Philipp Matthias Hahn
2014-01-07 11:26:02 UTC
The {genres,channels_y}.xml contains the XML declaration
<?xml encoding=3D"ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR K=F6ln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
---
tv_grab_eu_epgdata | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 4f7f41c..09b527e 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -155,6 +155,7 @@ use HTTP::Request::Common;
=20
# deal with umlauts
use HTML::Entities;
+use Encode qw(decode);
=20
# to parse expiry and start/stop dates
use Date::Format;
@@ -168,12 +169,21 @@ $ua->agent("xmltv/$XMLTV::VERSION");
our(%genre, $channelgroup, %chanid, $country);
our $tmp =3D tempdir(CLEANUP =3D> 1) . '/';
=20
+sub force_utf8 {
+ # 2014-01-07: channels_y.xml still declares encoding=3D"ISO-8859-1",=
while it actually is "UTF-8"
+ my($text) =3D @_;
+ $text =3D decode('utf8', $text);
+ return $text;
+}
+
# set up XML::Twig
our $epg =3D new XML::Twig(twig_handlers =3D> { data =3D> \&printep=
g },
output_encoding =3D> 'UTF-8');
our $channels =3D new XML::Twig(twig_handlers =3D> { data =3D> \&printch=
annels },
+ char_handler =3D> \&force_utf8,
output_encoding =3D> 'UTF-8');
our $genre =3D new XML::Twig(twig_handlers =3D> { data =3D> \&makegen=
rehash },
+ char_handler =3D> \&force_utf8,
output_encoding =3D> 'UTF-8');
=20
# build a hash: epgdata.com channel id -> xmltv channel id
--=20
1.9.2
--IS0zKkzwUGydFO0o--
<?xml encoding=3D"ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR K=F6ln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
---
tv_grab_eu_epgdata | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 4f7f41c..09b527e 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -155,6 +155,7 @@ use HTTP::Request::Common;
=20
# deal with umlauts
use HTML::Entities;
+use Encode qw(decode);
=20
# to parse expiry and start/stop dates
use Date::Format;
@@ -168,12 +169,21 @@ $ua->agent("xmltv/$XMLTV::VERSION");
our(%genre, $channelgroup, %chanid, $country);
our $tmp =3D tempdir(CLEANUP =3D> 1) . '/';
=20
+sub force_utf8 {
+ # 2014-01-07: channels_y.xml still declares encoding=3D"ISO-8859-1",=
while it actually is "UTF-8"
+ my($text) =3D @_;
+ $text =3D decode('utf8', $text);
+ return $text;
+}
+
# set up XML::Twig
our $epg =3D new XML::Twig(twig_handlers =3D> { data =3D> \&printep=
g },
output_encoding =3D> 'UTF-8');
our $channels =3D new XML::Twig(twig_handlers =3D> { data =3D> \&printch=
annels },
+ char_handler =3D> \&force_utf8,
output_encoding =3D> 'UTF-8');
our $genre =3D new XML::Twig(twig_handlers =3D> { data =3D> \&makegen=
rehash },
+ char_handler =3D> \&force_utf8,
output_encoding =3D> 'UTF-8');
=20
# build a hash: epgdata.com channel id -> xmltv channel id
--=20
1.9.2
--IS0zKkzwUGydFO0o--