Discussion:
[Xmltv-devel] [PATCH 03/16] Anchor regexp for file name suffix
pmhahn+
2014-01-12 14:18:32 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

File name must end on .dtd or .xml, not contain it.
---
tv_grab_eu_epgdata | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 0d2d525..162fbc1 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -355,8 +355,8 @@ sub unzip {
my @filelist = $zip->memberNames;
foreach my $filename (@filelist) {
# we only care about .dtd and .xml right now
- my $isdtd = 1 if $filename =~ /\.dtd/;
- my $isxml = 1 if $filename =~ /\.xml/;
+ my $isdtd = 1 if $filename =~ /\.dtd$/;
+ my $isxml = 1 if $filename =~ /\.xml$/;
$zip->extractMember($filename, $tmp . sanitize($filename)) if ($isdtd or $isxml);
push @xmlfilelist, ($tmp . sanitize($filename)) if $isxml;
}
--
1.8.5.2
pmhahn+
2014-01-12 14:18:31 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Looks like a copy-paste error from a 80-column terminal window to me.
---
tv_grab_eu_epgdata | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 751c68e..0d2d525 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -625,8 +625,7 @@ sub list_channels {
$chanid = $main::chanid{$internalchanid};
} else {
$chanid = $internalchanid;
- warn "New channel with ID $internalchanid found. Please update chann
-el_ids file!" unless $opt->{quiet};
+ warn "New channel with ID $internalchanid found. Please update channel_ids file!" unless $opt->{quiet};
}

my $name = $channel->first_child('ch0')->text;
--
1.8.5.2
pmhahn+
2014-01-12 14:18:30 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Add the usual wrapper to make sure tha scrips is processed by perl.
---
tv_grab_eu_epgdata | 3 +++
1 file changed, 3 insertions(+)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index d8c6513..751c68e 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -1,5 +1,8 @@
#!/usr/bin/perl -w

+eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
+ if 0; # not running under some shell
+
=pod

=head1 NAME
--
1.8.5.2
pmhahn+
2014-01-12 14:18:33 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Is only used in this function.
---
tv_grab_eu_epgdata | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 162fbc1..068d636 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -165,7 +165,7 @@ use XMLTV::Memoize; XMLTV::Memoize::check_argv('getstore');
# set user agent
$ua->agent("xmltv/$XMLTV::VERSION");

-our(%genre, $channelgroup, $expiry_date, %chanid, $country);
+our(%genre, $channelgroup, %chanid, $country);
our $tmp = tempdir(CLEANUP => 1) . '/';

# set up XML::Twig
@@ -302,6 +302,7 @@ sub downloadepg {
my $i = 0;
my @filenames;
my $baseurl='http://www.epgdata.com';
+ my $expiry_date = 0;

# we've got to start counting at 0
# if we did "$i <= $days", we'd end up with one zip file too much
--
1.8.5.2
pmhahn+
2014-01-12 14:18:35 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

display-names needs to be a 2-dimensional array.

This fixes the bug introduced in revision 1.39 by yunosh, Thu Feb 7 20:15:53 2013 UTC
display-name element: not writing with-lang whose content is not an array at /usr/share/perl5/XMLTV.pm line 1743.
bad data inside channel element, not writing
---
tv_grab_eu_epgdata | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 0ccc510..d40f4e7 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -595,7 +595,7 @@ sub printchannels {
if ($channel eq $chanid) {
my %ch = (
'id' => $chanid,
- 'display-name' => @names
+ 'display-name' => [ @names ]
);
$writer->write_channel(\%ch);
}
--
1.8.5.2
pmhahn+
2014-01-12 14:18:34 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Improve code readability instead of sprinkling the logic in three
locations.
---
tv_grab_eu_epgdata | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 068d636..0ccc510 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -299,14 +299,13 @@ sub downloadepg {
my $days = shift;
my $offset = shift;
my $pin = shift;
- my $i = 0;
my @filenames;
my $baseurl='http://www.epgdata.com';
my $expiry_date = 0;

# we've got to start counting at 0
# if we did "$i <= $days", we'd end up with one zip file too much
- while ($i < $days) {
+ for (my $i = 0; $i < $days; $i++) {
my $dataoffset = $i + $offset;
my $url = "$baseurl/index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml";
# get file name from content-disposition header
@@ -330,7 +329,6 @@ sub downloadepg {
warn "No more zip files available for download\n" unless $opt->{quiet};
last;
}
- $i++;
}
warn 'Your PIN will expire around ' . time2str('%C', $expiry_date) . "\n" unless $opt->{quiet};
return unzip(@filenames);
--
1.8.5.2
pmhahn+
2014-01-12 14:18:36 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Multiple entries are separated by '|'s:
<ch11>RTL Television|RTL Austria|RTL HH SH</ch11>

Rant: Their XML format clearly shows how NOT to use XML. In addation to
using a XML parser I now also need to write a second parser to
post-process the data ...
---
tv_grab_eu_epgdata | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index d40f4e7..6444438 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -583,7 +583,9 @@ sub printchannels {
}
my @names;
if ($sendung->first_child('ch11')->text) {
- push(@names, [ $sendung->first_child('ch11')->text ]);
+ for my $name (split('[|]', $sendung->first_child('ch11')->text)) {
+ push(@names, [ $name ]);
+ }
}
if ($sendung->first_child('ch0')->text) {
push(@names, [ $sendung->first_child('ch0')->text ]);
--
1.8.5.2
pmhahn+
2014-01-12 14:18:44 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Convert the list of configures channels once into a hash instead of
doing it for every programme.

Also use the hash when generating the channel list.

Before:
real 0m17.136s
user 0m17.025s
sys 0m0.091s
After:
real 0m16.883s
user 0m16.752s
sys 0m0.112s
---
tv_grab_eu_epgdata | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index cf7b868..cb016f4 100755
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -173,7 +173,7 @@ our %grabber_tags = (
# set user agent
$ua->agent("xmltv/$XMLTV::VERSION");

-our(%genre, $channelgroup, %chanid, $country);
+our(%genre, $channelgroup, %chanid, $country, %configuredchannels);
our $tmp = tempdir(CLEANUP => 1) . '/';

sub force_utf8 {
@@ -230,6 +230,10 @@ my %country_tz = (
'nl', 'Europe/Amsterdam',
);

+# push the channel ids we want to grab in an array
+# http://effectiveperl.blogspot.com/
+%configuredchannels = map { $_, 1 } @{$conf->{channel}};
+
sub config_stage {
# shamelessly stolen from http://wiki.xmltv.org/index.php/HowtoWriteAGrabber

@@ -417,10 +421,6 @@ sub printepg {
# but this ought to be OK for now
}

- # alright, let's try this:
- # push the channel ids we want to grab in an array
- # http://effectiveperl.blogspot.com/
- my %configuredchannels = map { $_, 1 } @{$conf->{channel}};
# does the channel we're currently processing exist in the hash?
# BTW: this is not a lot more efficient in our case than looping over a list
# but a few seconds are better than nothing :)
@@ -607,14 +607,12 @@ sub printchannels {
if ($sendung->first_child('ch1')->text) {
$names{ $sendung->first_child('ch1')->text } = 1;
}
- foreach my $channel (@{$conf->{channel}}) {
- if ($channel eq $chanid) {
+ if ($configuredchannels{$chanid}) {
my %ch = (
'id' => $chanid,
'display-name' => [ map { [ $_ ] } keys %names ],
);
$writer->write_channel(\%ch);
- }
}
}
--
1.8.5.2
pmhahn+
2014-01-12 14:18:37 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Display channel names just once. This is not strictly necessary, but
helps with some other XMLTV reading programs not handling this
correctly.
---
tv_grab_eu_epgdata | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 6444438..4f7f41c 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -581,23 +581,23 @@ sub printchannels {
# Maybe we should just return if we don't know the channel id
$chanid = $internalchanid;
}
- my @names;
+ my %names;
if ($sendung->first_child('ch11')->text) {
for my $name (split('[|]', $sendung->first_child('ch11')->text)) {
- push(@names, [ $name ]);
+ $names{ $name } = 1;
}
}
if ($sendung->first_child('ch0')->text) {
- push(@names, [ $sendung->first_child('ch0')->text ]);
+ $names{ $sendung->first_child('ch0')->text } = 1;
}
if ($sendung->first_child('ch1')->text) {
- push(@names, [ $sendung->first_child('ch1')->text ]);
+ $names{ $sendung->first_child('ch1')->text } = 1;
}
foreach my $channel (@{$conf->{channel}}) {
if ($channel eq $chanid) {
my %ch = (
'id' => $chanid,
- 'display-name' => [ @names ]
+ 'display-name' => [ map { [ $_ ] } keys %names ],
);
$writer->write_channel(\%ch);
}
--
1.8.5.2
pmhahn+
2014-01-12 14:18:38 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.

Force a second conversion using UTF-8.

PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
---
tv_grab_eu_epgdata | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 4f7f41c..0708099 100644
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -155,6 +155,7 @@ use HTTP::Request::Common;

# deal with umlauts
use HTML::Entities;
+use Encode qw(decode);

# to parse expiry and start/stop dates
use Date::Format;
@@ -168,10 +169,18 @@ $ua->agent("xmltv/$XMLTV::VERSION");
our(%genre, $channelgroup, %chanid, $country);
our $tmp = tempdir(CLEANUP => 1) . '/';

+sub force_utf8 {
+ # 2014-01-07: channels_y.xml still declares encoding="ISO-8859-1", while it actually is "UTF-8"
+ my($text) = @_;
+ $text = decode('utf8', $text);
+ return $text;
+}
+
# set up XML::Twig
our $epg = new XML::Twig(twig_handlers => { data => \&printepg },
output_encoding => 'UTF-8');
our $channels = new XML::Twig(twig_handlers => { data => \&printchannels },
+ char_handler => \&force_utf8,
output_encoding => 'UTF-8');
our $genre = new XML::Twig(twig_handlers => { data => \&makegenrehash },
output_encoding => 'UTF-8');
--
1.8.5.2
Jan Schneider
2014-01-13 15:11:19 UTC
Permalink
Post by pmhahn+
The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
I think I already reported this once. Please keep us updated if you
get a response (usually within a few days). I'd rather have this fixed
upstream than worked around locally.
--
Jan Schneider
The Horde Project
http://www.horde.org/
https://www.facebook.com/hordeproject
Jan Schneider
2014-03-03 12:49:48 UTC
Permalink
Post by Jan Schneider
Post by pmhahn+
The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
I think I already reported this once. Please keep us updated if you
get a response (usually within a few days). I'd rather have this fixed
upstream than worked around locally.
Any update?
--
Jan Schneider
The Horde Project
http://www.horde.org/
https://www.facebook.com/hordeproject
pmhahn+
2014-03-03 21:51:23 UTC
Permalink
Hello,
Post by Jan Schneider
Post by Jan Schneider
Post by pmhahn+
The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
I think I already reported this once. Please keep us updated if you
get a response (usually within a few days). I'd rather have this fixed
upstream than worked around locally.
Any update?
Still not fixed: include_20140303_de.zip

BYtE
Philipp
--
/ / (_)__ __ ____ __ Philipp Hahn
/ /__/ / _ \/ // /\ \/ /
/____/_/_//_/\_,_/ /_/\_\ ***@pmhahn.de
Philipp Matthias Hahn
2014-04-19 11:21:24 UTC
Permalink
Hello,
Post by Jan Schneider
Post by Jan Schneider
Post by pmhahn+
The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
I think I already reported this once. Please keep us updated if you
get a response (usually within a few days). I'd rather have this fixed
upstream than worked around locally.
Any update?
Still not fixed and in "genre.xml" it's also wrong for "Fußball".

BYtE
Philipp
--
/ / (_)__ __ ____ __ Philipp Hahn
/ /__/ / _ \/ // /\ \/ /
/____/_/_//_/\_,_/ /_/\_\ ***@pmhahn.de
Jan Schneider
2014-04-22 10:26:43 UTC
Permalink
Post by Philipp Matthias Hahn
Hello,
Post by Jan Schneider
Post by Jan Schneider
Post by pmhahn+
The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
I think I already reported this once. Please keep us updated if you
get a response (usually within a few days). I'd rather have this fixed
upstream than worked around locally.
Any update?
Still not fixed and in "genre.xml" it's also wrong for "Fußball".
BYtE
Philipp
I sent them another heads-up a week ago, and at least they replied
with a counter-question. Let's see where it gets this time.
--
Jan Schneider
The Horde Project
http://www.horde.org/
https://www.facebook.com/hordeproject
Jan Schneider
2014-04-30 07:46:25 UTC
Permalink
Post by Jan Schneider
Post by Philipp Matthias Hahn
Hello,
Post by Jan Schneider
Post by Jan Schneider
Post by pmhahn+
The channels_y.xml contains the XML declaration
<?xml encoding="ISO-8859-1"?>
while it actually is UTF-8 encoded. This shows in "WDR Köln" being
printed wrongly in the generated XML file.
Force a second conversion using UTF-8.
PS: I filed a complaint with epgdata.com, hopefully they will fix this
themselves.
I think I already reported this once. Please keep us updated if you
get a response (usually within a few days). I'd rather have this fixed
upstream than worked around locally.
Any update?
Still not fixed and in "genre.xml" it's also wrong for "Fußball".
BYtE
Philipp
I sent them another heads-up a week ago, and at least they replied
with a counter-question. Let's see where it gets this time.
This has finally been fixed now.

Can you prepare an updated series of patches?
--
Jan Schneider
The Horde Project
http://www.horde.org/
https://www.facebook.com/hordeproject
pmhahn+
2014-01-12 14:18:39 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

X-Epgdata-PackageAvailable always 0 for me since the beginning of 2014.
Maybe because my subscription expires in less than 3 weeks ?

As we're only interested in the ZIP file, check for the
Content-Disposition header instead, which gives the name for the ZIP
file.
---
tv_grab_eu_epgdata | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
mode change 100644 => 100755 tv_grab_eu_epgdata

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
old mode 100644
new mode 100755
index 0708099..73bb264
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -320,11 +320,17 @@ sub downloadepg {
# get file name from content-disposition header
warn "url=$url\n" if $opt->{debug};
my $response = $ua->request(GET $url);
- if ($response->{'_headers'}{'x-epgdata-packageavailable'} == 1) {
- my $filename = sanitize($response->{'_headers'}{'content-disposition'});
+
+ my $filename = $response->header('Content-Disposition');
+ unless ($filename) {
+ warn "No more zip files available for download\n" unless $opt->{quiet};
+ last;
+ }
+
+ $filename =~ s/^.*filename=//;
+ $filename = sanitize($filename);
$expiry_date = $response->{'_headers'}{'x-epgdata-timeout'};
$channelgroup = sanitize($response->{'_headers'}{'x-epgdata-channelgroup'});
- $filename =~ s/^.*=//;
($country) = ($filename =~ /[^_]*_[^_]*_([^_]*)_[^_]*/) unless $country; # format: xyz########_########_de_qy.zip

warn "filename=$filename\n" if $opt->{debug};
@@ -334,10 +340,6 @@ sub downloadepg {
print F $response->content;
close(F);
push @filenames, $tmp . $filename;
- } else {
- warn "No more zip files available for download\n" unless $opt->{quiet};
- last;
- }
}
warn 'Your PIN will expire around ' . time2str('%C', $expiry_date) . "\n" unless $opt->{quiet};
return unzip(@filenames);
--
1.8.5.2
Carsten Aulbert
2014-01-12 16:56:56 UTC
Permalink
Hi
Post by pmhahn+
X-Epgdata-PackageAvailable always 0 for me since the beginning of 2014.
Maybe because my subscription expires in less than 3 weeks ?
Maybe, just today it shows 1 again (my PIN will only expires in March)
and I currently get this header:

HTTP/1.1 200 OK
Date: Sun, 12 Jan 2014 16:53:25 GMT
Server: Apache
Expires: Mon, 26 Jul 1997 05:00:00 GMT
Last-Modified: Fri, 10 Jan 2014 18:25:48 +0100
Cache-Control: no-store, no-cache, must-revalidate
Pragma: no-cache
Content-Disposition: attachment; filename="20140112_20140110_de_qy.zip"
Content-Length: 18966761
x-prodadis-isDemo: 0
x-epgdata-isDemo: 0
x-prodadis-dataDepth: q
x-epgdata-dataDepth: q
x-prodadis-channelGroup: y
x-epgdata-channelGroup: y
x-prodadis-language: de
x-epgdata-language: de
x-epgdata-packageAvailable: 1
x-prodadis-timeout: 1395702000
x-epgdata-timeout: 1395702000
x-prodadis-email: ***@miyameca.de
x-epgdata-email: ***@miyameca.de
x-prodadis-isTrial: 0
x-epgdata-isTrial: 0
Content-Type: application/x-zip-compressed


Checked via:

date -d '@1395702000'
Tue Mar 25 00:00:00 CET 2014

Cheers

Carsten
Jan Schneider
2014-01-13 11:28:04 UTC
Permalink
Post by Carsten Aulbert
Hi
Post by pmhahn+
X-Epgdata-PackageAvailable always 0 for me since the beginning of 2014.
Maybe because my subscription expires in less than 3 weeks ?
Maybe, just today it shows 1 again (my PIN will only expires in March)
HTTP/1.1 200 OK
Date: Sun, 12 Jan 2014 16:53:25 GMT
Server: Apache
Expires: Mon, 26 Jul 1997 05:00:00 GMT
Last-Modified: Fri, 10 Jan 2014 18:25:48 +0100
Cache-Control: no-store, no-cache, must-revalidate
Pragma: no-cache
Content-Disposition: attachment; filename="20140112_20140110_de_qy.zip"
Content-Length: 18966761
x-prodadis-isDemo: 0
x-epgdata-isDemo: 0
x-prodadis-dataDepth: q
x-epgdata-dataDepth: q
x-prodadis-channelGroup: y
x-epgdata-channelGroup: y
x-prodadis-language: de
x-epgdata-language: de
x-epgdata-packageAvailable: 1
x-prodadis-timeout: 1395702000
x-epgdata-timeout: 1395702000
x-prodadis-isTrial: 0
x-epgdata-isTrial: 0
Content-Type: application/x-zip-compressed
Tue Mar 25 00:00:00 CET 2014
Yes, I filed a complaint and got a response that it works just fine.
Of course it does now, which makes me think that someone already
noticed it or it was a temporary glitch.

Jan.
--
Jan Schneider
The Horde Project
http://www.horde.org/
https://www.facebook.com/hordeproject
Philipp Matthias Hahn
2014-01-14 22:50:36 UTC
Permalink
Hello,
Post by Carsten Aulbert
Post by pmhahn+
X-Epgdata-PackageAvailable always 0 for me since the beginning of 2014.
Maybe because my subscription expires in less than 3 weeks ?
...
Post by Carsten Aulbert
x-prodadis-isDemo: 0
x-epgdata-isDemo: 0
x-prodadis-dataDepth: q
x-epgdata-dataDepth: q
x-prodadis-channelGroup: y
x-epgdata-channelGroup: y
x-prodadis-language: de
x-epgdata-language: de
x-epgdata-packageAvailable: 1
x-prodadis-timeout: 1395702000
x-epgdata-timeout: 1395702000
x-prodadis-isTrial: 0
x-epgdata-isTrial: 0
Is there some documentation about the meaning of those extra HTTP header
fields? Email, Timeout, Language and ChannelGroup are relatively clear,
but the other ones I know nothing about.

I ask since the code is only interested for the case, where it gets a
ZIP file, for which I would think the Content-Disposition header
perfect.
So if we don't know the exact meating of X-EpgData-packageAvailable, I
would preferr switching to Content-Disposition.

BYtE
Philipp
--
/ / (_)__ __ ____ __ Philipp Hahn
/ /__/ / _ \/ // /\ \/ /
/____/_/_//_/\_,_/ /_/\_\ ***@pmhahn.de
Jan Schneider
2014-01-15 13:17:50 UTC
Permalink
Post by Philipp Matthias Hahn
Hello,
Post by Carsten Aulbert
Post by pmhahn+
X-Epgdata-PackageAvailable always 0 for me since the beginning of 2014.
Maybe because my subscription expires in less than 3 weeks ?
...
Post by Carsten Aulbert
x-prodadis-isDemo: 0
x-epgdata-isDemo: 0
x-prodadis-dataDepth: q
x-epgdata-dataDepth: q
x-prodadis-channelGroup: y
x-epgdata-channelGroup: y
x-prodadis-language: de
x-epgdata-language: de
x-epgdata-packageAvailable: 1
x-prodadis-timeout: 1395702000
x-epgdata-timeout: 1395702000
x-prodadis-isTrial: 0
x-epgdata-isTrial: 0
Is there some documentation about the meaning of those extra HTTP header
fields? Email, Timeout, Language and ChannelGroup are relatively clear,
but the other ones I know nothing about.
Unfortunately not.
Post by Philipp Matthias Hahn
I ask since the code is only interested for the case, where it gets a
ZIP file, for which I would think the Content-Disposition header
perfect.
So if we don't know the exact meating of X-EpgData-packageAvailable, I
would preferr switching to Content-Disposition.
Well, the header works correct again, so I don't see a need to change this.
--
Jan Schneider
The Horde Project
http://www.horde.org/
https://www.facebook.com/hordeproject
pmhahn+
2014-01-12 14:18:42 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Just use fixed file-names instead of using the name from
Content-Disposition header and sanitizing it.

The real name is only needed for extracting the channel group and for
nothing else. (I find the name only useful for debugging and probably
for caching, but this doesn't work with epgdata.com)

Also download directly to a file instead of going through the RAM.
---
tv_grab_eu_epgdata | 26 ++++++++++----------------
1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 1016c49..34f7931 100755
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -325,27 +325,21 @@ sub downloadepg {
my $url = $grabber_baseurl . "index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml";
# get file name from content-disposition header
warn "url=$url\n" if $opt->{debug};
- my $response = $ua->request(GET $url);
-
- my $filename = $response->header('Content-Disposition');
+ my $filespec = "$tmp/$i.zip";
+ warn 'Downloading zip file for day ', $dataoffset + 1, "\n" unless $opt->{quiet};
+ my $response = $ua->get($url, ':content_file' => $filespec,);
+
+ $expiry_date = $response->header('X-Epgdata-Timeout');
+ $channelgroup = $response->header('X-Epgdata-ChannelGroup');
+ my $filename = $response->header('Content-Disposition');
+ warn "filename=$filename\n" if $opt->{debug};
unless ($filename) {
warn "No more zip files available for download\n" unless $opt->{quiet};
last;
}
+ ($country) = ($filename =~ /.*filename="[^_]*_[^_]*_([^_]*)_[^_]*/) unless $country; # format: YYYYMMDD_YYYYMMDD_de_qy.zip

- $filename =~ s/^.*filename=//;
- $filename = sanitize($filename);
- $expiry_date = $response->{'_headers'}{'x-epgdata-timeout'};
- $channelgroup = sanitize($response->{'_headers'}{'x-epgdata-channelgroup'});
- ($country) = ($filename =~ /[^_]*_[^_]*_([^_]*)_[^_]*/) unless $country; # format: xyz########_########_de_qy.zip
-
- warn "filename=$filename\n" if $opt->{debug};
- warn 'Downloading zip file for day ', $dataoffset + 1, "\n" unless $opt->{quiet};
- open(F,">$tmp" . $filename);
- binmode(F);
- print F $response->content;
- close(F);
- push @filenames, $tmp . $filename;
+ push @filenames, $filespec;
}
warn 'Your PIN will expire around ' . time2str('%C', $expiry_date) . "\n" unless $opt->{quiet};
return unzip(@filenames);
--
1.8.5.2
Philipp Matthias Hahn
2014-04-19 11:25:05 UTC
Permalink
Hello,
Post by pmhahn+
Just use fixed file-names instead of using the name from
Content-Disposition header and sanitizing it.
...
Post by pmhahn+
+ $expiry_date = $response->header('X-Epgdata-Timeout');
+ $channelgroup = $response->header('X-Epgdata-ChannelGroup');
+ my $filename = $response->header('Content-Disposition');
+ warn "filename=$filename\n" if $opt->{debug};
unless ($filename) {
warn "No more zip files available for download\n" unless $opt->{quiet};
last;
}
This prints a warning and breaks if 21+ days are downloaded. The check
for the last file must happen before extracting the expiry date and
channel groups.

BYtE
Philipp
--
/ / (_)__ __ ____ __ Philipp Hahn
/ /__/ / _ \/ // /\ \/ /
/____/_/_//_/\_,_/ /_/\_\ ***@pmhahn.de
pmhahn+
2014-01-12 14:18:43 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Use in string expansion.
---
tv_grab_eu_epgdata | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 34f7931..cf7b868 100755
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -370,8 +370,8 @@ sub unzip {
# we only care about .dtd and .xml right now
my $isdtd = 1 if $filename =~ /\.dtd$/;
my $isxml = 1 if $filename =~ /\.xml$/;
- $zip->extractMember($filename, $tmp . sanitize($filename)) if ($isdtd or $isxml);
- push @xmlfilelist, ($tmp . sanitize($filename)) if $isxml;
+ $zip->extractMember($filename, "$tmp/" . sanitize($filename)) if ($isdtd or $isxml);
+ push @xmlfilelist, ("$tmp/" . sanitize($filename)) if $isxml;
}
}
return @xmlfilelist;
@@ -380,8 +380,8 @@ sub unzip {

sub processxml {
$writer->start(\%grabber_tags);
- $genre->parsefile($tmp . 'genre.xml');
- $channels->parsefile($tmp . 'channel_' . sanitize($channelgroup) . '.xml');
+ $genre->parsefile("$tmp/genre.xml");
+ $channels->parsefile("$tmp/channel_$channelgroup.xml");
foreach my $xmlfile (@_) {
warn "Processing $xmlfile\n" if $opt->{debug};
$epg->parsefile($xmlfile);
@@ -626,7 +626,7 @@ sub list_channels {
my $pin = $conf->{pin}->[0];
prepareinclude($conf, $opt);
# borrowed from http://www.xmltwig.com/xmltwig/ex_fm1
- $channels->parsefile($tmp . 'channel_' . $channelgroup . '.xml');
+ $channels->parsefile("$tmp/channel_$channelgroup.xml");
my $channel_list = $channels->root;
my @channels = $channel_list->children;
my $xmltv_channel_list = "<tv generator-info-name=\"$grabber_name\">\n";
--
1.8.5.2
pmhahn+
2014-01-12 14:18:41 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Use LWP directly instead of using getstore() to get the HTTP header for
X-Epgdata-Channelgroup.

This saves downloading a 20 MiB file for one day for just getting that
data, which is also available when downloading the include.zip, which is
only 8 KiB.
---
tv_grab_eu_epgdata | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index d4641bc..1016c49 100755
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -142,7 +142,7 @@ I'm sure this list is not complete. Let me know if you encounter additional prob

use strict;
use warnings;
-use LWP::Simple qw($ua getstore);
+use LWP::Simple qw($ua);
use Archive::Zip;
use File::Temp qw/ tempdir /;
use XML::Twig;
@@ -161,8 +161,6 @@ use Encode qw(decode);
use Date::Format;
use DateTime::Format::Strptime;

-use XMLTV::Memoize; XMLTV::Memoize::check_argv('getstore');
-
our $grabber_name = 'tv_grab_eu_epgdata';
our $grabber_baseurl = "http://www.epgdata.com/";
our %grabber_tags = (
@@ -208,7 +206,7 @@ foreach my $line (@lines) {

my ($opt, $conf) = ParseOptions({
grabber_name => $grabber_name,
- capabilities => [qw/baseline manualconfig tkconfig apiconfig cache preferredmethod/],
+ capabilities => [qw/baseline manualconfig tkconfig apiconfig preferredmethod/],
stage_sub => \&config_stage,
listchannels_sub => \&list_channels,
version => '$Id: tv_grab_eu_epgdata,v 1.40 2013/02/08 19:58:21 yunosh Exp $',
@@ -358,9 +356,12 @@ sub prepareinclude {
my ($conf, $opt) = @_;
my $pin = $conf->{pin}->[0];
my $includeurl = $grabber_baseurl . "index.php?action=sendInclude&iOEM=&pin=$pin&dataType=xml";
- warn "Downloading include zip file\n" unless $opt->{quiet};
- getstore($includeurl, $tmp . 'includezip');
- my @zipfiles=($tmp . 'includezip');
+ my $filespec = "$tmp/include.zip";
+ warn "Downloading include zip file $includeurl\n" unless $opt->{quiet};
+ my $response = $ua->get($includeurl, ':content_file' => $filespec,);
+
+ $channelgroup = $response->header('X-Epgdata-ChannelGroup');
+ my @zipfiles=($filespec);
unzip(@zipfiles);
}

@@ -629,8 +630,6 @@ sub printchannels {
sub list_channels {
my ($conf, $opt) = @_;
my $pin = $conf->{pin}->[0];
- # make sure we know $channelgroup
- downloadepg('1','0',$pin);
prepareinclude($conf, $opt);
# borrowed from http://www.xmltwig.com/xmltwig/ex_fm1
$channels->parsefile($tmp . 'channel_' . $channelgroup . '.xml');
--
1.8.5.2
pmhahn+
2014-01-12 14:18:45 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Re-use the code from printchannels instead of hand-printing XML in
list_channels.

This fixes listing all channel names as it is now correctly implemented
by printchannels.

It also fixes a strange encoding bug:
Lower-case "encoding" is only used for the XML declaration, while
upper-case "ENCODING" is used by the XML::Writer to convert from Perls
interal representation to the external serialization.

The later one does NOT work with IO::Scalar or IO::String, which leads
to XML::Writer writing ISO-8859-1 data instead of UTF-8. This then
breaks --list-channels.
---
tv_grab_eu_epgdata | 40 ++++++++++++----------------------------
1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index cb016f4..c735870 100755
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -173,7 +173,7 @@ our %grabber_tags = (
# set user agent
$ua->agent("xmltv/$XMLTV::VERSION");

-our(%genre, $channelgroup, %chanid, $country, %configuredchannels);
+our(%genre, $channelgroup, %chanid, $country, %configuredchannels, $allchannels, $writer);
our $tmp = tempdir(CLEANUP => 1) . '/';

sub force_utf8 {
@@ -285,7 +285,7 @@ if (defined $opt->{output}) {
$w_args{encoding} = 'UTF-8';
$w_args{ENCODING} = 'UTF-8';

-our $writer = new XMLTV::Writer(%w_args);
+$writer = new XMLTV::Writer(%w_args);

# determine the timezone
if (not $tz) {
@@ -594,6 +594,7 @@ sub printchannels {
# FIXME: not sure if this is correct.
# Maybe we should just return if we don't know the channel id
$chanid = $internalchanid;
+ warn "New channel with ID $internalchanid found. Please update channel_ids file!" unless $opt->{quiet};
}
my %names;
if ($sendung->first_child('ch11')->text) {
@@ -607,7 +608,7 @@ sub printchannels {
if ($sendung->first_child('ch1')->text) {
$names{ $sendung->first_child('ch1')->text } = 1;
}
- if ($configuredchannels{$chanid}) {
+ if ($configuredchannels{$chanid} or $allchannels) {
my %ch = (
'id' => $chanid,
'display-name' => [ map { [ $_ ] } keys %names ],
@@ -623,31 +624,14 @@ sub list_channels {
my ($conf, $opt) = @_;
my $pin = $conf->{pin}->[0];
prepareinclude($conf, $opt);
- # borrowed from http://www.xmltwig.com/xmltwig/ex_fm1
- $channels->parsefile("$tmp/channel_$channelgroup.xml");
- my $channel_list = $channels->root;
- my @channels = $channel_list->children;
- my $xmltv_channel_list = "<tv generator-info-name=\"$grabber_name\">\n";
-
- foreach my $channel (@channels) {
- my $internalchanid = $channel->first_child('ch4')->text;
- our $chanid;
- if (defined $main::chanid{$internalchanid}) {
- $chanid = $main::chanid{$internalchanid};
- } else {
- $chanid = $internalchanid;
- warn "New channel with ID $internalchanid found. Please update channel_ids file!" unless $opt->{quiet};
- }
+ $allchannels = 1;

- my $name = $channel->first_child('ch0')->text;
- $xmltv_channel_list = <<END;
- $xmltv_channel_list
- <channel id="$chanid">
- <display-name>$name</display-name>
- </channel>
-END
- }
- $xmltv_channel_list = $xmltv_channel_list . '</tv>';
- return $xmltv_channel_list;
+ my $result = "";
+ open(my $fh, ">:utf8", \$result);
+ $writer = new XMLTV::Writer(OUTPUT => $fh, encoding => 'utf-8');
+ $writer->start(\%grabber_tags);
+ $channels->parsefile("$tmp/channel_$channelgroup.xml");
+ $writer->end();
+ return $result;
}
--
1.8.5.2
pmhahn+
2014-01-12 14:18:40 UTC
Permalink
From: Philipp Matthias Hahn <***@pmhahn.de>

Define grabber name and URL globally once and use that everywhere.

Defining the URL globally simplifies setting up a private server for
local testing.
---
tv_grab_eu_epgdata | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/tv_grab_eu_epgdata b/tv_grab_eu_epgdata
index 73bb264..d4641bc 100755
--- a/tv_grab_eu_epgdata
+++ b/tv_grab_eu_epgdata
@@ -163,6 +163,15 @@ use DateTime::Format::Strptime;

use XMLTV::Memoize; XMLTV::Memoize::check_argv('getstore');

+our $grabber_name = 'tv_grab_eu_epgdata';
+our $grabber_baseurl = "http://www.epgdata.com/";
+our %grabber_tags = (
+ 'source-info-url' => $grabber_baseurl,
+ 'source-info-name' => 'epgData',
+ 'generator-info-name' => $grabber_name,
+ 'generator-info-url' => 'http://xmltv.org/',
+);
+
# set user agent
$ua->agent("xmltv/$XMLTV::VERSION");

@@ -186,7 +195,7 @@ our $genre = new XML::Twig(twig_handlers => { data => \&makegenrehash },
output_encoding => 'UTF-8');

# build a hash: epgdata.com channel id -> xmltv channel id
-my $chanids = GetSupplement('tv_grab_eu_epgdata', 'channel_ids');
+my $chanids = GetSupplement($grabber_name, 'channel_ids');

my @lines = split(/[\n\r]+/, $chanids);
foreach my $line (@lines) {
@@ -198,7 +207,7 @@ foreach my $line (@lines) {
}

my ($opt, $conf) = ParseOptions({
- grabber_name => 'tv_grab_eu_epgdata',
+ grabber_name => $grabber_name,
capabilities => [qw/baseline manualconfig tkconfig apiconfig cache preferredmethod/],
stage_sub => \&config_stage,
listchannels_sub => \&list_channels,
@@ -208,7 +217,7 @@ my ($opt, $conf) = ParseOptions({
});

my $pin = $conf->{pin}->[0];
-die 'Sorry, your PIN is not defined. Run tv_grab_eu_epgdata --configure to fix this.\n' unless $pin;
+die "Sorry, your PIN is not defined. Run $grabber_name-configure to fix this.\n" unless $pin;

# country is determined by the filenames downloaded from the server
# and used to determine the time zone if not specified in the config
@@ -233,7 +242,7 @@ sub config_stage {
my $result;
my $configwriter = new XMLTV::Configure::Writer(OUTPUT => \$result,
encoding => 'ISO-8859-1');
- $configwriter->start({ grabber => 'tv_grab_eu_epgdata' });
+ $configwriter->start({ grabber => $grabber_name });
$configwriter->write_string({
id => 'pin',
title => [ [ 'Enter your PIN for epgdata.com', 'en' ] ],
@@ -309,14 +318,13 @@ sub downloadepg {
my $offset = shift;
my $pin = shift;
my @filenames;
- my $baseurl='http://www.epgdata.com';
my $expiry_date = 0;

# we've got to start counting at 0
# if we did "$i <= $days", we'd end up with one zip file too much
for (my $i = 0; $i < $days; $i++) {
my $dataoffset = $i + $offset;
- my $url = "$baseurl/index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml";
+ my $url = $grabber_baseurl . "index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml";
# get file name from content-disposition header
warn "url=$url\n" if $opt->{debug};
my $response = $ua->request(GET $url);
@@ -349,7 +357,7 @@ sub downloadepg {
sub prepareinclude {
my ($conf, $opt) = @_;
my $pin = $conf->{pin}->[0];
- my $includeurl = "http://www.epgdata.com/index.php?action=sendInclude&iOEM=&pin=$pin&dataType=xml";
+ my $includeurl = $grabber_baseurl . "index.php?action=sendInclude&iOEM=&pin=$pin&dataType=xml";
warn "Downloading include zip file\n" unless $opt->{quiet};
getstore($includeurl, $tmp . 'includezip');
my @zipfiles=($tmp . 'includezip');
@@ -376,7 +384,7 @@ sub unzip {


sub processxml {
- $writer->start({ 'generator-info-name' => 'tv_grab_eu_epgdata' });
+ $writer->start(\%grabber_tags);
$genre->parsefile($tmp . 'genre.xml');
$channels->parsefile($tmp . 'channel_' . sanitize($channelgroup) . '.xml');
foreach my $xmlfile (@_) {
@@ -628,7 +636,7 @@ sub list_channels {
$channels->parsefile($tmp . 'channel_' . $channelgroup . '.xml');
my $channel_list = $channels->root;
my @channels = $channel_list->children;
- my $xmltv_channel_list = "<tv generator-info-name=\"tv_grab_eu_epgdata\">\n";
+ my $xmltv_channel_list = "<tv generator-info-name=\"$grabber_name\">\n";

foreach my $channel (@channels) {
my $internalchanid = $channel->first_child('ch4')->text;
--
1.8.5.2
Loading...