-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTrashHTMLParser.pl
More file actions
62 lines (51 loc) · 1.36 KB
/
TrashHTMLParser.pl
File metadata and controls
62 lines (51 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/perl
use strict;
package ParseTrashHTML;
use base qw(HTML::Parser);
use LWP::Simple();
my $html;
my $lookupTag = "span";
my $curTag = "";
my $url = "http://www.kreis-alzey-worms.eu/verwaltung/abfall/termine.php?Location=W%C3%B6rrstadt&Type=Next";
sub start {
my ($self, $tag, $attr, $attrseq, $origtext) = @_;
# save the current HTML tag for later checks
$curTag = $tag;
}
sub text {
my ($self, $text) = @_;
# check if current Tag is equal to lookup Tag
if ($curTag eq $lookupTag) {
# check if $text contains only white spaces
if ($text =~ /^\s*$/) {
$lookupTag = "a";
} else {
print "$text\n";
$lookupTag = "span";
}
}
}
sub comment {
my ($self, $comment) = @_;
# if ($curTag eq $lookupTag) {
# print "<!—", $comment, "—>\n";
# }
}
sub end {
my ($self, $tag, $origtext) = @_;
# if ($tag eq $lookupTag) {
# print "END: $origtext\n";
# }
}
package main;
my $p = new ParseTrashHTML;
$html = LWP::Simple::get($url);
$p->parse( $html );
# HTML structure to parse, to get the data
#
# Case 1:
# <span> <a href="/verwaltung/abfall/abfallarten/restmuell.php">Restabfall</a> </span>
#
# Case 2:
# <span>21.03.2017</span>
#