#!/usr/bin/perl # spurs2000.pl - avantgo spurs news use LWP::Simple; use HTML::Parser; use Data::Dumper; my $frontpage = get("http://www.spurs2000.com/") or die $!; $frontpage =~ /(news_\w\w\w_\d\d\d\d)/; my $newsdirmon = $1; my $newsdirhtml = get("http://www.spurs2000.com/news/$newsdirmon/") or die $!; # print $newsdirhtml; sub starttag { my($tagname, $attr) = @_; if ($tagname eq 'a' && $attr->{href} =~ /news/) { $newspage = $attr->{href}; } } sub endtag { my($tagname) = @_; } my $newsidxhtml = get("http://www.spurs2000.com/news/$newsdirmon/"); # print $newsidxhtml; my $p2 = HTML::Parser->new( api_version => 3, start_h => [\&starttag2, "tagname, attr"], end_h => [\&endtag, "tagname"], marked_sections => 1, ); $p2->parse($newsidxhtml); my @newslinks; sub starttag2 { my($tagname, $attr) = @_; if ($tagname eq 'a' && $attr->{href} =~ /news2/) { push @newslinks, $attr->{href}; } } @newslinks = reverse(@newslinks); # most recent first # print Dumper(\@newslinks); my @headlines; my $dateseen = 0; # get each headline foreach my $newslink (@newslinks) { my $story = get("http://www.spurs2000.com/news/$newsdirmon/$newslink"); my $p3 = HTML::Parser->new( api_version => 3, # start_h => [\&starttag3, "tagname, attr"], # end_h => [\&endtag, "tagname"], text_h => [\&text, "dtext"], marked_sections => 1, ); $p3->parse($story); # print $newslink; } sub starttag3 { my($tagname, $attr) = @_; } sub text { my($origtext, $a2) = @_; # print "====$a2====\n"; # print $origtext . "\n---------------------------\n"; if ($origtext =~ m#\d\d/\d\d/\d\d\d\d#si) { # print "DATESEEN $origtext\n"; $dateseen = 1; } elsif (($dateseen == 1) && ($origtext !~ /^\s+$/)) { push @headlines, $origtext; $dateseen = 0; } } # print Dumper(\@headlines); print 'Spurs 2000 Headlines

Spurs 2000 Headlines

'; foreach (@headlines) { my $newslink = shift @newslinks; print "$_

\n"; } print '';