perl HTML::TokeParser
#! /usr/local/bin/perluse LWP::Simple;
use Data::Dumper;
use HTML::TokeParser;
use Encode;
my $content = get("http://www.sina.com.cn/");
Encode::_utf8_off($content);
open FH,">","1.html";
print FH $content;
system("dos2unix ./1.html 2> /dev/null");
my $p = HTML::TokeParser->new('1.html',
);
while( my $token = $p->get_tag("tr")){
my $text = $p->get_trimmed_text("tr","/tr");
print $text."\n";
}
页:
[1]