心海恋歌 发表于 2018-9-1 07:32:18

perl HTML::TokeParser

  #! /usr/local/bin/perl
  use LWP::Simple;
  use Data::Dumper;
  use HTML::TokeParser;
  use Encode;
  my $content = get("http://www.sina.com.cn/");
  Encode::_utf8_off($content);
  open FH,">","1.html";
  print FH $content;
  system("dos2unix ./1.html 2> /dev/null");
  my $p = HTML::TokeParser->new('1.html',
  );
  while( my $token = $p->get_tag("tr")){
  my $text = $p->get_trimmed_text("tr","/tr");
  print $text."\n";
  }

页: [1]
查看完整版本: perl HTML::TokeParser