#t.pl
# to split out index url and rar page.
use warnings;
use LWP::Simple;
sub getDownloadPage {
my @lines=split("\n", $_[0]);
my $line1="";
foreach my $line(@lines) {
if ($line=~/<li class="itm">[^<]*<span> *[0-9]{4}-[0-9]{2}-[0-9]{2} *<\/span>[^<]*<a href="([^"> ]*)" *>([^<]*)</) {
print $1," ",$2,"\n";
}
}
}
my @indexes;
unshift @indexes, "http://www.yingyu.com/stxz/chuzhong/zhongkao/";
# get index page.
my $content=get($indexes[0]);
my @hrefs=split "href=\"", $content;
shift @hrefs;
foreach $href(@hrefs) {
if($href=~/(http:\/\/.*index[_0-9]*\.shtml)" *>[0-9]+/) {
push @indexes, $1;
}
}
#page download page and its relative Chinese name.
foreach $index(@indexes) {
$content=get($index);
# my @pages=split "<li ", $content;
# shift @pages;
getDownloadPage($content);
}