|
#Thisisafileforrecordingthefrequentlyusedperlcode.
usewarnings;
######################################################################
open(FILE,"filename")||die"cannotopenthefile:$!";
@file_list=<FILE>;
open(OUTPUT,">>$output_file")||die"cannotopenthefile:$!";
foreach$eachline(@file_list){
chomp$eachline;
printOUTPUT"$eachline ";
}
#Abetterway
while(defined($eachline=<FILE>)){
chomp$eachline;
}
#Ifthefileexists
if(-e"$filenameORdirectory/$filename"){
}
######################################################################
#UsingHash
#defineahash(%genehash)foreachgene;
%gene_hash=();
#ifthereexiststhekeyword:$entry[0]
if(exists$gene_hash{$entry[0]}){
$gene_hash{$entry[0]}="$gene_hash{$entry[0]},$entry[1]";
}
#printthehashfile.
foreach$keyword(keys%gene_hash){
printGENEHASH"$keyword$gene_hash{$keyword} ";
}
######################################################################
#parsingstring
$dicname_start=index($dicxml_line,"<name>");
if($dicname_start>=0){
#theformat:<name>***</name>,getthedicname...
$dic_name=substr($dicxml_line,($dicname_start+6),(-7));
printOUTPUT"$dicid$dic_name ";
}
######################################################################
#crawlingHTMLpages
useLWP::UserAgent;
$letter='a';
open(LETTER_HTML,">>HTML/html_$letter")||die"cannotopenthefile:$!";
$url="http://cancerweb.ncl.ac.uk/omd/contents/".$letter.".html";
my$agent=newLWP::UserAgent();
my$request=newHTTP::Request('GET'=>$url);
my$response=$agent->request($request);
print"Downloadinghtml_$letter... ";
printLETTER_HTML$response->content();
close(LETTER_HTML);
print"Downloadinghtml_$letterCompleted! ";
######################################################################
#RegularExpressions
###TheCHARorSTRING
#onechar
.
#
[abcde];
[a-e];
[0-9];
#not
[^a-z];
#常用字符类的快捷方式
w一个单词字符,与[a-zA-Z0-9]相同
W一个非单词字符,与w相反
d一个数字,与[0-9]相同
D一个非数字
s一个白空间字符,与[tfrn]相同
S一个非白空间字符
###TheMatchingTimes
#oneormoretimes
+
#0ormoretimes
*
#0oronetimes
?
#customizematchingtimes,match'pat'atleast'n'times,most'm'times.
pat{n,m}
###RE中的'或者',选择
/dogs|cats/
/(.*)siss(.*)/理解其中s代表一个白空间字符
#………………………………………………………………………………
#matchingtheRegularExpressions
m//;
//;
m{};
#replacingthestring
s/searchpattern/replacement/
#默认的对$_进行操作,对变量进行匹配或替换使用
$variable=~m//;
$variable=~s///;
#不考虑大小写
m//i;
#全局
m//g;
#当RE中带有括号时,匹配的结果将被自动保存到特殊的变量中
$1,$2,...
#如
if(/(d{3})-(d{3})-(d{4})/){
print"Theareacodeis$1";
}
#因此可以把匹配结果付给一个list
@f=m/W(fwww)/g;
######################################################################
#其他函数
#搜索子串
index();
rindex();
#分割提取标量
substr();
#转换
tr/searchlist/replacementlist/;
#与替换s///的区别在于后者可以是整个list的对应替换
######################################################################
#使用目录
opendir(TEMPDIR,"http://treasuresoftheinternet.org/dictionary/o/")||die"cannotopentheDIRECTORY:$!";
#@FILES=readdirTEMPDIR;
#removethe.and..files!
#@FILES=grep(!/^..?$/,readdirTEMPDIR);
#getallthe.txtfiles!
#@FILES=grep(/.txt$/i,readdirTEMPDIR);
while(defined($file=readdirTEMPDIR)){
#open(FILEH,"crawl_html/$file")||die"cannotopenthefile:$!";
#removetheno_sense.and..
$no_sense=$file=~m{^..?$};
if(!$no_sense){
print"$file ";
}
}
#browserthedirectory
#print"Yourcurrentdirectoryis:",cwd," ";
#chdir'crawl_html'orwarn"Directory/tmpnotaccessible:$!";
#print"Youarenowin:",cwd," ";
closedir(TEMPDIR);
######################################################################
#使用Perl中的模块module
…… |
|
|