#!
@person=qw(canyin jiaotong lvyou shangwu tiyu);
for($i=0;$i<18;$i++) {
$src=join('+',map{$_."\\sys\\test_part_result_by_index($i).txt"} @person);
print $src ,"\n";
system("copy $src fold1_all_text($i).txt");
} 2. 简易获取当前目录下所有文件名
一般是用CWD模块,下面这个更简洁
#!
#####
#给目录下面的文件改名字
while($file=glob('*.txt')) {#glob 取得当前目录下的文件名
print "$file\n";
$newfile = "canyin_".$file;
rename($file,$newfile);
print "$newfile\n";
} 3. 去除文件每行前标号
在EBMT翻译时,为防止串行一般在每行前加标号,格式如:[0001234]
在评测前,需要去掉之,代码如下: 写成批处理即可运行。
#!
$inputfile =shift ;
$outputfile = shift;
open IN,"<$inputfile" or die "can not open file : $inputfile\n";
open OUT,">$outputfile" or die "can not open file : $outputfile\n";
foreach $str (<IN>){
$str =~s/^\[( \d+ )\]//g;#去掉行首[ 12121212 ]
$str =~s/^ //g;#去掉行首空格
print OUT $str;
} 4. 从评测结果文件中提取结果:
#!
$inputfile = shift;
$outputfile = shift;
open IN ,$inputfile or die "can not open file,$!\n ";
open OUT, ">$outputfile" or die"can not open >out.txt,$!\n";
my $contents = <IN>;
my @system_names;
$sys = "system name= \"";
$nist5 = "Nist5Score=\"";
$bleu5 = "Bleu5Score=\"";
my @nist5score;
my @nist3score;
my @nist1score;
my @bleu5score;
my @bleu3score;
my @bleu3score;
#@system_names = /$sys(.*?)\"/g;
$sys ="<system name=\"";
@system_names=($contents=~/$sys(.*?)\"/gs);
@nist5score = ($contents=~/$nist5(.*?)\"/gs);
@bleu5score = ($contents=~/$bleu5(.*?)\"/gs);
print OUT join("\n",@system_names);
print OUT "\nnist5\n";
print OUT join("\n",@nist5score);
print OUT "\nbleu5\n";
print OUT join("\n",@bleu5score);
5。把文件随机化
#!/usr/bin/perl
#功能: 把输入文件随机化(每行是一个单位)
#方法:每次产生一个在总行数之间的随机整数,用一个标记数组看有是否已经输出。
use strict;
my $n = 100;
my $seed = 1;
#Do not call srand() (i.e. without an argument) more than once in
#a script. The internal state of the random number generator
#should contain more entropy than can be provided by any seed, so
#calling srand() again actually *loses* randomness.
my $infile = shift;
my $outfile = shift;
open(IN,$infile) || die "无法打开input.txt\n";
open (OUT,">$outfile")|| die "无法打开output.txt\n";
my @all = <IN>;
my $sen_num = @all;#总句子数
my $cur_num = 0;#目前提取的句数
my @lab ;#标记数组
my $selected = 100;
my $non_selected = 0;
srand();
#先把标记数组初始化
my $i=0;
for($i=0;$i<@all;$i++)
{
$lab[$i] = $non_selected;
}
my $num = 0;
while($cur_num < $sen_num)
{
$num = int rand($sen_num);
if($lab[$num] != $selected)
{
print OUT @all[$num];