|
目前最流行的做机器学习的语言当属python了,两大库numpy和scipy十分强大。但本人学习perl的初衷是做爬虫,但做数据挖掘又不得不碰到科学计算,而perl中没有一个像numpy和scipy这么强大的module,更多的是零散的分布在许多module中,所以只能自己写点函数,以备自用。学习perl大约5个月的时间,所以以下代码存在许多不足之处,请大家多多见谅,能提意见更佳。
本节主要介绍以下几个功能:
1 #ex1:向量求和函数
2 my @ex1=1..5;
3 print &sum(\@ex1),"\n";
4
5 sub sum{
6 my($vec)=@_;
7 my $result=0;
8 foreach(@$vec){
9 $result+=$_;
10 }
11 return $result;
12 }
13
14 #ex2:向量均值函数
15 my @ex2=1..5;
16 print &mean(\@ex2),"\n";
17
18 sub mean{
19 my($vec)=@_;
20 my $sum=0;
21 foreach(@$vec){
22 $sum+=$_;
23 }
24 my $result=$sum/(scalar @$vec);
25 return $result;
26 }
27
28
29 #ex3:向量样本方差函数
30 my @ex3=1..5;
31 print &var(\@ex3),"\n";
32
33 sub var{
34 my($vec)=@_;
35 my $m=&mean($vec);
36 my $length=scalar @$vec;
37 my @squre=map(($_-$m)*($_-$m),@$vec);
38 my $result=1/($length-1)*&sum(\@squre);
39 return $result;
40 }
41
42 #ex4:向量样本标准差函数
43 my @ex4=1..5;
44 print &sd(\@ex4),"\n";
45
46 sub sd{
47 my($vec)=@_;
48 my $result=sqrt(&var($vec));
49 return $result;
50 }
51
52 #ex5:求相关系数
53 my @ex51=(2,3,4,9,10);
54 my @ex52=1..5;
55 print &corr(\@ex51,\@ex52),"\n";
56
57 sub corr{
58 my($v1,$v2)=@_;
59 my $s=0;
60 my $length=scalar @$v1;
61 my $m1=&mean($v1);
62 my $m2=&mean($v2);
63 for(my $i=0;$i<$length;$i++){
64 $s+=($v1->[$i]-&mean($v1))*($v2->[$i]-&mean($v2));
65 }
66 my $result=$s/(&sd($v1)*&sd($v2)*($length-1));
67 return $result;
68 }
69
70
71 #求偏度系数
72 my @ex6=(2,3,4,9,10,12);
73 print &skew(\@ex6),"\n";
74
75 sub skew{
76 my($vec)=@_;
77 if(scalar @$vec<3){
78 die "vector length must be larger than 3!";
79 }else{
80 my $m=&mean($vec);
81 my $n=scalar @$vec;
82 my @power=map(($_-$m)*($_-$m)*($_-$m),@$vec);
83 my $result=($n*&sum(\@power))/(($n-1)*($n-2)*&sd($vec)*&sd($vec)*&sd($vec));
84 return $result;
85 }
86 }
87
88 #求峰度系数
89 my @ex7=(1,9,4,9,10,12);
90 print &kurt(\@ex7),"\n";
91 sub kurt{
92 my($vec)=@_;
93 my $m=&mean($vec);
94 my $n=scalar @$vec;
95 my @power=map(($_-$m)*($_-$m)*($_-$m)*($_-$m),@$vec);
96 my $result=&sum(\@power)/(($n-1)*&sd($vec)*&sd($vec)*&sd($vec)*&sd($vec));
97 return $result;
98 }
|
|
|