'''
Created on 2015-1-19
@author: xuzhengzhu
'''
#input files
import xlrd,openpyxl
import pandas as pd
from sklearn import cross_validation
from dataAnaly import modelChose
from sklearn.metrics import r2_score
import numpy as np
file=pd.ExcelFile('e:\\report.xlsx')
data=file.parse('Sheet1')
n=len(data)
#init data
x=data[['myjg','tjg']]
y=data['byjg']
models=['linear_model.SGDRegressor','GradientBoostingRegressor','RandomForestRegressor','AdaBoostRegressor','BaggingRegressor','linear_model.LinearRegression','linear_model.LogisticRegression','svm.svr','svm.NuSVR']
m=len(models)
k=10
R2=np.zeros(k)
z=2
count=0
modelCount=0
#lookup get model object
for modelCount in range(m-1):
clf=modelChose.modelChose(models[modelCount])
R2=np.zeros(k)
count=0
#lookup folds
for train_index,test_index in cross_validation.KFold(n-z,n_folds=k):
x_train,x_test=x.ix[train_index],x.ix[test_index]
y_train,y_test=y[train_index],y[test_index]
clf.fit(x_train,y_train)
y_predict=clf.predict(x_test);
r2=r2_score(y_test,y_predict)
#print 'computed %d time(s) and R square is:%f ' %(count+1,r2)
R2[count]=r2
count+=1
print 'model choose is :',models[modelCount],'the mean of R2 is :',np.mean(R2)
y_validation = clf.predict(x.ix[(n-z):n])
r2_val=r2_score(y.ix[(n-z):n],y_validation)
print 'model choose is :',models[modelCount],'the validation ser R square is :%f ',r2_val
#print pd.DataFrame({'y_true':y.ix[(n-z):n,],'y_validation':y_validation})
modelCount+=1
modelTest.py
modelChose.py
测试结果:
model choose is : linear_model.SGDRegressor the mean of R2 is : -4.40149514377e+158
model choose is : linear_model.SGDRegressor the validation ser R square is :%f -1.69950873171e+175
model choose is : GradientBoostingRegressor the mean of R2 is : 0.06842532769
model choose is : GradientBoostingRegressor the validation ser R square is :%f -0.706828939678
model choose is : RandomForestRegressor the mean of R2 is : 0.0656454293629
model choose is : RandomForestRegressor the validation ser R square is :%f -1.62440546968
model choose is : AdaBoostRegressor the mean of R2 is : 0.0678670360111
model choose is : AdaBoostRegressor the validation ser R square is :%f -0.743162901308
model choose is : BaggingRegressor the mean of R2 is : 0.0913739612188
model choose is : BaggingRegressor the validation ser R square is :%f -1.11141498216
model choose is : linear_model.LinearRegression the mean of R2 is : 0.0976952970181
model choose is : linear_model.LinearRegression the validation ser R square is :%f -15.3631379961
model choose is : linear_model.LogisticRegression the mean of R2 is : -0.224099722992
model choose is : linear_model.LogisticRegression the validation ser R square is :%f 0.588585017836
model choose is : svm.svr the mean of R2 is : -0.243679440381
model choose is : svm.svr the validation ser R square is :%f -1.21033155027