1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
| # -*- coding: cp936 -*-
import xlrd
class DataComparison():
compare_result = ''
f_dir = r''
datasrc = ''
datadst = ''
f1 = ''
f2 = ''
col_sets_sheet1 = []
col_sets_sheet2 = []
f1_sheet_index = 0
f2_sheet_index = 0
if_with_title = '1'
def __init__(self, dataList):
############### Set Parameters #############
compare_result = dataList[0]
datasrc = dataList[1]
datadst = dataList[2]
# Read the 2 excel files to be compared.
# Set data file name
f1 = dataList[3]
f2 = dataList[4]
# Set col numbers in the 1st sheet
col_sets_sheet1 = dataList[5] #
col_sets_sheet2 = dataList[6] #
# Set sheet in files to be compared.
# The 1st sheet as default
f1_sheet_index = dataList[7]
f2_sheet_index = dataList[8]
# Set if excel sheet has title(only 1st row as title)
if_with_title = dataList[9]
############### Load Data & Compare #############
print "Start Loading..."
book1 = xlrd.open_workbook(f1)
book2 = xlrd.open_workbook(f2)
print "Loading Files OK!"
b1sheet = book1.sheet_by_index(f1_sheet_index)
b2sheet = book2.sheet_by_index(f2_sheet_index)
print "Ranging Data"
col_1_b1sheet = b1sheet.col_values(col_sets_sheet1[0])
col_2_b1sheet = b1sheet.col_values(col_sets_sheet1[1])
col_1_b2sheet = b2sheet.col_values(col_sets_sheet2[0])
col_2_b2sheet = b2sheet.col_values(col_sets_sheet2[1])
if if_with_title == '1':
print "Removing titles"
col1_b1sheet = col_1_b1sheet[1:]
col2_b1sheet = col_2_b1sheet[1:]
col1_b2sheet = col_1_b2sheet[1:]
col2_b2sheet = col_2_b2sheet[1:]
print "Round columns with number type"
for i, val in enumerate(col2_b1sheet):
col2_b1sheet = round(val,2)
for i, val in enumerate(col2_b2sheet):
col2_b2sheet = round(val,2)
# Get sum value of number cols
sum1 = sum(col2_b1sheet)
sum2 = sum(col2_b2sheet)
print "Do zipping..."
zip1 = zip(col1_b1sheet, col2_b1sheet)
zip2 = zip(col1_b2sheet, col2_b2sheet)
print "Sort ziped list"
zip1.sort()
zip2.sort()
print "Compare rows in summary"
len1 = len(zip1)
len2 = len(zip2)
print len1, '--', len2
compare_result = compare_result+":\n 总数比对:"
if len1 == len2:
print "记录总数比较一致!"
compare_result = compare_result+"结果一致,均为"+str(len1)+"条;"
else:
print "记录总数不一致!"
compare_result = compare_result+"结果不一致,"+ datasrc +"为"+str(len1)+"条,"+datadst +"为"+str(len2)+"条;"
print sum1, '--', sum2
if round(sum1,2) == round(sum2,2):
print "总金额比较一致!"
compare_result = compare_result+"总金额比较一致,"+"均为"+str(sum1)+";\n"
else:
print "总金额不一致!"
compare_result = compare_result+"总金额不一致,"+datasrc +"为"+str(sum1)+","+datadst +"为"+str(sum2)+";\n"
print "Get different rows in detail"
cnt = 0
for c in range(len(zip1)):
if zip1[c] != zip2[c]:
cnt += 1
print zip1[c], '--', zip2[c]
if cnt >0:
print cnt, "行比较有差异!"
compare_result = compare_result + " 明细比对:存在"+str(cnt)+"条差异。"
else:
print "未发现明细差异数据"
compare_result = compare_result + " 明细比对:无差异。"
print compare_result
self.compare_result = compare_result
print "Finished!"
def getResult(self):
return self.compare_result
datasrc = "源数据1"
datadst = "源数据2"
f1_sheet_index = 0
f2_sheet_index = 0
if_with_title = '1'
compare_result = "比对结果:"
f1 = r'D:\tmp\src001.xlsx'
f2 = r'D:\tmp\tmp001.xlsx'
col_sets_sheet1 = [2,4] #取第3、5列
col_sets_sheet2 = [1,2] #取第2、3列
# 组装dataList
dataList = [compare_result, datasrc, datadst, f1, f2, col_sets_sheet1, col_sets_sheet2, f1_sheet_index, f2_sheet_index, if_with_title]
dc1 = DataComparison(dataList)
# 打印比较结果
print dc1.getResult()
|