python cvs文件处理脚本 python csv

121thre · 发表于 2015-11-27 11:33:51

最近有一个需求，需要讲csv文件通过http接口post方法导入到数据库，于是写了一个脚本，主要字符编码这一块踩了不少坑，最后终于完成了，可适用windows 和linux 。具体功能，cvs列没有顺序要求。直接贴下脚本！

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

#!/usr/bin/env python
#coding=utf-8

from itertools import izip
import urllib,urllib2,urllib2,json,csv,sys,time,chardet

csv_file=sys.argv[1]

#定义需要导入的列，无顺序
Check_Head=("name","phone","isSell","origin","type","city","brand","series","model","year","plate" ,"remark")

##values 为True的表示不可以为空，为False的可以为空
keys={"name":True,"phone":True,"isSell":True,"origin":True,"type":False,"city":False,"brand":False,"series":False,"model":False,"year":False,"plate":False ,"remark":False}

#post
def http_post(data,url):
jdata = json.dumps(data)
req = urllib2.Request(url, jdata)
response = urllib2.urlopen(req)
return response.read()

#获取需要处理的csv文件的字符编码
def Codeing(file):
try:
      f = open( file, 'rb' )
      fencoding=chardet.detect(f.read())
      f.close()
      return fencoding["encoding"]
except Exception,err:
      print Exception, ":","%s" % err
      sys.exit()

decode=Codeing(csv_file)
encode= sys.stdin.encoding

###导入询问
def Export_Inquiry():
while True :
      YS=raw_input("确认导入请输入Y,退出请输入N :".decode('utf-8').encode(encode) ).strip()
      if YS == "Y":
         break
      elif YS == "N":
         sys.exit()
print u"已开始导入，导入过程中请勿退出！"

#导入环境配置
def Export_Env():
while True :
      export_env=raw_input("请输入导入环境prod/uat ? :".decode('utf-8').encode(encode)).strip()
      if export_env == "prod":
         url='http://www.xxxxx.com/webapi/public/register_carneed'
         print u"你要导入的是生产环境,请确认是否需要导入 ?"
         return  url
      elif export_env == "uat":
         print u"你要导入的是UAT环境,请确认是否需要导入 ?"
         url='http://uat.xxxx.com/webapi/public/register_carneed'
         return  url




def Check_Csv(csv_file,Head,coding):
suffix=csv_file.split(".")[-1]
if suffix != "csv":
      sys.exit("请输入csv文件")
try:
      f = open( csv_file, 'rb' )
except Exception,err:
      print Exception, ":","%s" % err
      sys.exit()
reader = csv.reader( f )
loop=0
for row in reader:
      if reader.line_num == 1:
         if len(row) != len(Head):
            print u"请检查你的csv文件列数与要求不一致；"
            print u"你导入为%s 列：%s" % (len(row),row)
            print u"正确应该%s 列：%s" % (len(Head),list(Head))
            sys.exit()
         err_head=[]
         list_head=[]
         for li in row:
            li=li.strip().decode(coding)
            if li not in Head:
                  err_head += [li]
            else:
                  list_head += [li]
         if err_head:
            print u"列名有误:%s 请确认是否与以下匹配" % err_head
            #print "正确请参考如下列名："
            print  list(Head)
            sys.exit()
         list_head=tuple(list_head)
         continue
      else:
         if len(row) == 0 :    ##去除空行
            print u"第%s行为空，请处理" %  reader.line_num
            sys.exit()
         row = iter( row ) ##放入迭代器
         out={}
         for key in list_head:
            out[key] = row.next().strip().decode(coding).encode("UTF-8") ##根据前面获取的字符编码解码
         data = out
         IsNull_Key={}
      for (k,v) in keys.items(): ##检查不能为空的字段的值是否为空，如果为空，则记录下并告诉行号！
         if v is True:
            if data[k] == "":
                  IsNull_Key[k]=data[k]
      if IsNull_Key:
         print u"第%s行有空值：%s" % (reader.line_num , IsNull_Key)
         loop += 1
result={"loop":loop,"list_head":list_head}
return result
f.close()

def Import_Csv(csv_file,list_head,url,coding):
f = open( csv_file, 'rb' )
reader = csv.reader( f )
for row in reader:
      if reader.line_num == 1:
         continue
      else:
         row = iter( row )
         out={}
         for key in list_head:
            out[key] = row.next().strip().decode(coding).encode("UTF-8")  ##根据前面获取的字符编码解码
      data = out
      for (k,v) in keys.items():
         if v is False:
            if data[k] == "":
                  del data[k]

      try:
         if reader.line_num % 2 == 0:
            time.sleep(1)
         resp=http_post(data=data,url=url)
         result=json.loads(resp)
#          result["success"]=False
         if result["success"] is False:
            print u"请注意：第%s行导入失败！ %s " % (reader.line_num,resp)
         else:
            print u"成功导入第%s行：%s" % (reader.line_num,resp)
      except Exception,err:
         print Exception, ":","%s" % err
         sys.exit()

f.close()
print u"导入已完成，共导入%s行。"  % (reader.line_num - 1)


print "############################################"
print u"###正在校验csv文件格式，请稍等.........#####"
print "############################################"

result=Check_Csv(csv_file,Check_Head,decode)
loop=result["loop"]
list_head=result["list_head"]
if loop > 0:
print u"请按以上提示处理后再进行导入！"
sys.exit()
else:
print u"文件格式校验已完成，请选择导入环境："
url=Export_Env()
Export_Inquiry()
Import_Csv(csv_file,list_head,url,decode)

print "############################################"
print u"################导入完成！##################"
print "############################################"

发这里也就当做笔记啦！

账号		自动登录	找回密码
密码			立即注册

wirelessnetview好用的无线分析工具

亿图图示专家(EDraw Max) V7.9 中文破解版

zabbix3.4.1安装部署+微信推送信息+大屏显

Red Hat OpenShift I: Containers & Kubern

2025 年，C++ 还能“硬核”多久？

RH199 RHCSA Rapid Track

Red Hat RHCE 8 (EX294) Cert Guide

[经验分享] python cvs文件处理脚本 python csv

相关帖子

浏览过的版块

扫码加入运维网微信交流群