|
python 爬虫爬取几十家门店在美团外卖上的排名,并插入数据库,最后在前端显示
爬虫脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
| #!/usr/bin/env python
# encoding: utf-8
"""
@version: ??
@author: phpergao
@license: Apache Licence
@file: meituan_paiming.py
@time: 2016/8/1 15:16
"""
import urllib,json,re
import urllib.parse
import http.cookiejar
import urllib.request,datetime,time,SQL
from multiprocessing import Process
import collections
def main(store_name,paiming):
## ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
## #返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
## ms.ExecNonQuery("insert into WeiBoUser values('2','3')")
ms = SQL.MSSQL(host='192.168.72.172',user="stdservice",pwd="7数据库密码",db="stddata")
#resList = ms.ExecQuery(cmd)
#print(resList)
#for i in resList:
# print(i)
#dele =ms.ExecNonQuery("DELETE FROM waimai4.dbo.baidu_rueren")
#update=ms.ExecNonQuery("UPDATE dbo.GOODS SET CLASSID='19' WHERE GOODSNAME LIKE'%牛肉%'")
today = datetime.date.today()
tm=time.strftime("%H:%M:%S")
sj="{} {}".format(str(today),tm)
insert=ms.ExecNonQuery('''
INSERT INTO dbo.meituan_paiming
VALUES ( '{}' , -- store_name - char(20)
'{}' , -- paiming - int
' ' , -- dingwei_address - char(500)
'{}' -- updatetime - char(50)
)
'''.format(store_name,paiming,sj))
def paiming(url):
cj = http.cookiejar.LWPCookieJar()
cookies_support = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(cookies_support, urllib.request.HTTPHandler)
urllib.request.install_opener(opener)
User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"#伪装成浏览器访问
headers = ('User-Agent', User_Agent)
opener = urllib.request.build_opener()
opener.addheaders = [headers]
ret = urllib.request.urlopen(url).read()
#ret = opener.open(url).read()
html=re.findall(r'''<div data-title="(.*?)" data-bulletin=".*?\n*?.*?" data-poiid=".*?" class="restaurant" data-all=".*?"''',str(ret,'utf8'))
today = datetime.date.today()
tm = time.strftime("%H:%M:%S")
sj = "{} {}".format(str(today), tm)
a=0
for i in html:
a+=1
if "72" in i:
print(i,a,sj)
main(i,a)
print("ok")
urllist = [
"http://waimai.meituan.com/home/ws0e9gmds0u7",
"http://waimai.meituan.com/home/ws0edu48zvm8",
"http://waimai.meituan.com/home/ws0e6v9brqq8",
"http://waimai.meituan.com/home/ws0ec83j0fbm",
"http://waimai.meituan.com/home/ws0efv4veqks",
"http://waimai.meituan.com/home/ws0edg19uxt6",
"http://waimai.meituan.com/home/ws0esdnh56um",
"http://waimai.meituan.com/home/ws0e937zdbph",
"http://waimai.meituan.com/home/ws0e3rfy1wxb",
"http://waimai.meituan.com/home/ws0eddzptu5e",
"http://waimai.meituan.com/home/ws0eeh52gdry",
"http://waimai.meituan.com/home/ws0e7jmpm28g",
"http://waimai.meituan.com/home/ws0e9pqmfr47",
"http://waimai.meituan.com/home/ws0edyvqgmrt",
"http://waimai.meituan.com/home/ws0edjnftj0h",
"http://waimai.meituan.com/home/ws0e3u0fb2gx",
"http://waimai.meituan.com/home/ws0ed9dq6x1f",
"http://waimai.meituan.com/home/ws0eehrxexqc",
"http://waimai.meituan.com/home/ws0dec6tzjwm",
"http://waimai.meituan.com/home/ws0e3txh5ym6",
"http://waimai.meituan.com/home/ws0ecqzp6n82",
"http://waimai.meituan.com/home/ws0e4g1dxshy",
"http://waimai.meituan.com/home/ws0g8ejh80rp",
"http://waimai.meituan.com/home/ws0ec37vje4d",
"http://waimai.meituan.com/home/ws0eg711k1t1",
"http://waimai.meituan.com/home/ws0dgmq924yy",
"http://waimai.meituan.com/home/ws0cff7x3m4u",
"http://waimai.meituan.com/home/ws0cg5zd5g4y",
"http://waimai.meituan.com/home/ws0ghenxxz82",
"http://waimai.meituan.com/home/ws0cfvkuzvtk",
"http://waimai.meituan.com/home/ws14dtvjhqm4",
"http://waimai.meituan.com/home/ws100stcewjn",
"http://waimai.meituan.com/home/ws104zssdsyp",
"http://waimai.meituan.com/home/ws102hkctrhh",
"http://waimai.meituan.com/home/ws10m19qgq7h",
"http://waimai.meituan.com/home/ws1079s3ek0m",
"http://waimai.meituan.com/home/ws0cq7hwhebm",
"http://waimai.meituan.com/home/ws10hyydu2f0",
"http://waimai.meituan.com/home/ws06vy2w07yr"
]
if __name__ == '__main__':
for i in urllist:
p = Process(target=paiming, args=(i,))
p.start()
time.sleep(10)
|
操作数据库的脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
| #!/usr/bin/env python
# encoding: utf-8
"""
@version: ??
@author: phpergao
@license: Apache Licence
@file: SQL.py
@time: 2016/7/25 17:56
"""
import pymssql
class MSSQL:
"""
对pymssql的简单封装
pymssql库,该库到这里下载:http://www.lfd.uci.edu/~gohlke/pythonlibs/#pymssql
使用该库时,需要在Sql Server Configuration Manager里面将TCP/IP协议开启
用法:
"""
def __init__(self,host,user,pwd,db):
self.host = host
self.user = user
self.pwd = pwd
self.db = db
def __GetConnect(self):
"""
得到连接信息
返回: conn.cursor()
"""
if not self.db:
raise(NameError,"没有设置数据库信息")
self.conn = pymssql.connect(host=self.host,user=self.user,password=self.pwd,database=self.db,charset="utf8")
cur = self.conn.cursor()
if not cur:
raise(NameError,"连接数据库失败")
else:
return cur
def ExecQuery(self,sql):
"""
执行查询语句
返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
调用示例:
ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
resList = ms.ExecQuery("SELECT id,NickName FROM WeiBoUser")
for (id,NickName) in resList:
print str(id),NickName
"""
cur = self.__GetConnect()
cur.execute(sql)
resList = cur.fetchall()
#查询完毕后必须关闭连接
self.conn.close()
return resList
def ExecNonQuery(self,sql):
"""
执行非查询语句
调用示例:
cur = self.__GetConnect()
cur.execute(sql)
self.conn.commit()
self.conn.close()
"""
cur = self.__GetConnect()
cur.execute(sql)
self.conn.commit()
self.conn.close()
def main(ip,username,password,dbname,cmd):
## ms = MSSQL(host="localhost",user="sa",pwd="123456",db="PythonWeiboStatistics")
## #返回的是一个包含tuple的list,list的元素是记录行,tuple的元素是每行记录的字段
## ms.ExecNonQuery("insert into WeiBoUser values('2','3')")
ms = MSSQL(host=ip,user=username,pwd=password,db=dbname)
resList = ms.ExecQuery(cmd)
#print(resList)
for i in resList:
print(i)
#dele =ms.ExecNonQuery("DELETE FROM waimai4.dbo.baidu_rueren")
#update=ms.ExecNonQuery("UPDATE dbo.GOODS SET CLASSID='19' WHERE GOODSNAME LIKE'%牛肉%'")
if __name__ == '__main__':
pass
|
前端PHP网页脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
| <!DOCTYPE html>
<html>
<head>
<?php
include "config.php";
session_start();
if(!isset($_SESSION['username'])){
$home_url = 'logIn.php';
header('Location:'.$home_url);
}
$ms=0;
$mt=0;
$ds=0;
$dt=0;
?>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>主页</title>
<script src="js/jquery-1.9.1.min.js" type="text/javascript"></script>
<script src="js/bootstrap-select.js" type="text/javascript"></script>
<script type="text/javascript" src="grid.js"></script>
<link id="bs-css" href="css/bootstrap-cerulean.min.css" rel="stylesheet">
<link href="css/charisma-app.css" rel="stylesheet">
<link href="css/bootstrap-select.css" rel="stylesheet">
<link href='bower_components/fullcalendar/dist/fullcalendar.css' rel='stylesheet'>
<link href='bower_components/fullcalendar/dist/fullcalendar.print.css' rel='stylesheet' media='print'>
<link href='bower_components/chosen/chosen.min.css' rel='stylesheet'>
<link href='bower_components/colorbox/example3/colorbox.css' rel='stylesheet'>
<link href='bower_components/responsive-tables/responsive-tables.css' rel='stylesheet'>
<link href='bower_components/bootstrap-tour/build/css/bootstrap-tour.min.css' rel='stylesheet'>
<link href='css/jquery.noty.css' rel='stylesheet'>
<link href='css/noty_theme_default.css' rel='stylesheet'>
<link href='css/elfinder.min.css' rel='stylesheet'>
<link href='css/elfinder.theme.css' rel='stylesheet'>
<link href='css/jquery.iphone.toggle.css' rel='stylesheet'>
<link href='css/uploadify.css' rel='stylesheet'>
<link href='css/animate.min.css' rel='stylesheet'>
<link href="grid.css" type="text/css" rel="stylesheet">
<script type="text/javascript" src="laydate/laydate.js"></script>
<script type="text/javascript" src="bower_components/responsive-tables/responsive-tables.js"></script>
<script type="text/javascript"> laydate.skin('danlan');</script>
<script src="js/jquery.noty.js"></script>
</head>
<body>
<!-- topbar starts -->
<?php include 'dropdownmenu.php' ?>
<!-- topbar ends -->
<div class="ch-container" style="position:relative;top:80px">
<div class="row">
<?php include 'navi.php'; ?>
<div id="content" class="col-lg-10 col-sm-10">
<!-- content starts -->
<div id="dlayerreport">
</div>
<div class="row">
<div class="box col-md-12">
<div class="box-inner">
<div class="box-header well">
<h2><i class="glyphicon glyphicon-info-sign"></i>美团店铺排名</h2>
<div class="box-icon">
<a href="orderlist.php" class="btn btn-minimize btn-round btn-default"><i class="glyphicon glyphicon-chevron-up"></i></a>
</div>
</div>
<div class="box-content row">
<div class="col-lg-7 col-md-12" style="width:100%;">
<div id="wepaydailysales" class="box-content" style="width:100%;">
<form id="grid_form_id">
<table class="table table-striped table-bordered bootstrap-datatable datatable responsive">
<thead><tr>
<!--<th class="th1" style="width:5%;">排序</th>-->
<th class="th1" style="width:15%;">店铺</th>
<th class="th2" style="width:15%;">排名</th>
<th class="th5" style="width:25%;">更新时间</th>
<th class="th4" style="width:25%;"> 定位地址</th>
</tr></thead>
<?php
$serverName = "localhost";
$connectionInfo = array( "UID"=>"stdservice", "PWD"=>"数据库密码","Database"=>"STDdata");
$conn = sqlsrv_connect( $serverName, $connectionInfo);
$queryString = "SELECT * FROM dbo.meituan_paiming ORDER BY paiming aSC";
if($result = sqlsrv_query($conn,$queryString))
{
$lb='';
$num=0;
while($row = sqlsrv_fetch_array( $result,SQLSRV_FETCH_ASSOC))
{
$num=$num+1;
//$action='<a class="btn btn-info" style="margin-left:10px;" data-toggle="modal" href="#menu" onclick="show(\''.$row['goodsid'].'\',\''.iconv("gbk//ignore", "utf-8",$row['goodsname']).'\');"><i class="glyphicon glyphicon-zoom-in icon-white"></i>修改</a>';
//$lb=iconv("gbk//ignore", "utf-8",$row['classid2']);
//if( strpos($lb, '1') !== false){
//$lb='超值';
//}else if( strpos($lb, '2') !== false){
//$lb='简餐';
//}else if(strpos($lb, '3') !== false){
//$lb='套餐';
//}else if (strpos($lb, '4') !== false){
//$lb='炖汤';
//}else if (strpos($lb, '5') !== false){
//$lb='小吃';
//}else if (strpos($lb, '6') !== false){
//$lb='积分换购';
//}else{
//$lb='未分类';
//};
//echo '<td>'.iconv("gbk//ignore", "utf-8",$num).'</td>';
echo '<td>'.iconv("gbk//ignore", "utf-8",$row['store_name']).'</td>';
//echo '<td>'.$row['store_name'].'</td>';
echo '<td>'.$row['paiming'].'</td>';
echo '<td>'.iconv("gbk//ignore", "utf-8",$row['updatetime']).'</td>';
echo '<td>'.iconv("gbk//ignore", "utf-8",$row['dingwei_address']).'</td>';
//echo '<td>'.$row['price'].'</td>';
// echo '<td>'.$lb.'</td>';
// echo '<td>'.$row['classid'].'</td>';
//echo '<td>'.$row['meituanID'].'</td>';
// echo '<td><img border="0" width="80px" src="getimage.php?id='.$row['goodsid'].'" ></td>';
//echo '<td>'.$action.'</td>';
echo '</tr>';
}
}
sqlsrv_close($conn);
?>
<script language="javascript">
function show(itemid,itemname) {
$("#itemid").attr("value",itemid);
$("#itemname").html(itemname);
}
</script>
</table></form></div>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="modal fade" id="menu" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true" style="display: none;">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal">×</button>
<h3 id="itemname"></h3>
</div>
<div class="modal-body">
<form class="form-horizontal" action="action_updateimage.php" method="post" enctype="multipart/form-data" >
<fieldset>
<div class="input-group input-group-lg">
<div style="display:none;" class="col-sm-8"><input type="text" name="itemid" id="itemid" class="form-control" readonly ="readonly"></div>
</div>
<div class="input-group input-group-lg">
<span class="input-group-addon"><i class="glyphicon glyphicon-folder-open red"></i></span>
<input type="file" name="FileUpload1" id="FileUpload1" class="btn btn-primary green" />
</div>
</div>
<div class="clearfix"></div><br>
<p style="text-align:center"> <button type="submit" name="submit" class="btn btn-primary" style="text-align:center">确认</button></p>
</form>
<div class="modal-footer">
<a href="rider" class="btn btn-default" data-dismiss="modal">关闭</a>
</div>
</div>
</div>
</div>
<?php include 'userprof.php'; ?>
<?php include 'footer.php'; ?>
</div>
</body>
</html>
|
最后执行完成后在前端显示的效果图:
|
|