用python爬虫爬取百度外卖店铺排名

iyuytre 发表于 2016-8-2 09:28:48

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# encoding: utf-8

"""
@version: ??
@author: phpergao
@license: Apache Licence
@file: baidu_paiming.py
@time: 2016/8/1 11:10
"""

import requests,re,urllib,codeop,urllib.request,nturl2path,macurl2path

urllist = ["f7a2bee997ef68e8",# 丽影
      "3b246a0864597e50",# 穗丰
      "0ebf88697141f32f",# 冠城
      "eff209d4a7f538ca",# 礼岗
      "57f9e38e087acf61",# 购书
      ]
def chapaiming(urllist):
User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"#伪装成浏览器访问
headers = ('User-Agent', User_Agent)
opener = urllib.request.build_opener()
opener.addheaders =
num0=1
num=1
flag=True

while flag:

   url="http://waimai.baidu.com/waimai/shoplist/{}?display=json&page={}&count=40".format(urllist,num0)
   num0 += 1
   ret = opener.open(url)
   #ret =urllib.request.urlopen(url)
   ret=ret.read().decode('unicode_escape')
   html = re.findall(r'''"shop_name":"(.*?)","shop_announcement":''', ret)
   address=re.findall(r'''"poi_address":"(.*?)"},"sortby":''',ret)
   for i in html:
         num += 1
         if '72' in str(i):

            print(i,"排名在:{},定位地址:{}".format(num,address))
            num0 = 1
            flag=False

            break
         #print(i)

if __name__=="__main__":
for i in urllist:
   chapaiming(i)

页: [1]

运维网's Archiver

用python爬虫爬取百度外卖店铺排名