iyuytre 发表于 2016-8-2 09:28:48

用python爬虫爬取百度外卖店铺排名



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# encoding: utf-8

"""
@version: ??
@author: phpergao
@license: Apache Licence
@file: baidu_paiming.py
@time: 2016/8/1 11:10
"""

import requests,re,urllib,codeop,urllib.request,nturl2path,macurl2path

urllist = ["f7a2bee997ef68e8",# 丽影
         "3b246a0864597e50",# 穗丰
         "0ebf88697141f32f",# 冠城
         "eff209d4a7f538ca",# 礼岗
         "57f9e38e087acf61",# 购书
         ]
def chapaiming(urllist):
    User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"#伪装成浏览器访问
    headers = ('User-Agent', User_Agent)
    opener = urllib.request.build_opener()
    opener.addheaders =
    num0=1
    num=1
    flag=True


    while flag:

      url="http://waimai.baidu.com/waimai/shoplist/{}?display=json&page={}&count=40".format(urllist,num0)
      num0 += 1
      ret = opener.open(url)
      #ret =urllib.request.urlopen(url)
      ret=ret.read().decode('unicode_escape')
      html = re.findall(r'''"shop_name":"(.*?)","shop_announcement":''', ret)
      address=re.findall(r'''"poi_address":"(.*?)"},"sortby":''',ret)
      for i in html:
            num += 1
            if '72' in str(i):

                print(i,"排名在:{},定位地址:{}".format(num,address))
                num0 = 1
                flag=False

                break
            #print(i)




if __name__=="__main__":
    for i in urllist:
      chapaiming(i)





页: [1]
查看完整版本: 用python爬虫爬取百度外卖店铺排名