loyalxuan 发表于 2015-12-1 12:06:17

python简单爬虫定时推送同花顺直播及荐股至邮箱


[*]1、初衷:实践
[*]2、技术:python requests Template
[*]3、思路:根据直播页面获取评价最高的前十博主,定时爬行最新的消息和实战股票
[*]4、思路:python 编辑简单邮件html模板
[*]5、难点:邮件html模板的设计,还需要邮箱支持爬虫文件
'''
-- #coding:utf-8
import requests
from lxml import etree
from sendmail import sendmail
import sys, time
from string import Template
reload(sys)
sys.setdefaultencoding('utf-8')1 定义类
class thszb:
thsbz_list = []
zb_contest_list = []
stock_list = []
def __init(self, url):
[*]self.url = url
[*]self.get_ths_bz()
[*]self.get_zb_content_list()
2 获取博主基本信息
def get_ths_bz(self):
[*]t_html = etree.HTML(requests.get(self.url).text).xpath('/html/body/div/a')
[*]tlist =[]
[*]for t in t_html:
[*]      d ={}
[*]      d["id"]= str(t.xpath('@data-statid'))).rindex('_')+1:]
[*]      d["title"]= str(t.xpath('div/strong/text()').decode('utf-8').encode('GB18030')).strip().strip(
[*]'\n').strip(
[*]'\r')
[*]      d["url"]= str(t.xpath('@data-clienturl'))).rindex('=')+1:]
[*]      d["isAgree"]=int(t.xpath('div/p/text()'))
[*]      d["oper"]=self.get_zb_stock_url(d["url"])
[*]      tlist.append(d)
[*]self.ths_bz_list = sorted(tlist, key=lambda s: s["isAgree"], reverse=True)
def get_zb_content_list(self):
[*]print(self.ths_bz_list[:10])
[*]for bz inself.ths_bz_list[:10]:
[*]      t_html = etree.HTML(requests.get(bz["url"]).text).xpath('//*[@id="J_Mlist"]/div')
[*]if len(t_html)>0:
[*]          t = t_html
[*]          zbtime = t.xpath("p/span/text()")
[*]          zbtext = str(t.xpath("p/text()").decode('utf-8').encode('GB18030')).strip().strip('\n').strip('\r')
[*]print("   %s %s:%s"%(bz['oper'],zbtime, zbtext))
[*]self.zb_contest_list.append(
[*]{"gpurl": bz['oper'],"title": bz['title'],"zbtime": zbtime,"zbtext": zbtext})
[*]returnself.zb_contest_list
3 获取博主实战股票页面
def get_zb_stock_url(self, bz_url):
[*]html = requests.get(bz_url).text
[*]t_html_gp = etree.HTML(html).xpath('//*[@id="gotracelink"]/@data-iosurl')
[*]if len(t_html_gp)>0:
[*]return t_html_gp
4 获取博主实战股票信息
def get_zb_stock(self, gp_url):
[*]stock_list =[]
[*]stock_list.append(
[*]{"code": u"股票编码".decode('utf-8').encode('GB18030'),"name": u"股票名称".decode('utf-8').encode('GB18030'),
[*]"date": u"买入日期".decode('utf-8').encode('GB18030'),
[*]"money": u"盈亏金额".decode('utf-8').encode('GB18030'),
[*]"rate": u"盈利率".decode('utf-8').encode('GB18030')})
[*]t_html = etree.HTML(requests.get(gp_url).text).xpath('//*[@id="infoTpl"]/ul')
[*]if len(t_html)>0:
[*]for t in t_html:
[*]try:
[*]            code = t.xpath('li/div/text()')
[*]            name = t.xpath('li/div/text()').decode('utf-8').encode('GB18030')
[*]            date = str(t.xpath('li/text()')).strip()
[*]            money = str(t.xpath('li/text()')).strip()
[*]            rate = str(t.xpath('li/text()')).strip()
[*]            stock_list.append({"code": code,"name": name,"date": date,"money": money,"rate": rate})
[*]except:
[*]pass
[*]return stock_list
5 发送邮件
def send_mail(self):
[*]mymail = sendmail([''])
[*]s =""
[*]tt =Template(mymail.title_template)
[*]tt_gp =Template(mymail.table_template)
[*]for zb inself.zb_contest_list:
[*]      gp_s =""
[*]      pglist =self.get_zb_stock(zb["gpurl"])
[*]for gp in pglist:
[*]try:
[*]iffloat(gp["money"])>0.0:
[*]                  gp["isBold"]='style="color: #F00,; font-weight: bold;"'
[*]            gp_s = gp_s + tt_gp.substitute(gp)
[*]except:
[*]pass
[*]      s = s + str(tt.substitute(zb))+ gp_s +"</table>"
[*]if mymail.send_mail(u'同花顺直播 %s '% time.strftime("%Y-%m-%d %H:%M", time.localtime())," %s"%(s)):
[*]print("send_mail ok!^_^")
[*]else:
[*]print("send_mail fail!~_~")
'''
if name == 'main':
ths = ths_zb("http://t.10jqka.com.cn/m/zhibo/index.html")
ths.send_mail()
'''发送邮件
import smtplib
from email.mime.text import MIMEText1 邮件类
class sendmail:2 python模板
title_template = '''
[*]<divid="J_Mlist">
[*]<strongstyle="color:red;">$title $zbtime </strong>
[*]<div>
[*]<p>
[*]            $zbtext
[*]</p>
[*]</div>
[*]</div>
[*]<tablewidth="400"border="1">
[*]    '''
[*]    table_template = '''
[*]<tr ${isBold}>
[*]<td>${code}</td>
[*]<td>${name}</td>
[*]<td>${date}</td>
[*]<td><spanclass="ping">${money}</span></td>
[*]<td>${rate}</td>
[*]</tr>
'''
mailtolist = []
mailhost = "smtp.126.com"
mail_user = ""
mail_pass = ""
mail_postfix = "126.com"
def __init(self, mailto_list):
[*]self.mailto_list = mailto_list
2发送html格式邮件
def send_mail(self, sub, content):
[*]me =sub+"<"+self.mail_user +"@"+self.mail_postfix +">"
[*]msg =MIMEText(content, _subtype="html", _charset="gb2312")
[*]msg["Subject"]=sub
[*]msg["From"]= me
[*]msg["To"]=";".join(self.mailto_list)
[*]try:
[*]      s = smtplib.SMTP()
[*]      s.connect(self.mail_host)
[*]      s.login(self.mail_user,self.mail_pass)
[*]      s.sendmail(me,self.mailto_list, msg.as_string())
[*]      s.close()
[*]returnTrue
[*]exceptException, e:
[*]print str(e)
[*]returnFalse
效果展示

页: [1]
查看完整版本: python简单爬虫定时推送同花顺直播及荐股至邮箱