def getAppUrl(page):
doc=minidom.parseString(page)
appDiv=doc.getElementsByTagName('body')[0].getElementsByTagName('div')[3]
for a in appDiv.getElementsByTagName('a'):
url=a.getAttribute('href')
if url and 'apps.do' in url:
return url
else:
print '没有找到“应用”页面的链接'.decode('utf-8').encode('gbk')
def getFarmUrl(page):
doc=minidom.parseString(page)
farmDiv=doc.getElementsByTagName('body')[0].getElementsByTagName('div')[5]
for a in farmDiv.getElementsByTagName('a'):
url=a.getAttribute('href')
if url and 'appid=53429' in url:
return url
else:
print '没有找到“人人农场”页面的链接'.decode('utf-8').encode('gbk')
def harvestFarm(user, passwd):
print '收获'.decode('utf-8').encode('gbk')+user+\
'的农场……'.decode('utf-8').encode('gbk')
appurl=getAppUrl(login(user, passwd))
farmurl=getFarmUrl(visitUrl(appurl))
tp=TargetsParser()
tp.feed(visitUrl(farmurl))
linkLists= tp.getTargets()
for url in linkLists:
hlp=HarvestLinkParser()
hlp.feed(visitUrl(url))
if hlp.getHarvestLink():
visitUrl(hlp.getHarvestLink())
print '收工离开此农场'.decode('utf-8').encode('gbk')
class TargetsParser(HTMLParser):
def __init__(self):
self.targets=[]
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag!='a':
return
href=[value for key,value in attrs if key=='href']
for url in href:
if 'myCropAction.php' in url or 'myTreeAction.php' in url or\
'myAnimalAction.php' in url or 'myMachineAction.php' in url:
self.targets.append('http://mapp.renren.com'+url)
def getTargets(self):
return self.targets
class HarvestLinkParser(HTMLParser):
def __init__(self):
self.link=''
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag!='a':
return
href=[value for key,value in attrs if key=='href']
for url in href:
if 'wap,reapAllAction.php' in url:
self.link='http://mapp.renren.com'+url
def getHarvestLink(self):
return self.link
if __name__=='__main__':
userList=[('email','password')]
for u,p in userList:
harvestFarm(u,p)