1 def getTags(self,userid):
2 ''' get last three tags stored by weight of this user'''
3 try:
4 tags = self.AppCli.tags.get(uid=userid)
5 except Exception:
6 print 'get tags failed'
7 return
8 userTags = []
9 sortedT = sorted(tags,key=operator.attrgetter('weight'),reverse=True)
10 if len(sortedT) > 3:
11 sortedT = sortedT[-3:]
12 for tag in sortedT:
13 for item in tag:
14 if item != 'weight':
15 userTags.append(tag[item])
16 return userTags
4.获得用户以关注的人:
1 def getFocus(self,userid):
2 ''' get focused users list by current user '''
3 focus = self.AppCli.friendships.friends.ids.get(uid=userid)
4 try:
5 return focus.get('ids')
6 except Exception:
7 print 'get focus failed'
8 return
5.对3中获得的用户标签进行分词处理:(之前要写个class进行分词处理,本文最后给出完整源码)
1 from wordSegmentation import tokenizer
2
3 tkr = tokenizer()
4 #concatenate all the tags of the user into a string ,then segment the string
5 for tag in userTags:
6 utf8_tag = tag.encode('utf-8')
7 #print utf8_tag
8 lstrwords += utf8_tag
9 words = tkr.parse(lstrwords)
6.根据5中获得的关键词+新浪api中搜索接口最终给出用户未关注但感兴趣的用户:
1 for keyword in words:
2 print keyword.decode('utf-8').encode('gbk')
3 searchUsers = self.AppCli.search.suggestions.users.get(q=keyword.decode('utf-8'),count=10)
4
5 #recommendation the top ten users
6 '''
7 if len(searchUsers) >6:
8 searchUsers = searchUsers[-6:]
9 '''
10 for se_user in searchUsers:
11 #print se_user
12 uid = se_user['uid']
13 #filter those had been focused by the current user
14 if uid not in userFocus:
15 recommendUsers[uid] = se_user['screen_name'].encode('utf-8')
------ 实际运行:
下面是自己微博的例子,我的标签是:
运行推荐程序后得到的结果为:
红线框中为推荐结果,这些微博用户都是与被推荐用户标签一致并具有较高影响力,同时也是最有可能给用户传递效用较高信息的用户。(图中只标注了部分用户)