#coding=utf-8 import requests from bs4 import BeautifulSoup import pymongo import time client = pymongo.MongoClient('localhost',27017) shujuku = client['rss'] mongo_hostloc = shujuku['loc'] #hostloc新帖 def get_new_loc(): response = requests.get('http://www.hostloc.com/forum.php?mod=forumdisplay&fid=45&filter=author&orderby=dateline') response.encoding = 'utf-8' soup = BeautifulSoup(response.text,'lxml') list_data = [] for i in range(2,12): ut = soup.select('#threadlisttableid > tbody > tr > th > a.s.xst') title = ut[i].get_text() url_loc = 'http://www.hostloc.com/'+ut[i].get('href') data = { '_id':url_loc[52:58], 'title': title, 'url':url_loc } # 判断是否在数据库中 if not mongo_hostloc.find_one(data): # 数据放进去,list list_data.append(data) else: break #此时将数据倒序放入数据库 list_data.reverse() for data in list_data: # 插入数据 mongo_hostloc.insert(data) # 发送提醒 push_vx(data) # 清理数据库,保持只有10条数据 。 if mongo_hostloc.count() > 10: lists = list(mongo_hostloc.find().limit(mongo_hostloc.count() - 10)) for data2 in lists: mongo_hostloc.remove(data2) #发送vx提醒 def push_vx(data): post_data = { 'text': data['title'], 'desp': data['url'], 'sendkey': 'server酱的key' } requests.post('https://pushbear.ftqq.com/sub', data=post_data) time.sleep(1) get_new_loc()