#coding=utf-8
import requests
from bs4 import BeautifulSoup
import pymongo
import time
client = pymongo.MongoClient('localhost',27017)
shujuku = client['rss']
mongo_hostloc = shujuku['loc']
#hostloc新帖
def get_new_loc():
response = requests.get('http://www.hostloc.com/forum.php?mod=forumdisplay&fid=45&filter=author&orderby=dateline')
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text,'lxml')
list_data = []
for i in range(2,12):
ut = soup.select('#threadlisttableid > tbody > tr > th > a.s.xst')
title = ut[i].get_text()
url_loc = 'http://www.hostloc.com/'+ut[i].get('href')
data = {
'_id':url_loc[52:58],
'title': title,
'url':url_loc
}
# 判断是否在数据库中
if not mongo_hostloc.find_one(data):
# 数据放进去,list
list_data.append(data)
else:
break
#此时将数据倒序放入数据库
list_data.reverse()
for data in list_data:
# 插入数据
mongo_hostloc.insert(data)
# 发送提醒
push_vx(data)
# 清理数据库,保持只有10条数据 。
if mongo_hostloc.count() > 10:
lists = list(mongo_hostloc.find().limit(mongo_hostloc.count() - 10))
for data2 in lists:
mongo_hostloc.remove(data2)
#发送vx提醒
def push_vx(data):
post_data = {
'text': data['title'],
'desp': data['url'],
'sendkey': 'server酱的key'
}
requests.post('https://pushbear.ftqq.com/sub', data=post_data)
time.sleep(1)
get_new_loc()