利用爬虫实现订阅网站最新内容并发邮件通知

2017-4-15 Frank Python

[TOC]

实现功能

通过scrapy 抓取网站最新一条内容保存到redis并跟之前的作对比,如果是新的则发邮件通知内容更新了!

准备

scrapy
redis

bjjs_spider.py

# -*- coding: utf-8 -*-
import scrapy
import sys
sys.path.append("..")
from tutorial.models.bjjs_redis import BjjsRedis
from tutorial.componets.email_send import Email
reload(sys)
sys.setdefaultencoding( "utf-8" )

'''
# 自住型商品房 内容更新邮件通知
http://www.bjjs.gov.cn/bjjs/fwgl/zzxspzf/index.shtml
'''
class BjjsSpider(scrapy.Spider):
    name = "bjjs"
    allowed_domains = ["www.bjjs.gov.cn"]
    start_urls = (
        'http://www.bjjs.gov.cn/bjjs/fwgl/zzxspzf/index.shtml',
    )
    domains = "http://www.bjjs.gov.cn"
    def parse(self, response):
        print("crwwl start:--------------------------------------->")
        lis = response.xpath('//div[@class="tzgg_list_box"]/ul/li')
        for index,li in enumerate(lis):
            if index>0:
                break
            title = li.xpath('a/text()').extract()[0]
            date_arr = li.xpath('span/text()').extract()
            # 对比是否最新消息,如果最新发邮件通知
            # 保存redis
            br = BjjsRedis()
            last_news = br.get("bjjs_last_news")
            if last_news != title+date_arr[0] :
                br.save("bjjs_last_news",title+date_arr[0])
                # 邮件通知
                print "email notice"
                email = Email()
                link = "<a href=\"http://www.bjjs.gov.cn/bjjs/fwgl/zzxspzf/index.shtml\">查看</a>"
                content  = title+link
                content = u''.join(content).encode('utf-8').strip()
                # Todo:1.字符编码√
                # Todo:2.发送html内容√
                if email.send_mail(['xxx#qq.com'],"自住型商品房内容更新了",content):
                    print "send success"
                else:
                    print "send fail"

bjjs_redis.py

import redis
import sys
sys.path.append("../../..")
from config.config import Config
conf = Config()
configs = conf.getConfig()
class BjjsRedis:
    def __init__(self):
        self.conn = redis.Redis(host=configs.REDIS['host'], port=configs.REDIS['port'], db=0,password=configs.REDIS['pass'])

    def save(self,key,val,ex=None):
        self.conn.set(key,val,ex)

    def get(self,key):
        return self.conn.get(key)

email_send.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#导入smtplib和MIMEText
#http://blog.csdn.net/stonexing5/article/details/6371605
#[python发送各类邮件的主要方法](http://www.cnblogs.com/xiaowuyi/archive/2012/03/17/2404015.html)
#[python发送邮件的实例代码(支持html、图片、附件)](http://www.jb51.net/article/34498.htm)
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
sys.path.append("../../..")
from config.config import Config
conf = Config()
configs = conf.getConfig()
class Email():
    def send_mail(self,to_list,sub,content):
        '''
        to_list:发给谁
        sub:主题
        content:内容
        send_mail(["aaa#126.com"],"sub","content")
        '''
        msgAlternative = MIMEMultipart('alternative')
        me=configs.MAIL['user']+"<"+configs.MAIL['user']+"@"+configs.MAIL['postfix']+">"
        # msg = MIMEText(content)
        #设定HTML信息
        msg = MIMEText(content, 'html', 'utf-8')
        msgAlternative.attach(msg)

        msg['Subject'] = sub #设置主题
        msg['From'] = "Frank<frank.feng@liangcuntu.com>"     #发件人
        msg['To'] = ";".join(to_list) #收件人
        #print msg ;
        try:
            s = smtplib.SMTP()
            s.connect(configs.MAIL['host'])
            s.login(configs.MAIL['user'],configs.MAIL['pass'])
            s.sendmail(me, to_list, msg.as_string())
            s.close()
            return True
        except Exception, e:
            print str(e)
            return False

发表评论 登录

Top