接上篇从头开始搭建一个爬虫网站,对网站进行一些优化,添加一些功能。
先来添加个百度统计吧。
Jinja2 模板引擎
Flask-Bootstrap
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "//hm.baidu.com/hm.js?436cb9b289c0af4b9ed90e2d2e944cb7";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>
$ vim hello.py
app = Flask(__name__)
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
Bootstrap(app)
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
添加定时脚本
$ vim bin/crawl.sh
#!/usr/bin/env bash
#sh bin/crawl.sh
#chmod -R 755 /opt/www/crawl/bin/crawl.sh
#0 15 * * * /opt/www/crawl/bin/crawl.sh >> /tmp/crawl.log 2>&1
# crontab -e
basepath=$(cd `dirname $0`; pwd)
project_dir=$(dirname $basepath)
cd $project_dir"/tutorial"
echo "start"
. /etc/profile
. /root/.bash_profile
/usr/local/bin/scrapy crawl joke
echo "finish"
mkdir -p static/joke
cd tutorial
mv jokejimg ../static/joke
mv upfilesnew ../static/joke
mv UpFilesnew ../static/joke
mv UpFiles ../static/joke
update joke set content = replace(content,'http://gaoxiao.jokeji.cn','/static/joke') ;
update joke set content = replace(content,'src="http://www.jokeji.cn','src="/static/joke') ;
update joke set content = replace(content,'href="http://www.jokeji.cn','href="http://joke.liangcuntu.com') ;
--按照发布时间更新id
CREATE TABLE `joke_copy` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(255) DEFAULT NULL,
`category_id` int(11) DEFAULT NULL,
`content` text,
`view_count` int(11) DEFAULT NULL COMMENT '浏览量',
`link` varchar(255) DEFAULT NULL COMMENT '抓取链接',
`created_at` datetime DEFAULT NULL,
`updated_at` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,
`is_deleted` tinyint(1) DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=11216 DEFAULT CHARSET=utf8;
insert into joke_copy(title,category_id,content,view_count,link,created_at,updated_at,is_deleted) select title,category_id,content,view_count,link,created_at,updated_at,is_deleted from joke order by created_at asc ;
TRUNCATE table joke ;
insert into joke(title,category_id,content,view_count,link,created_at,updated_at,is_deleted) select title,category_id,content,view_count,link,created_at,updated_at,is_deleted from joke_copy order by created_at asc ;
1.
2.
http://www.html5tricks.com/demo/jquery-bootstrap-dropdown-menu/index.html
http://www.html5tricks.com/jquery-bootstrap-dropdown-menu.html
标签: python
发表评论 登录: