python实现多线程采集的2个代码例子-范文、应用文-IT技术专栏-脚本栏目-范文大全-在线学习网

网站首页汉语字词英语词汇考试资料写作素材旧版资料

标题	python实现多线程采集的2个代码例子
范文	这篇文章主要介绍了python多线程采集代码例子,使用了threading、queue、mysqldb等模块,需要的朋友可以参考下。代码一：代码如下: #!/usr/bin/python # -- coding: utf-8 -- #encoding=utf-8 import threading import queue import sys import urllib2 import re import mysqldb # # 数据库变量设置 # db_host = '127.0.0.1' db_user = xxxx db_passwd = xxxxxxxx db_name = xxxx # # 变量设置 # thread_limit = 3 jobs = queue.queue(5) singlelock = threading.lock() info = queue.queue() def workerbee(inputlist): for x in xrange(thread_limit): print 'thead {0} started.'.format(x) t = spider() t.start() for i in inputlist: try: jobs.put(i, block=true, timeout=5) except: singlelock.acquire() print the queue is full ! singlelock.release() # wait for the threads to finish singlelock.acquire() # acquire the lock so we can print print waiting for threads to finish. singlelock.release() # release the lock jobs.join() # this command waits for all threads to finish. # while not jobs.empty(): # print jobs.get() def gettitle(url,time=10): response = urllib2.urlopen(url,timeout=time) html = response.read() response.close() reg = r'<title>(.?)</title>' title = re.compile(reg).findall(html) # title = title[0].decode('gb2312','replace').encode('utf-8') title = title[0] return title class spider(threading.thread): def run(self): while 1: try: job = jobs.get(true,1) singlelock.acquire() title = gettitle(job[1]) info.put([job[0],title], block=true, timeout=5) # print 'this {0} is {1}'.format(job[1],title) singlelock.release() jobs.task_done() except: break; if __name__ == '__main__': con = none urls = [] try: con = mysqldb.connect(db_host,db_user,db_passwd,db_name) cur = con.cursor() cur.execute('select id,url from `table_name` where `status`=0 limit 10') rows = cur.fetchall() for row in rows: # print row urls.append([row[0],row[1]]) workerbee(urls) while not info.empty(): print info.get() finally: if con: con.close()代码二：代码如下: #!/usr/bin/python # -- coding: utf-8 -- #encoding=utf-8 #filename:robot.py import threading,queue,sys,urllib2,re # # 变量设置 # thread_limit = 3 #设置线程数 jobs = queue.queue(5) #设置队列长度 singlelock = threading.lock() #设置一个线程锁,避免重复调用 urls = ['http://xxx.com/w/n/2013-04-28/1634703505.shtml','http://xxx.com/w/n/2013-04-28/1246703487.shtml','http://xxx.com/w/n/2013-04-28/1028703471.shtml','http://xxx.com/w/n/2013-04-27/1015703426.shtml','http://xxx.com/w/n/2013-04-26/1554703373.shtml','http://xxx.com/w/n/2013-04-26/1512703346.shtml','http://xxx.com/w/n/2013-04-26/1453703334.shtml','http://xxx.com/w/n/2013-04-26/1451703333.shtml','http://xxx.com/w/n/2013-04-26/1445703329.shtml','http://xxx.com/w/n/2013-04-26/1434703322.shtml','http://xxx.com/w/n/2013-04-26/1433703321.shtml','http://xxx.com/w/n/2013-04-26/1433703320.shtml','http://xxx.com/w/n/2013-04-26/1429703318.shtml','http://xxx.com/w/n/2013-04-26/1429703317.shtml','http://xxx.com/w/n/2013-04-26/1409703297.shtml','http://xxx.com/w/n/2013-04-26/1406703296.shtml','http://xxx.com/w/n/2013-04-26/1402703292.shtml','http://xxx.com/w/n/2013-04-26/1353703286.shtml','http://xxx.com/w/n/2013-04-26/1348703284.shtml','http://xxx.com/w/n/2013-04-26/1327703275.shtml','http://xxx.com/w/n/2013-04-26/1239703265.shtml','http://xxx.com/w/n/2013-04-26/1238703264.shtml','http://xxx.com/w/n/2013-04-26/1231703262.shtml','http://xxx.com/w/n/2013-04-26/1229703261.shtml','http://xxx.com/w/n/2013-04-26/1228703260.shtml','http://xxx.com/w/n/2013-04-26/1223703259.shtml','http://xxx.com/w/n/2013-04-26/1218703258.shtml','http://xxx.com/w/n/2013-04-26/1202703254.shtml','http://xxx.com/w/n/2013-04-26/1159703251.shtml','http://xxx.com/w/n/2013-04-26/1139703233.shtml'] def workerbee(inputlist): for x in xrange(thread_limit): print 'thead {0} started.'.format(x) t = spider() t.start() for i in inputlist: try: jobs.put(i, block=true, timeout=5) except: singlelock.acquire() print the queue is full ! singlelock.release() # wait for the threads to finish singlelock.acquire() # acquire the lock so we can print print waiting for threads to finish. singlelock.release() # release the lock jobs.join() # this command waits for all threads to finish. # while not jobs.empty(): # print jobs.get() def gettitle(url,time=10): response = urllib2.urlopen(url,timeout=time) html = response.read() response.close() reg = r'<title>(.?)</title>' title = re.compile(reg).findall(html) title = title[0].decode('gb2312','replace').encode('utf-8') return title class spider(threading.thread): def run(self): while 1: try: job = jobs.get(true,1) singlelock.acquire() title = gettitle(job) print 'this {0} is {1}'.format(job,title) singlelock.release() jobs.task_done() except: break; if __name__ == '__main__': workerbee(urls)
随便看	新时代新梦想学生作文四年级语文梦想作文有关梦想四年级作文题目我的梦想小学作文我的梦想教学设计作文我的梦想四年级作文有梦想四年级作文梦想小学生四年级作文关于四年级中秋的作文中秋观察小学四年级作文中秋好作文小学生四年级中秋节小学生四年级优秀作文迎中秋小学四年级作文小学中秋节的作文小学生四年级中秋节作文中秋节获奖学生作文8篇中秋节赏月四年级作文生态文明四年级作文四年级文明礼仪伴我行作文文明学生的四年级作文文明在我身边小学作文文明在我身边四年级作文文明绿色出行四年级作文文明礼仪的学生作文明天开学了四年级作文 acquire acquired acquired agammaglobulinemia acquired-agammaglobulinemia acquired immunity acquired knowledge acquired resistance acquired-taste acquiree acquirer 借倠倡倢倣値倥倦倧倨安庆市岳西县中医院2016年赴高校招聘卫生系统专业技术人员岗位表安庆市岳西县医院2016年赴高校招聘卫生系统专业技术人员公告安庆市岳西县医院2016年赴高校招聘卫生系统专业技术人员岗位表 2016年温州乐清市招聘文秘人员启事 2016年长沙市社会科学界联合会(长沙社会科学院)中级雇员招聘公告 2015年马鞍山市规划编制研究与地理信息中心招聘人员专业测试实施方案公告 2015年马鞍山技师学院马鞍山职业技术学院招聘人员专业测试实施方案公告 2015年马鞍山市卫生局卫生监督局招聘人员专业测试实施方案公告 2016年芜湖市公安局警犬基地聘用人员招录公告 2016年长沙市天心区工务局招聘工作人员简章

在线学习网范文大全提供好词好句、学习总结、工作总结、演讲稿等写作素材及范文模板，是学习及工作的有利工具。