最近写了一个批量下载网页图片的类,针对类中函数如何进行多线程操作:直接给出代码
import urllib2
import re
import time
import threading
class DownPic:
def __init__(self):
self.user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
self.headers = { 'User-Agent' : self.user_agent }
def geturls(self,url):
try:
request = urllib2.Request(url,headers = self.headers)
fp=urllib2.urlopen(request) #异常处理,打开url
except:
print 'cannot open the URL'
s=fp.read() #读取资源
pattern=re.compile(r'(src=.+\.?jpg)|(src=.+\.?gif)|(src=.+\.?png)')#正则编译对象,匹配图片url
urlss=pattern.findall(s) #匹配符合正则的存成list
#print urlss
urls=[]
for items in urlss:
for item in items:
if(item !=''):
urls.append(item)
fp.close()
return urls
def saveimgs(self,url,filepath):
urls=self.geturls(url)
cnt=1
for item in urls:
if("http" in item):
item=item[5:]
else:
item=url+item[5:]
print item
extendname=item[-4:]
flock=threading.Lock()
flock.acquire()
try:
fp0=urllib2.urlopen(item)
s0=fp0.read()
except:
print '异常图片地址:'+ item
filename=filepath+'\\'+str(cnt) + extendname#'\\'用到了转义字符
op0=open(filename,'wb') #以二进制写方式打开文件
op0.write(s0)#写入二进制图片信息
fp0.close()
op0.close()
cnt=cnt+1
flock.release()
return 1 #如果保存成功返回1
def thst(self,url,filepath):
l=[]
for i in xrange(0,10):
th1=threading.Thread(target=DownPic.saveimgs,args=(self,url,filepath))
l.append(th1)
for j in l:
j.start()
for k in l:
k.join()
if __name__ == "__main__":
print "程序执行开始时间:%s",time.ctime()
pics=DownPic()
pics.thst("http://news.baidu.com/", r"c:\123")
# rest=pics.saveimgs("http://news.baidu.com/", r"c:\123")
# if (rest ==1):
# print "下载完成!"
print "程序执行结束时间:%s",time.ctime()
- 本文固定链接: http://ttfde.top/index.php/post/307.html
- 转载请注明: admin 于 TTF的家园 发表
《本文》有 0 条评论