最近写了一个批量下载网页图片的类,针对类中函数如何进行多线程操作:直接给出代码
import urllib2 import re import time import threading class DownPic: def __init__(self): self.user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' self.headers = { 'User-Agent' : self.user_agent } def geturls(self,url): try: request = urllib2.Request(url,headers = self.headers) fp=urllib2.urlopen(request) #异常处理,打开url except: print 'cannot open the URL' s=fp.read() #读取资源 pattern=re.compile(r'(src=.+\.?jpg)|(src=.+\.?gif)|(src=.+\.?png)')#正则编译对象,匹配图片url urlss=pattern.findall(s) #匹配符合正则的存成list #print urlss urls=[] for items in urlss: for item in items: if(item !=''): urls.append(item) fp.close() return urls def saveimgs(self,url,filepath): urls=self.geturls(url) cnt=1 for item in urls: if("http" in item): item=item[5:] else: item=url+item[5:] print item extendname=item[-4:] flock=threading.Lock() flock.acquire() try: fp0=urllib2.urlopen(item) s0=fp0.read() except: print '异常图片地址:'+ item filename=filepath+'\\'+str(cnt) + extendname#'\\'用到了转义字符 op0=open(filename,'wb') #以二进制写方式打开文件 op0.write(s0)#写入二进制图片信息 fp0.close() op0.close() cnt=cnt+1 flock.release() return 1 #如果保存成功返回1 def thst(self,url,filepath): l=[] for i in xrange(0,10): th1=threading.Thread(target=DownPic.saveimgs,args=(self,url,filepath)) l.append(th1) for j in l: j.start() for k in l: k.join() if __name__ == "__main__": print "程序执行开始时间:%s",time.ctime() pics=DownPic() pics.thst("http://news.baidu.com/", r"c:\123") # rest=pics.saveimgs("http://news.baidu.com/", r"c:\123") # if (rest ==1): # print "下载完成!" print "程序执行结束时间:%s",time.ctime()
- 本文固定链接: http://ttfde.top/index.php/post/307.html
- 转载请注明: admin 于 TTF的家园 发表
《本文》有 0 条评论