Skip to content

Instantly share code, notes, and snippets.

@licsber
Created June 4, 2020 16:10
Show Gist options
  • Select an option

  • Save licsber/67652d7399b7d3b9e7b2050a8c7058c6 to your computer and use it in GitHub Desktop.

Select an option

Save licsber/67652d7399b7d3b9e7b2050a8c7058c6 to your computer and use it in GitHub Desktop.
python写的一个多线程爬虫 用时五分钟
import threading
import requests
import urllib3
import queue
import os
urllib3.disable_warnings()
class Downloader:
def __init__(self, save_path='down/', headers=None, thread_num=50):
if headers is None:
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) '
'AppleWebKit/605.1.15 (KHTML, like Gecko) '
'Version/13.1 Safari/605.1.15'
}
self.path = save_path
self.header = headers
self.q = queue.Queue()
self.thread_num = thread_num
def download(self, file):
if not os.path.exists(self.path):
os.mkdir(self.path)
with open(file, 'r') as f:
lines = f.readlines()
for line in lines:
line = line.strip().split(',')
name = line[0]
url = line[1]
self.q.put((self.path + name, url))
def gen(self):
while not self.q.empty():
yield self.q.get()
def loop(self, gen):
while True:
try:
name, url = next(gen)
except StopIteration:
print(threading.current_thread())
break
try:
r = requests.get(url, verify=False, headers=self.header, timeout=5)
except Exception:
self.q.put((name, url))
continue
if r.status_code == 200:
with open(name, 'wb') as f:
f.write(r.content)
else:
print(name, url)
self.q.put((name, url))
def start(self):
gen = self.gen()
for i in range(0, self.thread_num):
t = threading.Thread(target=self.loop, args=(gen,))
t.start()
if __name__ == '__main__':
downloader = Downloader()
downloader.download('img.txt')
downloader.start()
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment