结构
生产者生成网址并放入队列
多个消费者从队列中取出网址
1 from queue import Queue
2 import time, threading, requests
3
4 url_base = 'http://www.qiushibaike.com/8hr/page/{}/'
5 header = {}
6
7 def load_data():
8 return [url_base.format(i) for i in [1, 3, 6, 7]]
9
10 #生产者
11 def produce(q):
12 index = 0
13 data = load_data()
14 while True:
15 if index < len(data):
16 q.put(data[index])
17 index += 1
18
19 #消费者
20 def consume(q):
21 while True:
22 download_url = q.get()
23 # requests.get(download_url,headers=header)
24 print('thread is {} content is {}'.format(threading.current_thread(), download_url))
25
26 def main():
27 q = Queue(4)
28 p1 = threading.Thread(target=produce, args=[q])
29 c1 = threading.Thread(target=consume, args=[q])
30 c2 = threading.Thread(target=consume, args=[q])
31 p1.start()
32 c1.start()
33 c2.start()
34
35 if __name__ == '__main__':
36 main()
类
爬虫类需要继承多线程类
初始化方法需要继承父类初始化方法
创建对象,直接start就会调用类中run方法
1 # class ConsumeSpider(threading.Thread):
2 # def __init__(self):
3 # super().__init__()
4 # pass
5 #
6 # def run(self):
7 # pass
8 #
9 # c3 = ConsumeSpider()
10 # c3.start()
协程(coroutine):轻量级的线程,不存在上下文切换,能在多个任务之间调度的多任务方式,可以使用yield实现
1 import time, threading
2
3 def task_1():
4 while True:
5 print('-----1-----', threading.current_thread())
6 time.sleep(1)
7 yield
8
9
10 def task_2():
11 while True:
12 print('-----2-----', threading.current_thread())
13 time.sleep(1)
14 yield
15
16
17 def main():
18 t1 = task_1()
19 t2 = task_2()
20 while True:
21 next(t1)
22 next(t2)
23
24
25 if __name__ == '__main__':
26 main()
27
请使用手机"扫一扫"x