Talk is cheap

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Date : 2018-07-25 00:24:28
# @Author : Simon (simon.xie@codewalker.me)
# @Link : http://www.codewalker.me
# @Version : 1.0.0

import asyncio
import json
import os
import time
from random import randint
from urllib.request import Request, urlopen
import aiofiles

import aiohttp
import async_timeout


async def download_coroutine(url, headers, session):
with async_timeout.timeout(10):
async with session.get(url) as response:
path = '/path/to/some/where/you/like/'
filename = path + os.path.basename(url) + '.pdf'
async with aiofiles.open(filename, 'wb') as f_handle:
while True:
chunk = await response.content.read(1024)
if not chunk:
break
await f_handle.write(chunk)
return await response.release()

async def url_list(url, headers):
req = Request(url, headers=headers)
data = urlopen(req).read()
d = json.loads(str(data, encoding='utf-8'))
urls = d['data']
return urls

async def run(loop,url_api, headers):
urls = await url_list(url_api,headers)
async with aiohttp.ClientSession(loop=loop) as session:
tasks = [download_coroutine(url, headers, session) for url in urls]
await asyncio.gather(*tasks)

def main(url_api):
start_time = time.clock()
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gb2312,utf-8',
'User-Agent': 'Mozilla/5.0(Windows;U;WindowsNT6.1;en-us)AppleWebKit/534.50(KHTML,likeGecko)Version/5.1Safari/534.50',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'Keep-alive'
}
loop = asyncio.get_event_loop()
loop.run_until_complete(run(loop,url_api,headers))
loop.close()
t = time.clock()-start_time
print('total time cost:%f.2' % t)

if __name__ == '__main__':
url_api = 'http://www.someapi.com'
main(url_api)