Skip to content

Commit bdf8964

Browse files
committed
add unsplash
1 parent 7e35a83 commit bdf8964

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

crawler/unsplash.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#-*-encoding:utf-8-*-
2+
import os, shutil
3+
import requests
4+
from bs4 import BeautifulSoup
5+
import json
6+
import threading
7+
8+
Url = 'https://unsplash.com/'
9+
Start = 'https://unsplash.com/napi/feeds/home?after=95cc4cd0-4529-11e8-8080-8001050033a3'
10+
Pattern = 'https://unsplash.com/napi/feeds/home?after=xxx'
11+
Auth = {'Authorization':'Client-ID 72664f05b2aee9ed032f9f4084f0ab55aafe02704f8b7f8ef9e28acbec372d09'}
12+
Des = 'data'
13+
Pages = 50
14+
15+
16+
def GetJson(target):
17+
req = requests.get(target, headers=Auth, verify=False)
18+
if req.status_code == 200:
19+
return req.json()
20+
else:
21+
print('response error: ', req.status_code)
22+
23+
def SavePhoto(photo, regular_size=True):
24+
target = photo['m_size']
25+
if not regular_size:
26+
target = photo['l_size']
27+
req = requests.get(target, headers=Auth, verify=False)
28+
file = os.path.join(Des, photo['name'])
29+
file = file + '.jpg'
30+
with open(file, 'wb') as f:
31+
f.write(req.content)
32+
33+
if __name__ == '__main__':
34+
os.chdir(os.curdir)
35+
if not os.path.exists(Des):
36+
os.mkdir(Des)
37+
38+
js = GetJson(Start)
39+
cur_page = 1
40+
while cur_page < Pages:
41+
print('saving page: ', cur_page)
42+
after_value = js['next_page'].split('=')[1]
43+
next_target = Pattern.replace('xxx', after_value)
44+
photos = []
45+
for photo in js['photos']:
46+
pt = {}
47+
pt['id'] = photo['id']
48+
pt['m_size'] = photo['urls']['regular']
49+
pt['l_size'] = photo['urls']['full']
50+
51+
pt['name'] = photo['id']
52+
photos.append(pt)
53+
threads = [threading.Thread(target=SavePhoto, args=(photo, )) for photo in photos]
54+
for t in threads:
55+
t.start()
56+
# for t in threads:
57+
# t.join()
58+
js = GetJson(next_target)
59+
cur_page += 1

0 commit comments

Comments
 (0)