well, git is good at preserving the symlink, that's for sure...
This commit is contained in:
parent
d52e2e7050
commit
45b211785f
|
@ -1 +0,0 @@
|
||||||
../xkcd/download_xkcd.py
|
|
33
download_xkcd.py
Normal file
33
download_xkcd.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
FIRST_COMIC = 1
|
||||||
|
LAST_COMIC = 1633
|
||||||
|
|
||||||
|
def download(img_url, i):
|
||||||
|
img = requests.get(img_url, stream=True)
|
||||||
|
file_name = "%04d_%s" %(i, img_url.split("/")[-1])
|
||||||
|
with open(file_name ,"wb") as f:
|
||||||
|
for chunk in img.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
print "Got %s" %file_name
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
for i in range(FIRST_COMIC, LAST_COMIC + 1):
|
||||||
|
p = requests.get("http://xkcd.com/%d/" %i)
|
||||||
|
soup = BeautifulSoup(p.text, "html.parser")
|
||||||
|
div = soup.find("div", id="comic")
|
||||||
|
if div:
|
||||||
|
img = div.find("img")
|
||||||
|
if img:
|
||||||
|
src = img["src"]
|
||||||
|
if src.startswith("//"):
|
||||||
|
download("http:" + src, i)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
Loading…
Reference in New Issue
Block a user