well, git is good at preserving the symlink, that's for sure...

This commit is contained in:
Alexander Weinhold 2016-02-10 22:34:38 +01:00
parent d52e2e7050
commit 45b211785f

View File

@ -1 +0,0 @@
../xkcd/download_xkcd.py

33
download_xkcd.py Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python2
import requests
from bs4 import BeautifulSoup
FIRST_COMIC = 1
LAST_COMIC = 1633
def download(img_url, i):
img = requests.get(img_url, stream=True)
file_name = "%04d_%s" %(i, img_url.split("/")[-1])
with open(file_name ,"wb") as f:
for chunk in img.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
print "Got %s" %file_name
def run():
for i in range(FIRST_COMIC, LAST_COMIC + 1):
p = requests.get("http://xkcd.com/%d/" %i)
soup = BeautifulSoup(p.text, "html.parser")
div = soup.find("div", id="comic")
if div:
img = div.find("img")
if img:
src = img["src"]
if src.startswith("//"):
download("http:" + src, i)
if __name__ == "__main__":
run()