snippets/download_xkcd.py

#!/usr/bin/env python2

import requests
from bs4 import BeautifulSoup

FIRST_COMIC = 1
LAST_COMIC = 1633

def download(img_url, i):
    img = requests.get(img_url, stream=True)
    file_name = "%04d_%s" %(i, img_url.split("/")[-1])
    with open(file_name ,"wb") as f:
        for chunk in img.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
        print "Got %s" %file_name


def run():
    for i in range(FIRST_COMIC, LAST_COMIC + 1):
        p = requests.get("http://xkcd.com/%d/" %i)
        soup = BeautifulSoup(p.text, "html.parser")
        div = soup.find("div", id="comic")
        if div:
            img = div.find("img")
            if img:
                src = img["src"]
                if src.startswith("//"):
                    download("http:" + src, i)
        

if __name__ == "__main__":
    run()
well, git is good at preserving the symlink, that's for sure... 2016-02-10 22:34:38 +01:00			`#!/usr/bin/env python2`

			`import requests`
			`from bs4 import BeautifulSoup`

			`FIRST_COMIC = 1`
			`LAST_COMIC = 1633`

			`def download(img_url, i):`
			`img = requests.get(img_url, stream=True)`
			`file_name = "%04d_%s" %(i, img_url.split("/")[-1])`
			`with open(file_name ,"wb") as f:`
			`for chunk in img.iter_content(chunk_size=1024):`
			`if chunk:`
			`f.write(chunk)`
			`print "Got %s" %file_name`


			`def run():`
			`for i in range(FIRST_COMIC, LAST_COMIC + 1):`
			`p = requests.get("http://xkcd.com/%d/" %i)`
			`soup = BeautifulSoup(p.text, "html.parser")`
			`div = soup.find("div", id="comic")`
			`if div:`
			`img = div.find("img")`
			`if img:`
			`src = img["src"]`
			`if src.startswith("//"):`
			`download("http:" + src, i)`


			`if __name__ == "__main__":`
			`run()`