From ee0909738959de7614787c20b28c6b364276a819 Mon Sep 17 00:00:00 2001 From: Alexander Weinhold Date: Wed, 10 Feb 2016 22:40:26 +0100 Subject: [PATCH] abstruse goose downloader --- download_abstrusegoose.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 download_abstrusegoose.py diff --git a/download_abstrusegoose.py b/download_abstrusegoose.py new file mode 100644 index 0000000..9419b8e --- /dev/null +++ b/download_abstrusegoose.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python2 + +import requests +from bs4 import BeautifulSoup + +FIRST_COMIC = 1 +LAST_COMIC = 575 + +def download(img_url, i): + img = requests.get(img_url, stream=True) + file_name = "%04d_%s" %(i, img_url.split("/")[-1]) + with open(file_name ,"wb") as f: + for chunk in img.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + print "Got %s" %file_name + + +def run(): + for i in range(FIRST_COMIC, LAST_COMIC + 1): + p = requests.get("http://abstrusegoose.com/%d" %i) + soup = BeautifulSoup(p.text, "html.parser") + section = soup.find("section") + if section: + img = section.find("img") + if img: + src = img["src"] + if src: + download(src, i) + + +if __name__ == "__main__": + run()