From 45b211785f8dfd4ab73aaba606ecb1bd09f195f2 Mon Sep 17 00:00:00 2001
From: Alexander Weinhold <gutmet@allfex.org>
Date: Wed, 10 Feb 2016 22:34:38 +0100
Subject: [PATCH] well, git is good at preserving the symlink, that's for
 sure...

---
 download_xkcd.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)
 mode change 120000 => 100644 download_xkcd.py

diff --git a/download_xkcd.py b/download_xkcd.py
deleted file mode 120000
index 38eaee4..0000000
--- a/download_xkcd.py
+++ /dev/null
@@ -1 +0,0 @@
-../xkcd/download_xkcd.py
\ No newline at end of file
diff --git a/download_xkcd.py b/download_xkcd.py
new file mode 100644
index 0000000..4d10c0a
--- /dev/null
+++ b/download_xkcd.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python2
+
+import requests
+from bs4 import BeautifulSoup
+
+FIRST_COMIC = 1
+LAST_COMIC = 1633
+
+def download(img_url, i):
+    img = requests.get(img_url, stream=True)
+    file_name = "%04d_%s" %(i, img_url.split("/")[-1])
+    with open(file_name ,"wb") as f:
+        for chunk in img.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+        print "Got %s" %file_name
+
+
+def run():
+    for i in range(FIRST_COMIC, LAST_COMIC + 1):
+        p = requests.get("http://xkcd.com/%d/" %i)
+        soup = BeautifulSoup(p.text, "html.parser")
+        div = soup.find("div", id="comic")
+        if div:
+            img = div.find("img")
+            if img:
+                src = img["src"]
+                if src.startswith("//"):
+                    download("http:" + src, i)
+        
+
+if __name__ == "__main__":
+    run()