From 0308e0260891159526480c952ee1afbda5479e0b Mon Sep 17 00:00:00 2001
From: David Trail <napalm10@gmail.com>
Date: Fri, 16 Mar 2012 16:42:27 +0100
Subject: [PATCH] Initial obtaining of user data.

---
 .gitignore  |  1 +
 README.rst  |  6 +++++-
 shreddit.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 .gitignore
 create mode 100755 shreddit.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2d46485
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+data.json
diff --git a/README.rst b/README.rst
index bc6b11a..3ba56d9 100644
--- a/README.rst
+++ b/README.rst
@@ -6,8 +6,12 @@ Details
 When one deletes their account on Reddit it does nothing with their comment history other than
 obscure the author (replaces with [deleted]) which is not good enough for some people.
 
+Usage
+-----------
+python2 shreddit.py UserName
+
 Caveats
-----------
+-----------
 - Only your previous 1,000 comments are accessable on Reddit. So good luck deleting the others. There may be ways to hack around this via iterating using sorting by top/best/controversial/new but for now I am unsure.
 
 - Would make life easier if Reddit just did a "DELETE * FROM abc_def WHERE user_id = 1337"
diff --git a/shreddit.py b/shreddit.py
new file mode 100755
index 0000000..6392c34
--- /dev/null
+++ b/shreddit.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python2
+
+import sys
+from json import loads, dumps
+from urllib2 import urlopen
+from time import sleep
+
+if len(sys.argv) != 2:
+    raise Exception("You must specifiy a user")
+
+user = sys.argv[1]
+
+sub_section = 'comments'
+after = ''
+
+init_url = 'http://www.reddit.com/user/{user}/comments/.json?after=%s'.format(user=user)
+next_url = init_url % after
+
+http = urlopen(next_url).read()
+json = loads(http)
+
+datum = []
+while True:
+    print "Grabing IDs for after: ", after
+    after = json['data']['after']
+    children = json['data']['children']
+
+    # This bit fills datum with the id (for removal) and the date (for saving recent posts)
+    for child in children:
+        child_data = child['data']
+        if 'id' in child_data:
+            datum.append({'id': child_data[u'id'], 'date': child_data['created_utc']})
+
+    if after == None:
+        break
+
+    next_url = init_url % after
+    http = urlopen(next_url).read()
+    json = loads(http)
+    sleep(2) # don't want to hammer reddit to hard
+
+print "Script collected all available data."
+
+f = open('data.json', 'w')
+f.write(dumps(datum))
+f.close()