Initial obtaining of user data.

13 years ago · 0308e02608
parent f8a66beb6b
commit 0308e02608
3 changed files with 52 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 data.json
--- a/README.rst
+++ b/README.rst
@ -6,8 +6,12 @@ Details
 When one deletes their account on Reddit it does nothing with their comment history other than
 obscure the author (replaces with [deleted]) which is not good enough for some people.
 Usage
 -----------
 python2 shreddit.py UserName
 Caveats
----------
+-----------
 - Only your previous 1,000 comments are accessable on Reddit. So good luck deleting the others. There may be ways to hack around this via iterating using sorting by top/best/controversial/new but for now I am unsure.
 - Would make life easier if Reddit just did a "DELETE * FROM abc_def WHERE user_id = 1337"
--- a/shreddit.py
+++ b/shreddit.py
@ -0,0 +1,46 @@
 #!/usr/bin/env python2
 import sys
 from json import loads, dumps
 from urllib2 import urlopen
 from time import sleep
 if len(sys.argv) != 2:
    raise Exception("You must specifiy a user")
 user = sys.argv[1]
 sub_section = 'comments'
 after = ''
 init_url = 'http://www.reddit.com/user/{user}/comments/.json?after=%s'.format(user=user)
 next_url = init_url % after
 http = urlopen(next_url).read()
 json = loads(http)
 datum = []
 while True:
    print "Grabing IDs for after: ", after
    after = json['data']['after']
    children = json['data']['children']
    # This bit fills datum with the id (for removal) and the date (for saving recent posts)
    for child in children:
        child_data = child['data']
        if 'id' in child_data:
            datum.append({'id': child_data[u'id'], 'date': child_data['created_utc']})
    if after == None:
        break
    next_url = init_url % after
    http = urlopen(next_url).read()
    json = loads(http)
    sleep(2) # don't want to hammer reddit to hard
 print "Script collected all available data."
 f = open('data.json', 'w')
 f.write(dumps(datum))
 f.close()