From 48047a7884ec65b3b25c3fb9bc41d85f3fd3ac1a Mon Sep 17 00:00:00 2001 From: Scott Date: Fri, 15 Jul 2016 20:45:45 -0500 Subject: [PATCH 01/14] Changed name for fork in setup --- .gitignore | 66 ++++++++++++--- Makefile | 17 ++++ install.sh | 11 --- lambda_handler.py | 12 +++ oauth_test.py | 17 ---- run.sh | 4 - setup.py | 36 +++++++++ shreddit.py | 189 ------------------------------------------- shreddit/__init__.py | 0 shreddit/app.py | 29 +++++++ shreddit/oauth.py | 22 +++++ shreddit/shredder.py | 172 +++++++++++++++++++++++++++++++++++++++ 12 files changed, 342 insertions(+), 233 deletions(-) create mode 100644 Makefile delete mode 100755 install.sh create mode 100644 lambda_handler.py delete mode 100644 oauth_test.py delete mode 100755 run.sh create mode 100644 setup.py delete mode 100755 shreddit.py create mode 100644 shreddit/__init__.py create mode 100644 shreddit/app.py create mode 100644 shreddit/oauth.py create mode 100644 shreddit/shredder.py diff --git a/.gitignore b/.gitignore index 4c36359..156795a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,55 @@ -.venv -pip-selfcheck.json -.errors -2015*.txt -2016*.txt -2017*.txt -*.cfg -*.pyc -shreddit.conf -shreddit.yml -praw.ini +# Docs +docs/_build/ + +# Byte-compiled / optimized / DLL files __pycache__/ -.*.swp +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# PyBuilder +target/ + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..376dc78 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +help: + @echo "build - Build package" + @echo "install - Install package to local system" + @echo "clean - Clean built artifacts" + @echo "test - Run test suite with coverage" + +build: + python setup.py build + python setup.py bdist_wheel + +install: + pip install dist/*.whl --upgrade --force-reinstall --no-deps + python setup.py clean + +clean: + find . -type f -name "*.pyc" -delete + rm -rf ./build ./dist ./*.egg-info diff --git a/install.sh b/install.sh deleted file mode 100755 index b00c7b1..0000000 --- a/install.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env sh - -virtualenv . -source ./bin/activate -pip install -r requirements.txt - -if [ ! -f "shreddit.yml" ]; then - cp "shreddit.yml.example" "shreddit.yml" - $EDITOR shreddit.yml -fi - diff --git a/lambda_handler.py b/lambda_handler.py new file mode 100644 index 0000000..e7e7d97 --- /dev/null +++ b/lambda_handler.py @@ -0,0 +1,12 @@ +"""This module contains the handler function called by AWS. +""" +from shreddit.shredder import shred +import yaml + + +def lambda_handler(event, context): + with open("shreddit.yml") as fh: + config = yaml.safe_load(fh) + if not config: + raise Exception("No config options passed!") + shred(config) diff --git a/oauth_test.py b/oauth_test.py deleted file mode 100644 index 563d0c6..0000000 --- a/oauth_test.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python -''' -Simple script to check if your oauth is working. -''' -import praw -import sys - -r = praw.Reddit('Shreddit oauth test') -try: - r.refresh_access_information() - if r.is_oauth_session(): - sys.exit(0) - else: - sys.exit(2) -except: - sys.exit(1) - diff --git a/run.sh b/run.sh deleted file mode 100755 index b7b4f25..0000000 --- a/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env sh -source ./bin/activate -pip install --upgrade praw -python ./shreddit.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..724d001 --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +"""Setup script for shreddit. +""" +from setuptools import setup +from codecs import open +from os import path + +VERSION = "2.0.0" +DESCRIPTION = " Remove your comment history on Reddit as deleting an account does not do so." + +here = path.abspath(path.dirname(__file__)) + +with open(path.join(here, "README.md"), encoding='utf-8') as filein: + long_description = filein.read() + +with open(path.join(here, "requirements.txt"), encoding="utf-8") as filein: + requirements = [line.strip() for line in filein.readlines()] + +setup( + name="shreddit", + version=VERSION, + description=DESCRIPTION, + long_description=long_description, + url="https://github.com/scott-hand/Shreddit", + author="Scott Hand", + author_email="scott@vkgfx.com", + classifiers=["Development Status :: 3 - Alpha", + "Intended Audience :: End Users/Desktop", + "Programming Language :: Python :: 2"], + packages=["shreddit"], + install_requires=requirements, + entry_points={ + "console_scripts": [ + "shreddit=shreddit.app:main" + ] + } +) diff --git a/shreddit.py b/shreddit.py deleted file mode 100755 index 1e744c9..0000000 --- a/shreddit.py +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import logging -import argparse -import json -import yaml -import praw - -from re import sub -from random import shuffle, randint -from datetime import datetime, timedelta -from praw.errors import (InvalidUser, InvalidUserPass, RateLimitExceeded, - HTTPException, OAuthAppRequired) -from praw.objects import Comment, Submission - -logging.basicConfig(stream=sys.stdout) -log = logging.getLogger(__name__) -log.setLevel(level=logging.WARNING) - -try: - from loremipsum import get_sentence # This only works on Python 2 -except ImportError: - def get_sentence(): - return '''I have been Shreddited for privacy!''' - - os_wordlist = '/usr/share/dict/words' - if os.name == 'posix' and os.path.isfile(os_wordlist): - # Generate a random string of words from our system's dictionary - fh = open(os_wordlist) - words = fh.read().splitlines() - fh.close() - shuffle(words) - - def get_sentence(): - return ' '.join(words[:randint(50, 150)]) - -assert get_sentence - -parser = argparse.ArgumentParser() -parser.add_argument( - '-c', - '--config', - help="config file to use instead of the default shreddit.cfg" -) -args = parser.parse_args() - -if args.config: - config_file = args.config -else: - config_file = 'shreddit.yml' - -with open(config_file, 'r') as fh: - config = yaml.safe_load(fh) -if config is None: - raise Exception("No config options passed!") - -save_directory = config.get('save_directory', '.') - -r = praw.Reddit(user_agent="shreddit/4.2") -if save_directory: - r.config.store_json_result = True - -if config.get('verbose', True): - log.setLevel(level=logging.DEBUG) - -try: - # Try to login with OAuth2 - r.refresh_access_information() - log.debug("Logged in with OAuth.") -except (HTTPException, OAuthAppRequired) as e: - log.warning('''You should migrate to OAuth2 using get_secret.py before - Reddit disables this login method.''') - try: - try: - r.login(config['username'], config['password']) - except InvalidUserPass: - r.login() # Supply details on the command line - except InvalidUser as e: - raise InvalidUser("User does not exist.", e) - except InvalidUserPass as e: - raise InvalidUserPass("Specified an incorrect password.", e) - except RateLimitExceeded as e: - raise RateLimitExceeded("You're doing that too much.", e) - -log.info("Logged in as {user}.".format(user=r.user)) -log.debug("Deleting messages before {time}.".format( - time=datetime.now() - timedelta(hours=config['hours']))) - -whitelist = config.get('whitelist', []) -whitelist_ids = config.get('whitelist_ids', []) - -if config.get('whitelist'): - log.debug("Keeping messages from subreddits {subs}".format( - subs=', '.join(whitelist)) - ) - - -def get_things(after=None): - limit = None - item = config.get('item', 'comments') - sort = config.get('sort', 'new') - log.debug("Deleting items: {item}".format(item=item)) - if item == "comments": - return r.user.get_comments(limit=limit, sort=sort) - elif item == "submitted": - return r.user.get_submitted(limit=limit, sort=sort) - elif item == "overview": - return r.user.get_overview(limit=limit, sort=sort) - else: - raise Exception("Your deletion section is wrong") - - -def remove_things(things): - for thing in things: - log.debug('Starting remove function on: {thing}'.format(thing=thing)) - # Seems to be in users's timezone. Unclear. - thing_time = datetime.fromtimestamp(thing.created_utc) - # Exclude items from being deleted unless past X hours. - after_time = datetime.now() - timedelta(hours=config.get('hours', 24)) - if thing_time > after_time: - if thing_time + timedelta(hours=config.get('nuke_hours', 4320)) < datetime.utcnow(): - pass - continue - # For edit_only we're assuming that the hours aren't altered. - # This saves time when deleting (you don't edit already edited posts). - if config.get('edit_only'): - end_time = after_time - timedelta(hours=config.get('hours', 24)) - if thing_time < end_time: - continue - - if str(thing.subreddit).lower() in config.get('whitelist', []) \ - or thing.id in config.get('whitelist_ids', []): - continue - - if config.get('whitelist_distinguished') and thing.distinguished: - continue - if config.get('whitelist_gilded') and thing.gilded: - continue - if 'max_score' in config and thing.score > config['max_score']: - continue - - if config.get('save_directory'): - save_directory = config['save_directory'] - if not os.path.exists(save_directory): - os.makedirs(save_directory) - with open("%s/%s.json" % (save_directory, thing.id), "w") as fh: - json.dump(thing.json_dict, fh) - - if config.get('trial_run'): # Don't do anything, trial mode! - log.debug("Would have deleted {thing}: '{content}'".format( - thing=thing.id, content=thing)) - continue - - if config.get('clear_vote'): - thing.clear_vote() - - if isinstance(thing, Submission): - log.info('Deleting submission: #{id} {url}'.format( - id=thing.id, - url=thing.url.encode('utf-8')) - ) - elif isinstance(thing, Comment): - rep_format = config.get('replacement_format') - if rep_format == 'random': - replacement_text = get_sentence() - elif rep_format == 'dot': - replacement_text = '.' - else: - replacement_text = rep_format - - msg = '/r/{3}/ #{0} with:\n\t"{1}" to\n\t"{2}"'.format( - thing.id, - sub(b'\n\r\t', ' ', thing.body[:78].encode('utf-8')), - replacement_text[:78], - thing.subreddit - ) - - if config.get('edit_only'): - log.info('Editing (not removing) {msg}'.format(msg=msg)) - else: - log.info('Editing and deleting {msg}'.format(msg=msg)) - - thing.edit(replacement_text) - if not config.get('edit_only'): - thing.delete() - -remove_things(get_things()) diff --git a/shreddit/__init__.py b/shreddit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/shreddit/app.py b/shreddit/app.py new file mode 100644 index 0000000..e00e17c --- /dev/null +++ b/shreddit/app.py @@ -0,0 +1,29 @@ +"""This module contains script entrypoints for shreddit. +""" +import argparse +import yaml +from shreddit.oauth import oauth_test +from shreddit.shredder import shred + + +def main(): + parser = argparse.ArgumentParser(description="Command-line frontend to the shreddit library.") + parser.add_argument("-c", "--config", help="Config file to use instead of the default shreddit.cfg") + parser.add_argument("-p", "--praw", help="PRAW config (if not ./praw.ini)") + parser.add_argument("-t", "--test-oauth", help="Perform OAuth test and exit", action="store_true") + args = parser.parse_args() + + if args.test_oauth: + oauth_test(args.praw) + return + + with open(args.config or "shreddit.yml") as fh: + config = yaml.safe_load(fh) + if not config: + raise Exception("No config options passed!") + + shred(config, args.praw) + + +if __name__ == "__main__": + main() diff --git a/shreddit/oauth.py b/shreddit/oauth.py new file mode 100644 index 0000000..8276c43 --- /dev/null +++ b/shreddit/oauth.py @@ -0,0 +1,22 @@ +"""This module contains a function that tests OAuth session validity. +""" +import os +import praw + + +def oauth_test(praw_ini): + if praw_ini: + # PRAW won't panic if the file is invalid, so check first + if not os.path.exists(praw_ini): + print("PRAW configuration file \"{}\" not found.".format(praw_ini)) + return + praw.settings.CONFIG.read(praw_ini) + r = praw.Reddit("Shreddit oauth test") + try: + r.refresh_access_information() + if r.is_oauth_session(): + print("Session is valid.") + else: + print("Session is not a valid OAuth session.") + except Exception as e: + print("Error encountered while checking credentials:\n{}".format(e)) diff --git a/shreddit/shredder.py b/shreddit/shredder.py new file mode 100644 index 0000000..5f53010 --- /dev/null +++ b/shreddit/shredder.py @@ -0,0 +1,172 @@ +import os +import sys +import logging +import argparse +import json +import yaml +import praw +from re import sub +from random import shuffle, randint +from datetime import datetime, timedelta +from praw.errors import (InvalidUser, InvalidUserPass, RateLimitExceeded, HTTPException, OAuthAppRequired) +from praw.objects import Comment, Submission +try: + from loremipsum import get_sentence # This only works on Python 2 +except ImportError: + def get_sentence(): + return "I have been Shreddited for privacy!" + + os_wordlist = "/usr/share/dict/words" + if os.name == "posix" and os.path.isfile(os_wordlist): + # Generate a random string of words from our system's dictionary + fh = open(os_wordlist) + words = fh.read().splitlines() + fh.close() + shuffle(words) + + def get_sentence(): + return " ".join(words[:randint(50, 150)]) +assert get_sentence + + +def shred(config, praw_ini=None): + logging.basicConfig(stream=sys.stdout) + log = logging.getLogger("shreddit") + log.setLevel(level=logging.WARNING) + + if praw_ini: + # PRAW won't panic if the file is invalid, so check first + if not os.path.exists(praw_ini): + print("PRAW configuration file \"{}\" not found.".format(praw_ini)) + return + praw.settings.CONFIG.read(praw_ini) + + save_directory = config.get("save_directory", ".") + + r = praw.Reddit(user_agent="shreddit/4.2") + if save_directory: + r.config.store_json_result = True + + if config.get("verbose", True): + log.setLevel(level=logging.DEBUG) + + try: + # Try to login with OAuth2 + r.refresh_access_information() + log.debug("Logged in with OAuth.") + except (HTTPException, OAuthAppRequired) as e: + log.warning("You should migrate to OAuth2 using get_secret.py before Reddit disables this login method.") + try: + try: + r.login(config["username"], config["password"]) + except InvalidUserPass: + r.login() # Supply details on the command line + except InvalidUser as e: + raise InvalidUser("User does not exist.", e) + except InvalidUserPass as e: + raise InvalidUserPass("Specified an incorrect password.", e) + except RateLimitExceeded as e: + raise RateLimitExceeded("You're doing that too much.", e) + + log.info("Logged in as {user}.".format(user=r.user)) + log.debug("Deleting messages before {time}.".format( + time=datetime.now() - timedelta(hours=config["hours"]))) + + whitelist = config.get("whitelist", []) + whitelist_ids = config.get("whitelist_ids", []) + + if config.get("whitelist"): + log.debug("Keeping messages from subreddits {subs}".format(subs=", ".join(whitelist))) + + remove_things(r, config, log, get_things(r, config, log)) + + +def get_things(r, config, log, after=None): + limit = None + item = config.get("item", "comments") + sort = config.get("sort", "new") + log.debug("Deleting items: {item}".format(item=item)) + if item == "comments": + return r.user.get_comments(limit=limit, sort=sort) + elif item == "submitted": + return r.user.get_submitted(limit=limit, sort=sort) + elif item == "overview": + return r.user.get_overview(limit=limit, sort=sort) + else: + raise Exception("Your deletion section is wrong") + + +def remove_things(r, config, log, things): + for thing in things: + log.debug("Starting remove function on: {thing}".format(thing=thing)) + # Seems to be in users's timezone. Unclear. + thing_time = datetime.fromtimestamp(thing.created_utc) + # Exclude items from being deleted unless past X hours. + after_time = datetime.now() - timedelta(hours=config.get("hours", 24)) + if thing_time > after_time: + if thing_time + timedelta(hours=config.get("nuke_hours", 4320)) < datetime.utcnow(): + pass + continue + # For edit_only we're assuming that the hours aren't altered. + # This saves time when deleting (you don't edit already edited posts). + if config.get("edit_only"): + end_time = after_time - timedelta(hours=config.get("hours", 24)) + if thing_time < end_time: + continue + + if str(thing.subreddit).lower() in config.get("whitelist", []) \ + or thing.id in config.get("whitelist_ids", []): + continue + + if config.get("whitelist_distinguished") and thing.distinguished: + continue + if config.get("whitelist_gilded") and thing.gilded: + continue + if "max_score" in config and thing.score > config["max_score"]: + continue + + if config.get("save_directory"): + save_directory = config["save_directory"] + if not os.path.exists(save_directory): + os.makedirs(save_directory) + with open("%s/%s.json" % (save_directory, thing.id), "w") as fh: + json.dump(thing.json_dict, fh) + + if config.get("trial_run"): # Don't do anything, trial mode! + log.debug("Would have deleted {thing}: '{content}'".format( + thing=thing.id, content=thing)) + continue + + if config.get("clear_vote"): + thing.clear_vote() + + if isinstance(thing, Submission): + log.info("Deleting submission: #{id} {url}".format( + id=thing.id, + url=thing.url.encode("utf-8")) + ) + elif isinstance(thing, Comment): + rep_format = config.get("replacement_format") + if rep_format == "random": + replacement_text = get_sentence() + elif rep_format == "dot": + replacement_text = "." + else: + replacement_text = rep_format + + msg = '/r/{3}/ #{0} with:\n\t"{1}" to\n\t"{2}"'.format( + thing.id, + sub(b"\n\r\t", " ", thing.body[:78].encode("utf-8")), + replacement_text[:78], + thing.subreddit + ) + + if config.get("edit_only"): + log.info("Editing (not removing) {msg}".format(msg=msg)) + else: + log.info("Editing and deleting {msg}".format(msg=msg)) + + thing.edit(replacement_text) + if not config.get("edit_only"): + thing.delete() + From 39a625b75ba5dceb7b44faed066dea49501a3ed6 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 17 Jul 2016 21:23:21 -0500 Subject: [PATCH 02/14] Tweaking width to make it easier to hack on --- shreddit/shredder.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/shreddit/shredder.py b/shreddit/shredder.py index 5f53010..d4f3cb0 100644 --- a/shreddit/shredder.py +++ b/shreddit/shredder.py @@ -114,8 +114,7 @@ def remove_things(r, config, log, things): if thing_time < end_time: continue - if str(thing.subreddit).lower() in config.get("whitelist", []) \ - or thing.id in config.get("whitelist_ids", []): + if str(thing.subreddit).lower() in config.get("whitelist", []) or thing.id in config.get("whitelist_ids", []): continue if config.get("whitelist_distinguished") and thing.distinguished: @@ -141,10 +140,7 @@ def remove_things(r, config, log, things): thing.clear_vote() if isinstance(thing, Submission): - log.info("Deleting submission: #{id} {url}".format( - id=thing.id, - url=thing.url.encode("utf-8")) - ) + log.info("Deleting submission: #{id} {url}".format(id=thing.id, url=thing.url.encode("utf-8"))) elif isinstance(thing, Comment): rep_format = config.get("replacement_format") if rep_format == "random": @@ -154,12 +150,9 @@ def remove_things(r, config, log, things): else: replacement_text = rep_format - msg = '/r/{3}/ #{0} with:\n\t"{1}" to\n\t"{2}"'.format( - thing.id, - sub(b"\n\r\t", " ", thing.body[:78].encode("utf-8")), - replacement_text[:78], - thing.subreddit - ) + msg = '/r/{3}/ #{0} with:\n\t"{1}" to\n\t"{2}"'.format(thing.id, sub(b"\n\r\t", " ", + thing.body[:78].encode("utf-8")), + replacement_text[:78], thing.subreddit) if config.get("edit_only"): log.info("Editing (not removing) {msg}".format(msg=msg)) From 07d3c3df0c65d6114e235fdbdaed60b6ae071b85 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 17 Jul 2016 21:42:28 -0500 Subject: [PATCH 03/14] Moved loremipsum import logic out of main shreddit code --- shreddit/shredder.py | 19 +------------------ shreddit/util.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 shreddit/util.py diff --git a/shreddit/shredder.py b/shreddit/shredder.py index d4f3cb0..4408e70 100644 --- a/shreddit/shredder.py +++ b/shreddit/shredder.py @@ -6,27 +6,10 @@ import json import yaml import praw from re import sub -from random import shuffle, randint from datetime import datetime, timedelta from praw.errors import (InvalidUser, InvalidUserPass, RateLimitExceeded, HTTPException, OAuthAppRequired) from praw.objects import Comment, Submission -try: - from loremipsum import get_sentence # This only works on Python 2 -except ImportError: - def get_sentence(): - return "I have been Shreddited for privacy!" - - os_wordlist = "/usr/share/dict/words" - if os.name == "posix" and os.path.isfile(os_wordlist): - # Generate a random string of words from our system's dictionary - fh = open(os_wordlist) - words = fh.read().splitlines() - fh.close() - shuffle(words) - - def get_sentence(): - return " ".join(words[:randint(50, 150)]) -assert get_sentence +from shreddit.util import get_sentence def shred(config, praw_ini=None): diff --git a/shreddit/util.py b/shreddit/util.py new file mode 100644 index 0000000..28baec0 --- /dev/null +++ b/shreddit/util.py @@ -0,0 +1,24 @@ +"""This module contains common utilities for the rest of the package. +""" +import random + + +WORDLIST = "/usr/share/dict/words" +STATIC_TEXT = "I have been Shreddited for privacy!" + + +try: + from loremipsum import get_sentence +except ImportError: + def get_sentence(): + """This keeps the mess of dealing with the loremipsum library out of the shredding code. Until the maintainer of + the loremipsum package uploads a version that works with Python 3 to pypi, it is necessary to provide a drop-in + replacement. The current solution is to return a static text string unless the operating system has a word list. + If that is the case, use it instead. + """ + try: + lines = [line.strip() for line in open(WORDLIST).readlines()] + return " ".join(random.sample(lines, random.randint(50, 150))) + except IOError: + # The word list wasn't available... + return STATIC_TEXT From 83fd3510cd5cd374a8df51b53b1542d2b9086d41 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 18 Jul 2016 04:36:24 -0500 Subject: [PATCH 04/14] Split up shredder into a class. Added support for >1000 items --- requirements.txt | 1 + shreddit.yml.example | 4 + shreddit/app.py | 6 +- shreddit/shredder.py | 296 ++++++++++++++++++++++++------------------- 4 files changed, 174 insertions(+), 133 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1f3ff5c..560f4fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ six==1.10.0 tornado==4.3 update-checker==0.11 wheel==0.24.0 +arrow diff --git a/shreddit.yml.example b/shreddit.yml.example index f23bf0d..96365ad 100644 --- a/shreddit.yml.example +++ b/shreddit.yml.example @@ -68,4 +68,8 @@ save_directory: /tmp # options: [random, dot, "user entered string"] replacement_format: random +# Batch cooldown +# This controls how long (in seconds) to wait between each set of 1000 deletions. +batch_cooldown: 10 + # vim: syntax=yaml ts=2 diff --git a/shreddit/app.py b/shreddit/app.py index e00e17c..3ada978 100644 --- a/shreddit/app.py +++ b/shreddit/app.py @@ -2,8 +2,9 @@ """ import argparse import yaml +import logging from shreddit.oauth import oauth_test -from shreddit.shredder import shred +from shreddit.shredder import Shredder def main(): @@ -22,7 +23,8 @@ def main(): if not config: raise Exception("No config options passed!") - shred(config, args.praw) + shredder = Shredder(config, args.praw) + shredder.shred() if __name__ == "__main__": diff --git a/shreddit/shredder.py b/shreddit/shredder.py index 4408e70..242c083 100644 --- a/shreddit/shredder.py +++ b/shreddit/shredder.py @@ -3,8 +3,10 @@ import sys import logging import argparse import json +import arrow import yaml import praw +import time from re import sub from datetime import datetime, timedelta from praw.errors import (InvalidUser, InvalidUserPass, RateLimitExceeded, HTTPException, OAuthAppRequired) @@ -12,137 +14,169 @@ from praw.objects import Comment, Submission from shreddit.util import get_sentence -def shred(config, praw_ini=None): - logging.basicConfig(stream=sys.stdout) - log = logging.getLogger("shreddit") - log.setLevel(level=logging.WARNING) - - if praw_ini: - # PRAW won't panic if the file is invalid, so check first - if not os.path.exists(praw_ini): - print("PRAW configuration file \"{}\" not found.".format(praw_ini)) - return - praw.settings.CONFIG.read(praw_ini) - - save_directory = config.get("save_directory", ".") - - r = praw.Reddit(user_agent="shreddit/4.2") - if save_directory: - r.config.store_json_result = True - - if config.get("verbose", True): - log.setLevel(level=logging.DEBUG) - - try: - # Try to login with OAuth2 - r.refresh_access_information() - log.debug("Logged in with OAuth.") - except (HTTPException, OAuthAppRequired) as e: - log.warning("You should migrate to OAuth2 using get_secret.py before Reddit disables this login method.") +class Shredder(object): + """This class stores state for configuration, API objects, logging, etc. It exposes a shred() method that + application code can call to start it. + """ + def __init__(self, config, praw_ini=None): + logging.basicConfig() + self._logger = logging.getLogger("shreddit") + self._logger.setLevel(level=logging.DEBUG if config.get("verbose", True) else logging.INFO) + self._logger.info(config) + + self._praw_ini = praw_ini + self._username, self._password = config["username"], config["password"] + self._connect(praw_ini, self._username, self._password) + + if config.get("save_directory", "."): + self._r.config.store_json_result = True + + # Read some information from the config and store it + # TODO: Handle this in a much cleaner way + self._whitelist = set(config.get("whitelist", [])) + self._whitelist_ids = set(config.get("whitelist_ids", [])) + self._item = config.get("item", "comments") + self._sort = config.get("sort", "new") + self._whitelist_dist = config.get("whitelist_distinguished", False) + self._whitelist_gild = config.get("whitelist_gilded", False) + self._max_score = config.get("max_score", None) + self._recent_cutoff = arrow.now().replace(hours=-config.get("hours", 24)) + self._nuke_cutoff = arrow.now().replace(hours=-config.get("nuke_hours", 4320)) + self._save = config.get("save_directory", None) + self._trial = config.get("trial_run", False) + self._clear_vote = config.get("clear_vote", False) + self._repl_format = config.get("replacement_format") + self._edit_only = config.get("edit_only", False) + self._batch_cooldown = config.get("batch_cooldown", 10) + if self._save: + if not os.path.exists(self._save): + os.makedirs(self._save) + self._limit = None + self._logger.info("Deleting ALL items before {}".format(self._nuke_cutoff)) + self._logger.info("Deleting items not whitelisted until {}".format(self._recent_cutoff)) + self._logger.info("Ignoring ALL items after {}".format(self._recent_cutoff)) + self._logger.info("Targeting {} sorted by {}".format(self._item, self._sort)) + if self._whitelist: + self._logger.info("Keeping items from subreddits {}".format(", ".join(self._whitelist))) + if self._save: + self._logger.info("Saving deleted items to: {}".format(self._save)) + if self._trial: + self._logger.info("Trial run - no deletion will be performed") + + def shred(self): + deleted = self._remove_things(self._get_things()) + if deleted >= 1000: + # This user has more than 1000 items to handle, which angers the gods of the Reddit API. So chill for a + # while and do it again. + self._logger.info("Finished deleting 1000 items. " \ + "Waiting {} seconds and continuing...".format(self._batch_cooldown)) + time.sleep(self._batch_cooldown) + self._connect(None, self._username, self._password) + self.shred() + + def _connect(self, praw_ini, username, password): + self._r = praw.Reddit(user_agent="shreddit/4.2") + if praw_ini: + # PRAW won't panic if the file is invalid, so check first + if not os.path.exists(praw_ini): + print("PRAW configuration file \"{}\" not found.".format(praw_ini)) + return + praw.settings.CONFIG.read(praw_ini) try: + # Try to login with OAuth2 + self._r.refresh_access_information() + self._logger.debug("Logged in with OAuth.") + except (HTTPException, OAuthAppRequired) as e: + self._logger.warning("You should migrate to OAuth2 using get_secret.py before Reddit disables this login " + "method.") try: - r.login(config["username"], config["password"]) - except InvalidUserPass: - r.login() # Supply details on the command line - except InvalidUser as e: - raise InvalidUser("User does not exist.", e) - except InvalidUserPass as e: - raise InvalidUserPass("Specified an incorrect password.", e) - except RateLimitExceeded as e: - raise RateLimitExceeded("You're doing that too much.", e) - - log.info("Logged in as {user}.".format(user=r.user)) - log.debug("Deleting messages before {time}.".format( - time=datetime.now() - timedelta(hours=config["hours"]))) - - whitelist = config.get("whitelist", []) - whitelist_ids = config.get("whitelist_ids", []) - - if config.get("whitelist"): - log.debug("Keeping messages from subreddits {subs}".format(subs=", ".join(whitelist))) - - remove_things(r, config, log, get_things(r, config, log)) - - -def get_things(r, config, log, after=None): - limit = None - item = config.get("item", "comments") - sort = config.get("sort", "new") - log.debug("Deleting items: {item}".format(item=item)) - if item == "comments": - return r.user.get_comments(limit=limit, sort=sort) - elif item == "submitted": - return r.user.get_submitted(limit=limit, sort=sort) - elif item == "overview": - return r.user.get_overview(limit=limit, sort=sort) - else: - raise Exception("Your deletion section is wrong") - - -def remove_things(r, config, log, things): - for thing in things: - log.debug("Starting remove function on: {thing}".format(thing=thing)) - # Seems to be in users's timezone. Unclear. - thing_time = datetime.fromtimestamp(thing.created_utc) - # Exclude items from being deleted unless past X hours. - after_time = datetime.now() - timedelta(hours=config.get("hours", 24)) - if thing_time > after_time: - if thing_time + timedelta(hours=config.get("nuke_hours", 4320)) < datetime.utcnow(): - pass - continue - # For edit_only we're assuming that the hours aren't altered. - # This saves time when deleting (you don't edit already edited posts). - if config.get("edit_only"): - end_time = after_time - timedelta(hours=config.get("hours", 24)) - if thing_time < end_time: - continue - - if str(thing.subreddit).lower() in config.get("whitelist", []) or thing.id in config.get("whitelist_ids", []): - continue - - if config.get("whitelist_distinguished") and thing.distinguished: - continue - if config.get("whitelist_gilded") and thing.gilded: - continue - if "max_score" in config and thing.score > config["max_score"]: - continue - - if config.get("save_directory"): - save_directory = config["save_directory"] - if not os.path.exists(save_directory): - os.makedirs(save_directory) - with open("%s/%s.json" % (save_directory, thing.id), "w") as fh: - json.dump(thing.json_dict, fh) - - if config.get("trial_run"): # Don't do anything, trial mode! - log.debug("Would have deleted {thing}: '{content}'".format( - thing=thing.id, content=thing)) - continue - - if config.get("clear_vote"): - thing.clear_vote() - - if isinstance(thing, Submission): - log.info("Deleting submission: #{id} {url}".format(id=thing.id, url=thing.url.encode("utf-8"))) - elif isinstance(thing, Comment): - rep_format = config.get("replacement_format") - if rep_format == "random": - replacement_text = get_sentence() - elif rep_format == "dot": - replacement_text = "." - else: - replacement_text = rep_format - - msg = '/r/{3}/ #{0} with:\n\t"{1}" to\n\t"{2}"'.format(thing.id, sub(b"\n\r\t", " ", - thing.body[:78].encode("utf-8")), - replacement_text[:78], thing.subreddit) - - if config.get("edit_only"): - log.info("Editing (not removing) {msg}".format(msg=msg)) + try: + self._r.login(username, password) + except InvalidUserPass: + self._r.login() # Supply details on the command line + except InvalidUser as e: + raise InvalidUser("User does not exist.", e) + except InvalidUserPass as e: + raise InvalidUserPass("Specified an incorrect password.", e) + except RateLimitExceeded as e: + raise RateLimitExceeded("You're doing that too much.", e) + self._logger.info("Logged in as {user}.".format(user=self._r.user)) + + def _check_item(self, item): + """Returns True if the item is whitelisted, False otherwise. + """ + if str(item.subreddit).lower() in self._whitelist or item.id in self._whitelist_ids: + return True + if self._whitelist_dist and item.distinguished: + return True + if self._whitelist_gild and item.gilded: + return True + if self._max_score is not None and item.score > self._max_score: + return True + return False + + def _save_item(self, item): + with open(os.path.join(self._save, item.id), "w") as fh: + json.dump(item.json_dict, fh) + + def _remove_submission(self, sub): + self._logger.info("Deleting submission: #{id} {url}".format(id=sub.id, url=sub.url.encode("utf-8"))) + + def _remove_comment(self, comment): + if self._repl_format == "random": + replacement_text = get_sentence() + elif self._repl_format == "dot": + replacement_text = "." + else: + replacement_text = self._repl_format + + short_text = sub(b"\n\r\t", " ", comment.body[:35].encode("utf-8")) + msg = "/r/{}/ #{} ({}) with: {}".format(comment.subreddit, comment.id, short_text, replacement_text) + + if self._edit_only: + self._logger.info("Editing (not removing) {msg}".format(msg=msg)) + else: + self._logger.info("Editing and deleting {msg}".format(msg=msg)) + if not self._trial: + comment.edit(replacement_text) + + def _remove(self, item): + if self._save: + self._save_item(item) + if self._clear_vote: + item.clear_vote() + if isinstance(item, Submission): + self._remove_submission(item) + elif isinstance(item, Comment): + self._remove_comment(item) + if not self._edit_only and not self._trial: + item.delete() + + def _remove_things(self, items): + for idx, item in enumerate(items): + self._logger.debug("Examining: {}".format(item)) + created = arrow.get(item.created_utc) + if created <= self._nuke_cutoff: + self._logger.debug("Item occurs prior to nuke cutoff") + self._remove(item) + elif created > self._recent_cutoff: + self._logger.debug("Skipping due to: too recent") + continue + elif self._check_item(item): + self._logger.debug("Skipping due to: whitelisted") + continue else: - log.info("Editing and deleting {msg}".format(msg=msg)) - - thing.edit(replacement_text) - if not config.get("edit_only"): - thing.delete() - + self._remove(item) + if not idx % 10: + self._logger.info("{} items handled.".format(idx + 1)) + return idx + 1 + + def _get_things(self): + if self._item == "comments": + return self._r.user.get_comments(limit=self._limit, sort=self._sort) + elif self._item == "submitted": + return self._r.user.get_submitted(limit=self._limit, sort=self._sort) + elif self._item == "overview": + return self._r.user.get_overview(limit=self._limit, sort=self._sort) + else: + raise Exception("Your deletion section is wrong") From 7a26e1a0777962266a5c93b7ccfe66d411e2e351 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 13 Aug 2016 16:25:18 -0500 Subject: [PATCH 05/14] Probably a good idea to avoid config commits --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 156795a..de77ea0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +shreddit.yml +praw.ini + # Docs docs/_build/ From e1dcf9516ad8ca970a40af2665c135f16b7c1552 Mon Sep 17 00:00:00 2001 From: Scott Date: Sat, 13 Aug 2016 16:26:21 -0500 Subject: [PATCH 06/14] Config loading is no longer a huge mess --- shreddit/__init__.py | 20 ++++++++++++++++ shreddit/app.py | 13 +++++++---- shreddit/shredder.py | 54 ++++++++++++++++---------------------------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/shreddit/__init__.py b/shreddit/__init__.py index e69de29..c5e676e 100644 --- a/shreddit/__init__.py +++ b/shreddit/__init__.py @@ -0,0 +1,20 @@ +default_config = {"username": None, + "password": None, + "verbose": True, + "save_directory": "/tmp", + "whitelist": [], + "whitelist_ids": [], + "item": "overview", + "sort": "new", + "whitelist_distinguished": True, + "whitelist_gilded": True, + "max_score": 100, + "hours": 24, + "nuke_hours": 4320, + "keep_a_copy": False, + "save_directory": None, + "trial_run": False, + "clear_vote": False, + "replacement_format": "random", + "edit_only": False, + "batch_cooldown": 10} diff --git a/shreddit/app.py b/shreddit/app.py index 3ada978..98e6a47 100644 --- a/shreddit/app.py +++ b/shreddit/app.py @@ -3,6 +3,7 @@ import argparse import yaml import logging +from shreddit import default_config from shreddit.oauth import oauth_test from shreddit.shredder import Shredder @@ -19,11 +20,15 @@ def main(): return with open(args.config or "shreddit.yml") as fh: - config = yaml.safe_load(fh) - if not config: - raise Exception("No config options passed!") + # Not doing a simple update() here because it's preferable to only set attributes that are "whitelisted" as + # configuration options in the form of default values. + user_config = yaml.safe_load(fh) + for option in default_config: + if option in user_config: + default_config[option] = user_config[option] - shredder = Shredder(config, args.praw) + # TODO: Validate config options + shredder = Shredder(default_config, args.praw) shredder.shred() diff --git a/shreddit/shredder.py b/shreddit/shredder.py index 242c083..c2d1c6e 100644 --- a/shreddit/shredder.py +++ b/shreddit/shredder.py @@ -23,44 +23,30 @@ class Shredder(object): self._logger = logging.getLogger("shreddit") self._logger.setLevel(level=logging.DEBUG if config.get("verbose", True) else logging.INFO) self._logger.info(config) + self.__dict__.update({"_{}".format(k): config[k] for k in config}) self._praw_ini = praw_ini - self._username, self._password = config["username"], config["password"] self._connect(praw_ini, self._username, self._password) - if config.get("save_directory", "."): + if self._save_directory: self._r.config.store_json_result = True - # Read some information from the config and store it - # TODO: Handle this in a much cleaner way - self._whitelist = set(config.get("whitelist", [])) - self._whitelist_ids = set(config.get("whitelist_ids", [])) - self._item = config.get("item", "comments") - self._sort = config.get("sort", "new") - self._whitelist_dist = config.get("whitelist_distinguished", False) - self._whitelist_gild = config.get("whitelist_gilded", False) - self._max_score = config.get("max_score", None) - self._recent_cutoff = arrow.now().replace(hours=-config.get("hours", 24)) - self._nuke_cutoff = arrow.now().replace(hours=-config.get("nuke_hours", 4320)) - self._save = config.get("save_directory", None) - self._trial = config.get("trial_run", False) - self._clear_vote = config.get("clear_vote", False) - self._repl_format = config.get("replacement_format") - self._edit_only = config.get("edit_only", False) - self._batch_cooldown = config.get("batch_cooldown", 10) - if self._save: - if not os.path.exists(self._save): - os.makedirs(self._save) + self._recent_cutoff = arrow.now().replace(hours=-self._hours) + self._nuke_cutoff = arrow.now().replace(hours=-self._nuke_hours) + if self._save_directory: + if not os.path.exists(self._save_directory): + os.makedirs(self._save_directory) self._limit = None + self._logger.info("Deleting ALL items before {}".format(self._nuke_cutoff)) self._logger.info("Deleting items not whitelisted until {}".format(self._recent_cutoff)) self._logger.info("Ignoring ALL items after {}".format(self._recent_cutoff)) self._logger.info("Targeting {} sorted by {}".format(self._item, self._sort)) if self._whitelist: self._logger.info("Keeping items from subreddits {}".format(", ".join(self._whitelist))) - if self._save: - self._logger.info("Saving deleted items to: {}".format(self._save)) - if self._trial: + if self._save_directory: + self._logger.info("Saving deleted items to: {}".format(self._save_directory)) + if self._trial_run: self._logger.info("Trial run - no deletion will be performed") def shred(self): @@ -107,28 +93,28 @@ class Shredder(object): """ if str(item.subreddit).lower() in self._whitelist or item.id in self._whitelist_ids: return True - if self._whitelist_dist and item.distinguished: + if self._whitelist_distinguished and item.distinguished: return True - if self._whitelist_gild and item.gilded: + if self._whitelist_gilded and item.gilded: return True if self._max_score is not None and item.score > self._max_score: return True return False def _save_item(self, item): - with open(os.path.join(self._save, item.id), "w") as fh: + with open(os.path.join(self._save_directory, item.id), "w") as fh: json.dump(item.json_dict, fh) def _remove_submission(self, sub): self._logger.info("Deleting submission: #{id} {url}".format(id=sub.id, url=sub.url.encode("utf-8"))) def _remove_comment(self, comment): - if self._repl_format == "random": + if self._replacement_format == "random": replacement_text = get_sentence() - elif self._repl_format == "dot": + elif self._replacement_format == "dot": replacement_text = "." else: - replacement_text = self._repl_format + replacement_text = self._replacement_format short_text = sub(b"\n\r\t", " ", comment.body[:35].encode("utf-8")) msg = "/r/{}/ #{} ({}) with: {}".format(comment.subreddit, comment.id, short_text, replacement_text) @@ -137,11 +123,11 @@ class Shredder(object): self._logger.info("Editing (not removing) {msg}".format(msg=msg)) else: self._logger.info("Editing and deleting {msg}".format(msg=msg)) - if not self._trial: + if not self._trial_run: comment.edit(replacement_text) def _remove(self, item): - if self._save: + if self._save_directory: self._save_item(item) if self._clear_vote: item.clear_vote() @@ -149,7 +135,7 @@ class Shredder(object): self._remove_submission(item) elif isinstance(item, Comment): self._remove_comment(item) - if not self._edit_only and not self._trial: + if not self._edit_only and not self._trial_run: item.delete() def _remove_things(self, items): From 4d91de901bc220d56f0056abcbcff7e18a69c9f0 Mon Sep 17 00:00:00 2001 From: Scott Date: Sun, 14 Aug 2016 21:37:47 -0500 Subject: [PATCH 07/14] Bug fixes and rate limit hand holding for PRAW --- shreddit/shredder.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/shreddit/shredder.py b/shreddit/shredder.py index c2d1c6e..912e278 100644 --- a/shreddit/shredder.py +++ b/shreddit/shredder.py @@ -22,7 +22,6 @@ class Shredder(object): logging.basicConfig() self._logger = logging.getLogger("shreddit") self._logger.setLevel(level=logging.DEBUG if config.get("verbose", True) else logging.INFO) - self._logger.info(config) self.__dict__.update({"_{}".format(k): config[k] for k in config}) self._praw_ini = praw_ini @@ -37,6 +36,7 @@ class Shredder(object): if not os.path.exists(self._save_directory): os.makedirs(self._save_directory) self._limit = None + self._api_calls = [] self._logger.info("Deleting ALL items before {}".format(self._nuke_cutoff)) self._logger.info("Deleting items not whitelisted until {}".format(self._recent_cutoff)) @@ -44,18 +44,18 @@ class Shredder(object): self._logger.info("Targeting {} sorted by {}".format(self._item, self._sort)) if self._whitelist: self._logger.info("Keeping items from subreddits {}".format(", ".join(self._whitelist))) - if self._save_directory: + if self._keep_a_copy and self._save_directory: self._logger.info("Saving deleted items to: {}".format(self._save_directory)) if self._trial_run: self._logger.info("Trial run - no deletion will be performed") def shred(self): deleted = self._remove_things(self._get_things()) + self._logger.info("Finished deleting {} items. ".format(deleted)) if deleted >= 1000: # This user has more than 1000 items to handle, which angers the gods of the Reddit API. So chill for a # while and do it again. - self._logger.info("Finished deleting 1000 items. " \ - "Waiting {} seconds and continuing...".format(self._batch_cooldown)) + self._logger.info("Waiting {} seconds and continuing...".format(self._batch_cooldown)) time.sleep(self._batch_cooldown) self._connect(None, self._username, self._password) self.shred() @@ -120,27 +120,38 @@ class Shredder(object): msg = "/r/{}/ #{} ({}) with: {}".format(comment.subreddit, comment.id, short_text, replacement_text) if self._edit_only: - self._logger.info("Editing (not removing) {msg}".format(msg=msg)) + self._logger.debug("Editing (not removing) {msg}".format(msg=msg)) else: - self._logger.info("Editing and deleting {msg}".format(msg=msg)) + self._logger.debug("Editing and deleting {msg}".format(msg=msg)) if not self._trial_run: comment.edit(replacement_text) + self._api_calls.append(int(time.time())) def _remove(self, item): - if self._save_directory: + if self._keep_a_copy and self._save_directory: self._save_item(item) if self._clear_vote: item.clear_vote() + self._api_calls.append(int(time.time())) if isinstance(item, Submission): self._remove_submission(item) elif isinstance(item, Comment): self._remove_comment(item) if not self._edit_only and not self._trial_run: item.delete() + self._api_calls.append(int(time.time())) def _remove_things(self, items): - for idx, item in enumerate(items): - self._logger.debug("Examining: {}".format(item)) + self._logger.info("Loading items to delete...") + to_delete = [item for item in items] + self._logger.info("Done. Starting on batch of {} items...".format(len(to_delete))) + for idx, item in enumerate(to_delete): + minute_ago = arrow.now().replace(minutes=-1).timestamp + self._api_calls = [api_call for api_call in self._api_calls if api_call >= minute_ago] + if len(self._api_calls) >= 60: + self._logger.info("Sleeping 10 seconds to wait out API cooldown...") + time.sleep(10) + self._logger.debug("Examining item {}: {}".format(idx + 1, item)) created = arrow.get(item.created_utc) if created <= self._nuke_cutoff: self._logger.debug("Item occurs prior to nuke cutoff") @@ -153,8 +164,6 @@ class Shredder(object): continue else: self._remove(item) - if not idx % 10: - self._logger.info("{} items handled.".format(idx + 1)) return idx + 1 def _get_things(self): From 1dc6f3216468bf576a2d1142a68963dcc111cc98 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:21:06 -0600 Subject: [PATCH 08/14] Limited README to 80 chars --- README.md | 112 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index a8fd19a..3724142 100644 --- a/README.md +++ b/README.md @@ -4,49 +4,81 @@ Shreddit YAML Upgrade ----------- -**Note! Important! The latest version uses YAML format over the old simpleconfigparser! -This means you'll have to migrate your config file to yaml!** +**Note! Important! The latest version uses YAML format over the old +simpleconfigparser! This means you'll have to migrate your config file to +yaml!** User Login deprecation -------------------- -Reddit intends to disable username-password based authentication to access its APIs in the near future. You can specify your username and password in the `shreddit.yml` or the `praw.ini` to make it work **FOR NOW**. But consider looking at the [OAuth2 instructions](#oauth2-instructions) if you intend to use this program in the future. +Reddit intends to disable username-password based authentication to access its +APIs in the near future. You can specify your username and password in the +`shreddit.yml` or the `praw.ini` to make it work **FOR NOW**. But consider +looking at the [OAuth2 instructions](#oauth2-instructions) if you intend to use +this program in the future. Description ----------- -Shreddit is a Python command line program which will take a user's post history on the website [Reddit](http://reddit.com) and after having the user edit a config file will systematically go through the user's history deleting one post/submission at a time until only those whitelisted remain. +Shreddit is a Python command line program which will take a user's post history +on the website [Reddit](http://reddit.com) and after having the user edit a +config file will systematically go through the user's history deleting one +post/submission at a time until only those whitelisted remain. -**Note:** When it became known that post edits were *not* saved but post deletions *were* saved, code was added to edit your post prior to deletion. In fact you can actually turn off deletion all together and just have lorem ipsum (or a message about Shreddit) but this will increase how long it takes the script to run as it will be going over all of your messages every run! +**Note:** When it became known that post edits were *not* saved but post +deletions *were* saved, code was added to edit your post prior to deletion. In +fact you can actually turn off deletion all together and just have lorem ipsum +(or a message about Shreddit) but this will increase how long it takes the +script to run as it will be going over all of your messages every run! -Basically it lets you maintain your normal reddit account while having your history scrubbed after a certain amount of time. +Basically it lets you maintain your normal reddit account while having your +history scrubbed after a certain amount of time. Installation ([Click here for Windows instructions](#for-windows-users)) ----------- -The way I personally install Shreddit is via a handy tool called `virtualenv` which may come with your package manager or may be a part of your Python package in your distro (have a search if you can't find it). Both Python 2 and 3 are supported. +The way I personally install Shreddit is via a handy tool called `virtualenv` +which may come with your package manager or may be a part of your Python package +in your distro (have a search if you can't find it). Both Python 2 and 3 are +supported. 1. Clone the repository -2. Enter the repository's directory and run `virtualenv .` (this creates a virtual environment) -3. Run the following command, you must run this *every time* you wish to run the script `source ./bin/activate`. -4. This installs the required modules locally to your Shreddit virtual environment `pip install -r requirements.txt`. -5. Copy `shreddit.yml.example` to something else and edit it to your liking. +2. Enter the repository's directory and run `virtualenv .` (this creates a + virtual environment) +3. Run the following command, you must run this *every time* you wish to run + the script `source ./bin/activate`. +4. This installs the required modules locally to your Shreddit virtual + environment `pip install -r requirements.txt`. +5. Copy `shreddit.yml.example` to something else and edit it to your liking. - Make sure you specify your username and password in the file. - - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to use username-password based authentication. + - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to + use username-password based authentication. 6. Run `python shreddit.py -c YOUR_CONFIG_FILE.yml`. -Alternatively try to run `./install.sh` and it will attempt to do it all for you. +Alternatively try to run `./install.sh` and it will attempt to do it all for +you. Notes: -- The script *does* work with Python versions 2 and 3 but people often get in a mess with pip versions, python versions and virtulenv versions. Make sure that your Python/pip/virtualenv are all the same version. If you ran the above code it *should* work as stated. -- If in doubt try running `python3` instead of just `python` - the same goes for `pip3` and `virtualenv3` (exchange for 2 if you wish, though I advise using version 2). -- It's useful to have it run as an event, you can set this up as you like but I suggest `cron` via `crontab -e` and adding a line such as `@hourly cd $HOME/Shreddit && source bin/activate && python shreddit.py -c YOUR_CONFIG_FILE.cfg`. See below for more. -- Adding your password to the praw.ini and adding the additional output line can provide extra debugging help. +- The script *does* work with Python versions 2 and 3 but people often get in a + mess with pip versions, python versions and virtulenv versions. Make sure + that your Python/pip/virtualenv are all the same version. If you ran the above + code it *should* work as stated. +- If in doubt try running `python3` instead of just `python` - the same goes for + `pip3` and `virtualenv3` (exchange for 2 if you wish, though I advise using + version 2). +- It's useful to have it run as an event, you can set this up as you like but I + suggest `cron` via `crontab -e` and adding a line such as + `@hourly cd $HOME/Shreddit && source bin/activate && + python shreddit.py -c YOUR_CONFIG_FILE.cfg`. + See below for more. +- Adding your password to the praw.ini and adding the additional output line + can provide extra debugging help. Cron examples ----------- -- Run crontab -e to edit your cron file. If you have access to something like vixie-cron then each user can have their own personal cron job! +- Run crontab -e to edit your cron file. If you have access to something like + vixie-cron then each user can have their own personal cron job! - Run every hour on the hour `0 * * * * cd /home/$USER/Shreddit/ && source bin/activate && ./shreddit.py` @@ -57,19 +89,28 @@ Cron examples - Run once a month on the 1st of the month `0 0 1 * * cd /home/$USER/Shreddit/ && source bin/activate && ./shreddit.py` -If for some reason you get an error saying `source: not found` in your logs, change `source` to `.`. The source command would become `. bin/activate`. This is caused by your cron jobs running in shell, not bash, and the source command is a dot. +If for some reason you get an error saying `source: not found` in your logs, +change `source` to `.`. The source command would become `. bin/activate`. This +is caused by your cron jobs running in shell, not bash, and the source command +is a dot. For Windows users ----------------- -1. Make sure you have python installed. [Click here for the Python download page](https://www.python.org/downloads/). +1. Make sure you have python installed. + [Click here for the Python download page](https://www.python.org/downloads/). - **Note:** Install either `python 2.x` or `python 3.x`, not both. -2. Clone the repository (or download and extract the [zip file](https://github.com/dragsubil/Shreddit/archive/master.zip)) +2. Clone the repository (or download and extract the + [zip file](https://github.com/dragsubil/Shreddit/archive/master.zip)) 3. Open command prompt and type `cd ` -4. Type `pip install -r requirements.txt` in the open command prompt window to download and install the required additional modules. -5. Open the `shreddit.yml.example` and edit it to your liking and rename the file to `your-config-filename.yml`. +4. Type `pip install -r requirements.txt` in the open command prompt window to + download and install the required additional modules. +5. Open the `shreddit.yml.example` and edit it to your liking and rename the + file to `your-config-filename.yml`. - Make sure you specify your username and password in the file. - - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to use username-password based authentication. -6. Type `python shreddit.py -c your-config-filename.yml` in the open command prompt window to run the program. + - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to + use username-password based authentication. +6. Type `python shreddit.py -c your-config-filename.yml` in the open command + prompt window to run the program. OAuth2 Instructions ------------------- @@ -77,8 +118,10 @@ OAuth2 Instructions 2. Click on 'Create app'. - Fill in the name and select the 'script' option - Under "redirect uri" put http://127.0.0.1:65010 -3. Copy from or rename `praw.ini.example` to `praw.ini` and open it. Enter the values from the Reddit page. - - oauth\_client\_id = { The ID displayed next to the icon thingy (under "personal use script") } +3. Copy from or rename `praw.ini.example` to `praw.ini` and open it. Enter the + values from the Reddit page. + - oauth\_client\_id = { The ID displayed next to the icon thingy (under + "personal use script") } - oauth\_client\_secret = { The secret } - oauth\_redirect\_uri = http://127.0.0.1:65010 - Save the file. @@ -89,6 +132,15 @@ OAuth2 Instructions Caveats ----------- -- Only your previous 1,000 comments are accessible on Reddit. So good luck deleting the others. There may be ways to hack around this via iterating using sorting by top/best/controversial/new but for now I am unsure. I believe it best to set the script settings and run it as a cron job and then it won't be a problem unless you post *a lot*. I do, however, think that it may be a caching issue and perhaps after a certain time period your post history would, once again, become available as a block of 1,000. So you needn't despair yet! - -- We are relying on Reddit admin words that they do not store edits, deleted posts are still stored in the database they are merely inaccessible to the public. +- Only your previous 1,000 comments are accessible on Reddit. So good luck + deleting the others. There may be ways to hack around this via iterating + using sorting by top/best/controversial/new but for now I am unsure. I + believe it best to set the script settings and run it as a cron job and then + it won't be a problem unless you post *a lot*. I do, however, think that it + may be a caching issue and perhaps after a certain time period your post + history would, once again, become available as a block of 1,000. So you + needn't despair yet! + +- We are relying on Reddit admin words that they do not store edits, deleted + posts are still stored in the database they are merely inaccessible to the + public. From fb6e326bc99fe7f884b8cd7afaa085956b3b5c64 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:31:03 -0600 Subject: [PATCH 09/14] Updating README with packaged version usage --- README.md | 56 +++++++++++++++++++++---------------------------- shreddit/app.py | 2 +- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 3724142..7063423 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ Shreddit -======= +======== YAML Upgrade ------------ +------------ **Note! Important! The latest version uses YAML format over the old simpleconfigparser! This means you'll have to migrate your config file to yaml!** User Login deprecation --------------------- +---------------------- Reddit intends to disable username-password based authentication to access its APIs in the near future. You can specify your username and password in the @@ -21,9 +21,9 @@ Description ----------- Shreddit is a Python command line program which will take a user's post history -on the website [Reddit](http://reddit.com) and after having the user edit a -config file will systematically go through the user's history deleting one -post/submission at a time until only those whitelisted remain. +on the website [Reddit](http://reddit.com), and will systematically go through +the user's history deleting one post/submission at a time until only those +whitelisted remain. **Note:** When it became known that post edits were *not* saved but post deletions *were* saved, code was added to edit your post prior to deletion. In @@ -31,11 +31,11 @@ fact you can actually turn off deletion all together and just have lorem ipsum (or a message about Shreddit) but this will increase how long it takes the script to run as it will be going over all of your messages every run! -Basically it lets you maintain your normal reddit account while having your -history scrubbed after a certain amount of time. +It allows you to maintain your normal reddit account while having your history +scrubbed after a certain amount of time. Installation ([Click here for Windows instructions](#for-windows-users)) ------------ +------------------------------------------------------------------------ The way I personally install Shreddit is via a handy tool called `virtualenv` which may come with your package manager or may be a part of your Python package in your distro (have a search if you can't find it). Both Python 2 and 3 are @@ -52,11 +52,6 @@ supported. - Make sure you specify your username and password in the file. - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to use username-password based authentication. -6. Run `python shreddit.py -c YOUR_CONFIG_FILE.yml`. - -Alternatively try to run `./install.sh` and it will attempt to do it all for -you. - Notes: @@ -69,15 +64,15 @@ Notes: version 2). - It's useful to have it run as an event, you can set this up as you like but I suggest `cron` via `crontab -e` and adding a line such as - `@hourly cd $HOME/Shreddit && source bin/activate && - python shreddit.py -c YOUR_CONFIG_FILE.cfg`. - See below for more. + `@hourly cd $HOME/Shreddit && source bin/activate && shreddit` See below for + more. - Adding your password to the praw.ini and adding the additional output line can provide extra debugging help. Cron examples ------------ -- Run crontab -e to edit your cron file. If you have access to something like +------------- + +- Run `crontab -e` to edit your cron file. If you have access to something like vixie-cron then each user can have their own personal cron job! - Run every hour on the hour @@ -96,7 +91,8 @@ is a dot. For Windows users ----------------- -1. Make sure you have python installed. + +1. Make sure you have Python installed. [Click here for the Python download page](https://www.python.org/downloads/). - **Note:** Install either `python 2.x` or `python 3.x`, not both. 2. Clone the repository (or download and extract the @@ -105,15 +101,15 @@ For Windows users 4. Type `pip install -r requirements.txt` in the open command prompt window to download and install the required additional modules. 5. Open the `shreddit.yml.example` and edit it to your liking and rename the - file to `your-config-filename.yml`. + file to `shreddit.yml`. - Make sure you specify your username and password in the file. - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to use username-password based authentication. -6. Type `python shreddit.py -c your-config-filename.yml` in the open command - prompt window to run the program. +6. Type `shreddit` in the open command prompt window to run the program. OAuth2 Instructions ------------------- + 1. Visit: https://www.reddit.com/prefs/apps 2. Click on 'Create app'. - Fill in the name and select the 'script' option @@ -131,15 +127,11 @@ OAuth2 Instructions Caveats ------------ -- Only your previous 1,000 comments are accessible on Reddit. So good luck - deleting the others. There may be ways to hack around this via iterating - using sorting by top/best/controversial/new but for now I am unsure. I - believe it best to set the script settings and run it as a cron job and then - it won't be a problem unless you post *a lot*. I do, however, think that it - may be a caching issue and perhaps after a certain time period your post - history would, once again, become available as a block of 1,000. So you - needn't despair yet! +------- + +- Certain limitations in the Reddit API and the PRAW library make it difficult + to delete more than 1,000 comments. While deleting >1000 comments is planned, + it is necessary right now to rerun the program until they are all deleted. - We are relying on Reddit admin words that they do not store edits, deleted posts are still stored in the database they are merely inaccessible to the diff --git a/shreddit/app.py b/shreddit/app.py index 98e6a47..0eeaca8 100644 --- a/shreddit/app.py +++ b/shreddit/app.py @@ -10,7 +10,7 @@ from shreddit.shredder import Shredder def main(): parser = argparse.ArgumentParser(description="Command-line frontend to the shreddit library.") - parser.add_argument("-c", "--config", help="Config file to use instead of the default shreddit.cfg") + parser.add_argument("-c", "--config", help="Config file to use instead of the default shreddit.yml") parser.add_argument("-p", "--praw", help="PRAW config (if not ./praw.ini)") parser.add_argument("-t", "--test-oauth", help="Perform OAuth test and exit", action="store_true") args = parser.parse_args() From 325d2d680bd3d0e5d026793ab9eb3733dd185cd5 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:43:35 -0600 Subject: [PATCH 10/14] Tweaked some installation things to make life easier on Windows --- README.md | 14 ++++++-------- requirements.txt | 12 ++++++------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 7063423..edb97dd 100644 --- a/README.md +++ b/README.md @@ -95,17 +95,15 @@ For Windows users 1. Make sure you have Python installed. [Click here for the Python download page](https://www.python.org/downloads/). - **Note:** Install either `python 2.x` or `python 3.x`, not both. -2. Clone the repository (or download and extract the - [zip file](https://github.com/dragsubil/Shreddit/archive/master.zip)) -3. Open command prompt and type `cd ` -4. Type `pip install -r requirements.txt` in the open command prompt window to - download and install the required additional modules. -5. Open the `shreddit.yml.example` and edit it to your liking and rename the +2. Clone the repository (or download and extract the zip file) +3. Open command prompt to the folder with the zip file (Shreddit-master.zip), + and type `pip install -U Shreddit-master.zip` +4. Open `shreddit.yml.example` in the zip edit it to your liking, and rename the file to `shreddit.yml`. - - Make sure you specify your username and password in the file. + - Make sure you specify credentials in the file. - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to use username-password based authentication. -6. Type `shreddit` in the open command prompt window to run the program. +5. Type `shreddit` in the open command prompt window to run the program. OAuth2 Instructions ------------------- diff --git a/requirements.txt b/requirements.txt index 560f4fb..b45e408 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ +arrow +decorator +praw==3.6.0 +PyYAML +requests +six backports-abc==0.4 -decorator==4.0.6 -praw==3.5.0 -PyYAML==3.11 -requests==2.8.1 -six==1.10.0 tornado==4.3 update-checker==0.11 wheel==0.24.0 -arrow From 7f794828896a6862be5bfc21cb9803ec1647a424 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:45:06 -0600 Subject: [PATCH 11/14] Removing this for the merge --- Makefile | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index 376dc78..0000000 --- a/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -help: - @echo "build - Build package" - @echo "install - Install package to local system" - @echo "clean - Clean built artifacts" - @echo "test - Run test suite with coverage" - -build: - python setup.py build - python setup.py bdist_wheel - -install: - pip install dist/*.whl --upgrade --force-reinstall --no-deps - python setup.py clean - -clean: - find . -type f -name "*.pyc" -delete - rm -rf ./build ./dist ./*.egg-info From 7667beef14beb34b188fd639a881ea878e468d7a Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:50:07 -0600 Subject: [PATCH 12/14] Making setup.py a bit more correct --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 724d001..c6b8ee3 100644 --- a/setup.py +++ b/setup.py @@ -23,11 +23,12 @@ setup( url="https://github.com/scott-hand/Shreddit", author="Scott Hand", author_email="scott@vkgfx.com", - classifiers=["Development Status :: 3 - Alpha", + classifiers=["Development Status :: 4 - Beta", "Intended Audience :: End Users/Desktop", - "Programming Language :: Python :: 2"], + "Programming Language :: Python"], packages=["shreddit"], - install_requires=requirements, + install_requires=["arrow", "backports-abc", "decorator", "praw", "PyYAML", + "requests", "six", "tornado", "update-checker", "wheel"], entry_points={ "console_scripts": [ "shreddit=shreddit.app:main" From 931116b395eb73074dc686712a18e54869bafc95 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:55:49 -0600 Subject: [PATCH 13/14] Streamlined non-Windows usage info --- README.md | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index edb97dd..7a870bd 100644 --- a/README.md +++ b/README.md @@ -36,32 +36,15 @@ scrubbed after a certain amount of time. Installation ([Click here for Windows instructions](#for-windows-users)) ------------------------------------------------------------------------ -The way I personally install Shreddit is via a handy tool called `virtualenv` -which may come with your package manager or may be a part of your Python package -in your distro (have a search if you can't find it). Both Python 2 and 3 are -supported. - 1. Clone the repository -2. Enter the repository's directory and run `virtualenv .` (this creates a +2. Run `python setup.py install`. Usually this is run in the context of a + virtualenv or with administrative permissions for system-wide installation. virtual environment) -3. Run the following command, you must run this *every time* you wish to run - the script `source ./bin/activate`. -4. This installs the required modules locally to your Shreddit virtual - environment `pip install -r requirements.txt`. -5. Copy `shreddit.yml.example` to something else and edit it to your liking. - - Make sure you specify your username and password in the file. +3. Copy `shreddit.yml.example` to `shreddit.yml` and edit it to your liking. + - Make sure you specify your credentials in the file. - See the [OAuth2 instructions](#oauth2-instructions) if you don't want to use username-password based authentication. -Notes: - -- The script *does* work with Python versions 2 and 3 but people often get in a - mess with pip versions, python versions and virtulenv versions. Make sure - that your Python/pip/virtualenv are all the same version. If you ran the above - code it *should* work as stated. -- If in doubt try running `python3` instead of just `python` - the same goes for - `pip3` and `virtualenv3` (exchange for 2 if you wish, though I advise using - version 2). - It's useful to have it run as an event, you can set this up as you like but I suggest `cron` via `crontab -e` and adding a line such as `@hourly cd $HOME/Shreddit && source bin/activate && shreddit` See below for @@ -76,18 +59,19 @@ Cron examples vixie-cron then each user can have their own personal cron job! - Run every hour on the hour - `0 * * * * cd /home/$USER/Shreddit/ && source bin/activate && ./shreddit.py` + `0 * * * * shreddit -c ` - Run at 3am every morning - `0 3 * * * cd /home/$USER/Shreddit/ && source bin/activate && ./shreddit.py` + `0 3 * * * shreddit -c ` - Run once a month on the 1st of the month - `0 0 1 * * cd /home/$USER/Shreddit/ && source bin/activate && ./shreddit.py` + `0 0 1 * * shreddit -c ` -If for some reason you get an error saying `source: not found` in your logs, -change `source` to `.`. The source command would become `. bin/activate`. This -is caused by your cron jobs running in shell, not bash, and the source command -is a dot. +If virtualenv was used, be sure to add +`source /full/path/to/venv/bin/activate &&` +before the command. For example: +`0 * * * * source /full/path/to/venv/bin/activate && +shreddit -c ` For Windows users ----------------- From b5525f3289a0d4409d1dc436eb148577a77f2afb Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 21 Nov 2016 21:56:31 -0600 Subject: [PATCH 14/14] Fixing typos --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7a870bd..fbb8d5b 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ Cron examples If virtualenv was used, be sure to add `source /full/path/to/venv/bin/activate &&` before the command. For example: + `0 * * * * source /full/path/to/venv/bin/activate && shreddit -c `