Split up shredder into a class. Added support for >1000 items

pull/69/head
Scott 8 years ago
parent 07d3c3df0c
commit 83fd3510cd

@ -7,3 +7,4 @@ six==1.10.0
tornado==4.3 tornado==4.3
update-checker==0.11 update-checker==0.11
wheel==0.24.0 wheel==0.24.0
arrow

@ -68,4 +68,8 @@ save_directory: /tmp
# options: [random, dot, "user entered string"] # options: [random, dot, "user entered string"]
replacement_format: random replacement_format: random
# Batch cooldown
# This controls how long (in seconds) to wait between each set of 1000 deletions.
batch_cooldown: 10
# vim: syntax=yaml ts=2 # vim: syntax=yaml ts=2

@ -2,8 +2,9 @@
""" """
import argparse import argparse
import yaml import yaml
import logging
from shreddit.oauth import oauth_test from shreddit.oauth import oauth_test
from shreddit.shredder import shred from shreddit.shredder import Shredder
def main(): def main():
@ -22,7 +23,8 @@ def main():
if not config: if not config:
raise Exception("No config options passed!") raise Exception("No config options passed!")
shred(config, args.praw) shredder = Shredder(config, args.praw)
shredder.shred()
if __name__ == "__main__": if __name__ == "__main__":

@ -3,8 +3,10 @@ import sys
import logging import logging
import argparse import argparse
import json import json
import arrow
import yaml import yaml
import praw import praw
import time
from re import sub from re import sub
from datetime import datetime, timedelta from datetime import datetime, timedelta
from praw.errors import (InvalidUser, InvalidUserPass, RateLimitExceeded, HTTPException, OAuthAppRequired) from praw.errors import (InvalidUser, InvalidUserPass, RateLimitExceeded, HTTPException, OAuthAppRequired)
@ -12,137 +14,169 @@ from praw.objects import Comment, Submission
from shreddit.util import get_sentence from shreddit.util import get_sentence
def shred(config, praw_ini=None): class Shredder(object):
logging.basicConfig(stream=sys.stdout) """This class stores state for configuration, API objects, logging, etc. It exposes a shred() method that
log = logging.getLogger("shreddit") application code can call to start it.
log.setLevel(level=logging.WARNING) """
def __init__(self, config, praw_ini=None):
logging.basicConfig()
self._logger = logging.getLogger("shreddit")
self._logger.setLevel(level=logging.DEBUG if config.get("verbose", True) else logging.INFO)
self._logger.info(config)
self._praw_ini = praw_ini
self._username, self._password = config["username"], config["password"]
self._connect(praw_ini, self._username, self._password)
if config.get("save_directory", "."):
self._r.config.store_json_result = True
# Read some information from the config and store it
# TODO: Handle this in a much cleaner way
self._whitelist = set(config.get("whitelist", []))
self._whitelist_ids = set(config.get("whitelist_ids", []))
self._item = config.get("item", "comments")
self._sort = config.get("sort", "new")
self._whitelist_dist = config.get("whitelist_distinguished", False)
self._whitelist_gild = config.get("whitelist_gilded", False)
self._max_score = config.get("max_score", None)
self._recent_cutoff = arrow.now().replace(hours=-config.get("hours", 24))
self._nuke_cutoff = arrow.now().replace(hours=-config.get("nuke_hours", 4320))
self._save = config.get("save_directory", None)
self._trial = config.get("trial_run", False)
self._clear_vote = config.get("clear_vote", False)
self._repl_format = config.get("replacement_format")
self._edit_only = config.get("edit_only", False)
self._batch_cooldown = config.get("batch_cooldown", 10)
if self._save:
if not os.path.exists(self._save):
os.makedirs(self._save)
self._limit = None
self._logger.info("Deleting ALL items before {}".format(self._nuke_cutoff))
self._logger.info("Deleting items not whitelisted until {}".format(self._recent_cutoff))
self._logger.info("Ignoring ALL items after {}".format(self._recent_cutoff))
self._logger.info("Targeting {} sorted by {}".format(self._item, self._sort))
if self._whitelist:
self._logger.info("Keeping items from subreddits {}".format(", ".join(self._whitelist)))
if self._save:
self._logger.info("Saving deleted items to: {}".format(self._save))
if self._trial:
self._logger.info("Trial run - no deletion will be performed")
def shred(self):
deleted = self._remove_things(self._get_things())
if deleted >= 1000:
# This user has more than 1000 items to handle, which angers the gods of the Reddit API. So chill for a
# while and do it again.
self._logger.info("Finished deleting 1000 items. " \
"Waiting {} seconds and continuing...".format(self._batch_cooldown))
time.sleep(self._batch_cooldown)
self._connect(None, self._username, self._password)
self.shred()
def _connect(self, praw_ini, username, password):
self._r = praw.Reddit(user_agent="shreddit/4.2")
if praw_ini: if praw_ini:
# PRAW won't panic if the file is invalid, so check first # PRAW won't panic if the file is invalid, so check first
if not os.path.exists(praw_ini): if not os.path.exists(praw_ini):
print("PRAW configuration file \"{}\" not found.".format(praw_ini)) print("PRAW configuration file \"{}\" not found.".format(praw_ini))
return return
praw.settings.CONFIG.read(praw_ini) praw.settings.CONFIG.read(praw_ini)
save_directory = config.get("save_directory", ".")
r = praw.Reddit(user_agent="shreddit/4.2")
if save_directory:
r.config.store_json_result = True
if config.get("verbose", True):
log.setLevel(level=logging.DEBUG)
try: try:
# Try to login with OAuth2 # Try to login with OAuth2
r.refresh_access_information() self._r.refresh_access_information()
log.debug("Logged in with OAuth.") self._logger.debug("Logged in with OAuth.")
except (HTTPException, OAuthAppRequired) as e: except (HTTPException, OAuthAppRequired) as e:
log.warning("You should migrate to OAuth2 using get_secret.py before Reddit disables this login method.") self._logger.warning("You should migrate to OAuth2 using get_secret.py before Reddit disables this login "
"method.")
try: try:
try: try:
r.login(config["username"], config["password"]) self._r.login(username, password)
except InvalidUserPass: except InvalidUserPass:
r.login() # Supply details on the command line self._r.login() # Supply details on the command line
except InvalidUser as e: except InvalidUser as e:
raise InvalidUser("User does not exist.", e) raise InvalidUser("User does not exist.", e)
except InvalidUserPass as e: except InvalidUserPass as e:
raise InvalidUserPass("Specified an incorrect password.", e) raise InvalidUserPass("Specified an incorrect password.", e)
except RateLimitExceeded as e: except RateLimitExceeded as e:
raise RateLimitExceeded("You're doing that too much.", e) raise RateLimitExceeded("You're doing that too much.", e)
self._logger.info("Logged in as {user}.".format(user=self._r.user))
log.info("Logged in as {user}.".format(user=r.user))
log.debug("Deleting messages before {time}.".format( def _check_item(self, item):
time=datetime.now() - timedelta(hours=config["hours"]))) """Returns True if the item is whitelisted, False otherwise.
"""
whitelist = config.get("whitelist", []) if str(item.subreddit).lower() in self._whitelist or item.id in self._whitelist_ids:
whitelist_ids = config.get("whitelist_ids", []) return True
if self._whitelist_dist and item.distinguished:
if config.get("whitelist"): return True
log.debug("Keeping messages from subreddits {subs}".format(subs=", ".join(whitelist))) if self._whitelist_gild and item.gilded:
return True
remove_things(r, config, log, get_things(r, config, log)) if self._max_score is not None and item.score > self._max_score:
return True
return False
def get_things(r, config, log, after=None):
limit = None def _save_item(self, item):
item = config.get("item", "comments") with open(os.path.join(self._save, item.id), "w") as fh:
sort = config.get("sort", "new") json.dump(item.json_dict, fh)
log.debug("Deleting items: {item}".format(item=item))
if item == "comments": def _remove_submission(self, sub):
return r.user.get_comments(limit=limit, sort=sort) self._logger.info("Deleting submission: #{id} {url}".format(id=sub.id, url=sub.url.encode("utf-8")))
elif item == "submitted":
return r.user.get_submitted(limit=limit, sort=sort) def _remove_comment(self, comment):
elif item == "overview": if self._repl_format == "random":
return r.user.get_overview(limit=limit, sort=sort) replacement_text = get_sentence()
elif self._repl_format == "dot":
replacement_text = "."
else: else:
raise Exception("Your deletion section is wrong") replacement_text = self._repl_format
def remove_things(r, config, log, things): short_text = sub(b"\n\r\t", " ", comment.body[:35].encode("utf-8"))
for thing in things: msg = "/r/{}/ #{} ({}) with: {}".format(comment.subreddit, comment.id, short_text, replacement_text)
log.debug("Starting remove function on: {thing}".format(thing=thing))
# Seems to be in users's timezone. Unclear.
thing_time = datetime.fromtimestamp(thing.created_utc)
# Exclude items from being deleted unless past X hours.
after_time = datetime.now() - timedelta(hours=config.get("hours", 24))
if thing_time > after_time:
if thing_time + timedelta(hours=config.get("nuke_hours", 4320)) < datetime.utcnow():
pass
continue
# For edit_only we're assuming that the hours aren't altered.
# This saves time when deleting (you don't edit already edited posts).
if config.get("edit_only"):
end_time = after_time - timedelta(hours=config.get("hours", 24))
if thing_time < end_time:
continue
if str(thing.subreddit).lower() in config.get("whitelist", []) or thing.id in config.get("whitelist_ids", []): if self._edit_only:
continue self._logger.info("Editing (not removing) {msg}".format(msg=msg))
else:
if config.get("whitelist_distinguished") and thing.distinguished: self._logger.info("Editing and deleting {msg}".format(msg=msg))
continue if not self._trial:
if config.get("whitelist_gilded") and thing.gilded: comment.edit(replacement_text)
continue
if "max_score" in config and thing.score > config["max_score"]: def _remove(self, item):
if self._save:
self._save_item(item)
if self._clear_vote:
item.clear_vote()
if isinstance(item, Submission):
self._remove_submission(item)
elif isinstance(item, Comment):
self._remove_comment(item)
if not self._edit_only and not self._trial:
item.delete()
def _remove_things(self, items):
for idx, item in enumerate(items):
self._logger.debug("Examining: {}".format(item))
created = arrow.get(item.created_utc)
if created <= self._nuke_cutoff:
self._logger.debug("Item occurs prior to nuke cutoff")
self._remove(item)
elif created > self._recent_cutoff:
self._logger.debug("Skipping due to: too recent")
continue continue
elif self._check_item(item):
if config.get("save_directory"): self._logger.debug("Skipping due to: whitelisted")
save_directory = config["save_directory"]
if not os.path.exists(save_directory):
os.makedirs(save_directory)
with open("%s/%s.json" % (save_directory, thing.id), "w") as fh:
json.dump(thing.json_dict, fh)
if config.get("trial_run"): # Don't do anything, trial mode!
log.debug("Would have deleted {thing}: '{content}'".format(
thing=thing.id, content=thing))
continue continue
if config.get("clear_vote"):
thing.clear_vote()
if isinstance(thing, Submission):
log.info("Deleting submission: #{id} {url}".format(id=thing.id, url=thing.url.encode("utf-8")))
elif isinstance(thing, Comment):
rep_format = config.get("replacement_format")
if rep_format == "random":
replacement_text = get_sentence()
elif rep_format == "dot":
replacement_text = "."
else: else:
replacement_text = rep_format self._remove(item)
if not idx % 10:
msg = '/r/{3}/ #{0} with:\n\t"{1}" to\n\t"{2}"'.format(thing.id, sub(b"\n\r\t", " ", self._logger.info("{} items handled.".format(idx + 1))
thing.body[:78].encode("utf-8")), return idx + 1
replacement_text[:78], thing.subreddit)
def _get_things(self):
if config.get("edit_only"): if self._item == "comments":
log.info("Editing (not removing) {msg}".format(msg=msg)) return self._r.user.get_comments(limit=self._limit, sort=self._sort)
elif self._item == "submitted":
return self._r.user.get_submitted(limit=self._limit, sort=self._sort)
elif self._item == "overview":
return self._r.user.get_overview(limit=self._limit, sort=self._sort)
else: else:
log.info("Editing and deleting {msg}".format(msg=msg)) raise Exception("Your deletion section is wrong")
thing.edit(replacement_text)
if not config.get("edit_only"):
thing.delete()

Loading…
Cancel
Save