Tidy up a few broken datastore paths

pull/11/head
Leigh Morresi 4 years ago
parent 9f964b6d3f
commit 93ee65fe53

@ -18,7 +18,7 @@ RUN echo "commit: $SOURCE_COMMIT branch: $SOURCE_BRANCH" >/source.txt
RUN [ ! -d "/datastore" ] && mkdir /datastore RUN [ ! -d "/datastore" ] && mkdir /datastore
CMD [ "python", "./backend.py" ] CMD [ "python", "./backend.py" , "-d", "/datastore"]

@ -9,6 +9,9 @@ import eventlet
import eventlet.wsgi import eventlet.wsgi
import backend import backend
from backend import store
def main(argv): def main(argv):
ssl_mode = False ssl_mode = False
port = 5000 port = 5000
@ -17,14 +20,14 @@ def main(argv):
try: try:
opts, args = getopt.getopt(argv, "sd:p:", "purge") opts, args = getopt.getopt(argv, "sd:p:", "purge")
except getopt.GetoptError: except getopt.GetoptError:
print('backend.py -s SSL enable -p [port]') print('backend.py -s SSL enable -p [port] -d [datastore path]')
sys.exit(2) sys.exit(2)
for opt, arg in opts: for opt, arg in opts:
# if opt == '--purge': # if opt == '--purge':
# Remove history, the actual files you need to delete manually. # Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items(): # for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) # watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
if opt == '-s': if opt == '-s':
ssl_mode = True ssl_mode = True
@ -36,19 +39,21 @@ def main(argv):
datastore_path = arg datastore_path = arg
# @todo finalise SSL config, but this should get you in the right direction if you need it. # Kinda weird to tell them both where `datastore_path` is right..
app_config = {'datastore_path': datastore_path}
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'])
app = backend.changedetection_app(app_config, datastore)
app = backend.changedetection_app({'datastore_path':datastore_path})
if ssl_mode: if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)), eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen(('', port)),
certfile='cert.pem', certfile='cert.pem',
keyfile='privkey.pem', keyfile='privkey.pem',
server_side=True), app) server_side=True), app)
else: else:
eventlet.wsgi.server(eventlet.listen(('', port)), backend.changedetection_app()) eventlet.wsgi.server(eventlet.listen(('', port)), app)
if __name__ == '__main__':
main(sys.argv)
#print (__name__) if __name__ == '__main__':
main(sys.argv[1:])

@ -24,10 +24,9 @@ import queue
from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for from flask import Flask, render_template, request, send_file, send_from_directory, safe_join, abort, redirect, url_for
datastore=None
# Local # Local
from backend import store
running_update_threads = [] running_update_threads = []
ticker_thread = None ticker_thread = None
@ -75,13 +74,14 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
# return timeago.format(timestamp, time.time()) # return timeago.format(timestamp, time.time())
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format) # return datetime.datetime.utcfromtimestamp(timestamp).strftime(format)
def changedetection_app(config=None): def changedetection_app(config=None, datastore_o=None):
global datastore
datastore = datastore_o
# Hmm # Hmm
app.config.update(dict(DEBUG=True)) app.config.update(dict(DEBUG=True))
app.config.update(config or {}) app.config.update(config or {})
datastore = store.ChangeDetectionStore(datastore_path=app.config['datastore_path'])
# Setup cors headers to allow all domains # Setup cors headers to allow all domains
# https://flask-cors.readthedocs.io/en/latest/ # https://flask-cors.readthedocs.io/en/latest/
@ -454,37 +454,38 @@ class Worker(threading.Thread):
self.q.task_done() self.q.task_done()
# Thread runner to check every minute, look for new watches to feed into the Queue. # Thread runner to check every minute, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks(): def ticker_thread_check_time_launch_checks():
# Spin up Workers. # Spin up Workers.
for _ in range(datastore.data['settings']['requests']['workers']): for _ in range(datastore.data['settings']['requests']['workers']):
new_worker = Worker(update_q) print ("...")
running_update_threads.append(new_worker) new_worker = Worker(update_q)
new_worker.start() running_update_threads.append(new_worker)
new_worker.start()
# Every minute check for new UUIDs to follow up on # Every minute check for new UUIDs to follow up on
while True: while True:
minutes = datastore.data['settings']['requests']['minutes_between_check'] minutes = datastore.data['settings']['requests']['minutes_between_check']
for uuid, watch in datastore.data['watching'].items(): for uuid, watch in datastore.data['watching'].items():
if watch['last_checked'] <= time.time() - (minutes * 60): if watch['last_checked'] <= time.time() - (minutes * 60):
update_q.put(uuid) update_q.put(uuid)
if app.config['STOP_THREADS']: if app.config['STOP_THREADS']:
return return
time.sleep(1) time.sleep(1)
# Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON # Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON
# by just running periodically in one thread, according to python, dict updates are threadsafe. # by just running periodically in one thread, according to python, dict updates are threadsafe.
def save_datastore(): def save_datastore():
global stop_threads global stop_threads
while True: while True:
if stop_threads: if app.config['STOP_THREADS']:
return return
if datastore.needs_write: if datastore.needs_write:
datastore.sync_to_json() datastore.sync_to_json()
time.sleep(1) time.sleep(1)

@ -20,7 +20,10 @@ class perform_site_check():
return return
def ensure_output_path(self): def ensure_output_path(self):
os.mkdir(self.output_path) try:
os.mkdir(self.output_path)
except FileExistsError:
print (self.output_path, "already exists.")
def save_response_stripped_output(self, output, fname): def save_response_stripped_output(self, output, fname):
@ -40,7 +43,7 @@ class perform_site_check():
"last_checked": timestamp "last_checked": timestamp
} }
self.output_path = "/datastore/{}".format(uuid) self.output_path = "{}/{}".format(self.datastore.datastore_path,uuid)
self.ensure_output_path() self.ensure_output_path()
extra_headers = self.datastore.get_val(uuid, 'headers') extra_headers = self.datastore.get_val(uuid, 'headers')

@ -7,6 +7,7 @@ from threading import Lock, Thread
from copy import deepcopy from copy import deepcopy
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :) # Open a github issue if you know something :)
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change # https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
@ -42,7 +43,7 @@ class ChangeDetectionStore:
'tag': None, 'tag': None,
'last_checked': 0, 'last_checked': 0,
'last_changed': 0, 'last_changed': 0,
'last_viewed': 0, # history key value of the last viewed via the [diff] link 'last_viewed': 0, # history key value of the last viewed via the [diff] link
'newest_history_key': "", 'newest_history_key': "",
'title': None, 'title': None,
'previous_md5': "", 'previous_md5': "",
@ -58,7 +59,7 @@ class ChangeDetectionStore:
self.__data['build_sha'] = f.read() self.__data['build_sha'] = f.read()
try: try:
with open('/datastore/url-watches.json') as json_file: with open("{}/url-watches.json".format(self.datastore_path)) as json_file:
from_disk = json.load(json_file) from_disk = json.load(json_file)
# @todo isnt there a way todo this dict.update recursively? # @todo isnt there a way todo this dict.update recursively?
@ -85,7 +86,7 @@ class ChangeDetectionStore:
# First time ran, doesnt exist. # First time ran, doesnt exist.
except (FileNotFoundError, json.decoder.JSONDecodeError): except (FileNotFoundError, json.decoder.JSONDecodeError):
print("Creating JSON store") print("Creating JSON store at", self.datastore_path)
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid') self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
@ -106,9 +107,6 @@ class ChangeDetectionStore:
return 0 return 0
def set_last_viewed(self, uuid, timestamp): def set_last_viewed(self, uuid, timestamp):
self.data['watching'][uuid].update({'last_viewed': str(timestamp)}) self.data['watching'][uuid].update({'last_viewed': str(timestamp)})
self.needs_write = True self.needs_write = True
@ -122,7 +120,7 @@ class ChangeDetectionStore:
if isinstance(d, dict): if isinstance(d, dict):
if update_obj is not None and dict_key in update_obj: if update_obj is not None and dict_key in update_obj:
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key]) self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
del(update_obj[dict_key]) del (update_obj[dict_key])
self.__data['watching'][uuid].update(update_obj) self.__data['watching'][uuid].update(update_obj)
self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid) self.__data['watching'][uuid]['newest_history_key'] = self.get_newest_history_key(uuid)
@ -167,7 +165,6 @@ class ChangeDetectionStore:
def add_watch(self, url, tag): def add_watch(self, url, tag):
with self.lock: with self.lock:
# @todo use a common generic version of this # @todo use a common generic version of this
new_uuid = str(uuid_builder.uuid4()) new_uuid = str(uuid_builder.uuid4())
_blank = deepcopy(self.generic_definition) _blank = deepcopy(self.generic_definition)
@ -185,8 +182,7 @@ class ChangeDetectionStore:
def sync_to_json(self): def sync_to_json(self):
with open("{}/url-watches.json".format(self.datastore_path), 'w') as json_file:
with open('/datastore/url-watches.json', 'w') as json_file:
json.dump(self.__data, json_file, indent=4) json.dump(self.__data, json_file, indent=4)
print("Re-saved index") print("Re-saved index")

@ -2,7 +2,8 @@
import pytest import pytest
import backend import backend
from backend import store
import os
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
# Much better boilerplate than the docs # Much better boilerplate than the docs
@ -10,7 +11,20 @@ import backend
@pytest.fixture @pytest.fixture
def app(request): def app(request):
app = backend.changedetection_app({'datastore_path':'./datastorexxx'})
datastore_path ="./test-datastore"
try:
os.mkdir(datastore_path)
except FileExistsError:
pass
# Kinda weird to tell them both where `datastore_path` is right..
app_config = {'datastore_path': datastore_path}
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'])
app = backend.changedetection_app(app_config, datastore)
app.debug = True app.debug = True
def teardown(): def teardown():

Loading…
Cancel
Save