|
|
@ -3,35 +3,37 @@ import uuid as uuid_builder
|
|
|
|
import validators
|
|
|
|
import validators
|
|
|
|
import os.path
|
|
|
|
import os.path
|
|
|
|
from os import path
|
|
|
|
from os import path
|
|
|
|
|
|
|
|
from threading import Lock, Thread
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
|
|
|
|
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
|
|
|
|
# Open a github issue if you know something :)
|
|
|
|
# Open a github issue if you know something :)
|
|
|
|
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
|
|
|
|
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
|
|
|
|
class ChangeDetectionStore:
|
|
|
|
class ChangeDetectionStore:
|
|
|
|
|
|
|
|
lock = Lock()
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
def __init__(self):
|
|
|
|
self.needs_write = False
|
|
|
|
self.needs_write = False
|
|
|
|
|
|
|
|
|
|
|
|
self.__data = {
|
|
|
|
self.__data = {
|
|
|
|
'note' : "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
|
|
|
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
|
|
|
'watching': {},
|
|
|
|
'watching': {},
|
|
|
|
'tag': "0.22",
|
|
|
|
'tag': "0.23",
|
|
|
|
'settings': {
|
|
|
|
'settings': {
|
|
|
|
'headers': {
|
|
|
|
'headers': {
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
|
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
|
|
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
|
|
|
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
|
|
|
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
|
|
|
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
|
|
|
},
|
|
|
|
},
|
|
|
|
'requests': {
|
|
|
|
'requests': {
|
|
|
|
'timeout': 15, # Default 15 seconds
|
|
|
|
'timeout': 15, # Default 15 seconds
|
|
|
|
'minutes_between_check': 3 * 60, # Default 3 hours
|
|
|
|
'minutes_between_check': 3 * 60, # Default 3 hours
|
|
|
|
'workers': 10 # Number of threads, lower is better for slow connections
|
|
|
|
'workers': 10 # Number of threads, lower is better for slow connections
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Base definition for all watchers
|
|
|
|
# Base definition for all watchers
|
|
|
|
self.generic_definition = {
|
|
|
|
self.generic_definition = {
|
|
|
|
'url': None,
|
|
|
|
'url': None,
|
|
|
@ -41,8 +43,8 @@ class ChangeDetectionStore:
|
|
|
|
'title': None,
|
|
|
|
'title': None,
|
|
|
|
'previous_md5': None,
|
|
|
|
'previous_md5': None,
|
|
|
|
'uuid': str(uuid_builder.uuid4()),
|
|
|
|
'uuid': str(uuid_builder.uuid4()),
|
|
|
|
'headers' : {}, # Extra headers to send
|
|
|
|
'headers': {}, # Extra headers to send
|
|
|
|
'history' : {} # Dict of timestamp and output stripped filename
|
|
|
|
'history': {} # Dict of timestamp and output stripped filename
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if path.isfile('/source.txt'):
|
|
|
|
if path.isfile('/source.txt'):
|
|
|
@ -67,7 +69,6 @@ class ChangeDetectionStore:
|
|
|
|
if 'requests' in from_disk['settings']:
|
|
|
|
if 'requests' in from_disk['settings']:
|
|
|
|
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
|
|
|
|
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Reinitialise each `watching` with our generic_definition in the case that we add a new var in the future.
|
|
|
|
# Reinitialise each `watching` with our generic_definition in the case that we add a new var in the future.
|
|
|
|
# @todo pretty sure theres a python we todo this with an abstracted(?) object!
|
|
|
|
# @todo pretty sure theres a python we todo this with an abstracted(?) object!
|
|
|
|
i = 0
|
|
|
|
i = 0
|
|
|
@ -85,20 +86,28 @@ class ChangeDetectionStore:
|
|
|
|
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
|
|
|
|
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
|
|
|
|
self.add_watch(url='https://changedetection.io', tag='Tech news')
|
|
|
|
self.add_watch(url='https://changedetection.io', tag='Tech news')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_watch(self, uuid, update_obj):
|
|
|
|
# self.entryVariable.get()
|
|
|
|
|
|
|
|
def update_watch(self, uuid, val, var):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.__data['watching'][uuid].update({val: var})
|
|
|
|
self.lock.acquire()
|
|
|
|
self.needs_write = True
|
|
|
|
|
|
|
|
|
|
|
|
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
|
|
|
|
|
|
|
|
for dict_key, d in self.generic_definition.items():
|
|
|
|
|
|
|
|
if isinstance(d, dict) and dict_key in update_obj:
|
|
|
|
|
|
|
|
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
|
|
|
|
|
|
|
|
del(update_obj[dict_key])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Update with the remaining values
|
|
|
|
|
|
|
|
self.__data['watching'][uuid].update(update_obj)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.needs_write = True
|
|
|
|
|
|
|
|
self.lock.release()
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
@property
|
|
|
|
def data(self):
|
|
|
|
def data(self):
|
|
|
|
return self.__data
|
|
|
|
return self.__data
|
|
|
|
|
|
|
|
|
|
|
|
def get_all_tags(self):
|
|
|
|
def get_all_tags(self):
|
|
|
|
tags=[]
|
|
|
|
tags = []
|
|
|
|
for uuid, watch in self.data['watching'].items():
|
|
|
|
for uuid, watch in self.data['watching'].items():
|
|
|
|
|
|
|
|
|
|
|
|
# Support for comma separated list of tags.
|
|
|
|
# Support for comma separated list of tags.
|
|
|
@ -111,10 +120,11 @@ class ChangeDetectionStore:
|
|
|
|
return tags
|
|
|
|
return tags
|
|
|
|
|
|
|
|
|
|
|
|
def delete(self, uuid):
|
|
|
|
def delete(self, uuid):
|
|
|
|
# Probably their should be dict...
|
|
|
|
|
|
|
|
del(self.__data['watching'][uuid])
|
|
|
|
|
|
|
|
self.needs_write = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.lock.acquire()
|
|
|
|
|
|
|
|
del (self.__data['watching'][uuid])
|
|
|
|
|
|
|
|
self.needs_write = True
|
|
|
|
|
|
|
|
self.lock.release()
|
|
|
|
|
|
|
|
|
|
|
|
def url_exists(self, url):
|
|
|
|
def url_exists(self, url):
|
|
|
|
|
|
|
|
|
|
|
@ -130,7 +140,7 @@ class ChangeDetectionStore:
|
|
|
|
return self.data['watching'][uuid].get(val)
|
|
|
|
return self.data['watching'][uuid].get(val)
|
|
|
|
|
|
|
|
|
|
|
|
def add_watch(self, url, tag):
|
|
|
|
def add_watch(self, url, tag):
|
|
|
|
|
|
|
|
self.lock.acquire()
|
|
|
|
print("Adding", url, tag)
|
|
|
|
print("Adding", url, tag)
|
|
|
|
# # @todo deal with exception
|
|
|
|
# # @todo deal with exception
|
|
|
|
# validators.url(url)
|
|
|
|
# validators.url(url)
|
|
|
@ -146,13 +156,15 @@ class ChangeDetectionStore:
|
|
|
|
|
|
|
|
|
|
|
|
self.data['watching'][new_uuid] = _blank
|
|
|
|
self.data['watching'][new_uuid] = _blank
|
|
|
|
self.needs_write = True
|
|
|
|
self.needs_write = True
|
|
|
|
|
|
|
|
self.lock.release()
|
|
|
|
return new_uuid
|
|
|
|
return new_uuid
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sync_to_json(self):
|
|
|
|
def sync_to_json(self):
|
|
|
|
print ("Saving index")
|
|
|
|
print("Saving index")
|
|
|
|
|
|
|
|
self.lock.acquire()
|
|
|
|
with open('/datastore/url-watches.json', 'w') as json_file:
|
|
|
|
with open('/datastore/url-watches.json', 'w') as json_file:
|
|
|
|
json.dump(self.data, json_file, indent=4)
|
|
|
|
json.dump(self.data, json_file, indent=4)
|
|
|
|
self.needs_write = False
|
|
|
|
self.needs_write = False
|
|
|
|
|
|
|
|
self.lock.release()
|
|
|
|
|
|
|
|
|
|
|
|
# body of the constructor
|
|
|
|
# body of the constructor
|
|
|
|