From 4b50ebb5c9b3aebc99145103a346419acd0314ab Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 15 Sep 2022 15:07:13 +0200 Subject: [PATCH] Use proxies.json instead of proxies.txt --- changedetectionio/__init__.py | 27 ++++++++++++++++++++++++-- changedetectionio/fetch_site_status.py | 21 ++++++++++++-------- changedetectionio/store.py | 20 +++++-------------- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 06c52d61..23693446 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -547,6 +547,7 @@ def changedetection_app(config=None, datastore_o=None): # Defaults for proxy choice if datastore.proxy_list is not None: # When enabled + # @todo # Radio needs '' not None, or incase that the chosen one no longer exists if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list): default['proxy'] = '' @@ -560,7 +561,10 @@ def changedetection_app(config=None, datastore_o=None): # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead del form.proxy else: - form.proxy.choices = [('', 'Default')] + datastore.proxy_list + form.proxy.choices = [('', 'Default')] + for p in datastore.proxy_list: + form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) + if request.method == 'POST' and form.validate(): extra_update_obj = {} @@ -1368,6 +1372,8 @@ def ticker_thread_check_time_launch_checks(): import random from changedetectionio import update_worker + proxy_last_called_time = {} + recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20)) print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds) @@ -1428,10 +1434,27 @@ def ticker_thread_check_time_launch_checks(): if watch.jitter_seconds == 0: watch.jitter_seconds = random.uniform(-abs(jitter), jitter) - seconds_since_last_recheck = now - watch['last_checked'] + if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: + + # Proxies can be set to have a limit on seconds between which they can be called + watch_proxy = watch.get('proxy') + if watch_proxy and any([watch_proxy in p for p in datastore.proxy_list]): + # Proxy may also have some threshold minimum + proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0)) + if proxy_list_reuse_time_minimum: + proxy_last_used_time = proxy_last_called_time.get(watch_proxy, 0) + time_since_proxy_used = time.time() - proxy_last_used_time + if time_since_proxy_used < proxy_list_reuse_time_minimum: + # Not enough time difference reached, skip this watch + print("Skipped UUID {} on proxy {}, not enough time between proxy requests".format(uuid, watch_proxy)) + continue + else: + # Record the last used time + proxy_last_called_time[watch_proxy] = int(time.time()) + # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. priority = int(time.time()) print( diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index ed84c0fd..fdc4d7dd 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -21,6 +21,7 @@ class perform_site_check(): self.datastore = datastore # If there was a proxy list enabled, figure out what proxy_args/which proxy to use + # Returns the proxy as a URL # if watch.proxy use that # fetcher.proxy_override = watch.proxy or main config proxy # Allows override the proxy on a per-request basis @@ -33,18 +34,19 @@ class perform_site_check(): # If its a valid one if any([watch['proxy'] in p for p in self.datastore.proxy_list]): - proxy_args = watch['proxy'] + proxy_args = self.datastore.proxy_list.get(watch['proxy']).get('url') # not valid (including None), try the system one else: system_proxy = self.datastore.data['settings']['requests']['proxy'] # Is not None and exists - if any([system_proxy in p for p in self.datastore.proxy_list]): - proxy_args = system_proxy + if self.datastore.proxy_list.get(): + proxy_args = self.datastore.proxy_list.get(system_proxy).get('url') # Fallback - Did not resolve anything, use the first available if proxy_args is None: - proxy_args = self.datastore.proxy_list[0][0] + first_default = list(self.datastore.proxy_list)[0] + proxy_args = self.datastore.proxy_list.get(first_default).get('url') return proxy_args @@ -68,6 +70,8 @@ class perform_site_check(): stripped_text_from_html = "" watch = self.datastore.data['watching'].get(uuid) + if not watch: + return # Protect against file:// access if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): @@ -90,7 +94,7 @@ class perform_site_check(): if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']: request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '') - timeout = self.datastore.data['settings']['requests']['timeout'] + timeout = self.datastore.data['settings']['requests'].get('timeout') url = watch.get('url') request_body = self.datastore.data['watching'][uuid].get('body') request_method = self.datastore.data['watching'][uuid].get('method') @@ -110,9 +114,10 @@ class perform_site_check(): # If the klass doesnt exist, just use a default klass = getattr(content_fetcher, "html_requests") - - proxy_args = self.set_proxy_from_list(watch) - fetcher = klass(proxy_override=proxy_args) + proxy_url = self.set_proxy_from_list(watch) + if proxy_url: + print ("UUID {} Using proxy {}".format(uuid, proxy_url)) + fetcher = klass(proxy_override=proxy_url) # Configurable per-watch or global extra delay before extracting text (for webDriver types) system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 53c50b79..11f25283 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -113,9 +113,7 @@ class ChangeDetectionStore: self.__data['settings']['application']['api_access_token'] = secret # Proxy list support - available as a selection in settings when text file is imported - # CSV list - # "name, address", or just "name" - proxy_list_file = "{}/proxies.txt".format(self.datastore_path) + proxy_list_file = "{}/proxies.json".format(self.datastore_path) if path.isfile(proxy_list_file): self.import_proxy_list(proxy_list_file) @@ -437,18 +435,10 @@ class ChangeDetectionStore: unlink(item) def import_proxy_list(self, filename): - import csv - with open(filename, newline='') as f: - reader = csv.reader(f, skipinitialspace=True) - # @todo This loop can could be improved - l = [] - for row in reader: - if len(row): - if len(row)>=2: - l.append(tuple(row[:2])) - else: - l.append(tuple([row[0], row[0]])) - self.proxy_list = l if len(l) else None + with open(filename) as f: + self.proxy_list = json.load(f) + print ("Registered proxy list", list(self.proxy_list.keys())) + # Run all updates