Use proxies.json instead of proxies.txt

proxies-json-data
dgtlmoon 2 years ago
parent 22638399c1
commit 4b50ebb5c9

@ -547,6 +547,7 @@ def changedetection_app(config=None, datastore_o=None):
# Defaults for proxy choice # Defaults for proxy choice
if datastore.proxy_list is not None: # When enabled if datastore.proxy_list is not None: # When enabled
# @todo
# Radio needs '' not None, or incase that the chosen one no longer exists # Radio needs '' not None, or incase that the chosen one no longer exists
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list): if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
default['proxy'] = '' default['proxy'] = ''
@ -560,7 +561,10 @@ def changedetection_app(config=None, datastore_o=None):
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
del form.proxy del form.proxy
else: else:
form.proxy.choices = [('', 'Default')] + datastore.proxy_list form.proxy.choices = [('', 'Default')]
for p in datastore.proxy_list:
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
if request.method == 'POST' and form.validate(): if request.method == 'POST' and form.validate():
extra_update_obj = {} extra_update_obj = {}
@ -1368,6 +1372,8 @@ def ticker_thread_check_time_launch_checks():
import random import random
from changedetectionio import update_worker from changedetectionio import update_worker
proxy_last_called_time = {}
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20)) recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds) print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
@ -1428,10 +1434,27 @@ def ticker_thread_check_time_launch_checks():
if watch.jitter_seconds == 0: if watch.jitter_seconds == 0:
watch.jitter_seconds = random.uniform(-abs(jitter), jitter) watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
seconds_since_last_recheck = now - watch['last_checked'] seconds_since_last_recheck = now - watch['last_checked']
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
# Proxies can be set to have a limit on seconds between which they can be called
watch_proxy = watch.get('proxy')
if watch_proxy and any([watch_proxy in p for p in datastore.proxy_list]):
# Proxy may also have some threshold minimum
proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
if proxy_list_reuse_time_minimum:
proxy_last_used_time = proxy_last_called_time.get(watch_proxy, 0)
time_since_proxy_used = time.time() - proxy_last_used_time
if time_since_proxy_used < proxy_list_reuse_time_minimum:
# Not enough time difference reached, skip this watch
print("Skipped UUID {} on proxy {}, not enough time between proxy requests".format(uuid, watch_proxy))
continue
else:
# Record the last used time
proxy_last_called_time[watch_proxy] = int(time.time())
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
priority = int(time.time()) priority = int(time.time())
print( print(

@ -21,6 +21,7 @@ class perform_site_check():
self.datastore = datastore self.datastore = datastore
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use # If there was a proxy list enabled, figure out what proxy_args/which proxy to use
# Returns the proxy as a URL
# if watch.proxy use that # if watch.proxy use that
# fetcher.proxy_override = watch.proxy or main config proxy # fetcher.proxy_override = watch.proxy or main config proxy
# Allows override the proxy on a per-request basis # Allows override the proxy on a per-request basis
@ -33,18 +34,19 @@ class perform_site_check():
# If its a valid one # If its a valid one
if any([watch['proxy'] in p for p in self.datastore.proxy_list]): if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
proxy_args = watch['proxy'] proxy_args = self.datastore.proxy_list.get(watch['proxy']).get('url')
# not valid (including None), try the system one # not valid (including None), try the system one
else: else:
system_proxy = self.datastore.data['settings']['requests']['proxy'] system_proxy = self.datastore.data['settings']['requests']['proxy']
# Is not None and exists # Is not None and exists
if any([system_proxy in p for p in self.datastore.proxy_list]): if self.datastore.proxy_list.get():
proxy_args = system_proxy proxy_args = self.datastore.proxy_list.get(system_proxy).get('url')
# Fallback - Did not resolve anything, use the first available # Fallback - Did not resolve anything, use the first available
if proxy_args is None: if proxy_args is None:
proxy_args = self.datastore.proxy_list[0][0] first_default = list(self.datastore.proxy_list)[0]
proxy_args = self.datastore.proxy_list.get(first_default).get('url')
return proxy_args return proxy_args
@ -68,6 +70,8 @@ class perform_site_check():
stripped_text_from_html = "" stripped_text_from_html = ""
watch = self.datastore.data['watching'].get(uuid) watch = self.datastore.data['watching'].get(uuid)
if not watch:
return
# Protect against file:// access # Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
@ -90,7 +94,7 @@ class perform_site_check():
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']: if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '') request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
timeout = self.datastore.data['settings']['requests']['timeout'] timeout = self.datastore.data['settings']['requests'].get('timeout')
url = watch.get('url') url = watch.get('url')
request_body = self.datastore.data['watching'][uuid].get('body') request_body = self.datastore.data['watching'][uuid].get('body')
request_method = self.datastore.data['watching'][uuid].get('method') request_method = self.datastore.data['watching'][uuid].get('method')
@ -110,9 +114,10 @@ class perform_site_check():
# If the klass doesnt exist, just use a default # If the klass doesnt exist, just use a default
klass = getattr(content_fetcher, "html_requests") klass = getattr(content_fetcher, "html_requests")
proxy_url = self.set_proxy_from_list(watch)
proxy_args = self.set_proxy_from_list(watch) if proxy_url:
fetcher = klass(proxy_override=proxy_args) print ("UUID {} Using proxy {}".format(uuid, proxy_url))
fetcher = klass(proxy_override=proxy_url)
# Configurable per-watch or global extra delay before extracting text (for webDriver types) # Configurable per-watch or global extra delay before extracting text (for webDriver types)
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)

@ -113,9 +113,7 @@ class ChangeDetectionStore:
self.__data['settings']['application']['api_access_token'] = secret self.__data['settings']['application']['api_access_token'] = secret
# Proxy list support - available as a selection in settings when text file is imported # Proxy list support - available as a selection in settings when text file is imported
# CSV list proxy_list_file = "{}/proxies.json".format(self.datastore_path)
# "name, address", or just "name"
proxy_list_file = "{}/proxies.txt".format(self.datastore_path)
if path.isfile(proxy_list_file): if path.isfile(proxy_list_file):
self.import_proxy_list(proxy_list_file) self.import_proxy_list(proxy_list_file)
@ -437,18 +435,10 @@ class ChangeDetectionStore:
unlink(item) unlink(item)
def import_proxy_list(self, filename): def import_proxy_list(self, filename):
import csv with open(filename) as f:
with open(filename, newline='') as f: self.proxy_list = json.load(f)
reader = csv.reader(f, skipinitialspace=True) print ("Registered proxy list", list(self.proxy_list.keys()))
# @todo This loop can could be improved
l = []
for row in reader:
if len(row):
if len(row)>=2:
l.append(tuple(row[:2]))
else:
l.append(tuple([row[0], row[0]]))
self.proxy_list = l if len(l) else None
# Run all updates # Run all updates

Loading…
Cancel
Save