From d4715e2bc8cd4fc82589072d41d4ed535ebb25ff Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 19 Sep 2022 13:14:35 +0200 Subject: [PATCH] Tidy up proxies.json logic, adding tests (#955) --- changedetectionio/__init__.py | 7 +-- changedetectionio/fetch_site_status.py | 37 ++------------- changedetectionio/run_all_tests.sh | 46 ++++++++++++++++++- changedetectionio/store.py | 30 ++++++++++++ .../tests/proxy_list/__init__.py | 2 + .../tests/proxy_list/conftest.py | 14 ++++++ .../tests/proxy_list/proxies.json-example | 10 ++++ changedetectionio/tests/proxy_list/squid.conf | 41 +++++++++++++++++ .../tests/proxy_list/test_multiple_proxy.py | 38 +++++++++++++++ .../tests/proxy_list/test_proxy.py | 19 ++++++++ docker-compose.yml | 2 + 11 files changed, 207 insertions(+), 39 deletions(-) create mode 100644 changedetectionio/tests/proxy_list/__init__.py create mode 100644 changedetectionio/tests/proxy_list/conftest.py create mode 100644 changedetectionio/tests/proxy_list/proxies.json-example create mode 100644 changedetectionio/tests/proxy_list/squid.conf create mode 100644 changedetectionio/tests/proxy_list/test_multiple_proxy.py create mode 100644 changedetectionio/tests/proxy_list/test_proxy.py diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 5b8a38af..07242bf3 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -1444,12 +1444,7 @@ def ticker_thread_check_time_launch_checks(): if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: # Proxies can be set to have a limit on seconds between which they can be called - watch_proxy = watch.get('proxy') - if not watch_proxy: - watch_proxy = datastore.data['settings']['requests']['proxy'] - if not watch_proxy: - watch_proxy = list(datastore.proxy_list.keys())[0] - + watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid) if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()): # Proxy may also have some threshold minimum proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0)) diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index 26113353..79e282b5 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -20,36 +20,6 @@ class perform_site_check(): super().__init__(*args, **kwargs) self.datastore = datastore - # If there was a proxy list enabled, figure out what proxy_args/which proxy to use - # Returns the proxy as a URL - # if watch.proxy use that - # fetcher.proxy_override = watch.proxy or main config proxy - # Allows override the proxy on a per-request basis - # ALWAYS use the first one is nothing selected - - def set_proxy_from_list(self, watch): - proxy_args = None - if self.datastore.proxy_list is None: - return None - - # If its a valid one - if watch['proxy'] and watch['proxy'] in list(self.datastore.proxy_list.keys()): - proxy_args = self.datastore.proxy_list.get(watch['proxy']).get('url') - - # not valid (including None), try the system one - else: - system_proxy = self.datastore.data['settings']['requests']['proxy'] - # Is not None and exists - if self.datastore.proxy_list.get(system_proxy): - proxy_args = self.datastore.proxy_list.get(system_proxy).get('url') - - # Fallback - Did not resolve anything, use the first available - if proxy_args is None: - first_default = list(self.datastore.proxy_list)[0] - proxy_args = self.datastore.proxy_list.get(first_default).get('url') - - return proxy_args - # Doesn't look like python supports forward slash auto enclosure in re.findall # So convert it to inline flag "foobar(?i)" type configuration def forward_slash_enclosed_regex_to_options(self, regex): @@ -114,9 +84,12 @@ class perform_site_check(): # If the klass doesnt exist, just use a default klass = getattr(content_fetcher, "html_requests") - proxy_url = self.set_proxy_from_list(watch) - if proxy_url: + proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid) + proxy_url = None + if proxy_id: + proxy_url = self.datastore.proxy_list.get(proxy_id).get('url') print ("UUID {} Using proxy {}".format(uuid, proxy_url)) + fetcher = klass(proxy_override=proxy_url) # Configurable per-watch or global extra delay before extracting text (for webDriver types) diff --git a/changedetectionio/run_all_tests.sh b/changedetectionio/run_all_tests.sh index ce428f12..e4ea3bac 100755 --- a/changedetectionio/run_all_tests.sh +++ b/changedetectionio/run_all_tests.sh @@ -48,4 +48,48 @@ pytest tests/test_errorhandling.py pytest tests/visualselector/test_fetch_data.py unset PLAYWRIGHT_DRIVER_URL -docker kill $$-test_browserless \ No newline at end of file +docker kill $$-test_browserless + +# Test proxy list handling, starting two squids on different ports +# Each squid adds a different header to the response, which is the main thing we test for. +docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge +docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge + + +# So, basic HTTP as env var test +export HTTP_PROXY=http://localhost:3128 +export HTTPS_PROXY=http://localhost:3128 +pytest tests/proxy_list/test_proxy.py +docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io +if [ $? -ne 0 ] +then + echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)" +fi +unset HTTP_PROXY +unset HTTPS_PROXY + + +# 2nd test actually choose the preferred proxy from proxies.json +cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json +# Makes a watch use a preferred proxy +pytest tests/proxy_list/test_multiple_proxy.py + +# Should be a request in the default "first" squid +docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io +if [ $? -ne 0 ] +then + echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)" +fi + +# And one in the 'second' squid (user selects this as preferred) +docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io +if [ $? -ne 0 ] +then + echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)" +fi + +# @todo - test system override proxy selection and watch defaults, setup a 3rd squid? +docker kill $$-squid-one +docker kill $$-squid-two + + diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 11f25283..4eb5dcd0 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -440,6 +440,36 @@ class ChangeDetectionStore: print ("Registered proxy list", list(self.proxy_list.keys())) + def get_preferred_proxy_for_watch(self, uuid): + """ + Returns the preferred proxy by ID key + :param uuid: UUID + :return: proxy "key" id + """ + + proxy_id = None + if self.proxy_list is None: + return None + + # If its a valid one + watch = self.data['watching'].get(uuid) + + if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()): + return watch.get('proxy') + + # not valid (including None), try the system one + else: + system_proxy_id = self.data['settings']['requests'].get('proxy') + # Is not None and exists + if self.proxy_list.get(system_proxy_id): + return system_proxy_id + + # Fallback - Did not resolve anything, use the first available + if system_proxy_id is None: + first_default = list(self.proxy_list)[0] + return first_default + + return None # Run all updates # IMPORTANT - Each update could be run even when they have a new install and the schema is correct diff --git a/changedetectionio/tests/proxy_list/__init__.py b/changedetectionio/tests/proxy_list/__init__.py new file mode 100644 index 00000000..085b3d78 --- /dev/null +++ b/changedetectionio/tests/proxy_list/__init__.py @@ -0,0 +1,2 @@ +"""Tests for the app.""" + diff --git a/changedetectionio/tests/proxy_list/conftest.py b/changedetectionio/tests/proxy_list/conftest.py new file mode 100644 index 00000000..95812e2e --- /dev/null +++ b/changedetectionio/tests/proxy_list/conftest.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +from .. import conftest + +#def pytest_addoption(parser): +# parser.addoption("--url_suffix", action="store", default="identifier for request") + + +#def pytest_generate_tests(metafunc): +# # This is called for every test. Only get/set command line arguments +# # if the argument is specified in the list of test "fixturenames". +# option_value = metafunc.config.option.url_suffix +# if 'url_suffix' in metafunc.fixturenames and option_value is not None: +# metafunc.parametrize("url_suffix", [option_value]) \ No newline at end of file diff --git a/changedetectionio/tests/proxy_list/proxies.json-example b/changedetectionio/tests/proxy_list/proxies.json-example new file mode 100644 index 00000000..0ae2178c --- /dev/null +++ b/changedetectionio/tests/proxy_list/proxies.json-example @@ -0,0 +1,10 @@ +{ + "proxy-one": { + "label": "One", + "url": "http://127.0.0.1:3128" + }, + "proxy-two": { + "label": "two", + "url": "http://127.0.0.1:3129" + } +} diff --git a/changedetectionio/tests/proxy_list/squid.conf b/changedetectionio/tests/proxy_list/squid.conf new file mode 100644 index 00000000..615b154d --- /dev/null +++ b/changedetectionio/tests/proxy_list/squid.conf @@ -0,0 +1,41 @@ +acl localnet src 0.0.0.1-0.255.255.255 # RFC 1122 "this" network (LAN) +acl localnet src 10.0.0.0/8 # RFC 1918 local private network (LAN) +acl localnet src 100.64.0.0/10 # RFC 6598 shared address space (CGN) +acl localnet src 169.254.0.0/16 # RFC 3927 link-local (directly plugged) machines +acl localnet src 172.16.0.0/12 # RFC 1918 local private network (LAN) +acl localnet src 192.168.0.0/16 # RFC 1918 local private network (LAN) +acl localnet src fc00::/7 # RFC 4193 local private network range +acl localnet src fe80::/10 # RFC 4291 link-local (directly plugged) machines +acl localnet src 159.65.224.174 +acl SSL_ports port 443 +acl Safe_ports port 80 # http +acl Safe_ports port 21 # ftp +acl Safe_ports port 443 # https +acl Safe_ports port 70 # gopher +acl Safe_ports port 210 # wais +acl Safe_ports port 1025-65535 # unregistered ports +acl Safe_ports port 280 # http-mgmt +acl Safe_ports port 488 # gss-http +acl Safe_ports port 591 # filemaker +acl Safe_ports port 777 # multiling http +acl CONNECT method CONNECT + +http_access deny !Safe_ports +http_access deny CONNECT !SSL_ports +http_access allow localhost manager +http_access deny manager +http_access allow localhost +http_access allow localnet +http_access deny all +http_port 3128 +coredump_dir /var/spool/squid +refresh_pattern ^ftp: 1440 20% 10080 +refresh_pattern ^gopher: 1440 0% 1440 +refresh_pattern -i (/cgi-bin/|\?) 0 0% 0 +refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims +refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims +refresh_pattern \/InRelease$ 0 0% 0 refresh-ims +refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims +refresh_pattern . 0 20% 4320 +logfile_rotate 0 + diff --git a/changedetectionio/tests/proxy_list/test_multiple_proxy.py b/changedetectionio/tests/proxy_list/test_multiple_proxy.py new file mode 100644 index 00000000..fcd286eb --- /dev/null +++ b/changedetectionio/tests/proxy_list/test_multiple_proxy.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from ..util import live_server_setup + +def test_preferred_proxy(client, live_server): + time.sleep(1) + live_server_setup(live_server) + time.sleep(1) + url = "http://chosen.changedetection.io" + + res = client.post( + url_for("import_page"), + # Because a URL wont show in squid/proxy logs due it being SSLed + # Use plain HTTP or a specific domain-name here + data={"urls": url}, + follow_redirects=True + ) + + assert b"1 Imported" in res.data + + time.sleep(2) + res = client.post( + url_for("edit_page", uuid="first"), + data={ + "css_filter": "", + "fetch_backend": "html_requests", + "headers": "", + "proxy": "proxy-two", + "tag": "", + "url": url, + }, + follow_redirects=True + ) + assert b"Updated watch." in res.data + time.sleep(2) + # Now the request should appear in the second-squid logs diff --git a/changedetectionio/tests/proxy_list/test_proxy.py b/changedetectionio/tests/proxy_list/test_proxy.py new file mode 100644 index 00000000..1f4c5ff4 --- /dev/null +++ b/changedetectionio/tests/proxy_list/test_proxy.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +import time +from flask import url_for +from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client + +# just make a request, we will grep in the docker logs to see it actually got called +def test_check_basic_change_detection_functionality(client, live_server): + live_server_setup(live_server) + res = client.post( + url_for("import_page"), + # Because a URL wont show in squid/proxy logs due it being SSLed + # Use plain HTTP or a specific domain-name here + data={"urls": "http://one.changedetection.io"}, + follow_redirects=True + ) + + assert b"1 Imported" in res.data + time.sleep(3) diff --git a/docker-compose.yml b/docker-compose.yml index 696eb89b..65417ee7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,8 @@ services: hostname: changedetection volumes: - changedetection-data:/datastore +# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support +# - ./proxies.json:/datastore/proxies.json # environment: # Default listening port, can also be changed with the -p option