From c1c8de310477acac150be4a9f4ba2c83c578bcf3 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 11 Oct 2024 00:19:19 +0200 Subject: [PATCH] Fixing proxy checker (#2696) --- .../blueprint/check_proxies/__init__.py | 14 ++-- changedetectionio/processors/__init__.py | 6 +- changedetectionio/static/js/recheck-proxy.js | 24 ++++--- .../styles/scss/parts/_extra_proxies.scss | 6 +- changedetectionio/static/styles/styles.css | 29 ++++---- .../tests/test_preview_endpoints.py | 72 +++++++++++++++++++ 6 files changed, 122 insertions(+), 29 deletions(-) create mode 100644 changedetectionio/tests/test_preview_endpoints.py diff --git a/changedetectionio/blueprint/check_proxies/__init__.py b/changedetectionio/blueprint/check_proxies/__init__.py index 8d7df73f..28fe5eba 100644 --- a/changedetectionio/blueprint/check_proxies/__init__.py +++ b/changedetectionio/blueprint/check_proxies/__init__.py @@ -1,4 +1,7 @@ +import importlib from concurrent.futures import ThreadPoolExecutor + +from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse from changedetectionio.store import ChangeDetectionStore from functools import wraps @@ -30,7 +33,6 @@ def construct_blueprint(datastore: ChangeDetectionStore): def long_task(uuid, preferred_proxy): import time from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions - from changedetectionio.processors.text_json_diff import text_json_diff from changedetectionio.safe_jinja import render as jinja_render status = {'status': '', 'length': 0, 'text': ''} @@ -38,8 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore): contents = '' now = time.time() try: - update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid) - update_handler.call_browser() + processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") + update_handler = processor_module.perform_site_check(datastore=datastore, + watch_uuid=uuid + ) + + update_handler.call_browser(preferred_proxy_id=preferred_proxy) # title, size is len contents not len xfer except content_fetcher_exceptions.Non200ErrorCodeReceived as e: if e.status_code == 404: @@ -48,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"}) else: status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"}) - except text_json_diff.FilterNotFoundInResponse: + except FilterNotFoundInResponse: status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"}) except content_fetcher_exceptions.EmptyReply as e: if e.status_code == 403 or e.status_code == 401: diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 54ffcea7..c243f07d 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -18,6 +18,7 @@ class difference_detection_processor(): screenshot = None watch = None xpath_data = None + preferred_proxy = None def __init__(self, *args, datastore, watch_uuid, **kwargs): super().__init__(*args, **kwargs) @@ -26,7 +27,8 @@ class difference_detection_processor(): # Generic fetcher that should be extended (requests, playwright etc) self.fetcher = Fetcher() - def call_browser(self): + def call_browser(self, preferred_proxy_id=None): + from requests.structures import CaseInsensitiveDict # Protect against file:// access @@ -42,7 +44,7 @@ class difference_detection_processor(): prefer_fetch_backend = self.watch.get('fetch_backend', 'system') # Proxy ID "key" - preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) + preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid')) # Pluggable content self.fetcher if not prefer_fetch_backend or prefer_fetch_backend == 'system': diff --git a/changedetectionio/static/js/recheck-proxy.js b/changedetectionio/static/js/recheck-proxy.js index 5380fe4d..c55bafe3 100644 --- a/changedetectionio/static/js/recheck-proxy.js +++ b/changedetectionio/static/js/recheck-proxy.js @@ -1,14 +1,14 @@ $(function () { /* add container before each proxy location to show status */ - - var option_li = $('.fetch-backend-proxy li').filter(function() { - return $("input",this)[0].value.length >0; - }); - - //var option_li = $('.fetch-backend-proxy li'); var isActive = false; - $(option_li).prepend('
'); - $(option_li).append('
'); + + function setup_html_widget() { + var option_li = $('.fetch-backend-proxy li').filter(function () { + return $("input", this)[0].value.length > 0; + }); + $(option_li).prepend('
'); + $(option_li).append('
'); + } function set_proxy_check_status(proxy_key, state) { // select input by value name @@ -59,8 +59,14 @@ $(function () { } $('#check-all-proxies').click(function (e) { + e.preventDefault() - $('body').addClass('proxy-check-active'); + + if (!$('body').hasClass('proxy-check-active')) { + setup_html_widget(); + $('body').addClass('proxy-check-active'); + } + $('.proxy-check-details').html(''); $('.proxy-status').html('').fadeIn(); $('.proxy-timing').html(''); diff --git a/changedetectionio/static/styles/scss/parts/_extra_proxies.scss b/changedetectionio/static/styles/scss/parts/_extra_proxies.scss index ed6de397..3aec343c 100644 --- a/changedetectionio/static/styles/scss/parts/_extra_proxies.scss +++ b/changedetectionio/static/styles/scss/parts/_extra_proxies.scss @@ -25,15 +25,19 @@ ul#requests-extra_proxies { body.proxy-check-active { #request { + // Padding set by flex layout + /* .proxy-status { width: 2em; } + */ .proxy-check-details { font-size: 80%; color: #555; display: block; - padding-left: 4em; + padding-left: 2em; + max-width: 500px; } .proxy-timing { diff --git a/changedetectionio/static/styles/styles.css b/changedetectionio/static/styles/styles.css index 71a285b8..9e350d35 100644 --- a/changedetectionio/static/styles/styles.css +++ b/changedetectionio/static/styles/styles.css @@ -119,19 +119,22 @@ ul#requests-extra_proxies { #request label[for=proxy] { display: inline-block; } -body.proxy-check-active #request .proxy-status { - width: 2em; } - -body.proxy-check-active #request .proxy-check-details { - font-size: 80%; - color: #555; - display: block; - padding-left: 4em; } - -body.proxy-check-active #request .proxy-timing { - font-size: 80%; - padding-left: 1rem; - color: var(--color-link); } +body.proxy-check-active #request { + /* + .proxy-status { + width: 2em; + } + */ } + body.proxy-check-active #request .proxy-check-details { + font-size: 80%; + color: #555; + display: block; + padding-left: 2em; + max-width: 500px; } + body.proxy-check-active #request .proxy-timing { + font-size: 80%; + padding-left: 1rem; + color: var(--color-link); } #recommended-proxy { display: grid; diff --git a/changedetectionio/tests/test_preview_endpoints.py b/changedetectionio/tests/test_preview_endpoints.py new file mode 100644 index 00000000..e1c8c747 --- /dev/null +++ b/changedetectionio/tests/test_preview_endpoints.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +import time +from flask import url_for +from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks + + +# `subtractive_selectors` should still work in `source:` type requests +def test_fetch_pdf(client, live_server, measure_memory_usage): + import shutil + shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf") + + live_server_setup(live_server) + test_url = url_for('test_pdf_endpoint', _external=True) + # Add our URL to the import page + res = client.post( + url_for("import_page"), + data={"urls": test_url}, + follow_redirects=True + ) + + assert b"1 Imported" in res.data + + wait_for_all_checks(client) + + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + # PDF header should not be there (it was converted to text) + assert b'PDF' not in res.data[:10] + assert b'hello world' in res.data + + # So we know if the file changes in other ways + import hashlib + original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() + # We should have one + assert len(original_md5) > 0 + # And it's going to be in the document + assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data + + shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf") + changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() + res = client.get(url_for("form_watch_checknow"), follow_redirects=True) + assert b'1 watches queued for rechecking.' in res.data + + wait_for_all_checks(client) + + # Now something should be ready, indicated by having a 'unviewed' class + res = client.get(url_for("index")) + assert b'unviewed' in res.data + + # The original checksum should be not be here anymore (cdio adds it to the bottom of the text) + + res = client.get( + url_for("preview_page", uuid="first"), + follow_redirects=True + ) + + assert original_md5.encode('utf-8') not in res.data + assert changed_md5.encode('utf-8') in res.data + + res = client.get( + url_for("diff_history_page", uuid="first"), + follow_redirects=True + ) + + assert original_md5.encode('utf-8') in res.data + assert changed_md5.encode('utf-8') in res.data + + assert b'here is a change' in res.data