Abstract out the fetch handlers for different fetch types

2 years ago · 425f8ea632
parent fefc39427b
commit 425f8ea632
9 changed files with 47 additions and 44 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -500,7 +500,7 @@ def changedetection_app(config=None, datastore_o=None):
        import hashlib
-        from changedetectionio import fetch_site_status
+        from changedetectionio.fetch_processor import json_html_plaintext
        # Get the most recent one
        newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
@ -514,7 +514,7 @@ def changedetection_app(config=None, datastore_o=None):
                      encoding='utf-8') as file:
                raw_content = file.read()
-                handler = fetch_site_status.perform_site_check(datastore=datastore)
+                handler = json_html_plaintext.perform_site_check(datastore=datastore)
                stripped_content = html_tools.strip_ignore_text(raw_content,
                                                             datastore.data['watching'][uuid]['ignore_text'])
--- a/changedetectionio/fetch_processor/init.py
+++ b/changedetectionio/fetch_processor/init.py
--- a/changedetectionio/fetch_processor/fetch_processor.py
+++ b/changedetectionio/fetch_processor/fetch_processor.py
@ -0,0 +1,37 @@
 class fetch_processor():
    """
    base class for all fetch processors
    - json_html_plaintext
    - image (future)
    """
    def __init__(self, *args, datastore, **kwargs):
        super().__init__(*args, **kwargs)
        self.datastore = datastore
    # If there was a proxy list enabled, figure out what proxy_args/which proxy to use
    # if watch.proxy use that
    # fetcher.proxy_override = watch.proxy or main config proxy
    # Allows override the proxy on a per-request basis
    # ALWAYS use the first one is nothing selected
    def set_proxy_from_list(self, watch):
        proxy_args = None
        if self.datastore.proxy_list is None:
            return None
        # If its a valid one
        if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
            proxy_args = watch['proxy']
        # not valid (including None), try the system one
        else:
            system_proxy = self.datastore.data['settings']['requests']['proxy']
            # Is not None and exists
            if any([system_proxy in p for p in self.datastore.proxy_list]):
                proxy_args = system_proxy
        # Fallback - Did not resolve anything, use the first available
        if proxy_args is None:
            proxy_args = self.datastore.proxy_list[0][0]
        return proxy_args
--- a/changedetectionio/fetch_processor/json_html_plaintext.py
+++ b/changedetectionio/fetch_processor/json_html_plaintext.py
@ -9,45 +9,14 @@ from changedetectionio import content_fetcher, html_tools
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 from .fetch_processor import fetch_processor
 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
-class perform_site_check():
+class perform_site_check(fetch_processor):
    screenshot = None
    xpath_data = None
    def __init__(self, *args, datastore, **kwargs):
        super().__init__(*args, **kwargs)
        self.datastore = datastore
    # If there was a proxy list enabled, figure out what proxy_args/which proxy to use
    # if watch.proxy use that
    # fetcher.proxy_override = watch.proxy or main config proxy
    # Allows override the proxy on a per-request basis
    # ALWAYS use the first one is nothing selected
    def set_proxy_from_list(self, watch):
        proxy_args = None
        if self.datastore.proxy_list is None:
            return None
        # If its a valid one
        if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
            proxy_args = watch['proxy']
        # not valid (including None), try the system one
        else:
            system_proxy = self.datastore.data['settings']['requests']['proxy']
            # Is not None and exists
            if any([system_proxy in p for p in self.datastore.proxy_list]):
                proxy_args = system_proxy
        # Fallback - Did not resolve anything, use the first available
        if proxy_args is None:
            proxy_args = self.datastore.proxy_list[0][0]
        return proxy_args
    # Doesn't look like python supports forward slash auto enclosure in re.findall
    # So convert it to inline flag "foobar(?i)" type configuration
    def forward_slash_enclosed_regex_to_options(self, regex):
@ -315,4 +284,5 @@ class perform_site_check():
        if not watch.get('previous_md5'):
            watch['previous_md5'] = fetched_md5
        # @todo text_content_before_ignored_filter can be removed? save it here?
        return changed_detected, update_obj, text_content_before_ignored_filter
--- a/changedetectionio/tests/test_css_selector.py
+++ b/changedetectionio/tests/test_css_selector.py
@ -47,7 +47,6 @@ def set_modified_response():
 # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
 def test_css_filter_output():
    from changedetectionio import fetch_site_status
    from inscriptis import get_text
    # Check text with sub-parts renders correctly
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@ -71,7 +71,6 @@ def set_modified_response():
 def test_element_removal_output():
    from changedetectionio import fetch_site_status
    from inscriptis import get_text
    # Check text with sub-parts renders correctly
--- a/changedetectionio/tests/test_ignore_regex_text.py
+++ b/changedetectionio/tests/test_ignore_regex_text.py
@ -1,7 +1,5 @@
 #!/usr/bin/python3
 import time
 from flask import url_for
 from . util import live_server_setup
 from changedetectionio import html_tools
@ -11,7 +9,7 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_regex_text_func():
-    from changedetectionio import fetch_site_status
+    from ..fetch_processor import json_html_plaintext
    test_content = """
    but sometimes we want to remove the lines.
@ -23,7 +21,7 @@ def test_strip_regex_text_func():
    ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
-    fetcher = fetch_site_status.perform_site_check(datastore=False)
+    fetcher = json_html_plaintext.perform_site_check(datastore=False)
    stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
    assert b"but 1 lines" in stripped_content
--- a/changedetectionio/tests/test_ignore_text.py
+++ b/changedetectionio/tests/test_ignore_text.py
@ -11,7 +11,7 @@ def test_setup(live_server):
 # Unit test of the stripper
 # Always we are dealing in utf-8
 def test_strip_text_func():
-    from changedetectionio import fetch_site_status
+    from ..fetch_processor import json_html_plaintext
    test_content = """
    Some content
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -117,9 +117,9 @@ class update_worker(threading.Thread):
                os.unlink(full_path)
    def run(self):
-        from changedetectionio import fetch_site_status
+        from changedetectionio.fetch_processor import json_html_plaintext
-        update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
+        update_handler = json_html_plaintext.perform_site_check(datastore=self.datastore)
        while not self.app.config.exit.is_set():