second attempt at plugins

1 year ago · 70842193b0
parent 0285d00f13
commit 70842193b0
8 changed files with 90 additions and 80 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -16,6 +16,7 @@ import logging
 import os
 import pytz
 import queue
+import sys
 import threading
 import time
 import timeago
@ -80,6 +81,9 @@ csrf = CSRFProtect()
 csrf.init_app(app)
 notification_debug_log=[]

+from pathlib import Path
+sys.path.append(os.path.join(Path.home(), 'changedetectionio-plugins'))
+
 watch_api = Api(app, decorators=[csrf.exempt])

 def init_app_secret(datastore_path):
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@ -76,7 +76,7 @@ class Watch(Resource):
        # Properties are not returned as a JSON, so add the required props manually
        watch['history_n'] = watch.history_n
        watch['last_changed'] = watch.last_changed
-
+        watch['viewed'] = watch.viewed
        return watch

    @auth.check_token
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@ -97,7 +97,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            proxy=proxy)

        # For test
-        #browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
+        #browsersteps_start_session['browserstepper'].action_goto_url(value="http://exbaseample.com?time="+str(time.time()))

        return browsersteps_start_session

--- a/changedetectionio/blueprint/check_proxies/init.py
+++ b/changedetectionio/blueprint/check_proxies/init.py
@ -41,7 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        now = time.time()
        try:
            update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
-            update_handler.call_browser()
+            update_handler.fetch_content()
        # title, size is len contents not len xfer
        except content_fetcher.Non200ErrorCodeReceived as e:
            if e.status_code == 404:
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@ -4,10 +4,8 @@ import hashlib
 import re
 from changedetectionio import content_fetcher
 from copy import deepcopy
-from distutils.util import strtobool
-
-class difference_detection_processor():

+class difference_detection_processor_interface():
    browser_steps = None
    datastore = None
    fetcher = None
@ -15,52 +13,36 @@ class difference_detection_processor():
    watch = None
    xpath_data = None

-    def __init__(self, *args, datastore, watch_uuid, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.datastore = datastore
-        self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))

-    def call_browser(self):
-
-        # Protect against file:// access
-        if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
-            if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
-                raise Exception(
-                    "file:// type access is denied for security reasons."
-                )
-
-        url = self.watch.link
-
-        # Requests, playwright, other browser via wss:// etc, fetch_extra_something
-        prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
-
-        # Proxy ID "key"
-        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
+    @abstractmethod
+    def run_changedetection(self, uuid, skip_when_checksum_same=True):
+        update_obj = {'last_notification_error': False, 'last_error': False}
+        some_data = 'xxxxx'
+        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
+        changed_detected = False
+        return changed_detected, update_obj, ''.encode('utf-8')

-        # Pluggable content self.fetcher
-        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
-            prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')

-        # In the case that the preferred fetcher was a browser config with custom connection URL..
-        # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
-        browser_connection_url = None
-        if prefer_fetch_backend.startswith('extra_browser_'):
-            (t, key) = prefer_fetch_backend.split('extra_browser_')
-            connection = list(
-                filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
-            if connection:
-                prefer_fetch_backend = 'base_html_playwright'
-                browser_connection_url = connection[0].get('browser_connection_url')
+class text_content_difference_detection_processor(difference_detection_processor_interface):

+    def __init__(self, *args, datastore, watch_uuid, prefer_fetch_backend, **kwargs):
+        self.datastore = datastore
+        self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
+        self.prefer_fetch_backend = prefer_fetch_backend
+        super().__init__(*args, **kwargs)

+        ########################################
+        # Attach the correct fetcher and proxy #
+        ########################################
        # Grab the right kind of 'fetcher', (playwright, requests, etc)
-        if hasattr(content_fetcher, prefer_fetch_backend):
-            fetcher_obj = getattr(content_fetcher, prefer_fetch_backend)
+        if hasattr(content_fetcher, self.prefer_fetch_backend):
+            fetcher_obj = getattr(content_fetcher, self.prefer_fetch_backend)
        else:
            # If the klass doesnt exist, just use a default
            fetcher_obj = getattr(content_fetcher, "html_requests")

-
+        # Proxy ID "key"
+        preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
        proxy_url = None
        if preferred_proxy_id:
            proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
@ -69,9 +51,23 @@ class difference_detection_processor():
        # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
        # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
        self.fetcher = fetcher_obj(proxy_override=proxy_url,
-                                   browser_connection_url=browser_connection_url
+                                   browser_connection_url=None # Default, let each fetcher work it out
                                   )

+    def fetch_content(self):
+
+        url = self.watch.link
+
+        # In the case that the preferred fetcher was a browser config with custom connection URL..
+        # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
+        if self.prefer_fetch_backend.startswith('extra_browser_'):
+            (t, key) = self.prefer_fetch_backend.split('extra_browser_')
+            connection = list(
+                filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
+            if connection:
+                prefer_fetch_backend = 'base_html_playwright'
+                browser_connection_url = connection[0].get('browser_connection_url')
+
        if self.watch.has_browser_steps:
            self.fetcher.browser_steps = self.watch.get('browser_steps', [])
            self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
@ -115,14 +111,6 @@ class difference_detection_processor():

        # After init, call run_changedetection() which will do the actual change-detection

-    @abstractmethod
-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
-        update_obj = {'last_notification_error': False, 'last_error': False}
-        some_data = 'xxxxx'
-        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
-        changed_detected = False
-        return changed_detected, update_obj, ''.encode('utf-8')
-

 def available_processors():
    from . import restock_diff, text_json_diff
--- a/changedetectionio/processors/restock_diff.py
+++ b/changedetectionio/processors/restock_diff.py
@ -1,8 +1,9 @@

 import hashlib
 import urllib3
-from . import difference_detection_processor
+#from . import browser_content_difference_detection_processor
 from copy import deepcopy
+from . import text_content_difference_detection_processor

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

@ -15,7 +16,7 @@ class UnableToExtractRestockData(Exception):
        self.status_code = status_code
        return

-class perform_site_check(difference_detection_processor):
+class perform_site_check(text_content_difference_detection_processor):
    screenshot = None
    xpath_data = None

--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@ -10,8 +10,8 @@ import urllib3
 from changedetectionio import content_fetcher, html_tools
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from copy import deepcopy
-from . import difference_detection_processor
 from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
+from . import text_content_difference_detection_processor

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

@ -31,7 +31,7 @@ class PDFToHTMLToolNotFound(ValueError):

 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
-class perform_site_check(difference_detection_processor):
+class perform_site_check(text_content_difference_detection_processor):

    def run_changedetection(self, uuid, skip_when_checksum_same=True):
        changed_detected = False
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@ -1,9 +1,13 @@
+import importlib
 import os
+import re
 import threading
 import queue
 import time
+from distutils.util import strtobool

 from changedetectionio import content_fetcher, html_tools
+
 from .processors.text_json_diff import FilterNotFoundInResponse
 from .processors.restock_diff import UnableToExtractRestockData

@ -15,6 +19,7 @@ from .processors.restock_diff import UnableToExtractRestockData
 import logging
 import sys

+
 class update_worker(threading.Thread):
    current_uuid = None

@ -24,6 +29,7 @@ class update_worker(threading.Thread):
        self.app = app
        self.notification_q = notification_q
        self.datastore = datastore
+
        super().__init__(*args, **kwargs)

    def queue_notification_for_watch(self, n_object, watch):
@ -209,7 +215,7 @@ class update_worker(threading.Thread):
        from .processors import text_json_diff, restock_diff

        while not self.app.config.exit.is_set():
-            update_handler = None
+            change_processor = None

            try:
                queued_item_data = self.q.get(block=False)
@ -230,35 +236,46 @@ class update_worker(threading.Thread):
                    now = time.time()

                    try:
-                        # Processor is what we are using for detecting the "Change"
-                        processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
-                        # if system...
+                        # Protect against file:// access
+                        if re.search(r'^file://', self.datastore.data['watching'][uuid].get('url', '').strip(), re.IGNORECASE):
+                            if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
+                                raise Exception(
+                                    "file:// type access is denied for security reasons."
+                                )

-                        # Abort processing when the content was the same as the last fetch
-                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
+                        prefer_fetch_backend = self.datastore.data['watching'][uuid].get('fetch_backend', 'system')
+                        if not prefer_fetch_backend or prefer_fetch_backend == 'system':
+                            prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')

+                        processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')

-                        # @todo some way to switch by name
-                        # Init a new 'difference_detection_processor'
+                        processor = 'cdio_whois_diff'

-                        if processor == 'restock_diff':
-                            update_handler = restock_diff.perform_site_check(datastore=self.datastore,
-                                                                             watch_uuid=uuid
-                                                                             )
+                        if processor in ['text_json_diff', 'restock_diff']:
+                            base_processor_module = f"changedetectionio.processors.{processor}"
                        else:
-                            # Used as a default and also by some tests
-                            update_handler = text_json_diff.perform_site_check(datastore=self.datastore,
-                                                                               watch_uuid=uuid
-                                                                               )
+                            # Each plugin is one processor exactly
+                            base_processor_module = f"{processor}.processor"
+
+# its correct that processor dictates which fethcer it uses i think
+
+                        # these should inherit the right fetcher too
+                        module = importlib.import_module(base_processor_module)
+                        change_processor = getattr(module, 'perform_site_check')
+                        change_processor = change_processor(datastore=self.datastore,
+                                                            watch_uuid=uuid,
+                                                            prefer_fetch_backend=prefer_fetch_backend
+                                                            )

                        # Clear last errors (move to preflight func?)
                        self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None

-                        update_handler.call_browser()
-
-                        changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
-                                                                                    skip_when_checksum_same=skip_when_same_checksum,
-                                                                                    )
+                        skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
+                        # Each processor extends base class of the kind of fetcher it needs to run anyway
+                        change_processor.fetch_content()
+                        changed_detected, update_obj, contents = change_processor.run_changedetection(uuid,
+                                                                                                      skip_when_checksum_same=skip_when_same_checksum
+                                                                                                      )

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@ -465,10 +482,10 @@ class update_worker(threading.Thread):
                                                                           })

                        # Always save the screenshot if it's available
-                        if update_handler.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
-                        if update_handler.xpath_data:
-                            self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
+                        if change_processor.screenshot:
+                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=change_processor.screenshot)
+                        if change_processor.xpath_data:
+                            self.datastore.save_xpath_data(watch_uuid=uuid, data=change_processor.xpath_data)


                self.current_uuid = None  # Done