From 321ab19ffbf492ed34a567604ba93109dea3e826 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Tue, 30 Jul 2024 17:10:21 +0200 Subject: [PATCH] Ability to use our own plugins to scrape extra data --- .../processors/restock_diff/hookspecs.py | 23 +++++++++++++++++++ .../processors/restock_diff/plugin_manager.py | 17 ++++++++++++++ .../processors/restock_diff/processor.py | 15 ++++++++++++ .../templates/watch-overview.html | 2 +- requirements.txt | 3 +++ 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 changedetectionio/processors/restock_diff/hookspecs.py create mode 100644 changedetectionio/processors/restock_diff/plugin_manager.py diff --git a/changedetectionio/processors/restock_diff/hookspecs.py b/changedetectionio/processors/restock_diff/hookspecs.py new file mode 100644 index 00000000..e14cb5a9 --- /dev/null +++ b/changedetectionio/processors/restock_diff/hookspecs.py @@ -0,0 +1,23 @@ +import pluggy +from typing import Dict +from changedetectionio.model import Watch as Watch + +plugin_namespace = "changedetectionio.restock_price_scraper" +hookspec = pluggy.HookspecMarker(plugin_namespace) + +class HookSpec: + @hookspec + def scrape_price_restock(self, watch: Watch.model, html_content: str, screenshot: bytes, update_obj: Dict) -> Dict: + """ + Scrape price and restock data from html_content and/or screenshot and return via update_obj + + Args: + watch (Watch.model): The watch object containing watch configuration. + html_content (str): The HTML content to scrape. + screenshot (bytes): The screenshot data. + update_obj (Dict): The dictionary to update with scraped data. + + Returns: + Optional[Dict]: The updated dictionary with the scraped price data, or None if no update is made. + """ + diff --git a/changedetectionio/processors/restock_diff/plugin_manager.py b/changedetectionio/processors/restock_diff/plugin_manager.py new file mode 100644 index 00000000..c3702f65 --- /dev/null +++ b/changedetectionio/processors/restock_diff/plugin_manager.py @@ -0,0 +1,17 @@ +import pluggy +from .hookspecs import HookSpec +import importlib.metadata + +# Define the plugin namespace +plugin_namespace = "changedetectionio.restock_price_scraper" + +# Create a pluggy.PluginManager instance +pm = pluggy.PluginManager(plugin_namespace) + +# Register the hook specifications +pm.add_hookspecs(HookSpec) + +# Automatically discover and register plugins using entry points +for entry_point in importlib.metadata.entry_points().get(plugin_namespace, []): + plugin = entry_point.load() + pm.register(plugin()) diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index b2184e35..2dc728b7 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -119,6 +119,8 @@ class perform_site_check(difference_detection_processor): xpath_data = None def run_changedetection(self, watch, skip_when_checksum_same=True): + from .plugin_manager import pm + if not watch: raise Exception("Watch no longer exists.") @@ -198,6 +200,19 @@ class perform_site_check(difference_detection_processor): update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.") + # Ask any "changedetectionio.restock_price_scraper" namespace plugins if they can add something + # (Should return an updated 'update_obj') + plugin_price_scraping = pm.hook.scrape_price_restock(watch=watch, + html_content=self.fetcher.content, + screenshot=self.fetcher.screenshot, + update_obj=update_obj) + if plugin_price_scraping: + for plugin_result in plugin_price_scraping: + update_obj.update(plugin_result) + if plugin_result.get('restock'): + update_obj['restock'].update(plugin_result.get('restock')) + + # What we store in the snapshot price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else "" snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}" diff --git a/changedetectionio/templates/watch-overview.html b/changedetectionio/templates/watch-overview.html index 736e19da..bdb48406 100644 --- a/changedetectionio/templates/watch-overview.html +++ b/changedetectionio/templates/watch-overview.html @@ -168,7 +168,7 @@ {% if watch.get('restock') and watch['restock']['price'] != None %} {% if watch['restock']['price'] != None %} - {{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }} + {{ watch['restock']['price']|format_number_locale }} {% if watch['restock']['currency'] %} {{ watch['restock']['currency'] }}{% endif %} {% endif %} {% elif not watch.has_restock_info %} diff --git a/requirements.txt b/requirements.txt index 2e085cf6..cbaca1c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -92,3 +92,6 @@ babel # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 greenlet >= 3.0.3 + +# Our own plugins +changedetection.io-amazon-price-scraper>=0.21 \ No newline at end of file