Ability to use our own plugins to scrape extra data

restock-plugin
dgtlmoon 6 months ago
parent 8a35d62e02
commit 321ab19ffb

@ -0,0 +1,23 @@
import pluggy
from typing import Dict
from changedetectionio.model import Watch as Watch
plugin_namespace = "changedetectionio.restock_price_scraper"
hookspec = pluggy.HookspecMarker(plugin_namespace)
class HookSpec:
@hookspec
def scrape_price_restock(self, watch: Watch.model, html_content: str, screenshot: bytes, update_obj: Dict) -> Dict:
"""
Scrape price and restock data from html_content and/or screenshot and return via update_obj
Args:
watch (Watch.model): The watch object containing watch configuration.
html_content (str): The HTML content to scrape.
screenshot (bytes): The screenshot data.
update_obj (Dict): The dictionary to update with scraped data.
Returns:
Optional[Dict]: The updated dictionary with the scraped price data, or None if no update is made.
"""

@ -0,0 +1,17 @@
import pluggy
from .hookspecs import HookSpec
import importlib.metadata
# Define the plugin namespace
plugin_namespace = "changedetectionio.restock_price_scraper"
# Create a pluggy.PluginManager instance
pm = pluggy.PluginManager(plugin_namespace)
# Register the hook specifications
pm.add_hookspecs(HookSpec)
# Automatically discover and register plugins using entry points
for entry_point in importlib.metadata.entry_points().get(plugin_namespace, []):
plugin = entry_point.load()
pm.register(plugin())

@ -119,6 +119,8 @@ class perform_site_check(difference_detection_processor):
xpath_data = None
def run_changedetection(self, watch, skip_when_checksum_same=True):
from .plugin_manager import pm
if not watch:
raise Exception("Watch no longer exists.")
@ -198,6 +200,19 @@ class perform_site_check(difference_detection_processor):
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
# Ask any "changedetectionio.restock_price_scraper" namespace plugins if they can add something
# (Should return an updated 'update_obj')
plugin_price_scraping = pm.hook.scrape_price_restock(watch=watch,
html_content=self.fetcher.content,
screenshot=self.fetcher.screenshot,
update_obj=update_obj)
if plugin_price_scraping:
for plugin_result in plugin_price_scraping:
update_obj.update(plugin_result)
if plugin_result.get('restock'):
update_obj['restock'].update(plugin_result.get('restock'))
# What we store in the snapshot
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"

@ -168,7 +168,7 @@
{% if watch.get('restock') and watch['restock']['price'] != None %}
{% if watch['restock']['price'] != None %}
<span class="restock-label price" title="Price">
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
{{ watch['restock']['price']|format_number_locale }} {% if watch['restock']['currency'] %} {{ watch['restock']['currency'] }}{% endif %}
</span>
{% endif %}
{% elif not watch.has_restock_info %}

@ -92,3 +92,6 @@ babel
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3
# Our own plugins
changedetection.io-amazon-price-scraper>=0.21
Loading…
Cancel
Save