From c1a0481ec0625f64bdac66caa96ceffbb17324a4 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 23 May 2024 09:59:22 +0200
Subject: [PATCH] skip rdfa?

---
 changedetectionio/model/Watch.py             | 1 +
 changedetectionio/processors/restock_diff.py | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
index a88a220d..178187ee 100644
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -197,6 +197,7 @@ class model(watch_base):
                 return True
 
         return False
+    
     # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
     @property
     def newest_history_key(self):
diff --git a/changedetectionio/processors/restock_diff.py b/changedetectionio/processors/restock_diff.py
index f7e7401d..0005b5b2 100644
--- a/changedetectionio/processors/restock_diff.py
+++ b/changedetectionio/processors/restock_diff.py
@@ -38,11 +38,13 @@ def get_itemprop_availability(html_content) -> Restock:
     import extruct
     logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")
 
-
     value = {}
     now = time.time()
-    # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7)
-    data = extruct.extract(html_content)
+    # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
+
+    syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
+
+    data = extruct.extract(html_content, syntaxes=syntaxes)
     logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")
 
     # First phase, dead simple scanning of anything that looks useful