From 343e359b391a76b0a5ee94567a393d1fb2b64a89 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sun, 23 Jun 2024 09:19:32 +0200
Subject: [PATCH] Now saving last two HTML snapshots for future reference,
 refactor, dont write screenshots and xpath to disk when no change detected
 (saves disk IO) (#2431)

---
 changedetectionio/api/api_v1.py               |  20 ++-
 .../blueprint/browser_steps/__init__.py       |   6 +-
 changedetectionio/model/Watch.py              |  91 ++++++++++--
 changedetectionio/processors/__init__.py      |   3 +-
 changedetectionio/processors/restock_diff.py  |  10 +-
 .../processors/text_json_diff.py              |  25 ++--
 changedetectionio/store.py                    |  41 ------
 changedetectionio/tests/test_api.py           |   9 ++
 .../tests/test_history_consistency.py         |  28 ++--
 changedetectionio/update_worker.py            | 130 ++++++++++--------
 10 files changed, 212 insertions(+), 151 deletions(-)

diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py
index 85e2b30e..19d83612 100644
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -170,23 +170,33 @@ class WatchSingleHistory(Resource):
             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
         @apiName Get single snapshot content
         @apiGroup Watch History
+        @apiParam {String} [html]       Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
         @apiSuccess (200) {String} OK
         @apiSuccess (404) {String} ERR Not found
         """
         watch = self.datastore.data['watching'].get(uuid)
         if not watch:
-            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
+            abort(404, message=f"No watch exists with the UUID of {uuid}")
 
         if not len(watch.history):
-            abort(404, message='Watch found but no history exists for the UUID {}'.format(uuid))
+            abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
 
         if timestamp == 'latest':
             timestamp = list(watch.history.keys())[-1]
 
-        content = watch.get_history_snapshot(timestamp)
+        if request.args.get('html'):
+            content = watch.get_fetched_html(timestamp)
+            if content:
+                response = make_response(content, 200)
+                response.mimetype = "text/html"
+            else:
+                response = make_response("No content found", 404)
+                response.mimetype = "text/plain"
+        else:
+            content = watch.get_history_snapshot(timestamp)
+            response = make_response(content, 200)
+            response.mimetype = "text/plain"
 
-        response = make_response(content, 200)
-        response.mimetype = "text/plain"
         return response
 
 
diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py
index 30797099..f92bf9f8 100644
--- a/changedetectionio/blueprint/browser_steps/__init__.py
+++ b/changedetectionio/blueprint/browser_steps/__init__.py
@@ -187,8 +187,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
             u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
             if is_last_step and u:
                 (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
-                datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
-                datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
+                watch = datastore.data['watching'].get(uuid)
+                if watch:
+                    watch.save_screenshot(screenshot=screenshot)
+                    watch.save_xpath_data(data=xpath_data)
 
 #        if not this_session.page:
 #            cleanup_playwright_session()
diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
index 83bfb2e3..44157268 100644
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -328,14 +328,9 @@ class model(dict):
     def save_history_text(self, contents, timestamp, snapshot_id):
         import brotli
 
-        self.ensure_data_dir_exists()
+        logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
 
-        # Small hack so that we sleep just enough to allow 1 second  between history snapshots
-        # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
-        if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
-            logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
-            timestamp = str(int(timestamp) + 1)
-            time.sleep(1)
+        self.ensure_data_dir_exists()
 
         threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
         skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
@@ -528,8 +523,42 @@ class model(dict):
         # None is set
         return False
 
+    def save_error_text(self, contents):
+        self.ensure_data_dir_exists()
+        target_path = os.path.join(self.watch_data_dir, "last-error.txt")
+        with open(target_path, 'w') as f:
+            f.write(contents)
+
+    def save_xpath_data(self, data, as_error=False):
+        import json
+
+        if as_error:
+            target_path = os.path.join(self.watch_data_dir, "elements-error.json")
+        else:
+            target_path = os.path.join(self.watch_data_dir, "elements.json")
+
+        self.ensure_data_dir_exists()
+
+        with open(target_path, 'w') as f:
+            f.write(json.dumps(data))
+            f.close()
+
+    # Save as PNG, PNG is larger but better for doing visual diff in the future
+    def save_screenshot(self, screenshot: bytes, as_error=False):
+
+        if as_error:
+            target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
+        else:
+            target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
+
+        self.ensure_data_dir_exists()
+
+        with open(target_path, 'wb') as f:
+            f.write(screenshot)
+            f.close()
+
 
-    def get_last_fetched_before_filters(self):
+    def get_last_fetched_text_before_filters(self):
         import brotli
         filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
 
@@ -544,12 +573,56 @@ class model(dict):
         with open(filepath, 'rb') as f:
             return(brotli.decompress(f.read()).decode('utf-8'))
 
-    def save_last_fetched_before_filters(self, contents):
+    def save_last_text_fetched_before_filters(self, contents):
         import brotli
         filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
         with open(filepath, 'wb') as f:
             f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
 
+    def save_last_fetched_html(self, timestamp, contents):
+        import brotli
+
+        self.ensure_data_dir_exists()
+        snapshot_fname = f"{timestamp}.html.br"
+        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+
+        with open(filepath, 'wb') as f:
+            contents = contents.encode('utf-8') if isinstance(contents, str) else contents
+            try:
+                f.write(brotli.compress(contents))
+            except Exception as e:
+                logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
+                logger.warning(e)
+                f.write(contents)
+
+        self._prune_last_fetched_html_snapshots()
+
+    def get_fetched_html(self, timestamp):
+        import brotli
+
+        snapshot_fname = f"{timestamp}.html.br"
+        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+        if os.path.isfile(filepath):
+            with open(filepath, 'rb') as f:
+                return (brotli.decompress(f.read()).decode('utf-8'))
+
+        return False
+
+
+    def _prune_last_fetched_html_snapshots(self):
+
+        dates = list(self.history.keys())
+        dates.reverse()
+
+        for index, timestamp in enumerate(dates):
+            snapshot_fname = f"{timestamp}.html.br"
+            filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+
+            # Keep only the first 2
+            if index > 1 and os.path.isfile(filepath):
+                os.remove(filepath)
+
+
     @property
     def get_browsersteps_available_screenshots(self):
         "For knowing which screenshots are available to show the user in BrowserSteps UI"
diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py
index 9e4ce6b1..d24c9a9d 100644
--- a/changedetectionio/processors/__init__.py
+++ b/changedetectionio/processors/__init__.py
@@ -1,5 +1,6 @@
 from abc import abstractmethod
 from changedetectionio.strtobool import strtobool
+from changedetectionio.model import Watch
 from copy import deepcopy
 from loguru import logger
 import hashlib
@@ -138,7 +139,7 @@ class difference_detection_processor():
         # After init, call run_changedetection() which will do the actual change-detection
 
     @abstractmethod
-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, watch: Watch, skip_when_checksum_same=True):
         update_obj = {'last_notification_error': False, 'last_error': False}
         some_data = 'xxxxx'
         update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
diff --git a/changedetectionio/processors/restock_diff.py b/changedetectionio/processors/restock_diff.py
index e692e7cb..a948eb0d 100644
--- a/changedetectionio/processors/restock_diff.py
+++ b/changedetectionio/processors/restock_diff.py
@@ -1,6 +1,5 @@
 
 from . import difference_detection_processor
-from copy import deepcopy
 from loguru import logger
 import hashlib
 import urllib3
@@ -20,10 +19,7 @@ class perform_site_check(difference_detection_processor):
     screenshot = None
     xpath_data = None
 
-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
-
-        # DeepCopy so we can be sure we don't accidently change anything by reference
-        watch = deepcopy(self.datastore.data['watching'].get(uuid))
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
 
         if not watch:
             raise Exception("Watch no longer exists.")
@@ -44,13 +40,13 @@ class perform_site_check(difference_detection_processor):
             fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
             # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
             update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
-            logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
+            logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
         else:
             raise UnableToExtractRestockData(status_code=self.fetcher.status_code)
 
         # The main thing that all this at the moment comes down to :)
         changed_detected = False
-        logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
+        logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
 
         if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
             # Yes if we only care about it going to instock, AND we are in stock
diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py
index 1d60be63..e793de89 100644
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@@ -10,8 +10,6 @@ from . import difference_detection_processor
 from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
 from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
-import changedetectionio.content_fetchers
-from copy import deepcopy
 from loguru import logger
 
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -21,7 +19,8 @@ description = 'Detects all text changes where possible'
 json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
 
 class FilterNotFoundInResponse(ValueError):
-    def __init__(self, msg):
+    def __init__(self, msg, screenshot=None):
+        self.screenshot = screenshot
         ValueError.__init__(self, msg)
 
 
@@ -34,14 +33,12 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):
 
-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
         changed_detected = False
         html_content = ""
         screenshot = False  # as bytes
         stripped_text_from_html = ""
 
-        # DeepCopy so we can be sure we don't accidently change anything by reference
-        watch = deepcopy(self.datastore.data['watching'].get(uuid))
         if not watch:
             raise Exception("Watch no longer exists.")
 
@@ -116,12 +113,12 @@ class perform_site_check(difference_detection_processor):
         # Better would be if Watch.model could access the global data also
         # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
         # https://realpython.com/inherit-python-dict/ instead of doing it procedurely
-        include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters')
+        include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
 
         # 1845 - remove duplicated filters in both group and watch include filter
         include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
 
-        subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'),
+        subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
                                  *watch.get("subtractive_selectors", []),
                                  *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
                                  ]
@@ -188,7 +185,7 @@ class perform_site_check(difference_detection_processor):
                                                                        append_pretty_line_formatting=not watch.is_source_type_url)
 
                     if not html_content.strip():
-                        raise FilterNotFoundInResponse(include_filters_rule)
+                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot)
 
                 if has_subtractive_selectors:
                     html_content = html_tools.element_removal(subtractive_selectors, html_content)
@@ -222,7 +219,7 @@ class perform_site_check(difference_detection_processor):
             from .. import diff
             # needs to not include (added) etc or it may get used twice
             # Replace the processed text with the preferred result
-            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
+            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
                                              newest_version_file_contents=stripped_text_from_html,
                                              include_equal=False,  # not the same lines
                                              include_added=watch.get('filter_text_added', True),
@@ -231,7 +228,7 @@ class perform_site_check(difference_detection_processor):
                                              line_feed_sep="\n",
                                              include_change_type_prefix=False)
 
-            watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
+            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
 
             if not rendered_diff and stripped_text_from_html:
                 # We had some content, but no differences were found
@@ -344,17 +341,17 @@ class perform_site_check(difference_detection_processor):
                 if not watch['title'] or not len(watch['title']):
                     update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content)
 
-        logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
+        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
 
         if changed_detected:
             if watch.get('check_unique_lines', False):
                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
                 # One or more lines? unsure?
                 if not has_unique_lines:
-                    logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False")
+                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
                     changed_detected = False
                 else:
-                    logger.debug(f"check_unique_lines: UUID {uuid} had unique content")
+                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
 
         # Always record the new checksum
         update_obj["previous_md5"] = fetched_md5
diff --git a/changedetectionio/store.py b/changedetectionio/store.py
index d11e7243..d4a6cb0f 100644
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -163,7 +163,6 @@ class ChangeDetectionStore:
                         del (update_obj[dict_key])
 
             self.__data['watching'][uuid].update(update_obj)
-
         self.needs_write = True
 
     @property
@@ -376,46 +375,6 @@ class ChangeDetectionStore:
 
         return False
 
-    # Save as PNG, PNG is larger but better for doing visual diff in the future
-    def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
-        if not self.data['watching'].get(watch_uuid):
-            return
-
-        if as_error:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
-        else:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
-
-        self.data['watching'][watch_uuid].ensure_data_dir_exists()
-
-        with open(target_path, 'wb') as f:
-            f.write(screenshot)
-            f.close()
-
-
-    def save_error_text(self, watch_uuid, contents):
-        if not self.data['watching'].get(watch_uuid):
-            return
-
-        self.data['watching'][watch_uuid].ensure_data_dir_exists()
-        target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
-        with open(target_path, 'w') as f:
-            f.write(contents)
-
-    def save_xpath_data(self, watch_uuid, data, as_error=False):
-
-        if not self.data['watching'].get(watch_uuid):
-            return
-        if as_error:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
-        else:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
-        self.data['watching'][watch_uuid].ensure_data_dir_exists()
-        with open(target_path, 'w') as f:
-            f.write(json.dumps(data))
-            f.close()
-
-
     def sync_to_json(self):
         logger.info("Saving JSON..")
         try:
diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py
index 5be55ec2..2f66489a 100644
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -149,6 +149,15 @@ def test_api_simple(client, live_server):
         headers={'x-api-key': api_key},
     )
     assert b'which has this one new line' in res.data
+    assert b'<div id' not in res.data
+
+    # Fetch the HTML of the latest one
+    res = client.get(
+        url_for("watchsinglehistory", uuid=watch_uuid, timestamp='latest')+"?html=1",
+        headers={'x-api-key': api_key},
+    )
+    assert b'which has this one new line' in res.data
+    assert b'<div id' in res.data
 
     # Fetch the whole watch
     res = client.get(
diff --git a/changedetectionio/tests/test_history_consistency.py b/changedetectionio/tests/test_history_consistency.py
index a7e46a57..c20aa34b 100644
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -5,15 +5,13 @@ import os
 import json
 import logging
 from flask import url_for
-from .util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks
 from urllib.parse import urlparse, parse_qs
 
 def test_consistent_history(client, live_server):
     live_server_setup(live_server)
 
-    # Give the endpoint time to spin up
-    time.sleep(1)
-    r = range(1, 50)
+    r = range(1, 30)
 
     for one in r:
         test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -25,15 +23,8 @@ def test_consistent_history(client, live_server):
 
         assert b"1 Imported" in res.data
 
-    time.sleep(3)
-    while True:
-        res = client.get(url_for("index"))
-        logging.debug("Waiting for 'Checking now' to go away..")
-        if b'Checking now' not in res.data:
-            break
-        time.sleep(0.5)
+    wait_for_all_checks(client)
 
-    time.sleep(3)
     # Essentially just triggers the DB write/update
     res = client.post(
         url_for("settings_page"),
@@ -44,8 +35,9 @@ def test_consistent_history(client, live_server):
     )
     assert b"Settings updated." in res.data
 
-    # Give it time to write it out
-    time.sleep(3)
+
+    time.sleep(2)
+
     json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
 
     json_obj = None
@@ -58,7 +50,7 @@ def test_consistent_history(client, live_server):
     # each one should have a history.txt containing just one line
     for w in json_obj['watching'].keys():
         history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
-        assert os.path.isfile(history_txt_index_file), "History.txt should exist where I expect it - {}".format(history_txt_index_file)
+        assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"
 
         # Same like in model.Watch
         with open(history_txt_index_file, "r") as f:
@@ -70,15 +62,15 @@ def test_consistent_history(client, live_server):
                                                      w))
         # Find the snapshot one
         for fname in files_in_watch_dir:
-            if fname != 'history.txt':
+            if fname != 'history.txt' and 'html' not in fname:
                 # contents should match what we requested as content returned from the test url
                 with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
                     contents = snapshot_f.read()
                     watch_url = json_obj['watching'][w]['url']
                     u = urlparse(watch_url)
                     q = parse_qs(u[4])
-                    assert q['content'][0] == contents.strip(), "Snapshot file {} should contain {}".format(fname, q['content'][0])
+                    assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"
 
 
 
-        assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot"
+        assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py
index f17d3a28..48743f2c 100644
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -1,11 +1,12 @@
+from . import content_fetchers
+from .processors.restock_diff import UnableToExtractRestockData
+from .processors.text_json_diff import FilterNotFoundInResponse
+from changedetectionio import html_tools
+from copy import deepcopy
 import os
-import threading
 import queue
+import threading
 import time
-from . import content_fetchers
-from changedetectionio import html_tools
-from .processors.text_json_diff import FilterNotFoundInResponse
-from .processors.restock_diff import UnableToExtractRestockData
 
 # A single update worker
 #
@@ -245,14 +246,19 @@ class update_worker(threading.Thread):
                     contents = b''
                     process_changedetection_results = True
                     update_obj = {}
-                    logger.info(f"Processing watch UUID {uuid} "
-                            f"Priority {queued_item_data.priority} "
-                            f"URL {self.datastore.data['watching'][uuid]['url']}")
+
+                    # Clear last errors (move to preflight func?)
+                    self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
+
+                    # DeepCopy so we can be sure we don't accidently change anything by reference
+                    watch = deepcopy(self.datastore.data['watching'].get(uuid))
+
+                    logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
                     now = time.time()
 
                     try:
                         # Processor is what we are using for detecting the "Change"
-                        processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
+                        processor = watch.get('processor', 'text_json_diff')
                         # if system...
 
                         # Abort processing when the content was the same as the last fetch
@@ -272,14 +278,12 @@ class update_worker(threading.Thread):
                                                                                watch_uuid=uuid
                                                                                )
 
-                        # Clear last errors (move to preflight func?)
-                        self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
-
                         update_handler.call_browser()
 
-                        changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
-                                                                                    skip_when_checksum_same=skip_when_same_checksum,
-                                                                                    )
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(
+                            watch=watch,
+                            skip_when_checksum_same=skip_when_same_checksum,
+                        )
 
                         # Re #342
                         # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -309,7 +313,8 @@ class update_worker(threading.Thread):
                         })
 
                         if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
+
                         process_changedetection_results = False
 
                     except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
@@ -325,11 +330,11 @@ class update_worker(threading.Thread):
                             err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
 
                         if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
                         if e.xpath_data:
-                            self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
+                            watch.save_xpath_data(data=e.xpath_data, as_error=True)
                         if e.page_text:
-                            self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
+                            watch.save_error_text(contents=e.page_text)
 
                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
                         process_changedetection_results = False
@@ -340,17 +345,19 @@ class update_worker(threading.Thread):
 
                         err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
+                        if e.screenshot:
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 
                         # Only when enabled, send the notification
-                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
-                            c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
+                        if watch.get('filter_failure_notification_send', False):
+                            c = watch.get('consecutive_filter_failures', 5)
                             c += 1
                             # Send notification if we reached the threshold?
                             threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                            0)
                             logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
                             if threshold > 0 and c >= threshold:
-                                if not self.datastore.data['watching'][uuid].get('notification_muted'):
+                                if not watch.get('notification_muted'):
                                     self.send_filter_failure_notification(uuid)
                                 c = 0
 
@@ -400,15 +407,15 @@ class update_worker(threading.Thread):
                                                                 }
                                                     )
 
-                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
-                            c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
+                        if watch.get('filter_failure_notification_send', False):
+                            c = watch.get('consecutive_filter_failures', 5)
                             c += 1
                             # Send notification if we reached the threshold?
                             threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                            0)
                             logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
                             if threshold > 0 and c >= threshold:
-                                if not self.datastore.data['watching'][uuid].get('notification_muted'):
+                                if not watch.get('notification_muted'):
                                     self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
                                 c = 0
 
@@ -430,7 +437,7 @@ class update_worker(threading.Thread):
                     except content_fetchers.exceptions.JSActionExceptions as e:
                         err_text = "Error running JS Actions - Page request - "+e.message
                         if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                            'last_check_status': e.status_code})
                         process_changedetection_results = False
@@ -440,7 +447,7 @@ class update_worker(threading.Thread):
                             err_text = "{} - {}".format(err_text, e.message)
 
                         if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 
                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                            'last_check_status': e.status_code,
@@ -464,8 +471,6 @@ class update_worker(threading.Thread):
                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
                         # Other serious error
                         process_changedetection_results = False
-#                        import traceback
-#                        print(traceback.format_exc())
 
                     else:
                         # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
@@ -473,7 +478,7 @@ class update_worker(threading.Thread):
                             continue
 
                         # Mark that we never had any failures
-                        if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
+                        if not watch.get('ignore_status_codes'):
                             update_obj['consecutive_filter_failures'] = 0
 
                         # Everything ran OK, clean off any previous error
@@ -481,25 +486,48 @@ class update_worker(threading.Thread):
 
                         self.cleanup_error_artifacts(uuid)
 
+                    if not self.datastore.data['watching'].get(uuid):
+                        continue
                     #
                     # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                     if process_changedetection_results:
+                        # Always save the screenshot if it's available
+
+                        if update_handler.screenshot:
+                            watch.save_screenshot(screenshot=update_handler.screenshot)
+
+                        if update_handler.xpath_data:
+                            watch.save_xpath_data(data=update_handler.xpath_data)
+
                         try:
-                            watch = self.datastore.data['watching'].get(uuid)
                             self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
 
                             # Also save the snapshot on the first time checked
-                            if changed_detected or not watch['last_checked']:
+                            if changed_detected or not watch.get('last_checked'):
+                                timestamp = round(time.time())
+
+                                # Small hack so that we sleep just enough to allow 1 second  between history snapshots
+                                # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
+
+                                if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
+                                    logger.warning(
+                                        f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
+                                    timestamp = str(int(timestamp) + 1)
+                                    time.sleep(1)
+
                                 watch.save_history_text(contents=contents,
-                                                        timestamp=str(round(time.time())),
+                                                        timestamp=timestamp,
                                                         snapshot_id=update_obj.get('previous_md5', 'none'))
 
+                                if update_handler.fetcher.content:
+                                    watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)
+
                             # A change was detected
                             if changed_detected:
                                 # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
                                 if watch.history_n >= 2:
                                     logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
-                                    if not self.datastore.data['watching'][uuid].get('notification_muted'):
+                                    if not watch.get('notification_muted'):
                                         self.send_content_changed_notification(watch_uuid=uuid)
                                 else:
                                     logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}")
@@ -510,29 +538,23 @@ class update_worker(threading.Thread):
                             logger.critical(str(e))
                             self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
 
-                    if self.datastore.data['watching'].get(uuid):
-                        # Always record that we atleast tried
-                        count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
 
-                        # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
-                        try:
-                            server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
-                            self.datastore.update_watch(uuid=uuid,
-                                                        update_obj={'remote_server_reply': server_header}
-                                                        )
-                        except Exception as e:
-                            pass
+                    # Always record that we atleast tried
+                    count = watch.get('check_count', 0) + 1
 
-                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
-                                                                           'last_checked': round(time.time()),
-                                                                           'check_count': count
-                                                                           })
+                    # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
+                    try:
+                        server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
+                        self.datastore.update_watch(uuid=uuid,
+                                                    update_obj={'remote_server_reply': server_header}
+                                                    )
+                    except Exception as e:
+                        pass
 
-                        # Always save the screenshot if it's available
-                        if update_handler.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
-                        if update_handler.xpath_data:
-                            self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
+                    self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
+                                                                       'last_checked': round(time.time()),
+                                                                       'check_count': count
+                                                                       })
 
 
                 self.current_uuid = None  # Done