From cf3f3e44977f72c12d73b5807c4998993d236332 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sun, 7 May 2023 13:06:11 +0200
Subject: [PATCH 1/4] BrowserSteps - BrowserSteps was not always following
 proxy information

---
 changedetectionio/blueprint/browser_steps/__init__.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py
index 381b2e59..37f18a81 100644
--- a/changedetectionio/blueprint/browser_steps/__init__.py
+++ b/changedetectionio/blueprint/browser_steps/__init__.py
@@ -169,7 +169,18 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                 if proxy_id:
                     proxy_url = datastore.proxy_list.get(proxy_id).get('url')
                     if proxy_url:
+
+                        # Playwright needs separate username and password values
+                        from urllib.parse import urlparse
+                        parsed = urlparse(proxy_url)
                         proxy = {'server': proxy_url}
+
+                        if parsed.username:
+                            proxy['username'] = parsed.username
+
+                        if parsed.password:
+                            proxy['password'] = parsed.password
+
                         print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
 
                 # Begin the new "Playwright Context" that re-uses the playwright interface

From d814535dc6f13d22d8f33ce443ed0c9c02374238 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sun, 7 May 2023 13:10:56 +0200
Subject: [PATCH 2/4] Element scraper - wrap offset detection in try/catch

---
 changedetectionio/res/xpath_element_scraper.js | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/changedetectionio/res/xpath_element_scraper.js b/changedetectionio/res/xpath_element_scraper.js
index e363d2a1..27a54c2c 100644
--- a/changedetectionio/res/xpath_element_scraper.js
+++ b/changedetectionio/res/xpath_element_scraper.js
@@ -8,8 +8,15 @@
 // Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
 // will automatically force a scroll somewhere, so include the position offset
 // Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
+var scroll_y = 0;
+try {
+    scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
+} catch (e) {
+    console.log(e);
+}
+
+
 
-var scroll_y=+document.documentElement.scrollTop || document.body.scrollTop
 
 // Include the getXpath script directly, easier than fetching
 function getxpath(e) {

From 3801d339f5f803de84ff18c36d8436e09c6011b8 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sun, 7 May 2023 13:47:17 +0200
Subject: [PATCH 3/4] UI - Adding shortcut list select button for "clear/reset
 history"

---
 changedetectionio/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py
index aeb6f555..1afddfd1 100644
--- a/changedetectionio/__init__.py
+++ b/changedetectionio/__init__.py
@@ -1313,6 +1313,13 @@ def changedetection_app(config=None, datastore_o=None):
                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
             flash("{} watches queued for rechecking".format(len(uuids)))
 
+        elif (op == 'clear-history'):
+            for uuid in uuids:
+                uuid = uuid.strip()
+                if datastore.data['watching'].get(uuid):
+                    datastore.clear_watch_history(uuid)
+            flash("{} watches cleared/reset.".format(len(uuids)))
+
         elif (op == 'notification-default'):
             from changedetectionio.notification import (
                 default_notification_format_for_watch

From 316f28a0f2d4081455b2740d050155bca30cd44e Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sun, 7 May 2023 13:49:53 +0200
Subject: [PATCH 4/4] Fetcher - Experimental fetcher fixes, now only enabled
 with 'USE_EXPERIMENTAL_PUPPETEER_FETCH' env var (default off) (#1561)

---
 changedetectionio/content_fetcher.py | 115 +++++++++++++++------------
 changedetectionio/update_worker.py   |   3 +-
 2 files changed, 66 insertions(+), 52 deletions(-)

diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py
index 2d8a66ab..262ac2b4 100644
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -277,7 +277,7 @@ class base_html_playwright(Fetcher):
         with open(destination, 'w') as f:
             f.write(content)
 
-    def run(self,
+    def run_fetch_browserless_puppeteer(self,
             url,
             timeout,
             request_headers,
@@ -287,63 +287,63 @@ class base_html_playwright(Fetcher):
             current_include_filters=None,
             is_binary=False):
 
-        # Fallback for now to the old way if browsersteps
-        # @todo - need to figure out how to get browsersteps with images on each step working
-        if self.browser_steps:
-            for step in self.browser_steps:
-                if step.get('operation'):
-                    return self.run_playwright(
-                                   url,
-                                   timeout,
-                                   request_headers,
-                                   request_body,
-                                   request_method,
-                                   ignore_status_codes,
-                                   current_include_filters,
-                                   is_binary)
-        elif os.getenv('FORCE_PLAYWRIGHT_FETCH'):
-            # Temporary backup solution until we rewrite the playwright code
-            return self.run_playwright(
-                url,
-                timeout,
-                request_headers,
-                request_body,
-                request_method,
-                ignore_status_codes,
-                current_include_filters,
-                is_binary)
-
-
         extra_wait_ms = (int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) * 1000
         xpath_element_js = self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
 
         code = f"""module.exports = async ({{ page, context }}) => {{
-          var {{ url, execute_js, user_agent, extra_wait_ms, req_headers, include_filters, xpath_element_js, screenshot_quality, proxy}} = context;
+          var {{ url, execute_js, user_agent, extra_wait_ms, req_headers, include_filters, xpath_element_js, screenshot_quality, proxy_username, proxy_password}} = context;
           
           await page.setBypassCSP(true)
           await page.setExtraHTTPHeaders(req_headers);          
           await page.setUserAgent(user_agent);
+          // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
           
-          if(proxy) {{
+          await page.setDefaultNavigationTimeout(0);
+          
+          if(proxy_username) {{
             await page.authenticate({{
-                username: proxy['username'],
-                password: proxy['password'],
+                username: proxy_username,
+                password: proxy_password
             }});
           }}
-          
-          const r = await page.goto(url, wait_until='commit');                  
-          await page.waitForTimeout(extra_wait_ms)
+
+        await page.setViewport({{
+          width: 1024,
+          height: 768,
+          deviceScaleFactor: 1,
+        }});
+        
+          const r = await page.goto(url, {{
+                waitUntil: 'load'                
+          }});
+                            
+          await page.waitForTimeout(1000); 
+          await page.waitForTimeout(extra_wait_ms);
           
           if(execute_js) {{
             await page.evaluate(execute_js);
             await page.waitForTimeout(200);
           }}
           
-          const xpath_data = await page.evaluate((include_filters) => {{ {xpath_element_js} }}, include_filters);
-          const instock_data = await page.evaluate(() => {{ {self.instock_data_js} }});
-      
-          const html = await page.content();
-          const b64s = await page.screenshot({{ encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg' }});
+        var html = await page.content();
+        var xpath_data;
+        var instock_data;
+        try {{
+             xpath_data = await page.evaluate((include_filters) => {{ {xpath_element_js} }}, include_filters);
+             instock_data = await page.evaluate(() => {{ {self.instock_data_js} }});
+        }} catch (e) {{
+            console.log(e);
+        }}   
+          
+      // Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
+      // Wrap it here (for now)
+      var b64s;
+      try {{
+             b64s = await page.screenshot({{ encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg' }});
+        }} catch (e) {{
+            console.log(e);
+        }}
+         
           return {{
             data: {{
                 'content': html, 
@@ -387,15 +387,16 @@ class base_html_playwright(Fetcher):
                         'execute_js': self.webdriver_js_execute_code,
                         'extra_wait_ms': extra_wait_ms,
                         'include_filters': current_include_filters,
-                        'proxy': self.proxy,
                         'req_headers': request_headers,
                         'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
                         'url': url,
                         'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'),
+                        'proxy_username': self.proxy.get('username','') if self.proxy else False,
+                        'proxy_password': self.proxy.get('password','') if self.proxy else False,
                     }
                 },
                 # @todo /function needs adding ws:// to http:// rebuild this
-                url=browserless_function_url,
+                url=browserless_function_url+"&--disable-features=AudioServiceOutOfProcess&dumpio=true",
                 timeout=wait_browserless_seconds)
 
         except ReadTimeout:
@@ -427,15 +428,27 @@ class base_html_playwright(Fetcher):
                 # Some other error from browserless
                 raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))
 
-    def run_playwright(self,
-                       url,
-                       timeout,
-                       request_headers,
-                       request_body,
-                       request_method,
-                       ignore_status_codes=False,
-                       current_include_filters=None,
-                       is_binary=False):
+    def run(self,
+            url,
+            timeout,
+            request_headers,
+            request_body,
+            request_method,
+            ignore_status_codes=False,
+            current_include_filters=None,
+            is_binary=False):
+
+        if os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'):
+            # Temporary backup solution until we rewrite the playwright code
+            return self.run_fetch_browserless_puppeteer(
+                url,
+                timeout,
+                request_headers,
+                request_body,
+                request_method,
+                ignore_status_codes,
+                current_include_filters,
+                is_binary)
 
         from playwright.sync_api import sync_playwright
         import playwright._impl._api_types
diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py
index 9fb49c45..96fd5b1e 100644
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -315,7 +315,8 @@ class update_worker(threading.Thread):
                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
 
                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
-                                                                           'last_check_status': e.status_code})
+                                                                           'last_check_status': e.status_code,
+                                                                           'has_ldjson_price_data': None})
                         process_changedetection_results = False
                     except Exception as e:
                         self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))