From 2d9ff7821c46cb63faf4d85c3ab563f99ffffd4a Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 31 Jan 2024 11:15:19 +0100 Subject: [PATCH] Puppeteer experimental fetcher should always make headers available --- changedetectionio/content_fetcher.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 4568831c..1de270cb 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -389,10 +389,17 @@ class base_html_playwright(Fetcher): raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..") else: # 200 Here means that the communication to browserless worked only, not the page state - if response.status_code == 200: + try: + x = response.json() + status_code = response.status_code + except Exception as e: + raise PageUnloadable(url=url, status_code=status_code, message="Error reading JSON response from browserless") + + self.headers = x.get('headers') + + if status_code == 200: import base64 - x = response.json() if not x.get('screenshot'): # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips # https://github.com/puppeteer/puppeteer/issues/1834 @@ -407,7 +414,6 @@ class base_html_playwright(Fetcher): raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content']) self.content = x.get('content') - self.headers = x.get('headers') self.instock_data = x.get('instock_data') self.screenshot = base64.b64decode(x.get('screenshot')) self.status_code = x.get('status_code')