From a7d4af52ca057b75ce925f154018a7001f03d174 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 31 Jan 2024 14:42:15 +0100 Subject: [PATCH] tidy up status code handling in experimental fetcher --- changedetectionio/content_fetcher.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 124eeae1..ce051920 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -51,6 +51,7 @@ class BrowserStepsStepException(Exception): return +# @todo - make base Exception class that announces via logger() class PageUnloadable(Exception): def __init__(self, status_code, url, message, screenshot=False): # Set this so we can use it in other parts of the app @@ -391,13 +392,20 @@ class base_html_playwright(Fetcher): # 200 Here means that the communication to browserless worked only, not the page state try: x = response.json() - status_code = response.status_code except Exception as e: - raise PageUnloadable(url=url, status_code=status_code, message="Error reading JSON response from browserless") + raise PageUnloadable(url=url, message="Error reading JSON response from browserless") + + try: + self.status_code = response.status_code + except Exception as e: + raise PageUnloadable(url=url, message="Error reading status_code code response from browserless") self.headers = x.get('headers') - if status_code == 200: + if self.status_code != 200 and not ignore_status_codes: + raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content','')) + + if self.status_code == 200: import base64 if not x.get('screenshot'): @@ -410,15 +418,10 @@ class base_html_playwright(Fetcher): if not x.get('content', '').strip(): raise EmptyReply(url=url, status_code=None) - if x.get('status_code', 200) != 200 and not ignore_status_codes: - raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content']) - self.content = x.get('content') self.instock_data = x.get('instock_data') self.screenshot = base64.b64decode(x.get('screenshot')) - self.status_code = x.get('status_code') self.xpath_data = x.get('xpath_data') - else: # Some other error from browserless raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))