tidy up status code handling in experimental fetcher

pull/2143/head
dgtlmoon 11 months ago
parent 88973b7408
commit a7d4af52ca

@ -51,6 +51,7 @@ class BrowserStepsStepException(Exception):
return return
# @todo - make base Exception class that announces via logger()
class PageUnloadable(Exception): class PageUnloadable(Exception):
def __init__(self, status_code, url, message, screenshot=False): def __init__(self, status_code, url, message, screenshot=False):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
@ -391,13 +392,20 @@ class base_html_playwright(Fetcher):
# 200 Here means that the communication to browserless worked only, not the page state # 200 Here means that the communication to browserless worked only, not the page state
try: try:
x = response.json() x = response.json()
status_code = response.status_code
except Exception as e: except Exception as e:
raise PageUnloadable(url=url, status_code=status_code, message="Error reading JSON response from browserless") raise PageUnloadable(url=url, message="Error reading JSON response from browserless")
try:
self.status_code = response.status_code
except Exception as e:
raise PageUnloadable(url=url, message="Error reading status_code code response from browserless")
self.headers = x.get('headers') self.headers = x.get('headers')
if status_code == 200: if self.status_code != 200 and not ignore_status_codes:
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content',''))
if self.status_code == 200:
import base64 import base64
if not x.get('screenshot'): if not x.get('screenshot'):
@ -410,15 +418,10 @@ class base_html_playwright(Fetcher):
if not x.get('content', '').strip(): if not x.get('content', '').strip():
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
if x.get('status_code', 200) != 200 and not ignore_status_codes:
raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content'])
self.content = x.get('content') self.content = x.get('content')
self.instock_data = x.get('instock_data') self.instock_data = x.get('instock_data')
self.screenshot = base64.b64decode(x.get('screenshot')) self.screenshot = base64.b64decode(x.get('screenshot'))
self.status_code = x.get('status_code')
self.xpath_data = x.get('xpath_data') self.xpath_data = x.get('xpath_data')
else: else:
# Some other error from browserless # Some other error from browserless
raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8')) raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))

Loading…
Cancel
Save