|
|
@ -51,6 +51,7 @@ class BrowserStepsStepException(Exception):
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# @todo - make base Exception class that announces via logger()
|
|
|
|
class PageUnloadable(Exception):
|
|
|
|
class PageUnloadable(Exception):
|
|
|
|
def __init__(self, status_code, url, message, screenshot=False):
|
|
|
|
def __init__(self, status_code, url, message, screenshot=False):
|
|
|
|
# Set this so we can use it in other parts of the app
|
|
|
|
# Set this so we can use it in other parts of the app
|
|
|
@ -391,13 +392,20 @@ class base_html_playwright(Fetcher):
|
|
|
|
# 200 Here means that the communication to browserless worked only, not the page state
|
|
|
|
# 200 Here means that the communication to browserless worked only, not the page state
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
x = response.json()
|
|
|
|
x = response.json()
|
|
|
|
status_code = response.status_code
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
raise PageUnloadable(url=url, status_code=status_code, message="Error reading JSON response from browserless")
|
|
|
|
raise PageUnloadable(url=url, message="Error reading JSON response from browserless")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
self.status_code = response.status_code
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
raise PageUnloadable(url=url, message="Error reading status_code code response from browserless")
|
|
|
|
|
|
|
|
|
|
|
|
self.headers = x.get('headers')
|
|
|
|
self.headers = x.get('headers')
|
|
|
|
|
|
|
|
|
|
|
|
if status_code == 200:
|
|
|
|
if self.status_code != 200 and not ignore_status_codes:
|
|
|
|
|
|
|
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content',''))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if self.status_code == 200:
|
|
|
|
import base64
|
|
|
|
import base64
|
|
|
|
|
|
|
|
|
|
|
|
if not x.get('screenshot'):
|
|
|
|
if not x.get('screenshot'):
|
|
|
@ -410,15 +418,10 @@ class base_html_playwright(Fetcher):
|
|
|
|
if not x.get('content', '').strip():
|
|
|
|
if not x.get('content', '').strip():
|
|
|
|
raise EmptyReply(url=url, status_code=None)
|
|
|
|
raise EmptyReply(url=url, status_code=None)
|
|
|
|
|
|
|
|
|
|
|
|
if x.get('status_code', 200) != 200 and not ignore_status_codes:
|
|
|
|
|
|
|
|
raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.content = x.get('content')
|
|
|
|
self.content = x.get('content')
|
|
|
|
self.instock_data = x.get('instock_data')
|
|
|
|
self.instock_data = x.get('instock_data')
|
|
|
|
self.screenshot = base64.b64decode(x.get('screenshot'))
|
|
|
|
self.screenshot = base64.b64decode(x.get('screenshot'))
|
|
|
|
self.status_code = x.get('status_code')
|
|
|
|
|
|
|
|
self.xpath_data = x.get('xpath_data')
|
|
|
|
self.xpath_data = x.get('xpath_data')
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
# Some other error from browserless
|
|
|
|
# Some other error from browserless
|
|
|
|
raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))
|
|
|
|
raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))
|
|
|
|