Plawright content fetcher - Fixes for status codes and screenshot info (#2168)

pull/2111/merge
dgtlmoon 11 months ago committed by GitHub
parent c5a4e0aaa3
commit 3287283065
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -396,6 +396,8 @@ class base_html_playwright(Fetcher):
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.") logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
logger.critical(response) logger.critical(response)
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None, message=str(e)) raise PageUnloadable(url=url, status_code=None, message=str(e))
if self.status_code != 200 and not ignore_status_codes: if self.status_code != 200 and not ignore_status_codes:
@ -436,14 +438,15 @@ class base_html_playwright(Fetcher):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here # acceptable screenshot quality here
try: try:
# The actual screenshot # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, self.screenshot = self.page.screenshot(type='jpeg',
quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) full_page=True,
quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
)
except Exception as e: except Exception as e:
context.close() # It's likely the screenshot was too long/big and something crashed
browser.close() raise ScreenshotUnavailable(url=url, status_code=self.status_code)
raise ScreenshotUnavailable(url=url, status_code=response.status_code) finally:
context.close() context.close()
browser.close() browser.close()

@ -411,7 +411,7 @@ class update_worker(threading.Thread):
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
process_changedetection_results = False process_changedetection_results = False
except content_fetcher.ScreenshotUnavailable as e: except content_fetcher.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'" err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code}) 'last_check_status': e.status_code})
process_changedetection_results = False process_changedetection_results = False

Loading…
Cancel
Save