minor cleanups

pull/489/head
dgtlmoon 3 years ago
parent 8134242b38
commit 22dda97a65

@ -6,6 +6,7 @@ import time
import urllib3.exceptions import urllib3.exceptions
import sys import sys
class EmptyReply(Exception): class EmptyReply(Exception):
def __init__(self, status_code, url): def __init__(self, status_code, url):
# Set this so we can use it in other parts of the app # Set this so we can use it in other parts of the app
@ -15,6 +16,7 @@ class EmptyReply(Exception):
pass pass
class Fetcher(): class Fetcher():
error = None error = None
status_code = None status_code = None
@ -56,25 +58,25 @@ class Fetcher():
def is_ready(self): def is_ready(self):
return True return True
# Maybe for the future, each fetcher provides its own diff output, could be used for text, image # Maybe for the future, each fetcher provides its own diff output, could be used for text, image
# the current one would return javascript output (as we use JS to generate the diff) # the current one would return javascript output (as we use JS to generate the diff)
# #
def available_fetchers(): def available_fetchers():
# See the if statement at the bottom of this file for how we switch between playwright and webdriver # See the if statement at the bottom of this file for how we switch between playwright and webdriver
import inspect import inspect
p=[] p = []
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass): for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
if inspect.isclass(obj): if inspect.isclass(obj):
# @todo html_ is maybe better as fetcher_ or something # @todo html_ is maybe better as fetcher_ or something
# In this case, make sure to edit the default one in store.py and fetch_site_status.py # In this case, make sure to edit the default one in store.py and fetch_site_status.py
if name.startswith('html_'): if name.startswith('html_'):
t=tuple([name,obj.fetcher_description]) t = tuple([name, obj.fetcher_description])
p.append(t) p.append(t)
return p return p
class base_html_playwright(Fetcher): class base_html_playwright(Fetcher):
fetcher_description = "Playwright {}/Javascript".format( fetcher_description = "Playwright {}/Javascript".format(
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
@ -89,7 +91,7 @@ class base_html_playwright(Fetcher):
# In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server" # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
playwright_proxy_settings_mappings = ['server', 'bypass', 'username', 'password'] playwright_proxy_settings_mappings = ['server', 'bypass', 'username', 'password']
proxy=None proxy = None
def __init__(self): def __init__(self):
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
@ -123,7 +125,7 @@ class base_html_playwright(Fetcher):
browser_type = getattr(p, self.browser_type) browser_type = getattr(p, self.browser_type)
# Seemed to cause a connection Exception even tho I can see it connect # Seemed to cause a connection Exception even tho I can see it connect
#self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000) browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
# Set user agent to prevent Cloudflare from blocking the browser # Set user agent to prevent Cloudflare from blocking the browser
@ -133,7 +135,7 @@ class base_html_playwright(Fetcher):
) )
page = context.new_page() page = context.new_page()
page.set_viewport_size({"width": 1280, "height": 1024}) page.set_viewport_size({"width": 1280, "height": 1024})
response = page.goto(url, timeout=timeout*1000) response = page.goto(url, timeout=timeout * 1000)
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
page.wait_for_timeout(extra_wait * 1000) page.wait_for_timeout(extra_wait * 1000)
@ -166,10 +168,7 @@ class base_html_webdriver(Fetcher):
selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy', selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
'proxyAutoconfigUrl', 'sslProxy', 'autodetect', 'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] 'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
proxy = None
proxy=None
def __init__(self): def __init__(self):
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
@ -177,7 +176,6 @@ class base_html_webdriver(Fetcher):
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
# If any proxy settings are enabled, then we should setup the proxy object # If any proxy settings are enabled, then we should setup the proxy object
proxy_args = {} proxy_args = {}
for k in self.selenium_proxy_settings_mappings: for k in self.selenium_proxy_settings_mappings:
@ -247,6 +245,7 @@ class base_html_webdriver(Fetcher):
except Exception as e: except Exception as e:
print("Exception in chrome shutdown/quit" + str(e)) print("Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py! # "html_requests" is listed as the default fetcher in store.py!
class html_requests(Fetcher): class html_requests(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client" fetcher_description = "Basic fast Plaintext/HTTP Client"
@ -260,11 +259,11 @@ class html_requests(Fetcher):
ignore_status_codes=False): ignore_status_codes=False):
r = requests.request(method=request_method, r = requests.request(method=request_method,
data=request_body, data=request_body,
url=url, url=url,
headers=request_headers, headers=request_headers,
timeout=timeout, timeout=timeout,
verify=False) verify=False)
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
# For example - some sites don't tell us it's utf-8, but return utf-8 content # For example - some sites don't tell us it's utf-8, but return utf-8 content
@ -287,9 +286,8 @@ class html_requests(Fetcher):
# Decide which is the 'real' HTML webdriver, this is more a system wide config # Decide which is the 'real' HTML webdriver, this is more a system wide config
# rather than site-specific. # rather than site-specific.
use_playwright_as_chrome_fetcher= os.getenv('PLAYWRIGHT_DRIVER_URL', False) use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
if use_playwright_as_chrome_fetcher: if use_playwright_as_chrome_fetcher:
html_webdriver = base_html_playwright html_webdriver = base_html_playwright
else: else:
html_webdriver = base_html_webdriver html_webdriver = base_html_webdriver

@ -55,7 +55,6 @@
<a href="{{url_for('api_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a> <a href="{{url_for('api_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %} {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
{%if watch.fetch_backend == "html_playwright" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Playwright-icon.png')}}" />{% endif %}
{% if watch.last_error is defined and watch.last_error != False %} {% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}</div> <div class="fetch-error">{{ watch.last_error }}</div>

Loading…
Cancel
Save