Merge branch 'master' into 550-visual-selector

3 years ago · 695fcc4566
parent d7c5a53315 0e385b1c22
commit 695fcc4566
19 changed files with 582 additions and 117 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@ -40,7 +40,7 @@ from flask_wtf import CSRFProtect
 from changedetectionio import html_tools
-__version__ = '0.39.12'
+__version__ = '0.39.13'
 datastore = None
@ -518,10 +518,31 @@ def changedetection_app(config=None, datastore_o=None):
        if all(value == 0 or value == None for value in datastore.data['watching'][uuid]['time_between_check'].values()):
            default['time_between_check'] = deepcopy(datastore.data['settings']['requests']['time_between_check'])
        # Defaults for proxy choice
        if datastore.proxy_list is not None:  # When enabled
            system_proxy = datastore.data['settings']['requests']['proxy']
            if default['proxy'] is None:
                default['proxy'] = system_proxy
            else:
                # Does the chosen one exist?
                if not any(default['proxy'] in tup for tup in datastore.proxy_list):
                    default['proxy'] = datastore.proxy_list[0][0]
            # Used by the form handler to keep or remove the proxy settings
            default['proxy_list'] = datastore.proxy_list
        # proxy_override set to the json/text list of the items
        form = forms.watchForm(formdata=request.form if request.method == 'POST' else None,
-                                        data=default
+                               data=default,
-                                        )
+                               )
        if datastore.proxy_list is None:
            # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
            del form.proxy
        else:
            form.proxy.choices = datastore.proxy_list
            if default['proxy'] is None:
                form.proxy.default='http://hello'
        if request.method == 'POST' and form.validate():
            extra_update_obj = {}
@ -601,10 +622,28 @@ def changedetection_app(config=None, datastore_o=None):
    def settings_page():
        from changedetectionio import content_fetcher, forms
        default = deepcopy(datastore.data['settings'])
        if datastore.proxy_list is not None:
            # When enabled
            system_proxy = datastore.data['settings']['requests']['proxy']
            # In the case it doesnt exist anymore
            if not any([system_proxy in tup for tup in datastore.proxy_list]):
                system_proxy = None
            default['requests']['proxy'] = system_proxy if system_proxy is not None else datastore.proxy_list[0][0]
            # Used by the form handler to keep or remove the proxy settings
            default['proxy_list'] = datastore.proxy_list
        # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
        form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None,
-                                        data=datastore.data['settings']
+                                        data=default
                                        )
        if datastore.proxy_list is None:
            # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
            del form.requests.form.proxy
        else:
            form.requests.form.proxy.choices = datastore.proxy_list
        if request.method == 'POST':
            # Password unset is a GET, but we can lock the session to a salted env password to always need the password
@ -644,44 +683,37 @@ def changedetection_app(config=None, datastore_o=None):
    @app.route("/import", methods=['GET', "POST"])
    @login_required
    def import_page():
        import validators
        remaining_urls = []
        if request.method == 'POST':
            from .importer import import_url_list, import_distill_io_json
            # URL List import
            if request.values.get('urls') and len(request.values.get('urls').strip()):
                # Import and push into the queue for immediate update check
                importer = import_url_list()
                importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
                for uuid in importer.new_uuids:
                    update_q.put(uuid)
                if len(importer.remaining_data) == 0:
                    return redirect(url_for('index'))
                else:
                    remaining_urls = importer.remaining_data
            # Distill.io import
            if request.values.get('distill-io') and len(request.values.get('distill-io').strip()):
                # Import and push into the queue for immediate update check
                d_importer = import_distill_io_json()
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
                for uuid in d_importer.new_uuids:
                    update_q.put(uuid)
        good = 0
        if request.method == 'POST':
            now=time.time()
            urls = request.values.get('urls').split("\n")
            if (len(urls) > 5000):
                flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
            for url in urls:
                url = url.strip()
                url, *tags = url.split(" ")
                # Flask wtform validators wont work with basic auth, use validators package
                # Up to 5000 per batch so we dont flood the server
                if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
                    new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags), write_to_disk_now=False)
                    if new_uuid:
                        # Straight into the queue.
                        update_q.put(new_uuid)
                        good += 1
                        continue
                if len(url.strip()):
                    remaining_urls.append(url)
            flash("{} Imported in {:.2f}s, {} Skipped.".format(good, time.time()-now,len(remaining_urls)))
            datastore.needs_write = True
            if len(remaining_urls) == 0:
                # Looking good, redirect to index.
                return redirect(url_for('index'))
        # Could be some remaining, or we could be on GET
        output = render_template("import.html",
-                                 remaining="\n".join(remaining_urls)
+                                 import_url_list_remaining="\n".join(remaining_urls),
                                 original_distill_json=''
                                 )
        return output
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@ -128,6 +128,9 @@ class Fetcher():
    # Will be needed in the future by the VisualSelector, always get this where possible.
    screenshot = False
    fetcher_description = "No description"
    system_http_proxy = os.getenv('HTTP_PROXY')
    system_https_proxy = os.getenv('HTTPS_PROXY')
    @abstractmethod
    def get_error(self):
@ -184,21 +187,17 @@ class base_html_playwright(Fetcher):
    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
    #    try:
    #        from playwright.sync_api import sync_playwright
    #    except ModuleNotFoundError:
    #        fetcher_enabled = False
    browser_type = ''
    command_executor = ''
    # Configs for Proxy setup
    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
-    playwright_proxy_settings_mappings = ['server', 'bypass', 'username', 'password']
+    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
    proxy = None
-    def __init__(self):
+    def __init__(self, proxy_override=None):
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
        self.command_executor = os.getenv(
@ -216,6 +215,10 @@ class base_html_playwright(Fetcher):
        if proxy_args:
            self.proxy = proxy_args
        # allow per-watch proxy selection override
        if proxy_override:
            self.proxy = {'server': proxy_override}
    def run(self,
            url,
            timeout,
@ -226,6 +229,8 @@ class base_html_playwright(Fetcher):
            current_css_filter=None):
        from playwright.sync_api import sync_playwright
        import playwright._impl._api_types
        from playwright._impl._api_types import Error, TimeoutError
        with sync_playwright() as p:
            browser_type = getattr(p, self.browser_type)
@ -235,17 +240,23 @@ class base_html_playwright(Fetcher):
            browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
            # Set user agent to prevent Cloudflare from blocking the browser
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
            context = browser.new_context(
-                user_agent="Mozilla/5.0",
+                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
                proxy=self.proxy
            )
            page = context.new_page()
            response = page.goto(url, timeout=timeout * 1000)
            # set size after visiting page, otherwise it wont work (seems to default to 800x)
            page.set_viewport_size({"width": 1280, "height": 1024})
-
+            try:
-            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
+                response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
-            page.wait_for_timeout(extra_wait * 1000)
+                # Wait_until = commit
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
                # This seemed to solve nearly all 'TimeoutErrors'
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
                page.wait_for_timeout(extra_wait * 1000)
            except playwright._impl._api_types.TimeoutError as e:
                raise EmptyReply(url=url, status_code=None)
            if response is None:
                raise EmptyReply(url=url, status_code=None)
@ -283,7 +294,7 @@ class base_html_webdriver(Fetcher):
                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
    proxy = None
-    def __init__(self):
+    def __init__(self, proxy_override=None):
        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
@ -296,6 +307,16 @@ class base_html_webdriver(Fetcher):
            if v:
                proxy_args[k] = v.strip('"')
        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
            proxy_args['httpProxy'] = self.system_http_proxy
        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
            proxy_args['httpsProxy'] = self.system_https_proxy
        # Allows override the proxy on a per-request basis
        if proxy_override is not None:
            proxy_args['httpProxy'] = proxy_override
        if proxy_args:
            self.proxy = SeleniumProxy(raw=proxy_args)
@ -366,6 +387,9 @@ class base_html_webdriver(Fetcher):
 class html_requests(Fetcher):
    fetcher_description = "Basic fast Plaintext/HTTP Client"
    def __init__(self, proxy_override=None):
        self.proxy_override = proxy_override
    def run(self,
            url,
            timeout,
@ -375,11 +399,23 @@ class html_requests(Fetcher):
            ignore_status_codes=False,
            current_css_filter=None):
        proxies={}
        # Allows override the proxy on a per-request basis
        if self.proxy_override:
            proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override}
        else:
            if self.system_http_proxy:
                proxies['http'] = self.system_http_proxy
            if self.system_https_proxy:
                proxies['https'] = self.system_https_proxy
        r = requests.request(method=request_method,
                             data=request_body,
                             url=url,
                             headers=request_headers,
                             timeout=timeout,
                             proxies=proxies,
                             verify=False)
        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@ -16,6 +16,34 @@ class perform_site_check():
        super().__init__(*args, **kwargs)
        self.datastore = datastore
    # If there was a proxy list enabled, figure out what proxy_args/which proxy to use
    # if watch.proxy use that
    # fetcher.proxy_override = watch.proxy or main config proxy
    # Allows override the proxy on a per-request basis
    # ALWAYS use the first one is nothing selected
    def set_proxy_from_list(self, watch):
        proxy_args = None
        if self.datastore.proxy_list is None:
            return None
        # If its a valid one
        if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
            proxy_args = watch['proxy']
        # not valid (including None), try the system one
        else:
            system_proxy = self.datastore.data['settings']['requests']['proxy']
            # Is not None and exists
            if any([system_proxy in p for p in self.datastore.proxy_list]):
                proxy_args = system_proxy
        # Fallback - Did not resolve anything, use the first available
        if proxy_args is None:
            proxy_args = self.datastore.proxy_list[0][0]
        return proxy_args
    def run(self, uuid):
        timestamp = int(time.time())  # used for storage etc too
@ -66,10 +94,15 @@ class perform_site_check():
            # If the klass doesnt exist, just use a default
            klass = getattr(content_fetcher, "html_requests")
-        fetcher = klass()
+
        proxy_args = self.set_proxy_from_list(watch)
        fetcher = klass(proxy_override=proxy_args)
        # Proxy List support
        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
        fetcher.quit()
        # Fetching complete, now filters
        # @todo move to class / maybe inside of fetcher abstract base?
@ -119,11 +152,13 @@ class perform_site_check():
                # Then we assume HTML
                if has_filter_rule:
                    # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
-                    if css_filter_rule[0] == '/':
+                    if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'):
-                        html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule, html_content=fetcher.content)
+                        html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''),
                                                               html_content=fetcher.content)
                    else:
                        # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
                        html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
                if has_subtractive_selectors:
                    html_content = html_tools.element_removal(subtractive_selectors, html_content)
@ -143,7 +178,6 @@ class perform_site_check():
            # Re #340 - return the content before the 'ignore text' was applied
            text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
        # Re #340 - return the content before the 'ignore text' was applied
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -337,9 +337,9 @@ class watchForm(commonSettingsForm):
    method = SelectField('Request method', choices=valid_method, default=default_method)
    ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
    save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
    proxy = RadioField('Proxy')
    def validate(self, **kwargs):
        if not super().validate():
@ -358,6 +358,7 @@ class watchForm(commonSettingsForm):
 # datastore.data['settings']['requests']..
 class globalSettingsRequestForm(Form):
    time_between_check = FormField(TimeBetweenCheckForm)
    proxy = RadioField('Proxy')
 # datastore.data['settings']['application']..
@ -382,4 +383,3 @@ class globalSettingsForm(Form):
    requests = FormField(globalSettingsRequestForm)
    application = FormField(globalSettingsApplicationForm)
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
--- a/changedetectionio/importer.py
+++ b/changedetectionio/importer.py
@ -0,0 +1,133 @@
 from abc import ABC, abstractmethod
 import time
 import validators
 class Importer():
    remaining_data = []
    new_uuids = []
    good = 0
    def __init__(self):
        self.new_uuids = []
        self.good = 0
        self.remaining_data = []
    @abstractmethod
    def run(self,
            data,
            flash,
            datastore):
        pass
 class import_url_list(Importer):
    """
    Imports a list, can be in <code>https://example.com tag1, tag2, last tag</code> format
    """
    def run(self,
            data,
            flash,
            datastore,
            ):
        urls = data.split("\n")
        good = 0
        now = time.time()
        if (len(urls) > 5000):
            flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
        for url in urls:
            url = url.strip()
            if not len(url):
                continue
            tags = ""
            # 'tags' should be a csv list after the URL
            if ' ' in url:
                url, tags = url.split(" ", 1)
            # Flask wtform validators wont work with basic auth, use validators package
            # Up to 5000 per batch so we dont flood the server
            if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
                new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False)
                if new_uuid:
                    # Straight into the queue.
                    self.new_uuids.append(new_uuid)
                    good += 1
                    continue
            # Worked past the 'continue' above, append it to the bad list
            if self.remaining_data is None:
                self.remaining_data = []
            self.remaining_data.append(url)
        flash("{} Imported from list in {:.2f}s, {} Skipped.".format(good, time.time() - now, len(self.remaining_data)))
 class import_distill_io_json(Importer):
    def run(self,
            data,
            flash,
            datastore,
            ):
        import json
        good = 0
        now = time.time()
        self.new_uuids=[]
        try:
            data = json.loads(data.strip())
        except json.decoder.JSONDecodeError:
            flash("Unable to read JSON file, was it broken?", 'error')
            return
        if not data.get('data'):
            flash("JSON structure looks invalid, was it broken?", 'error')
            return
        for d in data.get('data'):
            d_config = json.loads(d['config'])
            extras = {'title': d['name']}
            if len(d['uri']) and good < 5000:
                try:
                    # @todo we only support CSS ones at the moment
                    if d_config['selections'][0]['frames'][0]['excludes'][0]['type'] == 'css':
                        extras['subtractive_selectors'] = d_config['selections'][0]['frames'][0]['excludes'][0]['expr']
                except KeyError:
                    pass
                except IndexError:
                    pass
                try:
                    extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
                    if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
                        extras['css_filter'] = 'xpath:' + extras['css_filter']
                except KeyError:
                    pass
                except IndexError:
                    pass
                try:
                    extras['tag'] = ", ".join(d['tags'])
                except KeyError:
                    pass
                except IndexError:
                    pass
                new_uuid = datastore.add_watch(url=d['uri'].strip(),
                                               extras=extras,
                                               write_to_disk_now=False)
                if new_uuid:
                    # Straight into the queue.
                    self.new_uuids.append(new_uuid)
                    good += 1
        flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@ -23,7 +23,8 @@ class model(dict):
                'requests': {
                    'timeout': 15,  # Default 15 seconds
                    'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
-                    'workers': 10  # Number of threads, lower is better for slow connections
+                    'workers': 10,  # Number of threads, lower is better for slow connections
                    'proxy': None # Preferred proxy connection
                },
                'application': {
                    'password': False,
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@ -39,6 +39,7 @@ class model(dict):
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
            'fetch_backend': None,
            'extract_title_as_title': False,
            'proxy': None, # Preferred proxy connection
            # Re #110, so then if this is set to None, we know to use the default value instead
            # Requires setting to None on submit if it's the same as the default
            # Should be all None by default, so we use the system default in this case.
--- a/changedetectionio/static/js/settings.js
+++ b/changedetectionio/static/js/settings.js
@ -1,13 +0,0 @@
 window.addEventListener("load", (event) => {
  // just an example for now
  function toggleVisible(elem) {
    // theres better ways todo this
    var x = document.getElementById(elem);
    if (x.style.display === "block") {
      x.style.display = "none";
    } else {
      x.style.display = "block";
    }
  }
 });
--- a/changedetectionio/static/js/watch-settings.js
+++ b/changedetectionio/static/js/watch-settings.js
@ -0,0 +1,14 @@
 $(document).ready(function() {
    function toggle() {
        if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') {
            $('#requests-override-options').hide();
        } else {
            $('#requests-override-options').show();
        }
    }
    $('input[name="fetch_backend"]').click(function (e) {
        toggle();
    });
    toggle();
 });
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@ -309,10 +309,10 @@ footer {
    font-weight: bold; }
  .pure-form textarea {
    width: 100%; }
-  .pure-form ul.fetch-backend {
+  .pure-form .inline-radio ul {
    margin: 0px;
    list-style: none; }
-    .pure-form ul.fetch-backend li > * {
+    .pure-form .inline-radio ul li > * {
      display: inline-block; }
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@ -418,14 +418,16 @@ footer {
  textarea {
    width: 100%;
  }
-  ul.fetch-backend {
+  .inline-radio {
-    margin: 0px;
+      ul {
-    list-style: none;
+        margin: 0px;
-    li {
+        list-style: none;
-        > * {
+        li {
-            display: inline-block;
+            > * {
                display: inline-block;
            }
        }
-    }
+      }
  }
 }
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@ -33,6 +33,7 @@ class ChangeDetectionStore:
        self.needs_write = False
        self.datastore_path = datastore_path
        self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
        self.proxy_list = None
        self.stop_thread = False
        self.__data = App.model()
@ -111,6 +112,14 @@ class ChangeDetectionStore:
            secret = secrets.token_hex(16)
            self.__data['settings']['application']['rss_access_token'] = secret
        # Proxy list support - available as a selection in settings when text file is imported
        # CSV list
        # "name, address", or just "name"
        proxy_list_file = "{}/proxies.txt".format(self.datastore_path)
        if path.isfile(proxy_list_file):
            self.import_proxy_list(proxy_list_file)
        # Bump the update version by running updates
        self.run_updates()
@ -435,6 +444,21 @@ class ChangeDetectionStore:
                    print ("Removing",item)
                    unlink(item)
    def import_proxy_list(self, filename):
        import csv
        with open(filename, newline='') as f:
            reader = csv.reader(f, skipinitialspace=True)
            # @todo This loop can could be improved
            l = []
            for row in reader:
                if len(row):
                    if len(row)>=2:
                        l.append(tuple(row[:2]))
                    else:
                        l.append(tuple([row[0], row[0]]))
            self.proxy_list = l if len(l) else None
    # Run all updates
    # IMPORTANT - Each update could be run even when they have a new install and the schema is correct
    #             So therefor - each `update_n` should be very careful about checking if it needs to actually run
--- a/changedetectionio/templates/_common_fields.jinja
+++ b/changedetectionio/templates/_common_fields.jinja
@ -2,7 +2,6 @@
 {% from '_helpers.jinja' import render_field %}
 {% macro render_common_settings_form(form, current_base_url, emailprefix) %}
                        <div class="pure-control-group">
                            {{ render_field(form.notification_urls, rows=5, placeholder="Examples:
    Gitter - gitter://token/room
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@ -11,6 +11,7 @@
 {% endif %}
    const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
 </script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
@ -62,20 +63,25 @@
            </div>
            <div class="tab-pane-inner" id="request">
-                    <div class="pure-control-group">
+                    <div class="pure-control-group inline-radio">
                        {{ render_field(form.fetch_backend, class="fetch-backend") }}
                        <span class="pure-form-message-inline">
                            <p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
                            <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
                        </span>
                    </div>
-
+                {% if form.proxy %}
-                <hr/>
+                    <div class="pure-control-group inline-radio">
-                <fieldset class="pure-group">
+                        {{ render_field(form.proxy, class="fetch-backend-proxy") }}
-
+                        <span class="pure-form-message-inline">
-                    <span class="pure-form-message-inline">
+                        Choose a proxy for this watch
                        </span>
                    </div>
                {% endif %}
                <fieldset class="pure-group" id="requests-override-options">
                    <div class="pure-form-message-inline">
                        <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
-                    </span>
+                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.method) }}
                    </div>
@ -130,7 +136,7 @@ User-Agent: wonderbra 1.0") }}
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required,  <a
                                href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
-                        <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example  <code>//*[contains(@class, 'sametext')]</code>, <a
+                        <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
                                href="http://xpather.com/" target="new">test your XPath here</a></li>
                    </ul>
                    Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
--- a/changedetectionio/templates/import.html
+++ b/changedetectionio/templates/import.html
@ -1,30 +1,86 @@
 {% extends 'base.html' %}
 {% block content %}
-<div class="edit-form">
+<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
-     <div class="inner">
+<div class="edit-form monospaced-textarea">
    <div class="tabs collapsable">
        <ul>
            <li class="tab" id="default-tab"><a href="#url-list">URL List</a></li>
            <li class="tab"><a href="#distill-io">Distill.io</a></li>
        </ul>
    </div>
    <div class="box-wrap inner">
        <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
-            <fieldset class="pure-group">
+            <div class="tab-pane-inner" id="url-list">
-              <legend>
+                <fieldset class="pure-group">
-                Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):
+                    <legend>
-                <br>
+                        Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
-                <code>https://example.com tag1, tag2, last tag</code>
+                        (,):
-                <br>
+                        <br>
-                URLs which do not pass validation will stay in the textarea.
+                        <code>https://example.com tag1, tag2, last tag</code>
-              </legend>
+                        <br>
-              
+                        URLs which do not pass validation will stay in the textarea.
-
+                    </legend>
-                <textarea name="urls" class="pure-input-1-2" placeholder="https://"
+
-                          style="width: 100%;
+
                    <textarea name="urls" class="pure-input-1-2" placeholder="https://"
                              style="width: 100%;
                                font-family:monospace;
                                white-space: pre;
                                overflow-wrap: normal;
-                                overflow-x: scroll;" rows="25">{{ remaining }}</textarea>
+                                overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
-            </fieldset>
+                </fieldset>
            </div>
            <div class="tab-pane-inner" id="distill-io">
                <fieldset class="pure-group">
                    <legend>
                        Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.</br>
                        This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
                        <br/>
                        <p>
                        How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br/>
                        Be sure to set your default fetcher to Chrome if required.</br>
                        </p>
                    </legend>
                    <textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
                                font-family:monospace;
                                white-space: pre;
                                overflow-wrap: normal;
                                overflow-x: scroll;" placeholder="Example Distill.io JSON export file
 {
    &quot;client&quot;: {
        &quot;local&quot;: 1
    },
    &quot;data&quot;: [
        {
            &quot;name&quot;: &quot;Unraid | News&quot;,
            &quot;uri&quot;: &quot;https://unraid.net/blog&quot;,
            &quot;config&quot;: &quot;{\&quot;selections\&quot;:[{\&quot;frames\&quot;:[{\&quot;index\&quot;:0,\&quot;excludes\&quot;:[],\&quot;includes\&quot;:[{\&quot;type\&quot;:\&quot;xpath\&quot;,\&quot;expr\&quot;:\&quot;(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]\&quot;}]}],\&quot;dynamic\&quot;:true,\&quot;delay\&quot;:2}],\&quot;ignoreEmptyText\&quot;:true,\&quot;includeStyle\&quot;:false,\&quot;dataAttr\&quot;:\&quot;text\&quot;}&quot;,
            &quot;tags&quot;: [],
            &quot;content_type&quot;: 2,
            &quot;state&quot;: 40,
            &quot;schedule&quot;: &quot;{\&quot;type\&quot;:\&quot;INTERVAL\&quot;,\&quot;params\&quot;:{\&quot;interval\&quot;:4447}}&quot;,
            &quot;ts&quot;: &quot;2022-03-27T15:51:15.667Z&quot;
        }
    ]
 }
 " rows="25">{{ original_distill_json }}</textarea>
                </fieldset>
            </div>
            <button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
        </form>
-     </div>
+
    </div>
 </div>
 {% endblock %}
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@ -9,7 +9,6 @@
    const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
 {% endif %}
 </script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
@ -61,7 +60,14 @@
                        {{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
                        <span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
                    </div>
-
+                {% if form.requests.proxy %}
                    <div class="pure-control-group inline-radio">
                        {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
                        <span class="pure-form-message-inline">
                        Choose a default proxy for all watches
                        </span>
                    </div>
                {% endif %}
                </fieldset>
            </div>
@ -74,7 +80,7 @@
            </div>
            <div class="tab-pane-inner" id="fetching">
-                <div class="pure-control-group">
+                <div class="pure-control-group inline-radio">
                    {{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
                    <span class="pure-form-message-inline">
                        <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
--- a/changedetectionio/tests/test_import.py
+++ b/changedetectionio/tests/test_import.py
@ -5,18 +5,17 @@ import time
 from flask import url_for
 from .util import live_server_setup
-
+def test_setup(client, live_server):
 def test_import(client, live_server):
    live_server_setup(live_server)
 def test_import(client, live_server):
    # Give the endpoint time to spin up
    time.sleep(1)
    res = client.post(
        url_for("import_page"),
        data={
            "distill-io": "",
            "urls": """https://example.com
 https://example.com tag1
 https://example.com tag1, other tag"""
@ -26,3 +25,96 @@ https://example.com tag1, other tag"""
    assert b"3 Imported" in res.data
    assert b"tag1" in res.data
    assert b"other tag" in res.data
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    # Clear flask alerts
    res = client.get( url_for("index"))
    res = client.get( url_for("index"))
 def xtest_import_skip_url(client, live_server):
    # Give the endpoint time to spin up
    time.sleep(1)
    res = client.post(
        url_for("import_page"),
        data={
            "distill-io": "",
            "urls": """https://example.com
 :ht000000broken
 """
        },
        follow_redirects=True,
    )
    assert b"1 Imported" in res.data
    assert b"ht000000broken" in res.data
    assert b"1 Skipped" in res.data
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    # Clear flask alerts
    res = client.get( url_for("index"))
 def test_import_distillio(client, live_server):
    distill_data='''
 {
    "client": {
        "local": 1
    },
    "data": [
        {
            "name": "Unraid | News",
            "uri": "https://unraid.net/blog",
            "config": "{\\"selections\\":[{\\"frames\\":[{\\"index\\":0,\\"excludes\\":[],\\"includes\\":[{\\"type\\":\\"xpath\\",\\"expr\\":\\"(//div[@id='App']/div[contains(@class,'flex')]/main[contains(@class,'relative')]/section[contains(@class,'relative')]/div[@class='container']/div[contains(@class,'flex')]/div[contains(@class,'w-full')])[1]\\"}]}],\\"dynamic\\":true,\\"delay\\":2}],\\"ignoreEmptyText\\":true,\\"includeStyle\\":false,\\"dataAttr\\":\\"text\\"}",
            "tags": ["nice stuff", "nerd-news"],
            "content_type": 2,
            "state": 40,
            "schedule": "{\\"type\\":\\"INTERVAL\\",\\"params\\":{\\"interval\\":4447}}",
            "ts": "2022-03-27T15:51:15.667Z"
        }
    ]
 }		   
 '''
    # Give the endpoint time to spin up
    time.sleep(1)
    client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    res = client.post(
        url_for("import_page"),
        data={
            "distill-io": distill_data,
            "urls" : ''
        },
        follow_redirects=True,
    )
    assert b"Unable to read JSON file, was it broken?" not in res.data
    assert b"1 Imported from Distill.io" in res.data
    res = client.get( url_for("edit_page", uuid="first"))
    assert b"https://unraid.net/blog" in res.data
    assert b"Unraid | News" in res.data
    # flask/wtforms should recode this, check we see it
    # wtforms encodes it like id=&#39 ,but html.escape makes it like id=&#x27
    # - so just check it manually :(
    #import json
    #import html
    #d = json.loads(distill_data)
    # embedded_d=json.loads(d['data'][0]['config'])
    # x=html.escape(embedded_d['selections'][0]['frames'][0]['includes'][0]['expr']).encode('utf-8')
    assert b"xpath:(//div[@id=&#39;App&#39;]/div[contains(@class,&#39;flex&#39;)]/main[contains(@class,&#39;relative&#39;)]/section[contains(@class,&#39;relative&#39;)]/div[@class=&#39;container&#39;]/div[contains(@class,&#39;flex&#39;)]/div[contains(@class,&#39;w-full&#39;)])[1]" in res.data
    # did the tags work?
    res = client.get( url_for("index"))
    assert b"nice stuff" in res.data
    assert b"nerd-news" in res.data
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    # Clear flask alerts
    res = client.get(url_for("index"))
--- a/changedetectionio/tests/test_xpath_selector.py
+++ b/changedetectionio/tests/test_xpath_selector.py
@ -116,4 +116,46 @@ def test_xpath_validation(client, live_server):
        data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
-    assert b"is not a valid XPath expression" in res.data
+    assert b"is not a valid XPath expression" in res.data
 # actually only really used by the distll.io importer, but could be handy too
 def test_check_with_prefix_css_filter(client, live_server):
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
    # Give the endpoint time to spin up
    time.sleep(1)
    set_original_response()
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
        url_for("import_page"),
        data={"urls": test_url},
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
    time.sleep(3)
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"css_filter":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
    time.sleep(3)
    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )
    with open('/tmp/fuck.html', 'wb') as f:
        f.write(res.data)
    assert b"Some text thats the same" in res.data #in selector
    assert b"Some text that will change" not in res.data #not in selector
    client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -17,14 +17,14 @@ services:
  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
  #
-  #       WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_httpProxy, webdriver_noProxy,
+  #       WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
-  #                                webdriver_proxyAutoconfigUrl, webdriver_sslProxy, webdriver_autodetect,
+  #                                webdriver_proxyAutoconfigUrl, webdriver_autodetect,
  #                                webdriver_socksProxy, webdriver_socksUsername, webdriver_socksVersion, webdriver_socksPassword
  #
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
  #
  #       Alternative Playwright URL, do not use "'s or 's!
-  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/playwright
+  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/
  #
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
  #