diff --git a/backend/__init__.py b/backend/__init__.py index 96c26040..07fdcdb1 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -386,6 +386,17 @@ def changedetection_app(conig=None, datastore_o=None): if len(datastore.data['watching'][uuid]['history']): update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) + + # CSS Filter + css_filter = request.form.get('css_filter') + if css_filter: + datastore.data['watching'][uuid]['css_filter'] = css_filter.strip() + + # Reset the previous_md5 so we process a new snapshot including stripping ignore text. + if len(datastore.data['watching'][uuid]['history']): + update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) + + validators.url(url) # @todo switch to prop/attr/observer datastore.data['watching'][uuid].update(update_obj) datastore.needs_write = True @@ -876,7 +887,7 @@ def ticker_thread_check_time_launch_checks(): if not uuid in running_uuids and uuid not in update_q.queue: update_q.put(uuid) - time.sleep(1) + time.sleep(0.1) # Should be low so we can break this out in testing app.config.exit.wait(1) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 9fb52c75..8705201d 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -66,25 +66,36 @@ class perform_site_check(): timeout=timeout, verify=False) - stripped_text_from_html = get_text(r.text) + # CSS Filter + css_filter = self.datastore.data['watching'][uuid]['css_filter'] + if css_filter and len(css_filter.strip()): + from bs4 import BeautifulSoup + soup = BeautifulSoup(r.content, "html.parser") + stripped_text_from_html = "" + for item in soup.select(css_filter): + text = str(item.get_text())+"\n" + stripped_text_from_html += text + + else: + stripped_text_from_html = get_text(r.text) # Usually from networkIO/requests level except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e: update_obj["last_error"] = str(e) - print(str(e)) except requests.exceptions.MissingSchema: print("Skipping {} due to missing schema/bad url".format(uuid)) # Usually from html2text level - except UnicodeDecodeError as e: - + except Exception as e: + # except UnicodeDecodeError as e: update_obj["last_error"] = str(e) print(str(e)) # figure out how to deal with this cleaner.. # 'utf-8' codec can't decode byte 0xe9 in position 480: invalid continuation byte + else: # We rely on the actual text in the html output.. many sites have random script vars etc, # in the future we'll implement other mechanisms. diff --git a/backend/store.py b/backend/store.py index 6ae6223d..ecb87088 100644 --- a/backend/store.py +++ b/backend/store.py @@ -61,7 +61,8 @@ class ChangeDetectionStore: 'headers': {}, # Extra headers to send 'history': {}, # Dict of timestamp and output stripped filename 'ignore_text': [], # List of text to ignore when calculating the comparison checksum - 'notification_urls': [] # List of URLs to add to the notification Queue (Usually AppRise) + 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) + 'css_filter': "", } if path.isfile('backend/source.txt'): diff --git a/backend/templates/edit.html b/backend/templates/edit.html index 14e109d7..d97cc8bb 100644 --- a/backend/templates/edit.html +++ b/backend/templates/edit.html @@ -24,7 +24,13 @@ size="5"/>
- + +