Abstract out the fetch handlers for different fetch types

pull/897/head
dgtlmoon 2 years ago
parent fefc39427b
commit 425f8ea632

@ -500,7 +500,7 @@ def changedetection_app(config=None, datastore_o=None):
import hashlib import hashlib
from changedetectionio import fetch_site_status from changedetectionio.fetch_processor import json_html_plaintext
# Get the most recent one # Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key') newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
@ -514,7 +514,7 @@ def changedetection_app(config=None, datastore_o=None):
encoding='utf-8') as file: encoding='utf-8') as file:
raw_content = file.read() raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore) handler = json_html_plaintext.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content, stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text']) datastore.data['watching'][uuid]['ignore_text'])

@ -0,0 +1,37 @@
class fetch_processor():
"""
base class for all fetch processors
- json_html_plaintext
- image (future)
"""
def __init__(self, *args, datastore, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
# if watch.proxy use that
# fetcher.proxy_override = watch.proxy or main config proxy
# Allows override the proxy on a per-request basis
# ALWAYS use the first one is nothing selected
def set_proxy_from_list(self, watch):
proxy_args = None
if self.datastore.proxy_list is None:
return None
# If its a valid one
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
proxy_args = watch['proxy']
# not valid (including None), try the system one
else:
system_proxy = self.datastore.data['settings']['requests']['proxy']
# Is not None and exists
if any([system_proxy in p for p in self.datastore.proxy_list]):
proxy_args = system_proxy
# Fallback - Did not resolve anything, use the first available
if proxy_args is None:
proxy_args = self.datastore.proxy_list[0][0]
return proxy_args

@ -9,45 +9,14 @@ from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from .fetch_processor import fetch_processor
# Some common stuff here that can be moved to a base class # Some common stuff here that can be moved to a base class
# (set_proxy_from_list) # (set_proxy_from_list)
class perform_site_check(): class perform_site_check(fetch_processor):
screenshot = None screenshot = None
xpath_data = None xpath_data = None
def __init__(self, *args, datastore, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
# if watch.proxy use that
# fetcher.proxy_override = watch.proxy or main config proxy
# Allows override the proxy on a per-request basis
# ALWAYS use the first one is nothing selected
def set_proxy_from_list(self, watch):
proxy_args = None
if self.datastore.proxy_list is None:
return None
# If its a valid one
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
proxy_args = watch['proxy']
# not valid (including None), try the system one
else:
system_proxy = self.datastore.data['settings']['requests']['proxy']
# Is not None and exists
if any([system_proxy in p for p in self.datastore.proxy_list]):
proxy_args = system_proxy
# Fallback - Did not resolve anything, use the first available
if proxy_args is None:
proxy_args = self.datastore.proxy_list[0][0]
return proxy_args
# Doesn't look like python supports forward slash auto enclosure in re.findall # Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "foobar(?i)" type configuration # So convert it to inline flag "foobar(?i)" type configuration
def forward_slash_enclosed_regex_to_options(self, regex): def forward_slash_enclosed_regex_to_options(self, regex):
@ -315,4 +284,5 @@ class perform_site_check():
if not watch.get('previous_md5'): if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5 watch['previous_md5'] = fetched_md5
# @todo text_content_before_ignored_filter can be removed? save it here?
return changed_detected, update_obj, text_content_before_ignored_filter return changed_detected, update_obj, text_content_before_ignored_filter

@ -47,7 +47,6 @@ def set_modified_response():
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
def test_css_filter_output(): def test_css_filter_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text from inscriptis import get_text
# Check text with sub-parts renders correctly # Check text with sub-parts renders correctly

@ -71,7 +71,6 @@ def set_modified_response():
def test_element_removal_output(): def test_element_removal_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text from inscriptis import get_text
# Check text with sub-parts renders correctly # Check text with sub-parts renders correctly

@ -1,7 +1,5 @@
#!/usr/bin/python3 #!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup from . util import live_server_setup
from changedetectionio import html_tools from changedetectionio import html_tools
@ -11,7 +9,7 @@ def test_setup(live_server):
# Unit test of the stripper # Unit test of the stripper
# Always we are dealing in utf-8 # Always we are dealing in utf-8
def test_strip_regex_text_func(): def test_strip_regex_text_func():
from changedetectionio import fetch_site_status from ..fetch_processor import json_html_plaintext
test_content = """ test_content = """
but sometimes we want to remove the lines. but sometimes we want to remove the lines.
@ -23,7 +21,7 @@ def test_strip_regex_text_func():
ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"] ignore_lines = ["sometimes", "/\s\d{2,3}\s/", "/ignore-case text/"]
fetcher = fetch_site_status.perform_site_check(datastore=False) fetcher = json_html_plaintext.perform_site_check(datastore=False)
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines) stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert b"but 1 lines" in stripped_content assert b"but 1 lines" in stripped_content

@ -11,7 +11,7 @@ def test_setup(live_server):
# Unit test of the stripper # Unit test of the stripper
# Always we are dealing in utf-8 # Always we are dealing in utf-8
def test_strip_text_func(): def test_strip_text_func():
from changedetectionio import fetch_site_status from ..fetch_processor import json_html_plaintext
test_content = """ test_content = """
Some content Some content

@ -117,9 +117,9 @@ class update_worker(threading.Thread):
os.unlink(full_path) os.unlink(full_path)
def run(self): def run(self):
from changedetectionio import fetch_site_status from changedetectionio.fetch_processor import json_html_plaintext
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore) update_handler = json_html_plaintext.perform_site_check(datastore=self.datastore)
while not self.app.config.exit.is_set(): while not self.app.config.exit.is_set():

Loading…
Cancel
Save