pull/938/head
dgtlmoon 2 years ago
parent c5b0c19836
commit 0e0bd93234

@ -396,18 +396,20 @@ def changedetection_app(config=None, datastore_o=None):
existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form)
webdriver_enabled = True if os.getenv('PLAYWRIGHT_DRIVER_URL', False) or os.getenv('PLAYWRIGHT_DRIVER_URL', False) else False
output = render_template("watch-overview.html",
form=form,
watches=sorted_watches,
tags=existing_tags,
active_tag=limit_tag,
app_rss_token=datastore.data['settings']['application']['rss_access_token'],
form=form,
guid=datastore.data['app_guid'],
has_unviewed=datastore.has_unviewed,
# Don't link to hosting when we're on the hosting environment
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
guid=datastore.data['app_guid'],
queued_uuids=[uuid for p,uuid in update_q.queue])
queued_uuids=[uuid for p, uuid in update_q.queue],
tags=existing_tags,
watches=sorted_watches,
webdriver_enabled=webdriver_enabled
)
if session.get('share-link'):
del(session['share-link'])
@ -1228,15 +1230,23 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
url = request.form.get('url').strip()
fetch_processor =request.form.get('fetch_processor').strip()
if datastore.url_exists(url):
flash('The URL {} already exists'.format(url), "error")
return redirect(url_for('index'))
add_paused = request.form.get('edit_and_watch_submit_button') != None
fetch_processor = request.form.get('fetch_processor')
extras = {'paused': add_paused}
if fetch_processor:
extras['fetch_processor']=fetch_processor
if fetch_processor == 'image':
extras['fetch_backend'] = 'html_webdriver'
new_uuid = datastore.add_watch(url=url,
tag=request.form.get('tag').strip(),
extras={'paused': add_paused, 'fetch_processor': fetch_processor}
extras=extras
)

@ -1,4 +1,4 @@
available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Static Image'), ('rendered_webpage', 'Screenshot of page or element')]
available_fetchers = [('json_html_plaintext', 'JSON/HTML/Text'), ('image', 'Graphically by image or web-page')]
class fetch_processor():
contents = b''

@ -34,6 +34,12 @@ class perform_site_check(fetch_processor):
watch = self.datastore.data['watching'].get(uuid)
if watch.get('fetch_backend') != 'html_webdriver':
raise Exception(
"Requires a Chrome compatible fetcher enabled."
)
# Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
raise Exception(
@ -80,9 +86,12 @@ class perform_site_check(fetch_processor):
update_obj["last_check_status"] = fetcher.get_last_status_code()
self.contents = fetcher.raw_content
if 'image' in fetcher.headers['content-type']:
self.contents = fetcher.raw_content
else:
self.contents = fetcher.screenshot
image = Image.open(io.BytesIO(fetcher.raw_content))
image = Image.open(io.BytesIO(self.contents))
# @todo different choice?
# https://github.com/JohannesBuchner/imagehash#references

@ -1,105 +0,0 @@
import hashlib
import imagehash
from PIL import Image
import io
import logging
import os
import re
import time
import urllib3
# fetch processor for requesting and comparing a single image
# can use both requests and playwright/selenium
# - imagehash for change detection (or https://github.com/dgtlmoon/changedetection.io/pull/419/files#diff-7d3854710a6c0faead783f75850100a4c4b69409309200d3a83692dc9783bf6eR17 ?)
# - skimage.metrics import structural_similarity for viewing the diff
from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from . import fetch_processor
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
class perform_site_check(fetch_processor):
xpath_data = None
def run(self, uuid):
changed_detected = False
watch = self.datastore.data['watching'].get(uuid)
# Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
raise Exception(
"file:// type access is denied for security reasons."
)
if watch.get('fetch_backend') != 'html_webdriver':
raise Exception(
"Requires a Chrome compatible fetcher enabled."
)
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers = self.datastore.data['watching'][uuid].get('headers')
# Tweak the base config with the per-watch ones
request_headers = self.datastore.data['settings']['headers'].copy()
request_headers.update(extra_headers)
# https://github.com/psf/requests/issues/4525
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
# do this by accident.
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
timeout = self.datastore.data['settings']['requests']['timeout']
url = watch.get('url')
request_body = self.datastore.data['watching'][uuid].get('body')
request_method = self.datastore.data['watching'][uuid].get('method')
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
prefer_backend = watch['fetch_backend']
if hasattr(content_fetcher, prefer_backend):
klass = getattr(content_fetcher, prefer_backend)
else:
# If the klass doesnt exist, just use a default
klass = getattr(content_fetcher, "html_requests")
proxy_args = self.set_proxy_from_list(watch)
fetcher = klass(proxy_override=proxy_args)
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes)
fetcher.quit()
# if not image/foobar in mimetype
# raise content_fecther.NotAnImage(mimetype) ?
# or better to try load with PIL and catch exception?
update_obj["last_check_status"] = fetcher.get_last_status_code()
self.contents = fetcher.screenshot
image = Image.open(io.BytesIO(fetcher.screenshot))
# @todo different choice?
# https://github.com/JohannesBuchner/imagehash#references
fetched_hash = str(imagehash.average_hash(image))
# The main thing that all this at the moment comes down to :)
if watch['previous_md5'] != fetched_hash:
changed_detected = True
# Always record the new checksum
update_obj["previous_md5"] = fetched_hash
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_hash
return changed_detected, update_obj

@ -15,8 +15,10 @@
<div>
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
{% if webdriver_enabled %}
<br/>
{{ render_field(form.fetch_processor) }}
{% endif %}
</div>
<div>
{{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }}

Loading…
Cancel
Save