diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 9788db3c..6b202a5f 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -838,6 +838,7 @@ def changedetection_app(config=None, datastore_o=None): logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"]) return output + @app.route("/api//snapshot/current", methods=['GET']) @login_required def api_snapshot(uuid): diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py index f0f91961..35ef2a01 100644 --- a/changedetectionio/fetch_site_status.py +++ b/changedetectionio/fetch_site_status.py @@ -86,8 +86,13 @@ class perform_site_check(): if is_html: # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text html_content = fetcher.content - if not fetcher.headers.get('Content-Type', '') == 'text/plain': + # If not JSON, and if it's not text/plain.. + if 'text/plain' in fetcher.headers.get('Content-Type', '').lower(): + # Don't run get_text or xpath/css filters on plaintext + stripped_text_from_html = html_content + else: + # Then we assume HTML if has_filter_rule: # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." if css_filter_rule[0] == '/': @@ -98,9 +103,7 @@ class perform_site_check(): # get_text() via inscriptis stripped_text_from_html = get_text(html_content) - else: - # Don't run get_text or xpath/css filters on plaintext - stripped_text_from_html = html_content + # Re #340 - return the content before the 'ignore text' was applied text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index b4f9e5f2..370fd10a 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -14,7 +14,6 @@ def set_response_data(test_return_data): def test_snapshot_api_detects_change(client, live_server): - test_return_data = "Some initial text" test_return_data_modified = "Some NEW nice initial text" @@ -27,7 +26,7 @@ def test_snapshot_api_detects_change(client, live_server): time.sleep(1) # Add our URL to the import page - test_url = url_for('test_endpoint', _external=True) + test_url = url_for('test_endpoint', content_type="text/plain", _external=True) res = client.post( url_for("import_page"), data={"urls": test_url}, diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index 92b81891..b279251f 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -7,6 +7,13 @@ from . util import set_original_response, set_modified_response, live_server_set sleep_time_for_fetch_thread = 3 +# Basic test to check inscriptus is not adding return line chars, basically works etc +def test_inscriptus(): + from inscriptis import get_text + html_content="test!
ok man" + stripped_text_from_html = get_text(html_content) + assert stripped_text_from_html == 'test!\nok man' + def test_check_basic_change_detection_functionality(client, live_server): set_original_response() diff --git a/changedetectionio/tests/test_jsonpath_selector.py b/changedetectionio/tests/test_jsonpath_selector.py index 5a4b7959..a5329b67 100644 --- a/changedetectionio/tests/test_jsonpath_selector.py +++ b/changedetectionio/tests/test_jsonpath_selector.py @@ -162,7 +162,7 @@ def test_check_json_without_filter(client, live_server): time.sleep(1) # Add our URL to the import page - test_url = url_for('test_endpoint_json', _external=True) + test_url = url_for('test_endpoint', content_type="application/json", _external=True) client.post( url_for("import_page"), data={"urls": test_url}, @@ -193,7 +193,7 @@ def test_check_json_filter(client, live_server): time.sleep(1) # Add our URL to the import page - test_url = url_for('test_endpoint', _external=True) + test_url = url_for('test_endpoint', content_type="application/json", _external=True) res = client.post( url_for("import_page"), data={"urls": test_url}, @@ -258,7 +258,7 @@ def test_check_json_filter_bool_val(client, live_server): # Give the endpoint time to spin up time.sleep(1) - test_url = url_for('test_endpoint', _external=True) + test_url = url_for('test_endpoint', content_type="application/json", _external=True) res = client.post( url_for("import_page"), @@ -313,7 +313,7 @@ def test_check_json_ext_filter(client, live_server): time.sleep(1) # Add our URL to the import page - test_url = url_for('test_endpoint', _external=True) + test_url = url_for('test_endpoint', content_type="application/json", _external=True) res = client.post( url_for("import_page"), data={"urls": test_url}, diff --git a/changedetectionio/tests/util.py b/changedetectionio/tests/util.py index 86b78767..12aefb6e 100644 --- a/changedetectionio/tests/util.py +++ b/changedetectionio/tests/util.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 +from flask import make_response, request def set_original_response(): test_return_data = """ @@ -40,24 +41,16 @@ def live_server_setup(live_server): @live_server.app.route('/test-endpoint') def test_endpoint(): - # Tried using a global var here but didn't seem to work, so reading from a file instead. - with open("test-datastore/endpoint-content.txt", "r") as f: - return f.read() - - @live_server.app.route('/test-endpoint-json') - def test_endpoint_json(): - - from flask import make_response + ctype = request.args.get('content_type') + # Tried using a global var here but didn't seem to work, so reading from a file instead. with open("test-datastore/endpoint-content.txt", "r") as f: resp = make_response(f.read()) - resp.headers['Content-Type'] = 'application/json' + resp.headers['Content-Type'] = ctype if ctype else 'text/html' return resp @live_server.app.route('/test-403') def test_endpoint_403_error(): - - from flask import make_response resp = make_response('', 403) return resp @@ -65,7 +58,6 @@ def live_server_setup(live_server): @live_server.app.route('/test-headers') def test_headers(): - from flask import request output= [] for header in request.headers: @@ -76,24 +68,16 @@ def live_server_setup(live_server): # Just return the body in the request @live_server.app.route('/test-body', methods=['POST', 'GET']) def test_body(): - - from flask import request - return request.data # Just return the verb in the request @live_server.app.route('/test-method', methods=['POST', 'GET', 'PATCH']) def test_method(): - - from flask import request - return request.method # Where we POST to as a notification @live_server.app.route('/test_notification_endpoint', methods=['POST', 'GET']) def test_notification_endpoint(): - from flask import request - with open("test-datastore/notification.txt", "wb") as f: # Debug method, dump all POST to file also, used to prove #65 data = request.stream.read() @@ -107,8 +91,6 @@ def live_server_setup(live_server): # Just return the verb in the request @live_server.app.route('/test-basicauth', methods=['GET']) def test_basicauth_method(): - - from flask import request auth = request.authorization ret = " ".join([auth.username, auth.password, auth.type]) return ret