From 1b931fef20050f809e8ca3638a6481dd31d31b12 Mon Sep 17 00:00:00 2001 From: Leigh Date: Sun, 25 Jul 2021 13:55:28 +0200 Subject: [PATCH] Re #154 - Handle missing JSON better --- backend/html_tools.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/html_tools.py b/backend/html_tools.py index 8a0ff3c5..16fbf8e8 100644 --- a/backend/html_tools.py +++ b/backend/html_tools.py @@ -70,6 +70,10 @@ def extract_json_as_string(content, jsonpath_filter): raise JSONNotFound("No parsable JSON found in this document") for result in bs_result: + # Skip empty tags, and things that dont even look like JSON + if not result.string or not '{' in result.string: + continue + try: json_data = json.loads(result.string) except json.JSONDecodeError: @@ -80,4 +84,7 @@ def extract_json_as_string(content, jsonpath_filter): if stripped_text_from_html: break + if not stripped_text_from_html: + raise JSONNotFound("No JSON matching the rule '%s' found" % jsonpath_filter.replace('json:','')) + return stripped_text_from_html