From af25b824a044484bfa544abaef4d7b108dff9e60 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Sat, 12 Feb 2022 22:13:53 +0100
Subject: [PATCH] small tidyup

---
 changedetectionio/fetch_site_status.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/changedetectionio/fetch_site_status.py b/changedetectionio/fetch_site_status.py
index 48cfd457..909e933a 100644
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -109,9 +109,8 @@ class perform_site_check():
             
             # Could be 'application/json; charset=utf-8' etc
             is_json = 'application/json' in update_obj['content-type']
-
-            is_text_or_html = 'text' in update_obj['content-type']
-            is_binary = content_fetcher.supported_binary_type(update_obj['content-type'])
+            is_text_or_html = 'text/' in update_obj['content-type'] # text/plain , text/html etc
+            is_binary = not is_text_or_html and content_fetcher.supported_binary_type(update_obj['content-type'])
             css_filter_rule = watch['css_filter']
             has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
 
@@ -134,7 +133,11 @@ class perform_site_check():
             if is_text_or_html:
                 # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
                 html_content = fetcher.content
-                if not fetcher.headers.get('Content-Type', '') == 'text/plain':
+                if 'text/plain' in update_obj['content-type']:
+                    stripped_text_from_html = html_content
+
+                # Assume it's HTML if it's not text/plain
+                if not 'text/plain' in update_obj['content-type']:
                     if has_filter_rule:
                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
                         if css_filter_rule[0] == '/':
@@ -145,14 +148,10 @@ class perform_site_check():
                     # get_text() via inscriptis
                     stripped_text_from_html = get_text(html_content)
 
-                # Extract title as title
-                if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
-                    if not watch['title'] or not len(watch['title']):
-                        update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
-
-                else:
-                    # Don't run get_text or xpath/css filters on plaintext
-                    stripped_text_from_html = html_content
+                    # Extract title as title
+                    if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
+                        if not watch['title'] or not len(watch['title']):
+                            update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
 
                 # Re #340 - return the content before the 'ignore text' was applied
                 original_content_before_filters = stripped_text_from_html.encode('utf-8')