PDF Fetching - Handle when the PDF is given as inline content without a proper mime header (#1875)

1 year ago · 1c0fe4c23e
parent 4f6b0eb8a5
commit 1c0fe4c23e
1 changed files with 2 additions and 1 deletions
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@ -167,7 +167,8 @@ class perform_site_check(difference_detection_processor):
            is_html = False
            is_json = False

-        if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower():
+        inline_pdf = fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in fetcher.content[:10]
+        if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
            from shutil import which
            tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
            if not which(tool):