PDF Fetching - Handle when the PDF is given as inline content without a proper mime header (#1875)

pull/1876/head
dgtlmoon 7 months ago committed by GitHub
parent 4f6b0eb8a5
commit 1c0fe4c23e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -167,7 +167,8 @@ class perform_site_check(difference_detection_processor):
is_html = False
is_json = False
if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower():
inline_pdf = fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in fetcher.content[:10]
if watch.is_pdf or 'application/pdf' in fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
from shutil import which
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
if not which(tool):

Loading…
Cancel
Save