From 599291645d616375e8eef109ed959365daa77921 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 30 Nov 2023 20:01:14 +0100
Subject: [PATCH] PDF Fetcher for change detection - Always use plain requests
 for PDF because otherwise we cant access the embed PDF in the browser (#2020)

---
 changedetectionio/processors/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py
index 10c9138c..efccea49 100644
--- a/changedetectionio/processors/__init__.py
+++ b/changedetectionio/processors/__init__.py
@@ -52,6 +52,11 @@ class difference_detection_processor():
                 prefer_fetch_backend = 'base_html_playwright'
                 browser_connection_url = connection[0].get('browser_connection_url')
 
+        # PDF should be html_requests because playwright will serve it up (so far) in a embedded page
+        # @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
+        # @todo needs test to or a fix
+        if self.watch.is_pdf:
+           prefer_fetch_backend = "html_requests"
 
         # Grab the right kind of 'fetcher', (playwright, requests, etc)
         if hasattr(content_fetcher, prefer_fetch_backend):