diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 90988179..da5e0ca5 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -547,6 +547,43 @@ class html_requests(Fetcher): self.headers = r.headers +# "html_requests" is listed as the default fetcher in store.py! +class html_fetcher_with_weird_memory_leak(Fetcher): + fetcher_description = "HTTP Fetcher with unexplainable memory leak" + + def __init__(self, proxy_override=None): + self.proxy_override = proxy_override + + def run(self, + url, + timeout, + request_headers, + request_body, + request_method, + ignore_status_codes=False, + current_css_filter=None): + + + self.status_code = 200 + + # Does nothing to help + # with open('memory-leak.html', 'r', encoding="utf-8") as f: + # with open('memory-leak.html', 'r') as f: + + # Works but is binary (no good for me) + with open('memory-leak.html', 'r') as f: + wtf = f.read() + + # just to prove gc.collect doesnt help, i dont even use 'wtf' + del wtf + wtf="not much" + import gc + gc.collect() + + self.content = "foobar" + self.headers = {} + self.xpath_data = '{}' + # Decide which is the 'real' HTML webdriver, this is more a system wide config # rather than site-specific. use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 6e74d483..abd03f7b 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -31,7 +31,7 @@ class model(dict): 'base_url' : None, 'extract_title_as_title': False, 'empty_pages_are_a_change': False, - 'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), + 'fetch_backend': 'html_fetcher_with_weird_memory_leak', 'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, 'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum 'global_subtractive_selectors': [], diff --git a/changedetectionio/store.py b/changedetectionio/store.py index da7835f1..5194419e 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -82,9 +82,8 @@ class ChangeDetectionStore: if include_default_watches: print("Creating JSON store at", self.datastore_path) - self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') - self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') - self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io') + for i in range(50): + self.add_watch(url='https://changedetection.io/CHANGELOG.txt?x='+str(i), tag='test') self.__data['version_tag'] = version_tag diff --git a/memory-leak.html b/memory-leak.html new file mode 100644 index 00000000..fb34a76a --- /dev/null +++ b/memory-leak.html @@ -0,0 +1,231 @@ + + + + + + + + + + + + + + + + +CNN International - Breaking News, US News, World News and Video + + + + + + + +
+
+ +
+
+ +
\ No newline at end of file