Small memory allocation fixes (#2625)

2 months ago · 7f2fa20318
parent e16814e40b
commit 7f2fa20318
7 changed files with 103 additions and 91 deletions
--- a/changedetectionio/apprise/init.py
+++ b/changedetectionio/apprise/init.py
@ -0,0 +1,79 @@
 # include the decorator
 from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
 def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
    import requests
    import json
    from apprise.utils import parse_url as apprise_parse_url
    from apprise import URLBase
    url = kwargs['meta'].get('url')
    if url.startswith('post'):
        r = requests.post
    elif url.startswith('get'):
        r = requests.get
    elif url.startswith('put'):
        r = requests.put
    elif url.startswith('delete'):
        r = requests.delete
    url = url.replace('post://', 'http://')
    url = url.replace('posts://', 'https://')
    url = url.replace('put://', 'http://')
    url = url.replace('puts://', 'https://')
    url = url.replace('get://', 'http://')
    url = url.replace('gets://', 'https://')
    url = url.replace('put://', 'http://')
    url = url.replace('puts://', 'https://')
    url = url.replace('delete://', 'http://')
    url = url.replace('deletes://', 'https://')
    headers = {}
    params = {}
    auth = None
    # Convert /foobar?+some-header=hello to proper header dictionary
    results = apprise_parse_url(url)
    if results:
        # Add our headers that the user can potentially over-ride if they wish
        # to to our returned result set and tidy entries by unquoting them
        headers = {URLBase.unquote(x): URLBase.unquote(y)
                   for x, y in results['qsd+'].items()}
        # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
        # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
        # but here we are making straight requests, so we need todo convert this against apprise's logic
        for k, v in results['qsd'].items():
            if not k.strip('+-') in results['qsd+'].keys():
                params[URLBase.unquote(k)] = URLBase.unquote(v)
        # Determine Authentication
        auth = ''
        if results.get('user') and results.get('password'):
            auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
        elif results.get('user'):
            auth = (URLBase.unquote(results.get('user')))
    # Try to auto-guess if it's JSON
    try:
        json.loads(body)
        headers['Content-Type'] = 'application/json; charset=utf-8'
    except ValueError as e:
        pass
    r(results.get('url'),
      auth=auth,
      data=body.encode('utf-8') if type(body) is str else body,
      headers=headers,
      params=params
      )
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@ -1,8 +1,6 @@
 from loguru import logger
 import chardet
 import hashlib
 import os
 import requests
 from changedetectionio import strtobool
 from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
 from changedetectionio.content_fetchers.base import Fetcher
@ -28,6 +26,9 @@ class fetcher(Fetcher):
            is_binary=False,
            empty_pages_are_a_change=False):
        import chardet
        import requests
        if self.browser_steps_get_valid_steps():
            raise BrowserStepsInUnsupportedFetcher(url=url)
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@ -537,7 +537,8 @@ def changedetection_app(config=None, datastore_o=None):
        import random
        from .apprise_asset import asset
        apobj = apprise.Apprise(asset=asset)
-
+        # so that the custom endpoints are registered
        from changedetectionio.apprise import apprise_custom_api_call_wrapper
        is_global_settings_form = request.args.get('mode', '') == 'global-settings'
        is_group_settings_form = request.args.get('mode', '') == 'group-settings'
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
    def __call__(self, form, field):
        import apprise
        apobj = apprise.Apprise()
-
+        # so that the custom endpoints are registered
        from changedetectionio.apprise import apprise_custom_api_call_wrapper
        for server_url in field.data:
            if not apobj.add(server_url):
                message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@ -1,10 +1,4 @@
 from bs4 import BeautifulSoup
 from inscriptis import get_text
 from jsonpath_ng.ext import parse
 from typing import List
 from inscriptis.model.config import ParserConfig
 from xml.sax.saxutils import escape as xml_escape
 import json
 import re
@ -39,6 +33,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):
 # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html_content, "html.parser")
    html_block = ""
    r = soup.select(include_filters, separator="")
@ -56,6 +51,7 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
    return html_block
 def subtractive_css_selector(css_selector, html_content):
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html_content, "html.parser")
    for item in soup.select(css_selector):
        item.decompose()
@ -181,6 +177,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
 # Extract/find element
 def extract_element(find='title', html_content=''):
    from bs4 import BeautifulSoup
    #Re #106, be sure to handle when its not found
    element_text = None
@ -194,6 +191,8 @@ def extract_element(find='title', html_content=''):
 #
 def _parse_json(json_data, json_filter):
    from jsonpath_ng.ext import parse
    if json_filter.startswith("json:"):
        jsonpath_expression = parse(json_filter.replace('json:', ''))
        match = jsonpath_expression.find(json_data)
@ -242,6 +241,8 @@ def _get_stripped_text_from_json_match(match):
 # json_filter - ie json:$..price
 # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
 def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
    from bs4 import BeautifulSoup
    stripped_text_from_html = False
 # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
@ -352,6 +353,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
    return "\n".encode('utf8').join(output)
 def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
    from xml.sax.saxutils import escape as xml_escape
    pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
    def repl(m):
        text = m.group(1)
@ -360,6 +362,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
    return re.sub(pattern, repl, html_content)
 def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
    from inscriptis import get_text
    from inscriptis.model.config import ParserConfig
    """Converts html string to a string with just the text. If ignoring
    rendering anchor tag content is enable, anchor tag content are also
    included in the text
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@ -1,9 +1,10 @@
-import apprise
+
 import time
 from apprise import NotifyFormat
-import json
+import apprise
 from loguru import logger
 valid_tokens = {
    'base_url': '',
    'current_snapshot': '',
@ -34,87 +35,11 @@ valid_notification_formats = {
    default_notification_format_for_watch: default_notification_format_for_watch
 }
 # include the decorator
 from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
 def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
    import requests
    from apprise.utils import parse_url as apprise_parse_url
    from apprise import URLBase
    url = kwargs['meta'].get('url')
    if url.startswith('post'):
        r = requests.post
    elif url.startswith('get'):
        r = requests.get
    elif url.startswith('put'):
        r = requests.put
    elif url.startswith('delete'):
        r = requests.delete
    url = url.replace('post://', 'http://')
    url = url.replace('posts://', 'https://')
    url = url.replace('put://', 'http://')
    url = url.replace('puts://', 'https://')
    url = url.replace('get://', 'http://')
    url = url.replace('gets://', 'https://')
    url = url.replace('put://', 'http://')
    url = url.replace('puts://', 'https://')
    url = url.replace('delete://', 'http://')
    url = url.replace('deletes://', 'https://')
    headers = {}
    params = {}
    auth = None
    # Convert /foobar?+some-header=hello to proper header dictionary
    results = apprise_parse_url(url)
    if results:
        # Add our headers that the user can potentially over-ride if they wish
        # to to our returned result set and tidy entries by unquoting them
        headers = {URLBase.unquote(x): URLBase.unquote(y)
                   for x, y in results['qsd+'].items()}
        # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
        # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
        # but here we are making straight requests, so we need todo convert this against apprise's logic
        for k, v in results['qsd'].items():
            if not k.strip('+-') in results['qsd+'].keys():
                params[URLBase.unquote(k)] = URLBase.unquote(v)
        # Determine Authentication
        auth = ''
        if results.get('user') and results.get('password'):
            auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
        elif results.get('user'):
            auth = (URLBase.unquote(results.get('user')))
    # Try to auto-guess if it's JSON
    try:
        json.loads(body)
        headers['Content-Type'] = 'application/json; charset=utf-8'
    except ValueError as e:
        pass
    r(results.get('url'),
      auth=auth,
      data=body.encode('utf-8') if type(body) is str else body,
      headers=headers,
      params=params
      )
 def process_notification(n_object, datastore):
-
+    # so that the custom endpoints are registered
    from changedetectionio.apprise import apprise_custom_api_call_wrapper
    from .safe_jinja import render as jinja_render
    now = time.time()
    if n_object.get('notification_timestamp'):
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@ -11,7 +11,6 @@ from threading import Lock
 import json
 import os
 import re
 import requests
 import secrets
 import threading
 import time
@ -270,6 +269,7 @@ class ChangeDetectionStore:
        self.needs_write_urgent = True
    def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
        import requests
        if extras is None:
            extras = {}