Small memory allocation fixes (#2625)

pull/2629/head
dgtlmoon 2 months ago committed by GitHub
parent e16814e40b
commit 7f2fa20318
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,79 @@
# include the decorator
from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
import json
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
url = kwargs['meta'].get('url')
if url.startswith('post'):
r = requests.post
elif url.startswith('get'):
r = requests.get
elif url.startswith('put'):
r = requests.put
elif url.startswith('delete'):
r = requests.delete
url = url.replace('post://', 'http://')
url = url.replace('posts://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('get://', 'http://')
url = url.replace('gets://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('delete://', 'http://')
url = url.replace('deletes://', 'https://')
headers = {}
params = {}
auth = None
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON
try:
json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8'
except ValueError as e:
pass
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)

@ -1,8 +1,6 @@
from loguru import logger from loguru import logger
import chardet
import hashlib import hashlib
import os import os
import requests
from changedetectionio import strtobool from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher from changedetectionio.content_fetchers.base import Fetcher
@ -28,6 +26,9 @@ class fetcher(Fetcher):
is_binary=False, is_binary=False,
empty_pages_are_a_change=False): empty_pages_are_a_change=False):
import chardet
import requests
if self.browser_steps_get_valid_steps(): if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url) raise BrowserStepsInUnsupportedFetcher(url=url)

@ -537,7 +537,8 @@ def changedetection_app(config=None, datastore_o=None):
import random import random
from .apprise_asset import asset from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset) apobj = apprise.Apprise(asset=asset)
# so that the custom endpoints are registered
from changedetectionio.apprise import apprise_custom_api_call_wrapper
is_global_settings_form = request.args.get('mode', '') == 'global-settings' is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings' is_group_settings_form = request.args.get('mode', '') == 'group-settings'

@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
def __call__(self, form, field): def __call__(self, form, field):
import apprise import apprise
apobj = apprise.Apprise() apobj = apprise.Apprise()
# so that the custom endpoints are registered
from changedetectionio.apprise import apprise_custom_api_call_wrapper
for server_url in field.data: for server_url in field.data:
if not apobj.add(server_url): if not apobj.add(server_url):
message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url)) message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))

@ -1,10 +1,4 @@
from bs4 import BeautifulSoup
from inscriptis import get_text
from jsonpath_ng.ext import parse
from typing import List from typing import List
from inscriptis.model.config import ParserConfig
from xml.sax.saxutils import escape as xml_escape
import json import json
import re import re
@ -39,6 +33,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
def include_filters(include_filters, html_content, append_pretty_line_formatting=False): def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
html_block = "" html_block = ""
r = soup.select(include_filters, separator="") r = soup.select(include_filters, separator="")
@ -56,6 +51,7 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
return html_block return html_block
def subtractive_css_selector(css_selector, html_content): def subtractive_css_selector(css_selector, html_content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
for item in soup.select(css_selector): for item in soup.select(css_selector):
item.decompose() item.decompose()
@ -181,6 +177,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
# Extract/find element # Extract/find element
def extract_element(find='title', html_content=''): def extract_element(find='title', html_content=''):
from bs4 import BeautifulSoup
#Re #106, be sure to handle when its not found #Re #106, be sure to handle when its not found
element_text = None element_text = None
@ -194,6 +191,8 @@ def extract_element(find='title', html_content=''):
# #
def _parse_json(json_data, json_filter): def _parse_json(json_data, json_filter):
from jsonpath_ng.ext import parse
if json_filter.startswith("json:"): if json_filter.startswith("json:"):
jsonpath_expression = parse(json_filter.replace('json:', '')) jsonpath_expression = parse(json_filter.replace('json:', ''))
match = jsonpath_expression.find(json_data) match = jsonpath_expression.find(json_data)
@ -242,6 +241,8 @@ def _get_stripped_text_from_json_match(match):
# json_filter - ie json:$..price # json_filter - ie json:$..price
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
from bs4 import BeautifulSoup
stripped_text_from_html = False stripped_text_from_html = False
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
@ -352,6 +353,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
return "\n".encode('utf8').join(output) return "\n".encode('utf8').join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
from xml.sax.saxutils import escape as xml_escape
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>' pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
def repl(m): def repl(m):
text = m.group(1) text = m.group(1)
@ -360,6 +362,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
return re.sub(pattern, repl, html_content) return re.sub(pattern, repl, html_content)
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str: def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
from inscriptis import get_text
from inscriptis.model.config import ParserConfig
"""Converts html string to a string with just the text. If ignoring """Converts html string to a string with just the text. If ignoring
rendering anchor tag content is enable, anchor tag content are also rendering anchor tag content is enable, anchor tag content are also
included in the text included in the text

@ -1,9 +1,10 @@
import apprise
import time import time
from apprise import NotifyFormat from apprise import NotifyFormat
import json import apprise
from loguru import logger from loguru import logger
valid_tokens = { valid_tokens = {
'base_url': '', 'base_url': '',
'current_snapshot': '', 'current_snapshot': '',
@ -34,87 +35,11 @@ valid_notification_formats = {
default_notification_format_for_watch: default_notification_format_for_watch default_notification_format_for_watch: default_notification_format_for_watch
} }
# include the decorator
from apprise.decorators import notify
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
from apprise.utils import parse_url as apprise_parse_url
from apprise import URLBase
url = kwargs['meta'].get('url')
if url.startswith('post'):
r = requests.post
elif url.startswith('get'):
r = requests.get
elif url.startswith('put'):
r = requests.put
elif url.startswith('delete'):
r = requests.delete
url = url.replace('post://', 'http://')
url = url.replace('posts://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('get://', 'http://')
url = url.replace('gets://', 'https://')
url = url.replace('put://', 'http://')
url = url.replace('puts://', 'https://')
url = url.replace('delete://', 'http://')
url = url.replace('deletes://', 'https://')
headers = {}
params = {}
auth = None
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
if results:
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = {URLBase.unquote(x): URLBase.unquote(y)
for x, y in results['qsd+'].items()}
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[URLBase.unquote(k)] = URLBase.unquote(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
elif results.get('user'):
auth = (URLBase.unquote(results.get('user')))
# Try to auto-guess if it's JSON
try:
json.loads(body)
headers['Content-Type'] = 'application/json; charset=utf-8'
except ValueError as e:
pass
r(results.get('url'),
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)
def process_notification(n_object, datastore): def process_notification(n_object, datastore):
# so that the custom endpoints are registered
from changedetectionio.apprise import apprise_custom_api_call_wrapper
from .safe_jinja import render as jinja_render from .safe_jinja import render as jinja_render
now = time.time() now = time.time()
if n_object.get('notification_timestamp'): if n_object.get('notification_timestamp'):

@ -11,7 +11,6 @@ from threading import Lock
import json import json
import os import os
import re import re
import requests
import secrets import secrets
import threading import threading
import time import time
@ -270,6 +269,7 @@ class ChangeDetectionStore:
self.needs_write_urgent = True self.needs_write_urgent = True
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True): def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
import requests
if extras is None: if extras is None:
extras = {} extras = {}

Loading…
Cancel
Save