From 4be0fafa93ba0abd7954359dd33a9751bef84eb1 Mon Sep 17 00:00:00 2001 From: Constantin Hong Date: Tue, 16 Jan 2024 17:48:16 +0900 Subject: [PATCH] Support Loguru as a logger (#2036) --- .github/workflows/test-only.yml | 23 ++++++-- Dockerfile | 5 ++ changedetectionio/__init__.py | 53 ++++++++++++++---- .../blueprint/browser_steps/__init__.py | 12 ++-- .../blueprint/browser_steps/browser_steps.py | 17 +++--- changedetectionio/content_fetcher.py | 20 +++---- changedetectionio/flask_app.py | 35 ++++++------ changedetectionio/importer.py | 9 +-- changedetectionio/model/Watch.py | 6 +- changedetectionio/notification.py | 3 +- changedetectionio/processors/__init__.py | 3 +- .../processors/text_json_diff.py | 6 +- changedetectionio/store.py | 56 +++++++++++-------- changedetectionio/tests/conftest.py | 23 ++++++++ .../tests/fetchers/test_content.py | 2 +- changedetectionio/update_worker.py | 25 +++++---- docker-compose.yml | 4 ++ requirements.txt | 2 + 18 files changed, 199 insertions(+), 105 deletions(-) diff --git a/.github/workflows/test-only.yml b/.github/workflows/test-only.yml index 666fe051..8655964f 100644 --- a/.github/workflows/test-only.yml +++ b/.github/workflows/test-only.yml @@ -38,7 +38,7 @@ jobs: - name: Build changedetection.io container for testing run: | # Build a changedetection.io container and start testing inside - docker build . -t test-changedetectionio + docker build --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio . # Debug info docker run test-changedetectionio bash -c 'pip list' @@ -50,10 +50,15 @@ jobs: - name: Test built container with pytest run: | # Unit tests + echo "run test with unittest" docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' # All tests + echo "run test with pytest" + # The default pytest logger_level is TRACE + # To change logger_level for pytest(test/conftest.py), + # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' docker run --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' - name: Test built container selenium+browserless/playwright @@ -105,6 +110,13 @@ jobs: # and IPv6 curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid + + # Check whether TRACE log is enabled. + # Also, check whether TRACE is came from STDERR + docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 + # Check whether DEBUG is came from STDOUT + docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 + docker kill test-changedetectionio - name: Test changedetection.io SIGTERM and SIGINT signal shutdown @@ -118,8 +130,9 @@ jobs: sleep 3 # invert the check (it should be not 0/not running) docker ps - # check signal catch(STDOUT) log - docker logs sig-test | grep 'Shutdown: Got Signal - SIGINT' || exit 1 + # check signal catch(STDERR) log. Because of + # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) + docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 test -z "`docker ps|grep sig-test`" if [ $? -ne 0 ] then @@ -139,7 +152,9 @@ jobs: sleep 3 # invert the check (it should be not 0/not running) docker ps - docker logs sig-test | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 + # check signal catch(STDERR) log. Because of + # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) + docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 test -z "`docker ps|grep sig-test`" if [ $? -ne 0 ] then diff --git a/Dockerfile b/Dockerfile index 9c85abc8..68994e89 100644 --- a/Dockerfile +++ b/Dockerfile @@ -58,6 +58,11 @@ COPY changedetectionio /app/changedetectionio # Starting wrapper COPY changedetection.py /app/changedetection.py +# Github Action test purpose(test-only.yml). +# On production, it is effectively LOGGER_LEVEL=''. +ARG LOGGER_LEVEL='' +ENV LOGGER_LEVEL "$LOGGER_LEVEL" + WORKDIR /app CMD ["python", "./changedetection.py", "-d", "/datastore"] diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index c88c0eab..46ae3ea7 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -17,6 +17,7 @@ import sys from changedetectionio import store from changedetectionio.flask_app import changedetection_app +from loguru import logger # Only global so we can access it in the signal handler @@ -28,9 +29,9 @@ def sigshutdown_handler(_signo, _stack_frame): global app global datastore name = signal.Signals(_signo).name - print(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown') + logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown') datastore.sync_to_json() - print(f'Sync JSON to disk complete.') + logger.success('Sync JSON to disk complete.') # This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it. # Solution: move to gevent or other server in the future (#2014) datastore.stop_thread = True @@ -57,13 +58,22 @@ def main(): datastore_path = os.path.join(os.getcwd(), "../datastore") try: - opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:", "port") + opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port") except getopt.GetoptError: - print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]') + print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level]') sys.exit(2) create_datastore_dir = False + # Set a default logger level + logger_level = 'DEBUG' + # Set a logger level via shell env variable + # Used: Dockerfile for CICD + # To set logger level for pytest, see the app function in tests/conftest.py + if os.getenv("LOGGER_LEVEL"): + level = os.getenv("LOGGER_LEVEL") + logger_level = int(level) if level.isdigit() else level.upper() + for opt, arg in opts: if opt == '-s': ssl_mode = True @@ -78,7 +88,7 @@ def main(): datastore_path = arg if opt == '-6': - print ("Enabling IPv6 listen support") + logger.success("Enabling IPv6 listen support") ipv6_enabled = True # Cleanup (remove text files that arent in the index) @@ -89,6 +99,25 @@ def main(): if opt == '-C': create_datastore_dir = True + if opt == '-l': + logger_level = int(arg) if arg.isdigit() else arg.upper() + + # Without this, a logger will be duplicated + logger.remove() + try: + log_level_for_stdout = { 'DEBUG', 'SUCCESS' } + logger.configure(handlers=[ + {"sink": sys.stdout, "level": logger_level, + "filter" : lambda record: record['level'].name in log_level_for_stdout}, + {"sink": sys.stderr, "level": logger_level, + "filter": lambda record: record['level'].name not in log_level_for_stdout}, + ]) + # Catch negative number or wrong log level name + except ValueError: + print("Available log level names: TRACE, DEBUG(default), INFO, SUCCESS," + " WARNING, ERROR, CRITICAL") + sys.exit(2) + # isnt there some @thingy to attach to each route to tell it, that this route needs a datastore app_config = {'datastore_path': datastore_path} @@ -96,17 +125,19 @@ def main(): if create_datastore_dir: os.mkdir(app_config['datastore_path']) else: - print( - "ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists or specify a directory with the -d option.\n" - "Or use the -C parameter to create the directory.".format(app_config['datastore_path']), file=sys.stderr) + logger.critical( + f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'" + f" does not exist, cannot start, please make sure the" + f" directory exists or specify a directory with the -d option.\n" + f"Or use the -C parameter to create the directory.") sys.exit(2) try: datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__) except JSONDecodeError as e: # Dont' start if the JSON DB looks corrupt - print ("ERROR: JSON DB or Proxy List JSON at '{}' appears to be corrupt, aborting".format(app_config['datastore_path'])) - print(str(e)) + logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.") + logger.critical(str(e)) return app = changedetection_app(app_config, datastore) @@ -145,7 +176,7 @@ def main(): # proxy_set_header X-Forwarded-Prefix /app; if os.getenv('USE_X_SETTINGS'): - print ("USE_X_SETTINGS is ENABLED\n") + logger.info("USE_X_SETTINGS is ENABLED") from werkzeug.middleware.proxy_fix import ProxyFix app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1) diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py index 227c8ec2..999b81bc 100644 --- a/changedetectionio/blueprint/browser_steps/__init__.py +++ b/changedetectionio/blueprint/browser_steps/__init__.py @@ -23,11 +23,11 @@ from distutils.util import strtobool from flask import Blueprint, request, make_response -import logging import os from changedetectionio.store import ChangeDetectionStore from changedetectionio.flask_app import login_optionally_required +from loguru import logger browsersteps_sessions = {} io_interface_context = None @@ -88,7 +88,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): if parsed.password: proxy['password'] = parsed.password - print("Browser Steps: UUID {} selected proxy {}".format(watch_uuid, proxy_url)) + logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}") # Tell Playwright to connect to Chrome and setup a new session via our stepper interface browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui( @@ -115,10 +115,10 @@ def construct_blueprint(datastore: ChangeDetectionStore): if not watch_uuid: return make_response('No Watch UUID specified', 500) - print("Starting connection with playwright") - logging.debug("browser_steps.py connecting") + logger.debug("Starting connection with playwright") + logger.debug("browser_steps.py connecting") browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) - print("Starting connection with playwright - done") + logger.debug("Starting connection with playwright - done") return {'browsersteps_session_id': browsersteps_session_id} @login_optionally_required @@ -189,7 +189,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): optional_value=step_optional_value) except Exception as e: - print("Exception when calling step operation", step_operation, str(e)) + logger.error(f"Exception when calling step operation {step_operation} {str(e)}") # Try to find something of value to give back to the user return make_response(str(e).splitlines()[0], 401) diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py index 81f7f160..40925a3f 100644 --- a/changedetectionio/blueprint/browser_steps/browser_steps.py +++ b/changedetectionio/blueprint/browser_steps/browser_steps.py @@ -4,6 +4,7 @@ import os import time import re from random import randint +from loguru import logger # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end # 0- off, 1- on @@ -53,7 +54,7 @@ class steppable_browser_interface(): if call_action_name == 'choose_one': return - print("> action calling", call_action_name) + logger.debug(f"> Action calling '{call_action_name}'") # https://playwright.dev/python/docs/selectors#xpath-selectors if selector and selector.startswith('/') and not selector.startswith('//'): selector = "xpath=" + selector @@ -72,7 +73,7 @@ class steppable_browser_interface(): action_handler(selector, optional_value) self.page.wait_for_timeout(1.5 * 1000) - print("Call action done in", time.time() - now) + logger.debug(f"Call action done in {time.time()-now:.2f}s") def action_goto_url(self, selector=None, value=None): # self.page.set_viewport_size({"width": 1280, "height": 5000}) @@ -82,7 +83,7 @@ class steppable_browser_interface(): #and also wait for seconds ? #await page.waitForTimeout(1000); #await page.waitForTimeout(extra_wait_ms); - print("Time to goto URL ", time.time() - now) + logger.debug(f"Time to goto URL {time.time()-now:.2f}s") return response def action_click_element_containing_text(self, selector=None, value=''): @@ -103,7 +104,7 @@ class steppable_browser_interface(): return response def action_click_element(self, selector, value): - print("Clicking element") + logger.debug("Clicking element") if not len(selector.strip()): return @@ -111,7 +112,7 @@ class steppable_browser_interface(): def action_click_element_if_exists(self, selector, value): import playwright._impl._errors as _api_types - print("Clicking element if exists") + logger.debug("Clicking element if exists") if not len(selector.strip()): return try: @@ -227,11 +228,11 @@ class browsersteps_live_ui(steppable_browser_interface): # Listen for all console events and handle errors self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) - print("Time to browser setup", time.time() - now) + logger.debug(f"Time to browser setup {time.time()-now:.2f}s") self.page.wait_for_timeout(1 * 1000) def mark_as_closed(self): - print("Page closed, cleaning up..") + logger.debug("Page closed, cleaning up..") @property def has_expired(self): @@ -257,7 +258,7 @@ class browsersteps_live_ui(steppable_browser_interface): xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") # So the JS will find the smallest one first xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) - print("Time to complete get_current_state of browser", time.time() - now) + logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s") # except # playwright._impl._api_types.Error: Browser closed. # @todo show some countdown timer? diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 4c8a382c..00da86c9 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -4,12 +4,12 @@ from urllib.parse import urlparse import chardet import hashlib import json -import logging import os import requests import sys import time import urllib.parse +from loguru import logger visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary' @@ -47,7 +47,7 @@ class BrowserStepsStepException(Exception): def __init__(self, step_n, original_e): self.step_n = step_n self.original_e = original_e - print(f"Browser Steps exception at step {self.step_n}", str(original_e)) + logger.debug(f"Browser Steps exception at step {self.step_n} {str(original_e)}") return @@ -188,7 +188,7 @@ class Fetcher(): for step in valid_steps: step_n += 1 - print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation'])) + logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...") self.screenshot_step("before-" + str(step_n)) self.save_step_html("before-" + str(step_n)) try: @@ -205,8 +205,8 @@ class Fetcher(): optional_value=optional_value) self.screenshot_step(step_n) self.save_step_html(step_n) - except (Error, TimeoutError) as e: + logger.debug(str(e)) # Stop processing here raise BrowserStepsStepException(step_n=step_n, original_e=e) @@ -295,14 +295,14 @@ class base_html_playwright(Fetcher): if self.browser_steps_screenshot_path is not None: destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n)) - logging.debug("Saving step screenshot to {}".format(destination)) + logger.debug(f"Saving step screenshot to {destination}") with open(destination, 'wb') as f: f.write(screenshot) def save_step_html(self, step_n): content = self.page.content() destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) - logging.debug("Saving step HTML to {}".format(destination)) + logger.debug(f"Saving step HTML to {destination}") with open(destination, 'w') as f: f.write(content) @@ -491,7 +491,7 @@ class base_html_playwright(Fetcher): if response is None: context.close() browser.close() - print("Content Fetcher > Response object was none") + logger.debug("Content Fetcher > Response object was none") raise EmptyReply(url=url, status_code=None) try: @@ -503,7 +503,7 @@ class base_html_playwright(Fetcher): # This can be ok, we will try to grab what we could retrieve pass except Exception as e: - print("Content Fetcher > Other exception when executing custom JS code", str(e)) + logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}") context.close() browser.close() raise PageUnloadable(url=url, status_code=None, message=str(e)) @@ -524,7 +524,7 @@ class base_html_playwright(Fetcher): if len(self.page.content().strip()) == 0: context.close() browser.close() - print("Content Fetcher > Content was empty") + logger.debug("Content Fetcher > Content was empty") raise EmptyReply(url=url, status_code=response.status) # Run Browser Steps here @@ -676,7 +676,7 @@ class base_html_webdriver(Fetcher): try: self.driver.quit() except Exception as e: - print("Content Fetcher > Exception in chrome shutdown/quit" + str(e)) + logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}") # "html_requests" is listed as the default fetcher in store.py! diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index f077e688..360eff0e 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -12,7 +12,8 @@ from functools import wraps from threading import Event import datetime import flask_login -import logging +from loguru import logger +import sys import os import pytz import queue @@ -210,6 +211,8 @@ def login_optionally_required(func): return decorated_view def changedetection_app(config=None, datastore_o=None): + logger.trace("TRACE log is enabled") + global datastore datastore = datastore_o @@ -1492,7 +1495,7 @@ def changedetection_app(config=None, datastore_o=None): except Exception as e: - logging.error("Error sharing -{}".format(str(e))) + logger.error(f"Error sharing -{str(e)}") flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error') # https://changedetection.io/share/VrMv05wpXyQa @@ -1605,7 +1608,7 @@ def notification_runner(): sent_obj = notification.process_notification(n_object, datastore) except Exception as e: - logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e))) + logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}") # UUID wont be present when we submit a 'test' from the global settings if 'uuid' in n_object: @@ -1628,7 +1631,7 @@ def ticker_thread_check_time_launch_checks(): proxy_last_called_time = {} recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20)) - print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds) + logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}") # Spin up Workers that do the fetching # Can be overriden by ENV or use the default settings @@ -1673,7 +1676,7 @@ def ticker_thread_check_time_launch_checks(): now = time.time() watch = datastore.data['watching'].get(uuid) if not watch: - logging.error("Watch: {} no longer present.".format(uuid)) + logger.error(f"Watch: {uuid} no longer present.") continue # No need todo further processing if it's paused @@ -1706,10 +1709,10 @@ def ticker_thread_check_time_launch_checks(): time_since_proxy_used = int(time.time() - proxy_last_used_time) if time_since_proxy_used < proxy_list_reuse_time_minimum: # Not enough time difference reached, skip this watch - print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid, - watch_proxy, - time_since_proxy_used, - proxy_list_reuse_time_minimum)) + logger.debug(f"> Skipped UUID {uuid} " + f"using proxy '{watch_proxy}', not " + f"enough time between proxy requests " + f"{time_since_proxy_used}s/{proxy_list_reuse_time_minimum}s") continue else: # Record the last used time @@ -1717,14 +1720,12 @@ def ticker_thread_check_time_launch_checks(): # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. priority = int(time.time()) - print( - "> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format( - uuid, - watch['last_checked'], - now, - priority, - watch.jitter_seconds, - now - watch['last_checked'])) + logger.debug( + f"> Queued watch UUID {uuid} " + f"last checked at {watch['last_checked']} " + f"queued at {now:0.2f} priority {priority} " + f"jitter {watch.jitter_seconds:0.2f}s, " + f"{now - watch['last_checked']:0.2f}s since last checked") # Into the queue with you update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) diff --git a/changedetectionio/importer.py b/changedetectionio/importer.py index c1eda44d..ba7b1139 100644 --- a/changedetectionio/importer.py +++ b/changedetectionio/importer.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod import time import validators from wtforms import ValidationError +from loguru import logger from changedetectionio.forms import validate_url @@ -195,7 +196,7 @@ class import_xlsx_wachete(Importer): try: validate_url(data.get('url')) except ValidationError as e: - print(">> import URL error", data.get('url'), str(e)) + logger.error(f">> Import URL error {data.get('url')} {str(e)}") flash(f"Error processing row number {row_id}, URL value was incorrect, row was skipped.", 'error') # Don't bother processing anything else on this row continue @@ -209,7 +210,7 @@ class import_xlsx_wachete(Importer): self.new_uuids.append(new_uuid) good += 1 except Exception as e: - print(e) + logger.error(e) flash(f"Error processing row number {row_id}, check all cell data types are correct, row was skipped.", 'error') else: row_id += 1 @@ -264,7 +265,7 @@ class import_xlsx_custom(Importer): try: validate_url(url) except ValidationError as e: - print(">> Import URL error", url, str(e)) + logger.error(f">> Import URL error {url} {str(e)}") flash(f"Error processing row number {row_i}, URL value was incorrect, row was skipped.", 'error') # Don't bother processing anything else on this row url = None @@ -293,7 +294,7 @@ class import_xlsx_custom(Importer): self.new_uuids.append(new_uuid) good += 1 except Exception as e: - print(e) + logger.error(e) flash(f"Error processing row number {row_i}, check all cell data types are correct, row was skipped.", 'error') else: row_i += 1 diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index f785c815..d019532c 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -1,10 +1,10 @@ from distutils.util import strtobool -import logging import os import re import time import uuid from pathlib import Path +from loguru import logger # Allowable protocols, protects against javascript: etc # file:// is further checked by ALLOW_FILE_URI @@ -122,7 +122,7 @@ class model(dict): def ensure_data_dir_exists(self): if not os.path.isdir(self.watch_data_dir): - print ("> Creating data dir {}".format(self.watch_data_dir)) + logger.debug(f"> Creating data dir {self.watch_data_dir}") os.mkdir(self.watch_data_dir) @property @@ -211,7 +211,7 @@ class model(dict): # Read the history file as a dict fname = os.path.join(self.watch_data_dir, "history.txt") if os.path.isfile(fname): - logging.debug("Reading history index " + str(time.time())) + logger.debug("Reading watch history index") with open(fname, "r") as f: for i in f.readlines(): if ',' in i: diff --git a/changedetectionio/notification.py b/changedetectionio/notification.py index 436d08c6..7d143d5b 100644 --- a/changedetectionio/notification.py +++ b/changedetectionio/notification.py @@ -3,6 +3,7 @@ import time from jinja2 import Environment, BaseLoader from apprise import NotifyFormat import json +from loguru import logger valid_tokens = { 'base_url': '', @@ -146,7 +147,7 @@ def process_notification(n_object, datastore): with apprise.LogCapture(level=apprise.logging.DEBUG) as logs: for url in n_object['notification_urls']: url = url.strip() - print(">> Process Notification: AppRise notifying {}".format(url)) + logger.info(">> Process Notification: AppRise notifying {}".format(url)) url = jinja2_env.from_string(url).render(**notification_parameters) # Re 323 - Limit discord length to their 2000 char limit total or it wont send. diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 7aa8994a..3239e61d 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -5,6 +5,7 @@ import re from changedetectionio import content_fetcher from copy import deepcopy from distutils.util import strtobool +from loguru import logger class difference_detection_processor(): @@ -69,7 +70,7 @@ class difference_detection_processor(): proxy_url = None if preferred_proxy_id: proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') - print(f"Using proxy Key: {preferred_proxy_id} as Proxy URL {proxy_url}") + logger.debug(f"Using proxy Key: {preferred_proxy_id} as Proxy URL {proxy_url}") # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index b503c5be..94fea913 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -2,7 +2,6 @@ import hashlib import json -import logging import os import re import urllib3 @@ -12,6 +11,7 @@ from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACC from copy import deepcopy from . import difference_detection_processor from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text +from loguru import logger urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -340,10 +340,10 @@ class perform_site_check(difference_detection_processor): has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) # One or more lines? unsure? if not has_unique_lines: - logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid)) + logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False") changed_detected = False else: - logging.debug("check_unique_lines: UUID {} had unique content".format(uuid)) + logger.debug(f"check_unique_lines: UUID {uuid} had unique content") # Always record the new checksum update_obj["previous_md5"] = fetched_md5 diff --git a/changedetectionio/store.py b/changedetectionio/store.py index d4214184..3868dfa7 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -9,7 +9,6 @@ from copy import deepcopy, copy from os import path, unlink from threading import Lock import json -import logging import os import re import requests @@ -17,6 +16,7 @@ import secrets import threading import time import uuid as uuid_builder +from loguru import logger # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)' @@ -42,7 +42,7 @@ class ChangeDetectionStore: self.__data = App.model() self.datastore_path = datastore_path self.json_store_path = "{}/url-watches.json".format(self.datastore_path) - print(">>> Datastore path is ", self.json_store_path) + logger.info(f"Datastore path is '{self.json_store_path}'") self.needs_write = False self.start_time = time.time() self.stop_thread = False @@ -83,12 +83,12 @@ class ChangeDetectionStore: for uuid, watch in self.__data['watching'].items(): watch['uuid']=uuid self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch) - print("Watching:", uuid, self.__data['watching'][uuid]['url']) + logger.debug(f"Watching: {uuid} {self.__data['watching'][uuid]['url']}") # First time ran, Create the datastore. except (FileNotFoundError): if include_default_watches: - print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path)) + logger.critical(f"No JSON DB found at {self.json_store_path}, creating JSON store at {self.datastore_path}") self.add_watch(url='https://news.ycombinator.com/', tag='Tech news', extras={'fetch_backend': 'html_requests'}) @@ -139,7 +139,7 @@ class ChangeDetectionStore: save_data_thread = threading.Thread(target=self.save_datastore).start() def set_last_viewed(self, uuid, timestamp): - logging.debug("Setting watch UUID: {} last viewed to {}".format(uuid, int(timestamp))) + logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}") self.data['watching'][uuid].update({'last_viewed': int(timestamp)}) self.needs_write = True @@ -316,7 +316,7 @@ class ChangeDetectionStore: apply_extras['include_filters'] = [res['css_filter']] except Exception as e: - logging.error("Error fetching metadata for shared watch link", url, str(e)) + logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}") flash("Error fetching metadata for {}".format(url), 'error') return False from .model.Watch import is_safe_url @@ -345,7 +345,7 @@ class ChangeDetectionStore: new_uuid = new_watch.get('uuid') - logging.debug("Added URL {} - {}".format(url, new_uuid)) + logger.debug(f"Adding URL {url} - {new_uuid}") for k in ['uuid', 'history', 'last_checked', 'last_changed', 'newest_history_key', 'previous_md5', 'viewed']: if k in apply_extras: @@ -362,7 +362,7 @@ class ChangeDetectionStore: if write_to_disk_now: self.sync_to_json() - print("added ", url) + logger.debug(f"Added '{url}'") return new_uuid @@ -416,14 +416,13 @@ class ChangeDetectionStore: def sync_to_json(self): - logging.info("Saving JSON..") - print("Saving JSON..") + logger.info("Saving JSON..") try: data = deepcopy(self.__data) except RuntimeError as e: # Try again in 15 seconds time.sleep(15) - logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e)) + logger.error(f"! Data changed when writing to JSON, trying again.. {str(e)}") self.sync_to_json() return else: @@ -436,7 +435,7 @@ class ChangeDetectionStore: json.dump(data, json_file, indent=4) os.replace(self.json_store_path+".tmp", self.json_store_path) except Exception as e: - logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e)) + logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") self.needs_write = False self.needs_write_urgent = False @@ -447,7 +446,16 @@ class ChangeDetectionStore: while True: if self.stop_thread: - print("Shutting down datastore thread") + # Suppressing "Logging error in Loguru Handler #0" during CICD. + # Not a meaningful difference for a real use-case just for CICD. + # the side effect is a "Shutting down datastore thread" message + # at the end of each test. + # But still more looking better. + import sys + logger.remove() + logger.add(sys.stderr) + + logger.critical("Shutting down datastore thread") return if self.needs_write or self.needs_write_urgent: @@ -463,7 +471,7 @@ class ChangeDetectionStore: # Go through the datastore path and remove any snapshots that are not mentioned in the index # This usually is not used, but can be handy. def remove_unused_snapshots(self): - print ("Removing snapshots from datastore that are not in the index..") + logger.info("Removing snapshots from datastore that are not in the index..") index=[] for uuid in self.data['watching']: @@ -476,7 +484,7 @@ class ChangeDetectionStore: for uuid in self.data['watching']: for item in pathlib.Path(self.datastore_path).rglob(uuid+"/*.txt"): if not str(item) in index: - print ("Removing",item) + logger.info(f"Removing {item}") unlink(item) @property @@ -562,7 +570,7 @@ class ChangeDetectionStore: if os.path.isfile(filepath): headers.update(parse_headers_from_text_file(filepath)) except Exception as e: - print(f"ERROR reading headers.txt at {filepath}", str(e)) + logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}") watch = self.data['watching'].get(uuid) if watch: @@ -573,7 +581,7 @@ class ChangeDetectionStore: if os.path.isfile(filepath): headers.update(parse_headers_from_text_file(filepath)) except Exception as e: - print(f"ERROR reading headers.txt at {filepath}", str(e)) + logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}") # In /datastore/tag-name.txt tags = self.get_all_tags_for_watch(uuid=uuid) @@ -584,7 +592,7 @@ class ChangeDetectionStore: if os.path.isfile(filepath): headers.update(parse_headers_from_text_file(filepath)) except Exception as e: - print(f"ERROR reading headers.txt at {filepath}", str(e)) + logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}") return headers @@ -602,13 +610,13 @@ class ChangeDetectionStore: def add_tag(self, name): # If name exists, return that n = name.strip().lower() - print (f">>> Adding new tag - '{n}'") + logger.debug(f">>> Adding new tag - '{n}'") if not n: return False for uuid, tag in self.__data['settings']['application'].get('tags', {}).items(): if n == tag.get('title', '').lower().strip(): - print (f">>> Tag {name} already exists") + logger.error(f">>> Tag {name} already exists") return uuid # Eventually almost everything todo with a watch will apply as a Tag @@ -670,7 +678,7 @@ class ChangeDetectionStore: updates_available = self.get_updates_available() for update_n in updates_available: if update_n > self.__data['settings']['application']['schema_version']: - print ("Applying update_{}".format((update_n))) + logger.critical(f"Applying update_{update_n}") # Wont exist on fresh installs if os.path.exists(self.json_store_path): shutil.copyfile(self.json_store_path, self.datastore_path+"/url-watches-before-{}.json".format(update_n)) @@ -678,8 +686,8 @@ class ChangeDetectionStore: try: update_method = getattr(self, "update_{}".format(update_n))() except Exception as e: - print("Error while trying update_{}".format((update_n))) - print(e) + logger.error(f"Error while trying update_{update_n}") + logger.error(e) # Don't run any more updates return else: @@ -717,7 +725,7 @@ class ChangeDetectionStore: with open(os.path.join(target_path, "history.txt"), "w") as f: f.writelines(history) else: - logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path)) + logger.warning(f"Datastore history directory {target_path} does not exist, skipping history import.") # No longer needed, dynamically pulled from the disk when needed. # But we should set it back to a empty dict so we don't break if this schema runs on an earlier version. diff --git a/changedetectionio/tests/conftest.py b/changedetectionio/tests/conftest.py index 79ea5bc8..7a328823 100644 --- a/changedetectionio/tests/conftest.py +++ b/changedetectionio/tests/conftest.py @@ -4,6 +4,8 @@ import pytest from changedetectionio import changedetection_app from changedetectionio import store import os +import sys +from loguru import logger # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py # Much better boilerplate than the docs @@ -11,6 +13,15 @@ import os global app +# https://loguru.readthedocs.io/en/latest/resources/migration.html#replacing-caplog-fixture-from-pytest-library +# Show loguru logs only if CICD pytest fails. +from loguru import logger +@pytest.fixture +def reportlog(pytestconfig): + logging_plugin = pytestconfig.pluginmanager.getplugin("logging-plugin") + handler_id = logger.add(logging_plugin.report_handler, format="{message}") + yield + logger.remove(handler_id) def cleanup(datastore_path): import glob @@ -41,6 +52,18 @@ def app(request): app_config = {'datastore_path': datastore_path, 'disable_checkver' : True} cleanup(app_config['datastore_path']) + + logger_level = 'TRACE' + + logger.remove() + log_level_for_stdout = { 'DEBUG', 'SUCCESS' } + logger.configure(handlers=[ + {"sink": sys.stdout, "level": logger_level, + "filter" : lambda record: record['level'].name in log_level_for_stdout}, + {"sink": sys.stderr, "level": logger_level, + "filter": lambda record: record['level'].name not in log_level_for_stdout}, + ]) + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) app = changedetection_app(app_config, datastore) diff --git a/changedetectionio/tests/fetchers/test_content.py b/changedetectionio/tests/fetchers/test_content.py index 76b78ce8..bbabe5f1 100644 --- a/changedetectionio/tests/fetchers/test_content.py +++ b/changedetectionio/tests/fetchers/test_content.py @@ -37,4 +37,4 @@ def test_fetch_webdriver_content(client, live_server): ) logging.getLogger().info("Looking for correct fetched HTML (text) from server") - assert b'cool it works' in res.data \ No newline at end of file + assert b'cool it works' in res.data diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index ac759e2f..17913910 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -12,14 +12,13 @@ from .processors.restock_diff import UnableToExtractRestockData # Requests for checking on a single site(watch) from a queue of watches # (another process inserts watches into the queue that are time-ready for checking) -import logging import sys +from loguru import logger class update_worker(threading.Thread): current_uuid = None def __init__(self, q, notification_q, app, datastore, *args, **kwargs): - logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) self.q = q self.app = app self.notification_q = notification_q @@ -78,8 +77,8 @@ class update_worker(threading.Thread): 'uuid': watch.get('uuid') if watch else None, 'watch_url': watch.get('url') if watch else None, }) - logging.info(">> SENDING NOTIFICATION") + logger.debug(">> SENDING NOTIFICATION") notification_q.put(n_object) # Prefer - Individual watch settings > Tag settings > Global settings (in that order) @@ -180,7 +179,7 @@ class update_worker(threading.Thread): 'screenshot': None }) self.notification_q.put(n_object) - print("Sent filter not found notification for {}".format(watch_uuid)) + logger.error(f"Sent filter not found notification for {watch_uuid}") def send_step_failure_notification(self, watch_uuid, step_n): watch = self.datastore.data['watching'].get(watch_uuid, False) @@ -207,7 +206,7 @@ class update_worker(threading.Thread): 'uuid': watch_uuid }) self.notification_q.put(n_object) - print("Sent step not found notification for {}".format(watch_uuid)) + logger.error(f"Sent step not found notification for {watch_uuid}") def cleanup_error_artifacts(self, uuid): @@ -239,8 +238,9 @@ class update_worker(threading.Thread): contents = b'' process_changedetection_results = True update_obj = {} - print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, - self.datastore.data['watching'][uuid]['url'])) + logger.debug(f"> Processing UUID {uuid} " + f"Priority {queued_item_data.priority} " + f"URL {self.datastore.data['watching'][uuid]['url']}") now = time.time() try: @@ -338,7 +338,7 @@ class update_worker(threading.Thread): # Send notification if we reached the threshold? threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) - print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) + logger.error(f"Filter for {uuid} not found, consecutive_filter_failures: {c}") if threshold > 0 and c >= threshold: if not self.datastore.data['watching'][uuid].get('notification_muted'): self.send_filter_failure_notification(uuid) @@ -372,7 +372,7 @@ class update_worker(threading.Thread): # Other Error, more info is good. err_text += " " + str(e.original_e).splitlines()[0] - print(f"BrowserSteps exception at step {error_step}", str(e.original_e)) + logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}") self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, @@ -386,7 +386,7 @@ class update_worker(threading.Thread): # Send notification if we reached the threshold? threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) - print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c)) + logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}") if threshold > 0 and c >= threshold: if not self.datastore.data['watching'][uuid].get('notification_muted'): self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n) @@ -468,7 +468,7 @@ class update_worker(threading.Thread): # A change was detected if changed_detected: - print (">> Change detected in UUID {} - {}".format(uuid, watch['url'])) + logger.debug(f">> Change detected in UUID {uuid} - {watch['url']}") # Notifications should only trigger on the second time (first time, we gather the initial snapshot) if watch.history_n >= 2: @@ -478,7 +478,8 @@ class update_worker(threading.Thread): except Exception as e: # Catch everything possible here, so that if a worker crashes, we don't lose it until restart! - print("!!!! Exception in update_worker !!!\n", e) + logger.critical("!!!! Exception in update_worker !!!") + logger.critical(str(e)) self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) diff --git a/docker-compose.yml b/docker-compose.yml index cc7c2f52..017a9c59 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,10 @@ services: # - PUID=1000 # - PGID=1000 # + # Log levels are in descending order. (TRACE is the most detailed one) + # Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL + # - LOGGER_LEVEL=DEBUG + # # Alternative WebDriver/selenium URL, do not use "'s or 's! # - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub # diff --git a/requirements.txt b/requirements.txt index cbd1e365..5bc269c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -72,3 +72,5 @@ pytest-flask ~=1.2 # Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708) jsonschema==4.17.3 + +loguru