Installation via pip (#186)

Builder for https://pypi.org/project/changedetection.io/
pull/184/head 0.39
dgtlmoon 3 years ago committed by GitHub
parent 642ad5660d
commit a11b6daa2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -20,6 +20,7 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8
run: |
@ -30,14 +31,14 @@ jobs:
- name: Create release metadata
run: |
# COPY'ed by Dockerfile into backend/ of the image, then read by the server in store.py
echo ${{ github.sha }} > backend/source.txt
echo ${{ github.ref }} > backend/tag.txt
# COPY'ed by Dockerfile into changedetectionio/ of the image, then read by the server in store.py
echo ${{ github.sha }} > changedetectionio/source.txt
echo ${{ github.ref }} > changedetectionio/tag.txt
- name: Test with pytest
run: |
# Each test is totally isolated and performs its own cleanup/reset
cd backend; ./run_all_tests.sh
cd changedetectionio; ./run_all_tests.sh
- name: Set up QEMU
uses: docker/setup-qemu-action@v1

@ -29,6 +29,7 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8
run: |
@ -39,14 +40,14 @@ jobs:
- name: Create release metadata
run: |
# COPY'ed by Dockerfile into backend/ of the image, then read by the server in store.py
echo ${{ github.sha }} > backend/source.txt
echo ${{ github.ref }} > backend/tag.txt
# COPY'ed by Dockerfile into changedetectionio/ of the image, then read by the server in store.py
echo ${{ github.sha }} > changedetectionio/source.txt
echo ${{ github.ref }} > changedetectionio/tag.txt
- name: Test with pytest
run: |
# Each test is totally isolated and performs its own cleanup/reset
cd backend; ./run_all_tests.sh
cd changedetectionio; ./run_all_tests.sh
- name: Set up QEMU
uses: docker/setup-qemu-action@v1

@ -20,6 +20,7 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8
run: |
@ -30,14 +31,14 @@ jobs:
- name: Create release metadata
run: |
# COPY'ed by Dockerfile into backend/ of the image, then read by the server in store.py
echo ${{ github.sha }} > backend/source.txt
echo ${{ github.ref }} > backend/tag.txt
# COPY'ed by Dockerfile into changedetectionio/ of the image, then read by the server in store.py
echo ${{ github.sha }} > changedetectionio/source.txt
echo ${{ github.ref }} > changedetectionio/tag.txt
- name: Test with pytest
run: |
# Each test is totally isolated and performs its own cleanup/reset
cd backend; ./run_all_tests.sh
cd changedetectionio; ./run_all_tests.sh
- name: Set up QEMU
uses: docker/setup-qemu-action@v1

@ -19,15 +19,28 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
# Each test is totally isolated and performs its own cleanup/reset
cd backend; ./run_all_tests.sh
cd changedetectionio; ./run_all_tests.sh
- name: Test that pip builds without error
run: |
pip3 --version
python3 -m pip install wheel
python3 setup.py bdist_wheel
python3 -m pip install dist/changedetection.io-*-none-any.whl --force
changedetection.io -d /tmp -p 10000 &
sleep 3
curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
killall -9 changedetection.io

2
.gitignore vendored

@ -5,3 +5,5 @@ datastore/url-watches.json
datastore/*
__pycache__
.pytest_cache
build
dist

@ -47,7 +47,7 @@ COPY --from=builder /dependencies /usr/local
ENV PYTHONPATH=/usr/local
# The actual flask app
COPY backend /app/backend
COPY changedetectionio /app/changedetectionio
# The eventlet server wrapper
COPY changedetection.py /app/changedetection.py

@ -0,0 +1,4 @@
recursive-include changedetectionio/templates *
recursive-include changedetectionio/static *
include changedetection.py
global-exclude *.pyc

@ -0,0 +1,71 @@
# changedetection.io
![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master)
<a href="https://hub.docker.com/r/dgtlmoon/changedetection.io" target="_blank" title="Change detection docker hub">
<img src="https://img.shields.io/docker/pulls/dgtlmoon/changedetection.io" alt="Docker Pulls"/>
</a>
<a href="https://hub.docker.com/r/dgtlmoon/changedetection.io" target="_blank" title="Change detection docker hub">
<img src="https://img.shields.io/github/v/release/dgtlmoon/changedetection.io" alt="Change detection latest tag version"/>
</a>
## Self-hosted open source change monitoring of web pages.
_Know when web pages change! Stay ontop of new information!_
Live your data-life *pro-actively* instead of *re-actively*, do not rely on manipulative social media for consuming important information.
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />
#### Example use cases
Know when ...
- Government department updates (changes are often only on their websites)
- Local government news (changes are often only on their websites)
- New software releases, security advisories when you're not on their mailing list.
- Festivals with changes
- Realestate listing changes
- COVID related news from government websites
- Detect and monitor changes in JSON API responses
- API monitoring and alerting
**Get monitoring now!**
```bash
$ pip3 install changedetection.io
```
Specify a target for the *datastore path* with `-d` (required) and a *listening port* with `-p` (defaults to `5000`)
```bash
$ changedetection.io -d /path/to/empty/data/dir -p 5000
```
Then visit http://127.0.0.1:5000 , You should now be able to access the UI.
### Features
- Website monitoring
- Change detection of content and analyses
- Filters on change (Select by CSS or JSON)
- Triggers (Wait for text, wait for regex)
- Notification support
- JSON API Monitoring
- Parse JSON embedded in HTML
- (Reverse) Proxy support
- Javascript support via WebDriver
- RaspberriPi (arm v6/v7/64 support)
See https://github.com/dgtlmoon/changedetection.io for more information.
### Support us
Do you use changedetection.io to make money? does it save you time or money? Does it make your life easier? less stressful? Remember, we write this software when we should be doing actual paid work, we have to buy food and pay rent just like you.
Please support us, even small amounts help a LOT.
BTC `1PLFN327GyUarpJd7nVe7Reqg9qHx5frNn`
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/btc-support.png" style="max-width:50%;" alt="Support us!" />

@ -13,6 +13,8 @@ _Know when web pages change! Stay ontop of new information!_
Live your data-life *pro-actively* instead of *re-actively*, do not rely on manipulative social media for consuming important information.
Open source web page monitoring, notification and change detection.
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />
@ -29,32 +31,34 @@ Know when ...
- Detect and monitor changes in JSON API responses
- API monitoring and alerting
_Need an actual Chrome runner with Javascript support? see the experimental <a href="https://github.com/dgtlmoon/changedetection.io/tree/javascript-browser">Javascript/Chrome support changedetection.io branch!</a>_
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver!</a>_
**Get monitoring now! super simple, one command!**
Run the python code on your own machine by cloning this repository, or with <a href="https://docs.docker.com/get-docker/">docker</a> and/or <a href="https://www.digitalocean.com/community/tutorial_collections/how-to-install-docker-compose">docker-compose</a>
With one docker-compose command
**Docker**
With Docker composer, just clone this repository and
```bash
$ docker-compose up -d
```
Docker standalone
```bash
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
```
**Python PIP**
```bash
docker-compose up -d
$ pip3 install changedetection.io
$ changedetection.io -d /path/to/empty/data/dir -p 5000
```
Then visit http://127.0.0.1:5000 , You should now be able to access the UI.
_Now with per-site configurable support for using a fast built in HTTP fetcher or use a Chrome based fetcher for monitoring of JavaScript websites!_
#### Updating to the latest version
Highly recommended :)
```bash
docker pull dgtlmoon/changedetection.io
docker-compose up -d
```
### Screenshots
Examining differences in content.
@ -131,15 +135,6 @@ For more information see https://docs.python-requests.org/en/master/user/advance
This proxy support also extends to the notifications https://github.com/caronc/apprise/issues/387#issuecomment-841718867
### Notes
- ~~Does not yet support Javascript~~
- ~~Wont work with Cloudfare type "Please turn on javascript" protected pages~~
- You can use the 'headers' section to monitor password protected web page changes
See the experimental <a href="https://github.com/dgtlmoon/changedetection.io/tree/javascript-browser">Javascript/Chrome browser support!</a>
### RaspberriPi support?
RaspberriPi and linux/arm/v6 linux/arm/v7 arm64 devices are supported!

@ -8,11 +8,11 @@ import sys
import eventlet
import eventlet.wsgi
import backend
import changedetectionio
from backend import store
from changedetectionio import store
def main(argv):
def main():
ssl_mode = False
port = os.environ.get('PORT') or 5000
do_cleanup = False
@ -21,7 +21,7 @@ def main(argv):
datastore_path = os.path.join(os.getcwd(), "datastore")
try:
opts, args = getopt.getopt(argv, "csd:p:", "port")
opts, args = getopt.getopt(sys.argv[1:], "csd:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -p [port] -d [datastore path]')
sys.exit(2)
@ -48,8 +48,13 @@ def main(argv):
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {'datastore_path': datastore_path}
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'])
app = backend.changedetection_app(app_config, datastore)
if not os.path.isdir(app_config['datastore_path']):
print ("ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists.\n"
"Alternatively, use the -d parameter.".format(app_config['datastore_path']),file=sys.stderr)
sys.exit(2)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=changedetectionio.__version__)
app = changedetectionio.changedetection_app(app_config, datastore)
# Go into cleanup mode
if do_cleanup:
@ -89,4 +94,4 @@ def main(argv):
if __name__ == '__main__':
main(sys.argv[1:])
main()

@ -29,6 +29,8 @@ from flask import make_response
import datetime
import pytz
__version__ = '0.39'
datastore = None
# Local
@ -41,7 +43,11 @@ update_q = queue.Queue()
notification_q = queue.Queue()
app = Flask(__name__, static_url_path="/var/www/change-detection/backend/static")
# Needs to be set this way because we also build and publish via pip
base_path = os.path.dirname(os.path.realpath(__file__))
app = Flask(__name__,
static_url_path="{}/static".format(base_path),
template_folder="{}/templates".format(base_path))
# Stop browser caching of assets
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
@ -157,7 +163,6 @@ def changedetection_app(config=None, datastore_o=None):
global datastore
datastore = datastore_o
app.config.update(dict(DEBUG=True))
#app.config.update(config or {})
login_manager = flask_login.LoginManager(app)
@ -278,7 +283,7 @@ def changedetection_app(config=None, datastore_o=None):
return response
else:
from backend import forms
from changedetectionio import forms
form = forms.quickWatchForm(request.form)
output = render_template("watch-overview.html",
@ -344,7 +349,7 @@ def changedetection_app(config=None, datastore_o=None):
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from backend import fetch_site_status
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.get_val(uuid, 'newest_history_key')
@ -371,7 +376,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_required
def edit_page(uuid):
from backend import forms
from changedetectionio import forms
form = forms.watchForm(request.form)
# More for testing, possible to return the first/only
@ -473,8 +478,8 @@ def changedetection_app(config=None, datastore_o=None):
@login_required
def settings_page():
from backend import forms
from backend import content_fetcher
from changedetectionio import forms
from changedetectionio import content_fetcher
form = forms.globalSettingsForm(request.form)
@ -722,18 +727,15 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
def static_content(group, filename):
# These files should be in our subdirectory
full_path = os.path.realpath(__file__)
p = os.path.dirname(full_path)
try:
return send_from_directory("{}/static/{}".format(p, group), filename=filename)
return send_from_directory("static/{}".format(group), filename=filename)
except FileNotFoundError:
abort(404)
@app.route("/api/add", methods=['POST'])
@login_required
def api_watch_add():
from backend import forms
from changedetectionio import forms
form = forms.quickWatchForm(request.form)
if form.validate():
@ -821,7 +823,7 @@ def check_for_new_version():
while not app.config.exit.is_set():
try:
r = requests.post("https://changedetection.io/check-ver.php",
data={'version': datastore.data['version_tag'],
data={'version': __version__,
'app_guid': datastore.data['app_guid'],
'watch_count': len(datastore.data['watching'])
},
@ -850,7 +852,7 @@ def notification_runner():
else:
# Process notifications
try:
from backend import notification
from changedetectionio import notification
notification.process_notification(n_object, datastore)
except Exception as e:
@ -860,7 +862,7 @@ def notification_runner():
# Thread runner to check every minute, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks():
from backend import update_worker
from changedetectionio import update_worker
# Spin up Workers.
for _ in range(datastore.data['settings']['requests']['workers']):

@ -45,7 +45,7 @@ class Fetcher():
def available_fetchers():
import inspect
from backend import content_fetcher
from changedetectionio import content_fetcher
p=[]
for name, obj in inspect.getmembers(content_fetcher):
if inspect.isclass(obj):

@ -1,5 +1,5 @@
import time
from backend import content_fetcher
from changedetectionio import content_fetcher
import hashlib
from inscriptis import get_text
import urllib3

@ -3,7 +3,7 @@ from wtforms import Form, SelectField, RadioField, BooleanField, StringField, Pa
from wtforms import widgets
from wtforms.validators import ValidationError
from wtforms.fields import html5
from backend import content_fetcher
from changedetectionio import content_fetcher
import re
class StringListField(StringField):
@ -91,7 +91,7 @@ class ValidateContentFetcherIsReady(object):
self.message = message
def __call__(self, form, field):
from backend import content_fetcher
from changedetectionio import content_fetcher
import urllib3.exceptions
# Better would be a radiohandler that keeps a reference to each class

Before

Width:  |  Height:  |  Size: 569 B

After

Width:  |  Height:  |  Size: 569 B

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 31 KiB

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Before

Width:  |  Height:  |  Size: 2.9 KiB

After

Width:  |  Height:  |  Size: 2.9 KiB

@ -15,7 +15,7 @@ import threading
class ChangeDetectionStore:
lock = Lock()
def __init__(self, datastore_path="/datastore", include_default_watches=True):
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
self.needs_write = False
self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
@ -72,8 +72,8 @@ class ChangeDetectionStore:
'fetch_backend': None,
}
if path.isfile('backend/source.txt'):
with open('backend/source.txt') as f:
if path.isfile('changedetectionio/source.txt'):
with open('changedetectionio/source.txt') as f:
# Should be set in Dockerfile to look for /source.txt , this will give us the git commit #
# So when someone gives us a backup file to examine, we know exactly what code they were running.
self.__data['build_sha'] = f.read()
@ -120,7 +120,7 @@ class ChangeDetectionStore:
self.add_watch(url='https://www.gov.uk/coronavirus', tag='Covid')
self.add_watch(url='https://changedetection.io', tag='Tech news')
self.__data['version_tag'] = "0.38.2"
self.__data['version_tag'] = version_tag
# Helper to remove password protection
password_reset_lockfile = "{}/removepassword.lock".format(self.datastore_path)

@ -1,8 +1,8 @@
#!/usr/bin/python3
import pytest
from backend import changedetection_app
from backend import store
from changedetectionio import changedetection_app
from changedetectionio import store
import os
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py

@ -47,7 +47,7 @@ def set_modified_response():
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
def test_css_filter_output():
from backend import fetch_site_status
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly

@ -10,7 +10,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_regex_text_func():
from backend import fetch_site_status
from changedetectionio import fetch_site_status
test_content = """
but sometimes we want to remove the lines.

@ -10,7 +10,7 @@ def test_setup(live_server):
# Unit test of the stripper
# Always we are dealing in utf-8
def test_strip_text_func():
from backend import fetch_site_status
from changedetectionio import fetch_site_status
test_content = """
Some content

@ -14,7 +14,7 @@ class update_worker(threading.Thread):
super().__init__(*args, **kwargs)
def run(self):
from backend import fetch_site_status
from changedetectionio import fetch_site_status
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
@ -27,7 +27,7 @@ class update_worker(threading.Thread):
else:
self.current_uuid = uuid
from backend import content_fetcher
from changedetectionio import content_fetcher
if uuid in list(self.datastore.data['watching'].keys()):

@ -0,0 +1,2 @@
pytest ~=6.2
pytest-flask ~=1.2

@ -1,7 +1,6 @@
chardet==2.3.0
flask~= 1.0
pytest ~=6.2
pytest-flask ~=1.2
eventlet>=0.31.0
requests[socks] ~= 2.15
validators
@ -20,4 +19,4 @@ apprise ~= 0.9
# Used for CSS filtering, replace with soupsieve and lxml for xpath
bs4
selenium ~= 3.141
selenium ~= 3.141

@ -0,0 +1,72 @@
#!/usr/bin/env python
import codecs
import os.path
import re
import sys
from setuptools import setup, find_packages
here = os.path.abspath(os.path.dirname(__file__))
def read(*parts):
return codecs.open(os.path.join(here, *parts), 'r').read()
def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
install_requires = open('requirements.txt').readlines()
setup(
name='changedetection.io',
version=find_version("changedetectionio", "__init__.py"),
description='Website change detection and monitoring service',
long_description=open('README-pip.md').read(),
long_description_content_type='text/markdown',
keywords='website change monitor for changes notification change detection '
'alerts tracking website tracker change alert website and monitoring',
zip_safe=False,
entry_points={"console_scripts": ["changedetection.io=changedetection:main"]},
author='dgtlmoon',
url='https://changedetection.io',
scripts=['changedetection.py'],
packages=['changedetectionio'],
include_package_data=True,
install_requires=install_requires,
license="Apache License 2.0",
python_requires=">= 3.6",
classifiers=['Intended Audience :: Customer Service',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: End Users/Desktop',
'Intended Audience :: Financial and Insurance Industry',
'Intended Audience :: Healthcare Industry',
'Intended Audience :: Information Technology',
'Intended Audience :: Legal Industry',
'Intended Audience :: Manufacturing',
'Intended Audience :: Other Audience',
'Intended Audience :: Religion',
'Intended Audience :: Science/Research',
'Intended Audience :: System Administrators',
'Intended Audience :: Telecommunications Industry',
'Topic :: Education',
'Topic :: Internet',
'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
'Topic :: Internet :: WWW/HTTP :: Site Management',
'Topic :: Internet :: WWW/HTTP :: Site Management :: Link Checking',
'Topic :: Internet :: WWW/HTTP :: Browsers',
'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
'Topic :: Office/Business',
'Topic :: Other/Nonlisted Topic',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Text Processing :: Markup :: HTML',
'Topic :: Utilities'
],
)
Loading…
Cancel
Save