Import speed improvements, and adding an import URL batch size of 5,000 to stop accidental CPU overload (#549)

pull/552/head
dgtlmoon 3 years ago committed by GitHub
parent 6c3c5dc28a
commit 2a9fb12451
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -661,13 +661,19 @@ def changedetection_app(config=None, datastore_o=None):
good = 0
if request.method == 'POST':
now=time.time()
urls = request.values.get('urls').split("\n")
if (len(urls) > 5000):
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
for url in urls:
url = url.strip()
url, *tags = url.split(" ")
# Flask wtform validators wont work with basic auth, use validators package
if len(url) and validators.url(url.replace('source:', '')):
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags))
# Up to 5000 per batch so we dont flood the server
if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags), write_to_disk_now=False)
# Straight into the queue.
update_q.put(new_uuid)
good += 1
@ -675,7 +681,8 @@ def changedetection_app(config=None, datastore_o=None):
if len(url):
remaining_urls.append(url)
flash("{} Imported, {} Skipped.".format(good, len(remaining_urls)))
flash("{} Imported in {:.2f}s, {} Skipped.".format(good, time.time()-now,len(remaining_urls)))
datastore.needs_write = True
if len(remaining_urls) == 0:
# Looking good, redirect to index.

@ -13,7 +13,6 @@ from changedetectionio.notification import (
class model(dict):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
self.update({
'url': None,
'tag': None,
@ -45,6 +44,9 @@ class model(dict):
# Should be all None by default, so we use the system default in this case.
'minutes_between_check': None
})
# goes at the end so we update the default object with the initialiser
super(model, self).__init__(*arg, **kw)
@property
def has_empty_checktime(self):

@ -272,15 +272,14 @@ class ChangeDetectionStore:
self.needs_write = True
return changes_removed
def add_watch(self, url, tag="", extras=None):
def add_watch(self, url, tag="", extras=None, write_to_disk_now=True):
if extras is None:
extras = {}
with self.lock:
# @todo use a common generic version of this
new_uuid = str(uuid_builder.uuid4())
_blank = deepcopy(self.generic_definition)
_blank.update({
new_watch = Watch.model({
'url': url,
'tag': tag
})
@ -291,9 +290,8 @@ class ChangeDetectionStore:
if k in apply_extras:
del apply_extras[k]
_blank.update(apply_extras)
self.data['watching'][new_uuid] = _blank
new_watch.update(apply_extras)
self.__data['watching'][new_uuid]=new_watch
# Get the directory ready
output_path = "{}/{}".format(self.datastore_path, new_uuid)
@ -302,6 +300,7 @@ class ChangeDetectionStore:
except FileExistsError:
print(output_path, "already exists.")
if write_to_disk_now:
self.sync_to_json()
return new_uuid

@ -2,6 +2,7 @@ import threading
import queue
import time
from changedetectionio import content_fetcher
# A single update worker
#
# Requests for checking on a single site(watch) from a queue of watches
@ -32,7 +33,6 @@ class update_worker(threading.Thread):
else:
self.current_uuid = uuid
from changedetectionio import content_fetcher
if uuid in list(self.datastore.data['watching'].keys()):

Loading…
Cancel
Save