Import speed improvements, and adding an import URL batch size of 5,000 to stop accidental CPU overload (#549)

show-which-items-in-queue
dgtlmoon 3 years ago committed by GitHub
parent 6c3c5dc28a
commit 2a9fb12451
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -661,13 +661,19 @@ def changedetection_app(config=None, datastore_o=None):
good = 0 good = 0
if request.method == 'POST': if request.method == 'POST':
now=time.time()
urls = request.values.get('urls').split("\n") urls = request.values.get('urls').split("\n")
if (len(urls) > 5000):
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
for url in urls: for url in urls:
url = url.strip() url = url.strip()
url, *tags = url.split(" ") url, *tags = url.split(" ")
# Flask wtform validators wont work with basic auth, use validators package # Flask wtform validators wont work with basic auth, use validators package
if len(url) and validators.url(url.replace('source:', '')): # Up to 5000 per batch so we dont flood the server
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags)) if len(url) and validators.url(url.replace('source:', '')) and good < 5000:
new_uuid = datastore.add_watch(url=url.strip(), tag=" ".join(tags), write_to_disk_now=False)
# Straight into the queue. # Straight into the queue.
update_q.put(new_uuid) update_q.put(new_uuid)
good += 1 good += 1
@ -675,7 +681,8 @@ def changedetection_app(config=None, datastore_o=None):
if len(url): if len(url):
remaining_urls.append(url) remaining_urls.append(url)
flash("{} Imported, {} Skipped.".format(good, len(remaining_urls))) flash("{} Imported in {:.2f}s, {} Skipped.".format(good, time.time()-now,len(remaining_urls)))
datastore.needs_write = True
if len(remaining_urls) == 0: if len(remaining_urls) == 0:
# Looking good, redirect to index. # Looking good, redirect to index.

@ -13,7 +13,6 @@ from changedetectionio.notification import (
class model(dict): class model(dict):
def __init__(self, *arg, **kw): def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
self.update({ self.update({
'url': None, 'url': None,
'tag': None, 'tag': None,
@ -45,6 +44,9 @@ class model(dict):
# Should be all None by default, so we use the system default in this case. # Should be all None by default, so we use the system default in this case.
'minutes_between_check': None 'minutes_between_check': None
}) })
# goes at the end so we update the default object with the initialiser
super(model, self).__init__(*arg, **kw)
@property @property
def has_empty_checktime(self): def has_empty_checktime(self):

@ -272,15 +272,14 @@ class ChangeDetectionStore:
self.needs_write = True self.needs_write = True
return changes_removed return changes_removed
def add_watch(self, url, tag="", extras=None): def add_watch(self, url, tag="", extras=None, write_to_disk_now=True):
if extras is None: if extras is None:
extras = {} extras = {}
with self.lock: with self.lock:
# @todo use a common generic version of this # @todo use a common generic version of this
new_uuid = str(uuid_builder.uuid4()) new_uuid = str(uuid_builder.uuid4())
_blank = deepcopy(self.generic_definition) new_watch = Watch.model({
_blank.update({
'url': url, 'url': url,
'tag': tag 'tag': tag
}) })
@ -291,9 +290,8 @@ class ChangeDetectionStore:
if k in apply_extras: if k in apply_extras:
del apply_extras[k] del apply_extras[k]
_blank.update(apply_extras) new_watch.update(apply_extras)
self.__data['watching'][new_uuid]=new_watch
self.data['watching'][new_uuid] = _blank
# Get the directory ready # Get the directory ready
output_path = "{}/{}".format(self.datastore_path, new_uuid) output_path = "{}/{}".format(self.datastore_path, new_uuid)
@ -302,7 +300,8 @@ class ChangeDetectionStore:
except FileExistsError: except FileExistsError:
print(output_path, "already exists.") print(output_path, "already exists.")
self.sync_to_json() if write_to_disk_now:
self.sync_to_json()
return new_uuid return new_uuid
# Save some text file to the appropriate path and bump the history # Save some text file to the appropriate path and bump the history

@ -2,6 +2,7 @@ import threading
import queue import queue
import time import time
from changedetectionio import content_fetcher
# A single update worker # A single update worker
# #
# Requests for checking on a single site(watch) from a queue of watches # Requests for checking on a single site(watch) from a queue of watches
@ -32,7 +33,6 @@ class update_worker(threading.Thread):
else: else:
self.current_uuid = uuid self.current_uuid = uuid
from changedetectionio import content_fetcher
if uuid in list(self.datastore.data['watching'].keys()): if uuid in list(self.datastore.data['watching'].keys()):

Loading…
Cancel
Save