Refactor Excel / wachete import, extend tests (#1931)

pull/1933/head
dgtlmoon 1 year ago committed by GitHub
parent 03c671bfff
commit 47103d7f3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -137,6 +137,7 @@ class import_distill_io_json(Importer):
flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data))) flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
class import_xlsx_wachete(Importer): class import_xlsx_wachete(Importer):
def run(self, def run(self,
@ -144,6 +145,7 @@ class import_xlsx_wachete(Importer):
flash, flash,
datastore, datastore,
): ):
good = 0 good = 0
now = time.time() now = time.time()
self.new_uuids = [] self.new_uuids = []
@ -153,29 +155,32 @@ class import_xlsx_wachete(Importer):
try: try:
wb = load_workbook(data) wb = load_workbook(data)
except Exception as e: except Exception as e:
#@todo correct except # @todo correct except
flash("Unable to read export XLSX file, something wrong with the file?", 'error') flash("Unable to read export XLSX file, something wrong with the file?", 'error')
return return
sheet_obj = wb.active row_id = 2
for row in wb.active.iter_rows(min_row=row_id):
i = 1 try:
row = 2 extras = {}
while sheet_obj.cell(row=row, column=1).value:
data = {} data = {}
while sheet_obj.cell(row=row, column=i).value: for cell in row:
column_title = sheet_obj.cell(row=1, column=i).value.strip().lower() if not cell.value:
column_row_value = sheet_obj.cell(row=row, column=i).value continue
data[column_title] = column_row_value column_title = wb.active.cell(row=1, column=cell.column).value.strip().lower()
data[column_title] = cell.value
i += 1 # Forced switch to webdriver/playwright/etc
dynamic_wachet = str(data.get('dynamic wachet')).strip().lower() # Convert bool to str to cover all cases
# libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
if 'true' in dynamic_wachet or dynamic_wachet == '1':
extras['fetch_backend'] = 'html_webdriver'
extras = {}
if data.get('xpath'): if data.get('xpath'):
#@todo split by || ? # @todo split by || ?
extras['include_filters'] = [data.get('xpath')] extras['include_filters'] = [data.get('xpath')]
if data.get('name'): if data.get('name'):
extras['title'] = [data.get('name').strip()] extras['title'] = data.get('name').strip()
if data.get('interval (min)'): if data.get('interval (min)'):
minutes = int(data.get('interval (min)')) minutes = int(data.get('interval (min)'))
hours, minutes = divmod(minutes, 60) hours, minutes = divmod(minutes, 60)
@ -183,13 +188,13 @@ class import_xlsx_wachete(Importer):
weeks, days = divmod(days, 7) weeks, days = divmod(days, 7)
extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0} extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
# At minimum a URL is required. # At minimum a URL is required.
if data.get('url'): if data.get('url'):
try: try:
validate_url(data.get('url')) validate_url(data.get('url'))
except ValidationError as e: except ValidationError as e:
print(">> import URL error", data.get('url'), str(e)) print(">> import URL error", data.get('url'), str(e))
flash(f"Error processing row number {row_id}, URL value was incorrect, row was skipped.", 'error')
# Don't bother processing anything else on this row # Don't bother processing anything else on this row
continue continue
@ -201,14 +206,16 @@ class import_xlsx_wachete(Importer):
# Straight into the queue. # Straight into the queue.
self.new_uuids.append(new_uuid) self.new_uuids.append(new_uuid)
good += 1 good += 1
except Exception as e:
row += 1 print(e)
i = 1 flash(f"Error processing row number {row_id}, check all cell data types are correct, row was skipped.", 'error')
else:
row_id += 1
flash( flash(
"{} imported from Wachete .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now)) "{} imported from Wachete .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
class import_xlsx_custom(Importer): class import_xlsx_custom(Importer):
def run(self, def run(self,
@ -216,6 +223,7 @@ class import_xlsx_custom(Importer):
flash, flash,
datastore, datastore,
): ):
good = 0 good = 0
now = time.time() now = time.time()
self.new_uuids = [] self.new_uuids = []
@ -225,43 +233,52 @@ class import_xlsx_custom(Importer):
try: try:
wb = load_workbook(data) wb = load_workbook(data)
except Exception as e: except Exception as e:
#@todo correct except # @todo correct except
flash("Unable to read export XLSX file, something wrong with the file?", 'error') flash("Unable to read export XLSX file, something wrong with the file?", 'error')
return return
# @todo cehck atleast 2 rows, same in other method # @todo cehck atleast 2 rows, same in other method
sheet_obj = wb.active
from .forms import validate_url from .forms import validate_url
row = 2 row_i = 1
while sheet_obj.cell(row=row, column=1).value:
try:
for row in wb.active.iter_rows():
url = None url = None
tags = None tags = None
extras = {} extras = {}
for col_i, cell_map in self.import_profile.items():
cell_val = sheet_obj.cell(row=row, column=col_i).value for cell in row:
if not self.import_profile.get(cell.col_idx):
continue
if not cell.value:
continue
cell_map = self.import_profile.get(cell.col_idx)
cell_val = str(cell.value).strip() # could be bool
if cell_map == 'url': if cell_map == 'url':
url = cell_val.strip() url = cell.value.strip()
try: try:
validate_url(url) validate_url(url)
except ValidationError as e: except ValidationError as e:
print (">> Import URL error",url, str(e)) print(">> Import URL error", url, str(e))
flash(f"Error processing row number {row_i}, URL value was incorrect, row was skipped.", 'error')
# Don't bother processing anything else on this row # Don't bother processing anything else on this row
url = None url = None
break break
elif cell_map == 'tag': elif cell_map == 'tag':
tags = cell_val.strip() tags = cell.value.strip()
elif cell_map == 'include_filters': elif cell_map == 'include_filters':
# @todo validate? # @todo validate?
extras['include_filters'] = [cell_val.strip()] extras['include_filters'] = [cell.value.strip()]
elif cell_map == 'interval_minutes': elif cell_map == 'interval_minutes':
hours, minutes = divmod(int(cell_val), 60) hours, minutes = divmod(int(cell_val), 60)
days, hours = divmod(hours, 24) days, hours = divmod(hours, 24)
weeks, days = divmod(days, 7) weeks, days = divmod(days, 7)
extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0} extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0}
else: else:
extras[cell_map] = cell_val.strip() extras[cell_map] = cell_val
# At minimum a URL is required. # At minimum a URL is required.
if url: if url:
@ -273,8 +290,11 @@ class import_xlsx_custom(Importer):
# Straight into the queue. # Straight into the queue.
self.new_uuids.append(new_uuid) self.new_uuids.append(new_uuid)
good += 1 good += 1
except Exception as e:
row += 1 print(e)
flash(f"Error processing row number {row_i}, check all cell data types are correct, row was skipped.", 'error')
else:
row_i += 1
flash( flash(
"{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now)) "{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))

@ -127,6 +127,7 @@ def test_import_custom_xlsx(client, live_server):
"""Test can upload a excel spreadsheet and the watches are created correctly""" """Test can upload a excel spreadsheet and the watches are created correctly"""
#live_server_setup(live_server) #live_server_setup(live_server)
dirname = os.path.dirname(__file__) dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'import/spreadsheet.xlsx') filename = os.path.join(dirname, 'import/spreadsheet.xlsx')
with open(filename, 'rb') as f: with open(filename, 'rb') as f:
@ -150,13 +151,14 @@ def test_import_custom_xlsx(client, live_server):
follow_redirects=True, follow_redirects=True,
) )
assert b'2 imported from custom .xlsx' in res.data assert b'3 imported from custom .xlsx' in res.data
# Because this row was actually just a header with no usable URL, we should get an error
assert b'Error processing row number 1' in res.data
res = client.get( res = client.get(
url_for("index") url_for("index")
) )
assert b'Somesite results ABC' in res.data assert b'Somesite results ABC' in res.data
assert b'City news results' in res.data assert b'City news results' in res.data
@ -167,6 +169,9 @@ def test_import_custom_xlsx(client, live_server):
assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]' assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]'
assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0} assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0}
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_import_watchete_xlsx(client, live_server): def test_import_watchete_xlsx(client, live_server):
"""Test can upload a excel spreadsheet and the watches are created correctly""" """Test can upload a excel spreadsheet and the watches are created correctly"""
@ -186,7 +191,7 @@ def test_import_watchete_xlsx(client, live_server):
follow_redirects=True, follow_redirects=True,
) )
assert b'2 imported from Wachete .xlsx' in res.data assert b'3 imported from Wachete .xlsx' in res.data
res = client.get( res = client.get(
url_for("index") url_for("index")
@ -201,3 +206,10 @@ def test_import_watchete_xlsx(client, live_server):
filters = watch.get('include_filters') filters = watch.get('include_filters')
assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]' assert filters[0] == '/html[1]/body[1]/div[4]/div[1]/div[1]/div[1]||//*[@id=\'content\']/div[3]/div[1]/div[1]||//*[@id=\'content\']/div[1]'
assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0} assert watch.get('time_between_check') == {'weeks': 0, 'days': 1, 'hours': 6, 'minutes': 24, 'seconds': 0}
assert watch.get('fetch_backend') == 'system' # always uses default
if watch.get('title') == 'JS website':
assert watch.get('fetch_backend') == 'html_webdriver' # Has active 'dynamic wachet'
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

Loading…
Cancel
Save