Extract <title> as title should work on all processors

extract-title-all-processors
dgtlmoon 6 months ago
parent cffb6d748c
commit d06275a6b0

@ -338,12 +338,6 @@ class perform_site_check(difference_detection_processor):
if blocked: if blocked:
changed_detected = False changed_detected = False
# Extract title as title
if is_html:
if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content)
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
if changed_detected: if changed_detected:

@ -505,6 +505,16 @@ class update_worker(threading.Thread):
if update_handler.xpath_data: if update_handler.xpath_data:
watch.save_xpath_data(data=update_handler.xpath_data) watch.save_xpath_data(data=update_handler.xpath_data)
# Extract <title> as title if possible/requested.
if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
if not watch['title'] or not len(watch['title']):
try:
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
except Exception as e:
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
# Now update after running everything
try: try:
self.datastore.update_watch(uuid=uuid, update_obj=update_obj) self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

Loading…
Cancel
Save