increase tolerance for errors

3 years ago · 5821e23e13
parent 650d61a317
commit 5821e23e13
1 changed files with 4 additions and 1 deletions
--- a/scrape.py
+++ b/scrape.py
@ -115,6 +115,7 @@ class Webdriver:
    open("docs/tds-fonts/3.x/woff2/GothamSSm-Medium_web.woff2", 'wb').write(r.content)
  def get_html(self):
    # Loop to get all the html pages, and store information about images to be downloaded later.
    error_count = 0
    while upcoming_urls:
      for url in upcoming_urls:
        if len(visited_urls) % 50 == 0:
@ -128,7 +129,9 @@ class Webdriver:
        source = self.driver.find_element_by_css_selector("html").get_attribute('outerHTML')
        if not check_source_validity(source):
-          self.restart_scrape()
+          error_count += 1
          if error_count > 10:
            self.restart_scrape()
        with open('docs/' + url, 'w', encoding='utf-8') as f:
          source = re.sub(mpulse_tracker, '', source)