Restart scrape if invalid source

3 years ago · b6f543ace6
parent a8cf18b0c2
commit b6f543ace6
1 changed files with 6 additions and 2 deletions
--- a/scrape.py
+++ b/scrape.py
@ -38,8 +38,9 @@ class Webdriver:
      renderer="Intel Iris OpenGL Engine",
      fix_hairline=True,
    )
-  def quit(self):
+  def restart_scrape(self):
    self.driver.quit()
    run()
  def get_index(self):
    # Step 2: Login to Tesla
    driver = tesla_login(self.driver)
@ -116,7 +117,7 @@ class Webdriver:
    # Step 6: Loop to get all the html pages, and store information about images to be downloaded later.
    while upcoming_urls:
      for url in upcoming_urls:
-        if len(visited_urls) % 5 == 0:
+        if len(visited_urls) % 50 == 0:
          save_session()
        if url.startswith('GUID') and url.endswith('.html'):
          self.driver.get(base_url + url)
@ -125,6 +126,9 @@ class Webdriver:
          continue
        source = self.driver.find_element_by_css_selector("html").get_attribute('outerHTML')
        if not check_source_validity(source):
          self.restart_scrape()
        with open('docs/' + url, 'w', encoding='utf-8') as f:
          source = re.sub(mpulse_tracker, '', source)
          source = re.sub(google_tracker, '', source)