|
|
@ -5,7 +5,8 @@ import hashlib
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
import html2text
|
|
|
|
import html2text
|
|
|
|
from urlextract import URLExtract
|
|
|
|
# Not needed due to inscriptis being way better.
|
|
|
|
|
|
|
|
#from urlextract import URLExtract
|
|
|
|
from inscriptis import get_text
|
|
|
|
from inscriptis import get_text
|
|
|
|
|
|
|
|
|
|
|
|
# Hmm Polymorphism datastore, thread, etc
|
|
|
|
# Hmm Polymorphism datastore, thread, etc
|
|
|
|