@ -1,21 +1,28 @@
use std ::{
collections ::HashMap ,
net ::{ IpAddr , ToSocketAddrs } ,
sync ::{ Arc , RwLock } ,
net ::IpAddr ,
sync ::Arc ,
time ::{ Duration , SystemTime } ,
} ;
use bytes ::{ B uf, B ytes, BytesMut } ;
use bytes ::{ B ytes, BytesMut } ;
use futures ::{ stream ::StreamExt , TryFutureExt } ;
use once_cell ::sync ::Lazy ;
use regex ::Regex ;
use reqwest ::{ header , Client , Response } ;
use reqwest ::{
header ::{ self , HeaderMap , HeaderValue } ,
Client , Response ,
} ;
use rocket ::{ http ::ContentType , response ::Redirect , Route } ;
use tokio ::{
fs ::{ create_dir_all , remove_file , symlink_metadata , File } ,
io ::{ AsyncReadExt , AsyncWriteExt } ,
net ::lookup_host ,
sync ::RwLock ,
} ;
use html5gum ::{ Emitter , EndTag , InfallibleTokenizer , Readable , StartTag , StringReader , Tokenizer } ;
use crate ::{
error ::Error ,
util ::{ get_reqwest_client_builder , Cached } ,
@ -34,39 +41,50 @@ pub fn routes() -> Vec<Route> {
static CLIENT : Lazy < Client > = Lazy ::new ( | | {
// Generate the default headers
let mut default_headers = header ::HeaderMap ::new ( ) ;
default_headers
. insert ( header ::USER_AGENT , header ::HeaderValue ::from_static ( "Links (2.22; Linux X86_64; GNU C; text)" ) ) ;
default_headers
. insert ( header ::ACCEPT , header ::HeaderValue ::from_static ( "text/html, text/*;q=0.5, image/*, */*;q=0.1" ) ) ;
default_headers . insert ( header ::ACCEPT_LANGUAGE , header ::HeaderValue ::from_static ( "en,*;q=0.1" ) ) ;
default_headers . insert ( header ::CACHE_CONTROL , header ::HeaderValue ::from_static ( "no-cache" ) ) ;
default_headers . insert ( header ::PRAGMA , header ::HeaderValue ::from_static ( "no-cache" ) ) ;
let mut default_headers = HeaderMap ::new ( ) ;
default_headers . insert ( header ::USER_AGENT , HeaderValue ::from_static ( "Links (2.22; Linux X86_64; GNU C; text)" ) ) ;
default_headers . insert ( header ::ACCEPT , HeaderValue ::from_static ( "text/html, text/*;q=0.5, image/*, */*;q=0.1" ) ) ;
default_headers . insert ( header ::ACCEPT_LANGUAGE , HeaderValue ::from_static ( "en,*;q=0.1" ) ) ;
default_headers . insert ( header ::CACHE_CONTROL , HeaderValue ::from_static ( "no-cache" ) ) ;
default_headers . insert ( header ::PRAGMA , HeaderValue ::from_static ( "no-cache" ) ) ;
// Generate the cookie store
let cookie_store = Arc ::new ( Jar ::default ( ) ) ;
// Reuse the client between requests
let client = get_reqwest_client_builder ( )
. cookie_provider ( cookie_store . clone ( ) )
. timeout ( Duration ::from_secs ( CONFIG . icon_download_timeout ( ) ) )
. default_headers ( default_headers . clone ( ) ) ;
match client . build ( ) {
Ok ( client ) = > client ,
Err ( e ) = > {
error ! ( "Possible trust-dns error, trying with trust-dns disabled: '{e}'" ) ;
get_reqwest_client_builder ( )
. cookie_provider ( Arc ::new ( Jar ::default ( ) ) )
. cookie_provider ( cookie_store )
. timeout ( Duration ::from_secs ( CONFIG . icon_download_timeout ( ) ) )
. default_headers ( default_headers )
. trust_dns ( false )
. build ( )
. expect ( "Failed to build icon client" )
. expect ( "Failed to build client" )
}
}
} ) ;
// Build Regex only once since this takes a lot of time.
static ICON_REL_REGEX : Lazy < Regex > = Lazy ::new ( | | Regex ::new ( r"(?i)icon$|apple.*icon" ) . unwrap ( ) ) ;
static ICON_REL_BLACKLIST : Lazy < Regex > = Lazy ::new ( | | Regex ::new ( r"(?i)mask-icon" ) . unwrap ( ) ) ;
static ICON_SIZE_REGEX : Lazy < Regex > = Lazy ::new ( | | Regex ::new ( r"(?x)(\d+)\D*(\d+)" ) . unwrap ( ) ) ;
// Special HashMap which holds the user defined Regex to speedup matching the regex.
static ICON_BLACKLIST_REGEX : Lazy < RwLock < HashMap < String , Regex > > > = Lazy ::new ( | | RwLock ::new ( HashMap ::new ( ) ) ) ;
fn icon_redirect ( domain : & str , template : & str ) -> Option < Redirect > {
if ! is_valid_domain ( domain ) {
async fn icon_redirect ( domain : & str , template : & str ) -> Option < Redirect > {
if ! is_valid_domain ( domain ) . await {
warn ! ( "Invalid domain: {}" , domain ) ;
return None ;
}
if is_domain_blacklisted ( domain ) {
if is_domain_blacklisted ( domain ) . await {
return None ;
}
@ -84,30 +102,30 @@ fn icon_redirect(domain: &str, template: &str) -> Option<Redirect> {
}
#[ get( " /<domain>/icon.png " ) ]
fn icon_custom ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , & CONFIG . icon_service ( ) )
async fn icon_custom ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , & CONFIG . icon_service ( ) ) . await
}
#[ get( " /<domain>/icon.png " ) ]
fn icon_bitwarden ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , "https://icons.bitwarden.net/{}/icon.png" )
async fn icon_bitwarden ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , "https://icons.bitwarden.net/{}/icon.png" ) . await
}
#[ get( " /<domain>/icon.png " ) ]
fn icon_duckduckgo ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , "https://icons.duckduckgo.com/ip3/{}.ico" )
async fn icon_duckduckgo ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , "https://icons.duckduckgo.com/ip3/{}.ico" ) . await
}
#[ get( " /<domain>/icon.png " ) ]
fn icon_google ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , "https://www.google.com/s2/favicons?domain={}&sz=32" )
async fn icon_google ( domain : String ) -> Option < Redirect > {
icon_redirect ( & domain , "https://www.google.com/s2/favicons?domain={}&sz=32" ) . await
}
#[ get( " /<domain>/icon.png " ) ]
async fn icon_internal ( domain : String ) -> Cached < ( ContentType , Vec < u8 > ) > {
const FALLBACK_ICON : & [ u8 ] = include_bytes! ( "../static/images/fallback-icon.png" ) ;
if ! is_valid_domain ( & domain ) {
if ! is_valid_domain ( & domain ) . await {
warn ! ( "Invalid domain: {}" , domain ) ;
return Cached ::ttl (
( ContentType ::new ( "image" , "png" ) , FALLBACK_ICON . to_vec ( ) ) ,
@ -128,7 +146,7 @@ async fn icon_internal(domain: String) -> Cached<(ContentType, Vec<u8>)> {
///
/// This does some manual checks and makes use of Url to do some basic checking.
/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255.
fn is_valid_domain ( domain : & str ) -> bool {
async fn is_valid_domain ( domain : & str ) -> bool {
const ALLOWED_CHARS : & str = "_-." ;
// If parsing the domain fails using Url, it will not work with reqwest.
@ -260,25 +278,22 @@ mod tests {
}
}
fn is_domain_blacklisted ( domain : & str ) -> bool {
let mut is_blacklisted = CONFIG . icon_blacklist_non_global_ips ( )
& & ( domain , 0 )
. to_socket_addrs ( )
. map ( | x | {
for ip_port in x {
if ! is_global ( ip_port . ip ( ) ) {
warn ! ( "IP {} for domain '{}' is not a global IP!" , ip_port . ip ( ) , domain ) ;
use cached ::proc_macro ::cached ;
#[ cached(key = " String " , convert = r# " { domain.to_string() } " #, size = 16, time = 60) ]
async fn is_domain_blacklisted ( domain : & str ) -> bool {
if CONFIG . icon_blacklist_non_global_ips ( ) {
if let Ok ( s ) = lookup_host ( ( domain , 0 ) ) . await {
for addr in s {
if ! is_global ( addr . ip ( ) ) {
debug ! ( "IP {} for domain '{}' is not a global IP!" , addr . ip ( ) , domain ) ;
return true ;
}
}
false
} )
. unwrap_or ( false ) ;
}
}
// Skip the regex check if the previous one is true already
if ! is_blacklisted {
if let Some ( blacklist ) = CONFIG . icon_blacklist_regex ( ) {
let mut regex_hashmap = ICON_BLACKLIST_REGEX . read ( ) . unwrap ( ) ;
let mut regex_hashmap = ICON_BLACKLIST_REGEX . read ( ) . await ;
// Use the pre-generate Regex stored in a Lazy HashMap if there's one, else generate it.
let regex = if let Some ( regex ) = regex_hashmap . get ( & blacklist ) {
@ -286,7 +301,7 @@ fn is_domain_blacklisted(domain: &str) -> bool {
} else {
drop ( regex_hashmap ) ;
let mut regex_hashmap_write = ICON_BLACKLIST_REGEX . write ( ) . unwrap ( ) ;
let mut regex_hashmap_write = ICON_BLACKLIST_REGEX . write ( ) . await ;
// Clear the current list if the previous key doesn't exists.
// To prevent growing of the HashMap after someone has changed it via the admin interface.
if regex_hashmap_write . len ( ) > = 1 {
@ -294,23 +309,21 @@ fn is_domain_blacklisted(domain: &str) -> bool {
}
// Generate the regex to store in too the Lazy Static HashMap.
let blacklist_regex = Regex ::new ( & blacklist ) . unwrap ( ) ;
regex_hashmap_write . insert ( blacklist . to_string ( ) , blacklist_regex ) ;
let blacklist_regex = Regex ::new ( & blacklist ) ;
regex_hashmap_write . insert ( blacklist . to_string ( ) , blacklist_regex . unwrap ( ) ) ;
drop ( regex_hashmap_write ) ;
regex_hashmap = ICON_BLACKLIST_REGEX . read ( ) . unwrap ( ) ;
regex_hashmap = ICON_BLACKLIST_REGEX . read ( ) . await ;
regex_hashmap . get ( & blacklist ) . unwrap ( )
} ;
// Use the pre-generate Regex stored in a Lazy HashMap.
if regex . is_match ( domain ) {
debug ! ( "Blacklisted domain: {} matched ICON_BLACKLIST_REGEX" , domain ) ;
is_blacklisted = true ;
}
return true ;
}
}
is_blacklisted
false
}
async fn get_icon ( domain : & str ) -> Option < ( Vec < u8 > , String ) > {
@ -322,7 +335,7 @@ async fn get_icon(domain: &str) -> Option<(Vec<u8>, String)> {
}
if let Some ( icon ) = get_cached_icon ( & path ) . await {
let icon_type = match get_icon_type ( & icon ) {
let icon_type = match get_icon_type ( & icon ) . await {
Some ( x ) = > x ,
_ = > "x-icon" ,
} ;
@ -412,91 +425,62 @@ impl Icon {
}
}
/// Iterates over the HTML document to find <base href="http://domain.tld">
/// When found it will stop the iteration and the found base href will be shared deref via `base_href`.
///
/// # Arguments
/// * `node` - A Parsed HTML document via html5ever::parse_document()
/// * `base_href` - a mutable url::Url which will be overwritten when a base href tag has been found.
///
fn get_base_href ( node : & std ::rc ::Rc < markup5ever_rcdom ::Node > , base_href : & mut url ::Url ) -> bool {
if let markup5ever_rcdom ::NodeData ::Element {
name ,
attrs ,
..
} = & node . data
async fn get_favicons_node (
dom : InfallibleTokenizer < StringReader < ' _ > , FaviconEmitter > ,
icons : & mut Vec < Icon > ,
url : & url ::Url ,
) {
const TAG_LINK : & [ u8 ] = b" link " ;
const TAG_BASE : & [ u8 ] = b" base " ;
const TAG_HEAD : & [ u8 ] = b" head " ;
const ATTR_REL : & [ u8 ] = b" rel " ;
const ATTR_HREF : & [ u8 ] = b" href " ;
const ATTR_SIZES : & [ u8 ] = b" sizes " ;
let mut base_url = url . clone ( ) ;
let mut icon_tags : Vec < StartTag > = Vec ::new ( ) ;
for token in dom {
match token {
FaviconToken ::StartTag ( tag ) = > {
if tag . name = = TAG_LINK
& & tag . attributes . contains_key ( ATTR_REL )
& & tag . attributes . contains_key ( ATTR_HREF )
{
if name . local . as_ref ( ) = = "base" {
let attrs = attrs . borrow ( ) ;
for attr in attrs . iter ( ) {
let attr_name = attr . name . local . as_ref ( ) ;
let attr_value = attr . value . as_ref ( ) ;
if attr_name = = "href" {
debug ! ( "Found base href: {}" , attr_value ) ;
* base_href = match base_href . join ( attr_value ) {
Ok ( href ) = > href ,
_ = > base_href . clone ( ) ,
let rel_value = std ::str ::from_utf8 ( tag . attributes . get ( ATTR_REL ) . unwrap ( ) )
. unwrap_or_default ( )
. to_ascii_lowercase ( ) ;
if rel_value . contains ( "icon" ) & & ! rel_value . contains ( "mask-icon" ) {
icon_tags . push ( tag ) ;
}
} else if tag . name = = TAG_BASE & & tag . attributes . contains_key ( ATTR_HREF ) {
let href = std ::str ::from_utf8 ( tag . attributes . get ( ATTR_HREF ) . unwrap ( ) ) . unwrap_or_default ( ) ;
debug ! ( "Found base href: {href}" ) ;
base_url = match base_url . join ( href ) {
Ok ( inner_url ) = > inner_url ,
_ = > url . clone ( ) ,
} ;
return true ;
}
}
return true ;
}
}
// TODO: Might want to limit the recursion depth?
for child in node . children . borrow ( ) . iter ( ) {
// Check if we got a true back and stop the iter.
// This means we found a <base> tag and can stop processing the html.
if get_base_href ( child , base_href ) {
return true ;
}
FaviconToken ::EndTag ( tag ) = > {
if tag . name = = TAG_HEAD {
break ;
}
false
}
fn get_favicons_node ( node : & std ::rc ::Rc < markup5ever_rcdom ::Node > , icons : & mut Vec < Icon > , url : & url ::Url ) {
if let markup5ever_rcdom ::NodeData ::Element {
name ,
attrs ,
..
} = & node . data
{
if name . local . as_ref ( ) = = "link" {
let mut has_rel = false ;
let mut href = None ;
let mut sizes = None ;
let attrs = attrs . borrow ( ) ;
for attr in attrs . iter ( ) {
let attr_name = attr . name . local . as_ref ( ) ;
let attr_value = attr . value . as_ref ( ) ;
if attr_name = = "rel" & & ICON_REL_REGEX . is_match ( attr_value ) & & ! ICON_REL_BLACKLIST . is_match ( attr_value )
{
has_rel = true ;
} else if attr_name = = "href" {
href = Some ( attr_value ) ;
} else if attr_name = = "sizes" {
sizes = Some ( attr_value ) ;
}
}
if has_rel {
if let Some ( inner_href ) = href {
if let Ok ( full_href ) = url . join ( inner_href ) . map ( String ::from ) {
let priority = get_icon_priority ( & full_href , sizes ) ;
icons . push ( Icon ::new ( priority , full_href ) ) ;
}
}
}
}
for icon_tag in icon_tags {
if let Some ( icon_href ) = icon_tag . attributes . get ( ATTR_HREF ) {
if let Ok ( full_href ) = base_url . join ( std ::str ::from_utf8 ( icon_href ) . unwrap_or_default ( ) ) {
let sizes = if let Some ( v ) = icon_tag . attributes . get ( ATTR_SIZES ) {
std ::str ::from_utf8 ( v ) . unwrap_or_default ( )
} else {
""
} ;
let priority = get_icon_priority ( full_href . as_str ( ) , sizes ) . await ;
icons . push ( Icon ::new ( priority , full_href . to_string ( ) ) ) ;
}
// TODO: Might want to limit the recursion depth?
for child in node . children . borrow ( ) . iter ( ) {
get_favicons_node ( child , icons , url ) ;
} ;
}
}
@ -514,13 +498,13 @@ struct IconUrlResult {
///
/// # Example
/// ```
/// let icon_result = get_icon_url("github.com") ?;
/// let icon_result = get_icon_url("vaultwarden.discourse.group") ?;
/// let icon_result = get_icon_url("github.com") .await ?;
/// let icon_result = get_icon_url("vaultwarden.discourse.group") .await ?;
/// ```
async fn get_icon_url ( domain : & str ) -> Result < IconUrlResult , Error > {
// Default URL with secure and insecure schemes
let ssldomain = format! ( "https://{ }", domain) ;
let httpdomain = format! ( "http://{ }", domain) ;
let ssldomain = format! ( "https://{ domain}" ) ;
let httpdomain = format! ( "http://{ domain}" ) ;
// First check the domain as given during the request for both HTTPS and HTTP.
let resp = match get_page ( & ssldomain ) . or_else ( | _ | get_page ( & httpdomain ) ) . await {
@ -537,26 +521,25 @@ async fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
tld = domain_parts . next_back ( ) . unwrap ( ) ,
base = domain_parts . next_back ( ) . unwrap ( )
) ;
if is_valid_domain ( & base_domain ) {
let sslbase = format! ( "https://{ }", base_domain) ;
let httpbase = format! ( "http://{ }", base_domain) ;
debug ! ( "[get_icon_url]: Trying without subdomains '{ }'", base_domain) ;
if is_valid_domain ( & base_domain ) . await {
let sslbase = format! ( "https://{ base_domain}" ) ;
let httpbase = format! ( "http://{ base_domain}" ) ;
debug ! ( "[get_icon_url]: Trying without subdomains '{ base_domain}'" ) ;
sub_resp = get_page ( & sslbase ) . or_else ( | _ | get_page ( & httpbase ) ) . await ;
}
// When the domain is not an IP, and has less then 2 dots, try to add www. infront of it.
} else if is_ip . is_err ( ) & & domain . matches ( '.' ) . count ( ) < 2 {
let www_domain = format! ( "www.{ }", domain) ;
if is_valid_domain ( & www_domain ) {
let sslwww = format! ( "https://{ }", www_domain) ;
let httpwww = format! ( "http://{ }", www_domain) ;
debug ! ( "[get_icon_url]: Trying with www. prefix '{ }'", www_domain) ;
let www_domain = format! ( "www.{ domain}" ) ;
if is_valid_domain ( & www_domain ) . await {
let sslwww = format! ( "https://{ www_domain}" ) ;
let httpwww = format! ( "http://{ www_domain}" ) ;
debug ! ( "[get_icon_url]: Trying with www. prefix '{ www_domain}'" ) ;
sub_resp = get_page ( & sslwww ) . or_else ( | _ | get_page ( & httpwww ) ) . await ;
}
}
sub_resp
}
} ;
@ -571,26 +554,23 @@ async fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// Set the referer to be used on the final request, some sites check this.
// Mostly used to prevent direct linking and other security resons.
referer = url . as_str( ) . to_string( ) ;
referer = url . to_string( ) ;
// Add the default favicon.ico to the list with the domain the content responded from.
// Add the fallback favicon.ico and apple-touch-icon.png to the list with the domain the content responded from.
iconlist . push ( Icon ::new ( 35 , String ::from ( url . join ( "/favicon.ico" ) . unwrap ( ) ) ) ) ;
iconlist . push ( Icon ::new ( 40 , String ::from ( url . join ( "/apple-touch-icon.png" ) . unwrap ( ) ) ) ) ;
// 384KB should be more than enough for the HTML, though as we only really need the HTML header.
let mut limited_reader = stream_to_bytes_limit ( content , 384 * 1024 ) . await ? . reader ( ) ;
use html5ever ::tendril ::TendrilSink ;
let dom = html5ever ::parse_document ( markup5ever_rcdom ::RcDom ::default ( ) , Default ::default ( ) )
. from_utf8 ( )
. read_from ( & mut limited_reader ) ? ;
let limited_reader = stream_to_bytes_limit ( content , 384 * 1024 ) . await ? . to_vec ( ) ;
let mut base_url : url ::Url = url ;
get_base_href ( & dom . document , & mut base_url ) ;
get_favicons_node ( & dom . document , & mut iconlist , & base_url ) ;
let dom = Tokenizer ::new_with_emitter ( limited_reader . to_reader ( ) , FaviconEmitter ::default ( ) ) . infallible ( ) ;
get_favicons_node ( dom , & mut iconlist , & url ) . await ;
} else {
// Add the default favicon.ico to the list with just the given domain
iconlist . push ( Icon ::new ( 35 , format! ( "{}/favicon.ico" , ssldomain ) ) ) ;
iconlist . push ( Icon ::new ( 35 , format! ( "{}/favicon.ico" , httpdomain ) ) ) ;
iconlist . push ( Icon ::new ( 35 , format! ( "{ssldomain}/favicon.ico" ) ) ) ;
iconlist . push ( Icon ::new ( 40 , format! ( "{ssldomain}/apple-touch-icon.png" ) ) ) ;
iconlist . push ( Icon ::new ( 35 , format! ( "{httpdomain}/favicon.ico" ) ) ) ;
iconlist . push ( Icon ::new ( 40 , format! ( "{httpdomain}/apple-touch-icon.png" ) ) ) ;
}
// Sort the iconlist by priority
@ -608,7 +588,7 @@ async fn get_page(url: &str) -> Result<Response, Error> {
}
async fn get_page_with_referer ( url : & str , referer : & str ) -> Result < Response , Error > {
if is_domain_blacklisted ( url ::Url ::parse ( url ) . unwrap ( ) . host_str ( ) . unwrap_or_default ( ) ) {
if is_domain_blacklisted ( url ::Url ::parse ( url ) . unwrap ( ) . host_str ( ) . unwrap_or_default ( ) ) . await {
warn ! ( "Favicon '{}' resolves to a blacklisted domain or IP!" , url ) ;
}
@ -632,12 +612,12 @@ async fn get_page_with_referer(url: &str, referer: &str) -> Result<Response, Err
///
/// # Example
/// ```
/// priority1 = get_icon_priority("http://example.com/path/to/a/favicon.png", "32x32") ;
/// priority2 = get_icon_priority("https://example.com/path/to/a/favicon.ico", "") ;
/// priority1 = get_icon_priority("http://example.com/path/to/a/favicon.png", "32x32") .await ;
/// priority2 = get_icon_priority("https://example.com/path/to/a/favicon.ico", "") .await ;
/// ```
fn get_icon_priority ( href : & str , sizes : Option < & str > ) -> u8 {
async fn get_icon_priority ( href : & str , sizes : & str ) -> u8 {
// Check if there is a dimension set
let ( width , height ) = parse_sizes ( sizes ) ;
let ( width , height ) = parse_sizes ( sizes ) .await ;
// Check if there is a size given
if width ! = 0 & & height ! = 0 {
@ -679,15 +659,15 @@ fn get_icon_priority(href: &str, sizes: Option<&str>) -> u8 {
///
/// # Example
/// ```
/// let (width, height) = parse_sizes("64x64") ; // (64, 64)
/// let (width, height) = parse_sizes("x128x128") ; // (128, 128)
/// let (width, height) = parse_sizes("32") ; // (0, 0)
/// let (width, height) = parse_sizes("64x64") .await ; // (64, 64)
/// let (width, height) = parse_sizes("x128x128") .await ; // (128, 128)
/// let (width, height) = parse_sizes("32") .await ; // (0, 0)
/// ```
fn parse_sizes ( sizes : Option < & str > ) -> ( u16 , u16 ) {
async fn parse_sizes ( sizes : & str ) -> ( u16 , u16 ) {
let mut width : u16 = 0 ;
let mut height : u16 = 0 ;
if let Some ( sizes ) = sizes {
if ! sizes . is_empty ( ) {
match ICON_SIZE_REGEX . captures ( sizes . trim ( ) ) {
None = > { }
Some ( dimensions ) = > {
@ -703,7 +683,7 @@ fn parse_sizes(sizes: Option<&str>) -> (u16, u16) {
}
async fn download_icon ( domain : & str ) -> Result < ( Bytes , Option < & str > ) , Error > {
if is_domain_blacklisted ( domain ) {
if is_domain_blacklisted ( domain ) . await {
err_silent ! ( "Domain is blacklisted" , domain )
}
@ -727,7 +707,7 @@ async fn download_icon(domain: &str) -> Result<(Bytes, Option<&str>), Error> {
// Also check if the size is atleast 67 bytes, which seems to be the smallest png i could create
if body . len ( ) > = 67 {
// Check if the icon type is allowed, else try an icon from the list.
icon_type = get_icon_type ( & body ) ;
icon_type = get_icon_type ( & body ) .await ;
if icon_type . is_none ( ) {
debug ! ( "Icon from {} data:image uri, is not a valid image type" , domain ) ;
continue ;
@ -742,10 +722,10 @@ async fn download_icon(domain: &str) -> Result<(Bytes, Option<&str>), Error> {
} else {
match get_page_with_referer ( & icon . href , & icon_result . referer ) . await {
Ok ( res ) = > {
buffer = stream_to_bytes_limit ( res , 512 * 1024 ) . await ? ; // 512 KB for each icon max
// Check if the icon type is allowed, else try an icon from the list.
icon_type = get_icon_type ( & buffer ) ;
buffer = stream_to_bytes_limit ( res , 5120 * 1024 ) . await ? ; // 5120KB/5MB for each icon max (Same as icons.bitwarden.net)
// Check if the icon type is allowed, else try an icon from the list.
icon_type = get_icon_type ( & buffer ) . await ;
if icon_type . is_none ( ) {
buffer . clear ( ) ;
debug ! ( "Icon from {}, is not a valid image type" , icon . href ) ;
@ -780,7 +760,7 @@ async fn save_icon(path: &str, icon: &[u8]) {
}
}
fn get_icon_type ( bytes : & [ u8 ] ) -> Option < & ' static str > {
async fn get_icon_type ( bytes : & [ u8 ] ) -> Option < & ' static str > {
match bytes {
[ 137 , 80 , 78 , 71 , .. ] = > Some ( "png" ) ,
[ 0 , 0 , 1 , 0 , .. ] = > Some ( "x-icon" ) ,
@ -792,13 +772,30 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
}
}
/// Minimize the amount of bytes to be parsed from a reqwest result.
/// This prevents very long parsing and memory usage.
async fn stream_to_bytes_limit ( res : Response , max_size : usize ) -> Result < Bytes , reqwest ::Error > {
let mut stream = res . bytes_stream ( ) . take ( max_size ) ;
let mut buf = BytesMut ::new ( ) ;
let mut size = 0 ;
while let Some ( chunk ) = stream . next ( ) . await {
let chunk = & chunk ? ;
size + = chunk . len ( ) ;
buf . extend ( chunk ) ;
if size > = max_size {
break ;
}
}
Ok ( buf . freeze ( ) )
}
/// This is an implementation of the default Cookie Jar from Reqwest and reqwest_cookie_store build by pfernie.
/// The default cookie jar used by Reqwest keeps all the cookies based upon the Max-Age or Expires which could be a long time.
/// That could be used for tracking, to prevent this we force the lifespan of the cookies to always be max two minutes.
/// A Cookie Jar is needed because some sites force a redirect with cookies to verify if a request uses cookies or not.
use cookie_store ::CookieStore ;
#[ derive(Default) ]
pub struct Jar ( RwLock < CookieStore > ) ;
pub struct Jar ( std::sync :: RwLock< CookieStore > ) ;
impl reqwest ::cookie ::CookieStore for Jar {
fn set_cookies ( & self , cookie_headers : & mut dyn Iterator < Item = & header ::HeaderValue > , url : & url ::Url ) {
@ -836,11 +833,136 @@ impl reqwest::cookie::CookieStore for Jar {
}
}
async fn stream_to_bytes_limit ( res : Response , max_size : usize ) -> Result < Bytes , reqwest ::Error > {
let mut stream = res . bytes_stream ( ) . take ( max_size ) ;
let mut buf = BytesMut ::new ( ) ;
while let Some ( chunk ) = stream . next ( ) . await {
buf . extend ( chunk ? ) ;
/// Custom FaviconEmitter for the html5gum parser.
/// The FaviconEmitter is using an almost 1:1 copy of the DefaultEmitter with some small changes.
/// This prevents emitting tags like comments, doctype and also strings between the tags.
/// Therefor parsing the HTML content is faster.
use std ::collections ::{ BTreeSet , VecDeque } ;
enum FaviconToken {
StartTag ( StartTag ) ,
EndTag ( EndTag ) ,
}
Ok ( buf . freeze ( ) )
#[ derive(Default) ]
struct FaviconEmitter {
current_token : Option < FaviconToken > ,
last_start_tag : Vec < u8 > ,
current_attribute : Option < ( Vec < u8 > , Vec < u8 > ) > ,
seen_attributes : BTreeSet < Vec < u8 > > ,
emitted_tokens : VecDeque < FaviconToken > ,
}
impl FaviconEmitter {
fn emit_token ( & mut self , token : FaviconToken ) {
self . emitted_tokens . push_front ( token ) ;
}
fn flush_current_attribute ( & mut self ) {
if let Some ( ( k , v ) ) = self . current_attribute . take ( ) {
match self . current_token {
Some ( FaviconToken ::StartTag ( ref mut tag ) ) = > {
tag . attributes . entry ( k ) . and_modify ( | _ | { } ) . or_insert ( v ) ;
}
Some ( FaviconToken ::EndTag ( _ ) ) = > {
self . seen_attributes . insert ( k ) ;
}
_ = > {
debug_assert! ( false ) ;
}
}
}
}
}
impl Emitter for FaviconEmitter {
type Token = FaviconToken ;
fn set_last_start_tag ( & mut self , last_start_tag : Option < & [ u8 ] > ) {
self . last_start_tag . clear ( ) ;
self . last_start_tag . extend ( last_start_tag . unwrap_or_default ( ) ) ;
}
fn pop_token ( & mut self ) -> Option < Self ::Token > {
self . emitted_tokens . pop_back ( )
}
fn init_start_tag ( & mut self ) {
self . current_token = Some ( FaviconToken ::StartTag ( StartTag ::default ( ) ) ) ;
}
fn init_end_tag ( & mut self ) {
self . current_token = Some ( FaviconToken ::EndTag ( EndTag ::default ( ) ) ) ;
self . seen_attributes . clear ( ) ;
}
fn emit_current_tag ( & mut self ) {
self . flush_current_attribute ( ) ;
let mut token = self . current_token . take ( ) . unwrap ( ) ;
match token {
FaviconToken ::EndTag ( _ ) = > {
self . seen_attributes . clear ( ) ;
}
FaviconToken ::StartTag ( ref mut tag ) = > {
self . set_last_start_tag ( Some ( & tag . name ) ) ;
}
}
self . emit_token ( token ) ;
}
fn push_tag_name ( & mut self , s : & [ u8 ] ) {
match self . current_token {
Some (
FaviconToken ::StartTag ( StartTag {
ref mut name ,
..
} )
| FaviconToken ::EndTag ( EndTag {
ref mut name ,
..
} ) ,
) = > {
name . extend ( s ) ;
}
_ = > debug_assert! ( false ) ,
}
}
fn init_attribute ( & mut self ) {
self . flush_current_attribute ( ) ;
self . current_attribute = Some ( ( Vec ::new ( ) , Vec ::new ( ) ) ) ;
}
fn push_attribute_name ( & mut self , s : & [ u8 ] ) {
self . current_attribute . as_mut ( ) . unwrap ( ) . 0. extend ( s ) ;
}
fn push_attribute_value ( & mut self , s : & [ u8 ] ) {
self . current_attribute . as_mut ( ) . unwrap ( ) . 1. extend ( s ) ;
}
fn current_is_appropriate_end_tag_token ( & mut self ) -> bool {
match self . current_token {
Some ( FaviconToken ::EndTag ( ref tag ) ) = > ! self . last_start_tag . is_empty ( ) & & self . last_start_tag = = tag . name ,
_ = > false ,
}
}
// We do not want and need these parts of the HTML document
// These will be skipped and ignored during the tokenization and iteration.
fn emit_current_comment ( & mut self ) { }
fn emit_current_doctype ( & mut self ) { }
fn emit_eof ( & mut self ) { }
fn emit_error ( & mut self , _ : html5gum ::Error ) { }
fn emit_string ( & mut self , _ : & [ u8 ] ) { }
fn init_comment ( & mut self ) { }
fn init_doctype ( & mut self ) { }
fn push_comment ( & mut self , _ : & [ u8 ] ) { }
fn push_doctype_name ( & mut self , _ : & [ u8 ] ) { }
fn push_doctype_public_identifier ( & mut self , _ : & [ u8 ] ) { }
fn push_doctype_system_identifier ( & mut self , _ : & [ u8 ] ) { }
fn set_doctype_public_identifier ( & mut self , _ : & [ u8 ] ) { }
fn set_doctype_system_identifier ( & mut self , _ : & [ u8 ] ) { }
fn set_force_quirks ( & mut self ) { }
fn set_self_closing ( & mut self ) { }
}