@ -8,17 +8,11 @@ logger = logging.getLogger("Plex Meta Manager")
builders = [ " imdb_list " , " imdb_id " ]
base_url = " https://www.imdb.com "
urls = {
" list " : f " { base_url } /list/ls " ,
" search " : f " { base_url } /search/title/ " ,
" keyword " : f " { base_url } /search/keyword/ "
" lists " : f " { base_url } /list/ls " ,
" searches " : f " { base_url } /search/title/ " ,
" keyword_searches " : f " { base_url } /search/keyword/ " ,
" filmography_searches " : f " { base_url } /filmosearch/ "
}
xpath = {
" imdb_id " : " //div[contains(@class, ' lister-item-image ' )]//a/img//@data-tconst " ,
" list " : " //div[@class= ' desc lister-total-num-results ' ]/text() " ,
" search " : " //div[@class= ' desc ' ]/span/text() " ,
" keyword " : " //div[@class= ' desc ' ]/text() "
}
item_counts = { " list " : 100 , " search " : 250 , " keyword " : 50 }
class IMDb :
def __init__ ( self , config ) :
@ -31,22 +25,25 @@ class IMDb:
imdb_dict = { " url " : imdb_dict }
dict_methods = { dm . lower ( ) : dm for dm in imdb_dict }
imdb_url = util . parse ( " url " , imdb_dict , methods = dict_methods , parent = " imdb_list " ) . strip ( )
if not imdb_url . startswith ( ( urls [ " list " ] , urls [ " search " ] , urls [ " keyword " ] ) ) :
raise Failed ( f " IMDb Error: { imdb_url } must begin with either: \n { urls [ ' list ' ] } (For Lists) \n { urls [ ' search ' ] } (For Searches) \n { urls [ ' keyword ' ] } (For Keyword Searches) " )
if not imdb_url . startswith ( ( v for k , v in urls . items ( ) ) ) :
fails = " \n " . join ( [ f " { v } (For { k . replace ( ' _ ' , ' ' ) . title ( ) } ) " for k , v in urls . items ( ) ] )
raise Failed ( f " IMDb Error: { imdb_url } must begin with either: { fails } " )
self . _total ( imdb_url , language )
list_count = util . parse ( " limit " , imdb_dict , datatype = " int " , methods = dict_methods , default = 0 , parent = " imdb_list " , minimum = 0 ) if " limit " in dict_methods else 0
valid_lists . append ( { " url " : imdb_url , " limit " : list_count } )
return valid_lists
def _total ( self , imdb_url , language ) :
headers = util . header ( language )
if imdb_url . startswith ( urls [ " keyword " ] ) :
page_type = " keyword "
elif imdb_url . startswith ( urls [ " list " ] ) :
page_type = " list "
if imdb_url . startswith ( urls [ " lists " ] ) :
xpath_total = " //div[@class= ' desc lister-total-num-results ' ]/text() "
per_page = 100
elif imdb_url . startswith ( urls [ " searches " ] ) :
xpath_total = " //div[@class= ' desc ' ]/span/text() "
per_page = 250
else :
page_type = " search "
results = self . config . get_html ( imdb_url , headers = headers ) . xpath ( xpath [ page_type ] )
xpath_total = " //div[@class= ' desc ' ]/text() "
per_page = 50
results = self . config . get_html ( imdb_url , headers = util . header ( language ) ) . xpath ( xpath_total )
total = 0
for result in results :
if " title " in result :
@ -56,7 +53,7 @@ class IMDb:
except IndexError :
pass
if total > 0 :
return total , item_counts[ page_type ]
return total , per_page
raise Failed ( f " IMDb Error: Failed to parse URL: { imdb_url } " )
def _ids_from_url ( self , imdb_url , language , limit ) :
@ -72,7 +69,7 @@ class IMDb:
if self . config . trace_mode :
logger . debug ( f " URL: { imdb_base } " )
logger . debug ( f " Params: { params } " )
search_url = imdb_base . startswith ( urls [ " searches " ] )
if limit < 1 or total < limit :
limit = total
remainder = limit % item_count
@ -82,13 +79,14 @@ class IMDb:
for i in range ( 1 , num_of_pages + 1 ) :
start_num = ( i - 1 ) * item_count + 1
util . print_return ( f " Parsing Page { i } / { num_of_pages } { start_num } - { limit if i == num_of_pages else i * item_count } " )
if imdb_base . startswith ( ( urls [ " list " ] , urls [ " keyword " ] ) ) :
params [ " page " ] = i # noqa
else :
if search_url :
params [ " count " ] = remainder if i == num_of_pages else item_count # noqa
params [ " start " ] = start_num # noqa
ids_found = self . config . get_html ( imdb_base , headers = headers , params = params ) . xpath ( xpath [ " imdb_id " ] )
if imdb_base . startswith ( ( urls [ " list " ] , urls [ " keyword " ] ) ) and i == num_of_pages :
else :
params [ " page " ] = i # noqa
response = self . config . get_html ( imdb_base , headers = headers , params = params )
ids_found = response . xpath ( " //div[contains(@class, ' lister-item-image ' )]//a/img//@data-tconst " )
if not search_url and i == num_of_pages :
ids_found = ids_found [ : remainder ]
imdb_ids . extend ( ids_found )
time . sleep ( 2 )