@ -6,8 +6,11 @@ from modules.util import Failed
logger = util . logger
builders = [ " imdb_list " , " imdb_id " , " imdb_chart " , " imdb_watchlist " , " imdb_search " , " imdb_award " ]
movie_charts = [ " box_office " , " popular_movies " , " top_movies " , " top_english " , " top_indian " , " lowest_rated " ]
show_charts = [ " popular_shows " , " top_shows " ]
movie_charts = [
" box_office " , " popular_movies " , " top_movies " , " top_english " , " lowest_rated " ,
" top_indian " , " top_tamil " , " top_telugu " , " top_malayalam " , " trending_india " , " trending_tamil " , " trending_telugu "
]
show_charts = [ " popular_shows " , " top_shows " , " trending_india " ]
charts = {
" box_office " : " Box Office " ,
" popular_movies " : " Most Popular Movies " ,
@ -15,8 +18,30 @@ charts = {
" top_movies " : " Top 250 Movies " ,
" top_shows " : " Top 250 TV Shows " ,
" top_english " : " Top Rated English Movies " ,
" lowest_rated " : " Lowest Rated Movies " ,
" top_tamil " : " Top Rated Tamil Movies " ,
" top_telugu " : " Top Rated Telugu Movies " ,
" top_malayalam " : " Top Rated Malayalam Movies " ,
" trending_india " : " Trending Indian Movies & Shows " ,
" trending_tamil " : " Trending Tamil Movies " ,
" trending_telugu " : " Trending Telugu Movies " ,
" top_indian " : " Top Rated Indian Movies " ,
" lowest_rated " : " Lowest Rated Movies "
}
chart_urls = {
" box_office " : " chart/boxoffice " ,
" popular_movies " : " chart/moviemeter " ,
" popular_shows " : " chart/tvmeter " ,
" top_movies " : " chart/top " ,
" top_shows " : " chart/toptv " ,
" top_english " : " chart/top-english-movies " ,
" lowest_rated " : " chart/bottom " ,
" top_indian " : " india/top-rated-indian-movies " ,
" top_tamil " : " india/top-rated-tamil-movies " ,
" top_telugu " : " india/top-rated-telugu-movies " ,
" top_malayalam " : " india/top-rated-malayalam-movies " ,
" trending_india " : " india/upcoming " ,
" trending_tamil " : " india/tamil " ,
" trending_telugu " : " india/telugu " ,
}
imdb_search_attributes = [
" limit " , " sort_by " , " title " , " type " , " type.not " , " release.after " , " release.before " , " rating.gte " , " rating.lte " ,
@ -40,6 +65,17 @@ sort_by_options = {
" release " : " RELEASE_DATE " ,
}
sort_options = [ f " { a } . { d } " for a in sort_by_options for d in [ " asc " , " desc " ] ]
list_sort_by_options = {
" custom " : " LIST_ORDER " ,
" popularity " : " POPULARITY " ,
" title " : " TITLE_REGIONAL " ,
" rating " : " USER_RATING " ,
" votes " : " USER_RATING_COUNT " ,
" runtime " : " RUNTIME " ,
" added " : " DATE_ADDED " ,
" release " : " RELEASE_DATE " ,
}
list_sort_options = [ f " { a } . { d } " for a in sort_by_options for d in [ " asc " , " desc " ] ]
title_type_options = {
" movie " : " movie " , " tv_series " : " tvSeries " , " short " : " short " , " tv_episode " : " tvEpisode " , " tv_mini_series " : " tvMiniSeries " ,
" tv_movie " : " tvMovie " , " tv_special " : " tvSpecial " , " tv_short " : " tvShort " , " video_game " : " videoGame " , " video " : " video " ,
@ -89,7 +125,8 @@ event_options = {
}
base_url = " https://www.imdb.com "
git_base = " https://raw.githubusercontent.com/Kometa-Team/IMDb-Awards/master "
hash_url = " https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/HASH "
search_hash_url = " https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/HASH "
list_hash_url = " https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/LIST_HASH "
graphql_url = " https://api.graphql.imdb.com/ "
list_url = f " { base_url } /list/ls "
@ -103,7 +140,8 @@ class IMDb:
self . _episode_ratings = None
self . _events_validation = None
self . _events = { }
self . _hash = None
self . _search_hash = None
self . _list_hash = None
self . event_url_validation = { }
def _request ( self , url , language = None , xpath = None , params = None ) :
@ -117,10 +155,16 @@ class IMDb:
return self . requests . post_json ( graphql_url , headers = { " content-type " : " application/json " } , json = json_data )
@property
def hash ( self ) :
if self . _hash is None :
self . _hash = self . requests . get ( hash_url ) . text . strip ( )
return self . _hash
def search_hash ( self ) :
if self . _search_hash is None :
self . _search_hash = self . requests . get ( search_hash_url ) . text . strip ( )
return self . _search_hash
@property
def list_hash ( self ) :
if self . _list_hash is None :
self . _list_hash = self . requests . get ( list_hash_url ) . text . strip ( )
return self . _list_hash
@property
def events_validation ( self ) :
@ -133,26 +177,29 @@ class IMDb:
self . _events [ event_id ] = self . requests . get_yaml ( f " { git_base } /events/ { event_id } .yml " ) . data
return self . _events [ event_id ]
def validate_imdb_lists ( self , err_type , imdb_lists , language ):
def validate_imdb_lists ( self , err_type , imdb_lists ):
valid_lists = [ ]
for imdb_dict in util . get_list ( imdb_lists , split = False ) :
if not isinstance ( imdb_dict , dict ) :
imdb_dict = { " url " : imdb_dict }
imdb_dict = { " list_id " : imdb_dict }
if " url " in imdb_dict and " list_id " not in imdb_dict :
imdb_dict [ " list_id " ] = imdb_dict [ " url " ]
dict_methods = { dm . lower ( ) : dm for dm in imdb_dict }
if " url " not in dict_methods :
raise Failed ( f " { err_type } Error: imdb_list ur l attribute not found" )
elif imdb_dict [ dict_methods [ " ur l" ] ] is None :
raise Failed ( f " { err_type } Error: imdb_list ur l attribute is blank" )
if " list_id " not in dict_methods :
raise Failed ( f " { err_type } Error: imdb_list list_id attribute not found" )
elif imdb_dict [ dict_methods [ " list_id " ] ] is None :
raise Failed ( f " { err_type } Error: imdb_list list_id attribute is blank" )
else :
imdb_url = imdb_dict [ dict_methods [ " url " ] ] . strip ( )
if imdb_url . startswith ( f " { base_url } /search/ " ) :
raise Failed ( " IMDb Error: URLs with https://www.imdb.com/search/ no longer works with imdb_list use imdb_search. " )
if imdb_url . startswith ( f " { base_url } /filmosearch/ " ) :
raise Failed ( " IMDb Error: URLs with https://www.imdb.com/filmosearch/ no longer works with imdb_list use imdb_search. " )
if not imdb_url . startswith ( list_url ) :
raise Failed ( f " IMDb Error: imdb_list URLs must begin with { list_url } " )
self . _total ( imdb_url , language )
list_count = None
imdb_url = imdb_dict [ dict_methods [ " list_id " ] ] . strip ( )
if imdb_url . startswith ( f " { base_url } /search/ " ) :
raise Failed ( " IMDb Error: URLs with https://www.imdb.com/search/ no longer works with imdb_list use imdb_search. " )
if imdb_url . startswith ( f " { base_url } /filmosearch/ " ) :
raise Failed ( " IMDb Error: URLs with https://www.imdb.com/filmosearch/ no longer works with imdb_list use imdb_search. " )
search = re . search ( r " (ls \ d+) " , imdb_url )
if not search :
raise Failed ( " IMDb Error: imdb_list list_id must begin with ls (ex. ls005526372) " )
new_dict = { " list_id " : search . group ( 1 ) }
if " limit " in dict_methods :
if imdb_dict [ dict_methods [ " limit " ] ] is None :
logger . warning ( f " { err_type } Warning: imdb_list limit attribute is blank using 0 as default " )
@ -160,14 +207,18 @@ class IMDb:
try :
value = int ( str ( imdb_dict [ dict_methods [ " limit " ] ] ) )
if 0 < = value :
list_count = value
new_dict[ " limit " ] = value
except ValueError :
pass
if list_count is None :
logger . warning ( f " { err_type } Warning: imdb_list limit attribute must be an integer 0 or greater using 0 as default " )
if list_count is None :
list_count = 0
valid_lists . append ( { " url " : imdb_url , " limit " : list_count } )
if " limit " not in new_dict :
logger . warning ( f " { err_type } Warning: imdb_list limit attribute: { imdb_dict [ dict_methods [ ' limit ' ] ] } must be an integer 0 or greater using 0 as default " )
if " limit " not in new_dict :
new_dict [ " limit " ] = 0
if " sort_by " in dict_methods :
new_dict [ " sort_by " ] = util . parse ( err_type , dict_methods , imdb_dict , parent = " imdb_list " , default = " custom.asc " , options = list_sort_options )
valid_lists . append ( new_dict )
return valid_lists
def validate_imdb_watchlists ( self , err_type , users , language ) :
@ -220,63 +271,12 @@ class IMDb:
return [ f for f in json . loads ( jsonline [ jsonline . find ( ' { ' ) : - 2 ] ) [ " starbars " ] ]
raise Failed ( f " IMDb Error: Failed to parse URL: { imdb_url } " )
def _total ( self , imdb_url , language ) :
xpath_total = " //div[@class= ' desc lister-total-num-results ' ]/text() "
per_page = 100
results = self . _request ( imdb_url , language = language , xpath = xpath_total )
total = 0
for result in results :
if " title " in result :
try :
total = int ( re . findall ( " ( \\ d+) title " , result . replace ( " , " , " " ) ) [ 0 ] )
break
except IndexError :
pass
if total > 0 :
return total , per_page
raise Failed ( f " IMDb Error: Failed to parse URL: { imdb_url } " )
def _ids_from_url ( self , imdb_url , language , limit ) :
total , item_count = self . _total ( imdb_url , language )
imdb_ids = [ ]
parsed_url = urlparse ( imdb_url )
params = parse_qs ( parsed_url . query )
imdb_base = parsed_url . _replace ( query = None ) . geturl ( ) # noqa
params . pop ( " start " , None ) # noqa
params . pop ( " count " , None ) # noqa
params . pop ( " page " , None ) # noqa
logger . trace ( f " URL: { imdb_base } " )
logger . trace ( f " Params: { params } " )
if limit < 1 or total < limit :
limit = total
remainder = limit % item_count
if remainder == 0 :
remainder = item_count
num_of_pages = math . ceil ( int ( limit ) / item_count )
for i in range ( 1 , num_of_pages + 1 ) :
start_num = ( i - 1 ) * item_count + 1
logger . ghost ( f " Parsing Page { i } / { num_of_pages } { start_num } - { limit if i == num_of_pages else i * item_count } " )
params [ " page " ] = i # noqa
ids_found = self . _request ( imdb_base , language = language , xpath = " //div[contains(@class, ' lister-item-image ' )]//a/img//@data-tconst " , params = params )
if i == num_of_pages :
ids_found = ids_found [ : remainder ]
imdb_ids . extend ( ids_found )
time . sleep ( 2 )
logger . exorcise ( )
if len ( imdb_ids ) > 0 :
return imdb_ids
raise Failed ( f " IMDb Error: No IMDb IDs Found at { imdb_url } " )
def _search_json ( self , data ) :
def _graphql_json ( self , data , search = True ) :
page_limit = 250 if search else 100
out = {
" locale " : " en-US " ,
" first " : data [ " limit " ] if " limit " in data and 0 < data [ " limit " ] < 250 else 250 ,
" titleTypeConstraint " : { " anyTitleTypeIds " : [ title_type_options [ t ] for t in data [ " type " ] ] if " type " in data else [ ] } ,
" first " : data [ " limit " ] if " limit " in data and 0 < data [ " limit " ] < page_limit else page_limit ,
}
sort = data [ " sort_by " ] if " sort_by " in data else " popularity.asc "
sort_by , sort_order = sort . split ( " . " )
out [ " sortBy " ] = sort_by_options [ sort_by ]
out [ " sortOrder " ] = sort_order . upper ( )
def check_constraint ( bases , mods , constraint , lower = " " , translation = None , range_name = None ) :
if not isinstance ( bases , list ) :
@ -302,84 +302,96 @@ class IMDb:
if range_data :
out [ constraint ] [ range_name [ i ] ] = range_data
check_constraint ( " type " , [ ( " not " , " excludeTitleTypeIds " ) ] , " titleTypeConstraint " , translation = title_type_options )
check_constraint ( " release " , [ ( " after " , " start " ) , ( " before " , " end " ) ] , " releaseDateConstraint " , range_name = " releaseDateRange " )
check_constraint ( " title " , [ ( " " , " searchTerm " ) ] , " titleTextConstraint " )
check_constraint ( [ " rating " , " votes " ] , [ ( " gte " , " min " ) , ( " lte " , " max " ) ] , " userRatingsConstraint " , range_name = [ " aggregateRatingRange " , " ratingsCountRange " ] )
check_constraint ( " genre " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) ] , " genreConstraint " , lower = " GenreIds " , translation = genre_options )
check_constraint ( " topic " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " no " ) ] , " withTitleDataConstraint " , lower = " DataAvailable " , translation = topic_options )
check_constraint ( " alternate_version " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " alternateVersionMatchingConstraint " , lower = " AlternateVersionTextTerms " )
check_constraint ( " crazy_credit " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " crazyCreditMatchingConstraint " , lower = " CrazyCreditTextTerms " )
check_constraint ( " location " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " filmingLocationConstraint " , lower = " Locations " )
check_constraint ( " goof " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " goofMatchingConstraint " , lower = " GoofTextTerms " )
check_constraint ( " plot " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " plotMatchingConstraint " , lower = " PlotTextTerms " )
check_constraint ( " quote " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " quoteMatchingConstraint " , lower = " QuoteTextTerms " )
check_constraint ( " soundtrack " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " soundtrackMatchingConstraint " , lower = " SoundtrackTextTerms " )
check_constraint ( " trivia " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " triviaMatchingConstraint " , lower = " TriviaTextTerms " )
if " event " in data or " event.winning " in data :
input_list = [ ]
if " event " in data :
input_list . extend ( [ event_options [ a ] if a in event_options else { " eventId " : a } for a in data [ " event " ] ] )
if " event.winning " in data :
for a in data [ " event.winning " ] :
award_dict = event_options [ a ] if a in event_options else { " eventId " : a }
award_dict [ " winnerFilter " ] = " WINNER_ONLY "
input_list . append ( award_dict )
out [ " awardConstraint " ] = { " allEventNominations " : input_list }
if any ( [ a in data for a in [ " imdb_top " , " imdb_bottom " , " popularity.gte " , " popularity.lte " ] ] ) :
ranges = [ ]
if " imdb_top " in data :
ranges . append ( { " rankRange " : { " max " : data [ " imdb_top " ] } , " rankedTitleListType " : " TOP_RATED_MOVIES " } )
if " imdb_bottom " in data :
ranges . append ( { " rankRange " : { " max " : data [ " imdb_bottom " ] } , " rankedTitleListType " : " LOWEST_RATED_MOVIES " } )
if " popularity.gte " in data or " popularity.lte " in data :
num_range = { }
if " popularity.lte " in data :
num_range [ " max " ] = data [ " popularity.lte " ]
if " popularity.gte " in data :
num_range [ " min " ] = data [ " popularity.gte " ]
ranges . append ( { " rankRange " : num_range , " rankedTitleListType " : " TITLE_METER " } )
out [ " rankedTitleListConstraint " ] = { " allRankedTitleLists " : ranges }
check_constraint ( " series " , [ ( " " , " any " ) , ( " not " , " exclude " ) ] , " episodicConstraint " , lower = " SeriesIds " )
check_constraint ( " list " , [ ( " " , " inAllLists " ) , ( " any " , " inAnyList " ) , ( " not " , " notInAnyList " ) ] , " listConstraint " )
if " company " in data :
company_ids = [ ]
for c in data [ " company " ] :
if c in company_options :
company_ids . extend ( company_options [ c ] )
else :
company_ids . append ( c )
out [ " creditedCompanyConstraint " ] = { " anyCompanyIds " : company_ids }
check_constraint ( " content_rating " , [ ( " " , " anyRegionCertificateRatings " ) ] , " certificateConstraint " )
check_constraint ( " country " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) , ( " origin " , " anyPrimary " ) ] , " originCountryConstraint " , lower = " Countries " )
check_constraint ( " keyword " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) ] , " keywordConstraint " , lower = " Keywords " , translation = ( " " , " - " ) )
check_constraint ( " language " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) , ( " primary " , " anyPrimary " ) ] , " languageConstraint " , lower = " Languages " )
check_constraint ( " cast " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) ] , " creditedNameConstraint " , lower = " NameIds " )
check_constraint ( " runtime " , [ ( " gte " , " min " ) , ( " lte " , " max " ) ] , " runtimeConstraint " , range_name = " runtimeRangeMinutes " )
sort = data [ " sort_by " ] if " sort_by " in data else " popularity.asc " if search else " custom.asc "
sort_by , sort_order = sort . split ( " . " )
if " adult " in data and data [ " adult " ] :
out [ " explicitContentConstraint " ] = { " explicitContentFilter " : " INCLUDE_ADULT " }
if search :
out [ " titleTypeConstraint " ] = { " anyTitleTypeIds " : [ title_type_options [ t ] for t in data [ " type " ] ] if " type " in data else [ ] }
out [ " sortBy " ] = sort_by_options [ sort_by ]
out [ " sortOrder " ] = sort_order . upper ( )
check_constraint ( " type " , [ ( " not " , " excludeTitleTypeIds " ) ] , " titleTypeConstraint " , translation = title_type_options )
check_constraint ( " release " , [ ( " after " , " start " ) , ( " before " , " end " ) ] , " releaseDateConstraint " , range_name = " releaseDateRange " )
check_constraint ( " title " , [ ( " " , " searchTerm " ) ] , " titleTextConstraint " )
check_constraint ( [ " rating " , " votes " ] , [ ( " gte " , " min " ) , ( " lte " , " max " ) ] , " userRatingsConstraint " , range_name = [ " aggregateRatingRange " , " ratingsCountRange " ] )
check_constraint ( " genre " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) ] , " genreConstraint " , lower = " GenreIds " , translation = genre_options )
check_constraint ( " topic " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " no " ) ] , " withTitleDataConstraint " , lower = " DataAvailable " , translation = topic_options )
check_constraint ( " alternate_version " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " alternateVersionMatchingConstraint " , lower = " AlternateVersionTextTerms " )
check_constraint ( " crazy_credit " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " crazyCreditMatchingConstraint " , lower = " CrazyCreditTextTerms " )
check_constraint ( " location " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " filmingLocationConstraint " , lower = " Locations " )
check_constraint ( " goof " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " goofMatchingConstraint " , lower = " GoofTextTerms " )
check_constraint ( " plot " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " plotMatchingConstraint " , lower = " PlotTextTerms " )
check_constraint ( " quote " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " quoteMatchingConstraint " , lower = " QuoteTextTerms " )
check_constraint ( " soundtrack " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " soundtrackMatchingConstraint " , lower = " SoundtrackTextTerms " )
check_constraint ( " trivia " , [ ( " " , " all " ) , ( " any " , " any " ) ] , " triviaMatchingConstraint " , lower = " TriviaTextTerms " )
if " event " in data or " event.winning " in data :
input_list = [ ]
if " event " in data :
input_list . extend ( [ event_options [ a ] if a in event_options else { " eventId " : a } for a in data [ " event " ] ] )
if " event.winning " in data :
for a in data [ " event.winning " ] :
award_dict = event_options [ a ] if a in event_options else { " eventId " : a }
award_dict [ " winnerFilter " ] = " WINNER_ONLY "
input_list . append ( award_dict )
out [ " awardConstraint " ] = { " allEventNominations " : input_list }
if any ( [ a in data for a in [ " imdb_top " , " imdb_bottom " , " popularity.gte " , " popularity.lte " ] ] ) :
ranges = [ ]
if " imdb_top " in data :
ranges . append ( { " rankRange " : { " max " : data [ " imdb_top " ] } , " rankedTitleListType " : " TOP_RATED_MOVIES " } )
if " imdb_bottom " in data :
ranges . append ( { " rankRange " : { " max " : data [ " imdb_bottom " ] } , " rankedTitleListType " : " LOWEST_RATED_MOVIES " } )
if " popularity.gte " in data or " popularity.lte " in data :
num_range = { }
if " popularity.lte " in data :
num_range [ " max " ] = data [ " popularity.lte " ]
if " popularity.gte " in data :
num_range [ " min " ] = data [ " popularity.gte " ]
ranges . append ( { " rankRange " : num_range , " rankedTitleListType " : " TITLE_METER " } )
out [ " rankedTitleListConstraint " ] = { " allRankedTitleLists " : ranges }
check_constraint ( " series " , [ ( " " , " any " ) , ( " not " , " exclude " ) ] , " episodicConstraint " , lower = " SeriesIds " )
check_constraint ( " list " , [ ( " " , " inAllLists " ) , ( " any " , " inAnyList " ) , ( " not " , " notInAnyList " ) ] , " listConstraint " )
if " company " in data :
company_ids = [ ]
for c in data [ " company " ] :
if c in company_options :
company_ids . extend ( company_options [ c ] )
else :
company_ids . append ( c )
out [ " creditedCompanyConstraint " ] = { " anyCompanyIds " : company_ids }
check_constraint ( " content_rating " , [ ( " " , " anyRegionCertificateRatings " ) ] , " certificateConstraint " )
check_constraint ( " country " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) , ( " origin " , " anyPrimary " ) ] , " originCountryConstraint " , lower = " Countries " )
check_constraint ( " keyword " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) ] , " keywordConstraint " , lower = " Keywords " , translation = ( " " , " - " ) )
check_constraint ( " language " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) , ( " primary " , " anyPrimary " ) ] , " languageConstraint " , lower = " Languages " )
check_constraint ( " cast " , [ ( " " , " all " ) , ( " any " , " any " ) , ( " not " , " exclude " ) ] , " creditedNameConstraint " , lower = " NameIds " )
check_constraint ( " runtime " , [ ( " gte " , " min " ) , ( " lte " , " max " ) ] , " runtimeConstraint " , range_name = " runtimeRangeMinutes " )
if " adult " in data and data [ " adult " ] :
out [ " explicitContentConstraint " ] = { " explicitContentFilter " : " INCLUDE_ADULT " }
else :
out [ " lsConst " ] = data [ " list_id " ]
out [ " sort " ] = { " by " : list_sort_by_options [ sort_by ] , " order " : sort_order . upper ( ) }
logger . trace ( out )
return {
" operationName " : " AdvancedTitleSearch " ,
" operationName " : " AdvancedTitleSearch " if search else " TitleListMainPage " ,
" variables " : out ,
" extensions " : { " persistedQuery " : { " version " : 1 , " sha256Hash " : self . hash } }
" extensions " : { " persistedQuery " : { " version " : 1 , " sha256Hash " : self . search_hash if search else self . list_ hash} }
}
def _search ( self , data ) :
json_obj = self . _search_json ( data )
item_count = 250
def _ pagination( self , data , search = True ) :
json_obj = self . _ graphql_json( data , search = search )
item_count = 250 if search else 100
imdb_ids = [ ]
logger . ghost ( " Parsing Page 1 " )
response_json = self . _graph_request ( json_obj )
try :
total = response_json [ " data " ] [ " advancedTitleSearch " ] [ " total " ]
search_data = response_json [ " data " ] [ " advancedTitleSearch " ] if search else response_json [ " data " ] [ " list " ] [ " titleListItemSearch " ]
total = search_data [ " total " ]
limit = data [ " limit " ]
if limit < 1 or total < limit :
limit = total
@ -387,16 +399,17 @@ class IMDb:
if remainder == 0 :
remainder = item_count
num_of_pages = math . ceil ( int ( limit ) / item_count )
end_cursor = respon se_json[ " data" ] [ " advancedTitleSearch " ] [ " pageInfo " ] [ " endCursor " ]
imdb_ids . extend ( [ n [ " node " ] [ " title " ] [ " id " ] for n in response_json [ " data " ] [ " advancedTitleSearch " ] [ " edges " ] ] )
end_cursor = search _data[ " pageInfo " ] [ " endCursor " ]
imdb_ids . extend ( [ n [ " node " ] [ " title " ] [ " id " ] if search else n [ " listItem " ] [ " id " ] for n in search_data [ " edges " ] ] )
if num_of_pages > 1 :
for i in range ( 2 , num_of_pages + 1 ) :
start_num = ( i - 1 ) * item_count + 1
logger . ghost ( f " Parsing Page { i } / { num_of_pages } { start_num } - { limit if i == num_of_pages else i * item_count } " )
json_obj [ " variables " ] [ " after " ] = end_cursor
response_json = self . _graph_request ( json_obj )
end_cursor = response_json [ " data " ] [ " advancedTitleSearch " ] [ " pageInfo " ] [ " endCursor " ]
ids_found = [ n [ " node " ] [ " title " ] [ " id " ] for n in response_json [ " data " ] [ " advancedTitleSearch " ] [ " edges " ] ]
search_data = response_json [ " data " ] [ " advancedTitleSearch " ] if search else response_json [ " data " ] [ " list " ] [ " titleListItemSearch " ]
end_cursor = search_data [ " pageInfo " ] [ " endCursor " ]
ids_found = [ n [ " node " ] [ " title " ] [ " id " ] if search else n [ " listItem " ] [ " id " ] for n in search_data [ " edges " ] ]
if i == num_of_pages :
ids_found = ids_found [ : remainder ]
imdb_ids . extend ( ids_found )
@ -489,35 +502,22 @@ class IMDb:
return parental_dict
def _ids_from_chart ( self , chart , language ) :
if chart == " box_office " :
url = " chart/boxoffice "
elif chart == " popular_movies " :
url = " chart/moviemeter "
elif chart == " popular_shows " :
url = " chart/tvmeter "
elif chart == " top_movies " :
url = " chart/top "
elif chart == " top_shows " :
url = " chart/toptv "
elif chart == " top_english " :
url = " chart/top-english-movies "
elif chart == " top_indian " :
url = " india/top-rated-indian-movies "
elif chart == " lowest_rated " :
url = " chart/bottom "
else :
if chart not in chart_urls :
raise Failed ( f " IMDb Error: chart: { chart } not " )
links = self . _request ( f " { base_url } / { url} " , language = language , xpath = " // li//a[@class=' ipc-title-link-wrapper ' ]/@href " )
return [ re. search ( " (tt \\ d+) " , link ) . group ( 1 ) for link in links ]
script_data = self . _request ( f " { base_url } / { chart_urls [ chart ] } " , language = language , xpath = " //script[@id= ' __NEXT_DATA__ ' ]/text() " ) [ 0 ]
return [ x . group ( 1 ) for x in re . finditer ( r ' " (tt \ d+) " ' , script_data ) ]
def get_imdb_ids ( self , method , data , language ) :
if method == " imdb_id " :
logger . info ( f " Processing IMDb ID: { data } " )
return [ ( data , " imdb " ) ]
elif method == " imdb_list " :
status = f " { data [ ' limit ' ] } Items at " if data [ ' limit ' ] > 0 else ' '
logger . info ( f " Processing IMDb List: { status } { data [ ' url ' ] } " )
return [ ( i , " imdb " ) for i in self . _ids_from_url ( data [ " url " ] , language , data [ " limit " ] ) ]
logger . info ( f " Processing IMDb List: { data [ ' list_id ' ] } " )
if data [ " limit " ] > 0 :
logger . info ( f " Limit: { data [ ' limit ' ] } " )
if " sort_by " in data :
logger . info ( f " Sort By: { data [ ' sort_by ' ] } " )
return [ ( i , " imdb " ) for i in self . _pagination ( data , search = False ) ]
elif method == " imdb_chart " :
logger . info ( f " Processing IMDb Chart: { charts [ data ] } " )
return [ ( _i , " imdb " ) for _i in self . _ids_from_chart ( data , language ) ]
@ -538,7 +538,7 @@ class IMDb:
logger . info ( f " Processing IMDb Search: " )
for k , v in data . items ( ) :
logger . info ( f " { k } : { v } " )
return [ ( _i , " imdb " ) for _i in self . _ search ( data ) ]
return [ ( _i , " imdb " ) for _i in self . _ pagination ( data ) ]
else :
raise Failed ( f " IMDb Error: Method { method } not supported " )