From 27a6c48b48935978ca12919c1560171fbe56c52f Mon Sep 17 00:00:00 2001 From: meisnate12 Date: Fri, 31 May 2024 15:30:14 -0400 Subject: [PATCH] [37] Fixes `imdb_list` and `imdb_chart` (#2092) --- .github/.wordlist.txt | 1 + CHANGELOG | 8 + VERSION | 2 +- docs/files/builders/imdb.md | 58 ++++-- docs/files/filters.md | 3 +- modules/builder.py | 7 +- modules/imdb.py | 356 ++++++++++++++++++------------------ modules/letterboxd.py | 2 +- 8 files changed, 233 insertions(+), 204 deletions(-) diff --git a/.github/.wordlist.txt b/.github/.wordlist.txt index 6e15b69e..3a51508f 100644 --- a/.github/.wordlist.txt +++ b/.github/.wordlist.txt @@ -252,6 +252,7 @@ SciFi ScudLee SDTV SemVer +setuptools ShawShank Skywalker Sohjiro diff --git a/CHANGELOG b/CHANGELOG index 51f78660..a44708ab 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,9 @@ # Requirements Update (requirements will need to be reinstalled) Updated PlexAPI requirement to 4.15.13 +Update lxml requirement to 5.2.2 +Update requests requirement to 2.32.3 +Update schedule requirement to 1.2.2 +Update setuptools requirement to 70.0.0 # Removed Features @@ -7,6 +11,8 @@ Updated PlexAPI requirement to 4.15.13 Checks requirement versions to print a message if one needs to be updated Added the `mass_added_at_update` operation to mass set the Added At field of Movies and Shows. Add automated Anime Aggregations for AniDB matching +Added `top_tamil`, `top_telugu`, `top_malayalam`, `trending_india`, `trending_tamil`, and `trending_telugu` as options for `imdb_chart` +Adds the `sort_by` attribute to `imdb_list` # Updates Changed the `overlay_artwork_filetype` Setting to accept `webp_lossy` and `webp_lossless` while the old attribute `webp` will be treated as `webp_lossy`. @@ -19,5 +25,7 @@ Fixes #2034 `anilist_userlist` `score` attribute wasn't being validated correctl Fixes #1367 Error when trying to symlink the logs folder Fixes #2028 TMDb IDs were being ignored on the report Fixes a bug when parsing a comma-separated string of ints +Fixes `imdb_chart` only getting 25 results +Fixes `imdb_list` not returning items Various other Minor Fixes \ No newline at end of file diff --git a/VERSION b/VERSION index e4a5d4c2..9db4b4be 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.1-build36 +2.0.1-build37 diff --git a/docs/files/builders/imdb.md b/docs/files/builders/imdb.md index e800e251..794bfd43 100644 --- a/docs/files/builders/imdb.md +++ b/docs/files/builders/imdb.md @@ -31,16 +31,22 @@ The expected input are the options below. Multiple values are supported as eithe The `sync_mode: sync` and `collection_order: custom` Setting are recommended since the lists are continuously updated and in a specific order. -| Name | Attribute | Works with Movies | Works with Shows | -|:-------------------------------------------------------------------------------|:-----------------|:------------------------------------------:|:------------------------------------------:| -| [Box Office](https://www.imdb.com/chart/boxoffice) | `box_office` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | -| [Most Popular Movies](https://www.imdb.com/chart/moviemeter) | `popular_movies` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | -| [Top 250 Movies](https://www.imdb.com/chart/top) | `top_movies` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | -| [Top Rated English Movies](https://www.imdb.com/chart/top-english-movies) | `top_english` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | -| [Most Popular TV Shows](https://www.imdb.com/chart/tvmeter) | `popular_shows` | :fontawesome-solid-circle-xmark:{ .red } | :fontawesome-solid-circle-check:{ .green } | -| [Top 250 TV Shows](https://www.imdb.com/chart/toptv) | `top_shows` | :fontawesome-solid-circle-xmark:{ .red } | :fontawesome-solid-circle-check:{ .green } | -| [Top Rated Indian Movies](https://www.imdb.com/india/top-rated-indian-movies/) | `top_indian` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | -| [Lowest Rated Movies](https://www.imdb.com/chart/bottom) | `lowest_rated` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| Name | Attribute | Works with Movies | Works with Shows | +|:-------------------------------------------------------------------------------------|:------------------|:------------------------------------------:|:------------------------------------------:| +| [Box Office](https://www.imdb.com/chart/boxoffice) | `box_office` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Most Popular Movies](https://www.imdb.com/chart/moviemeter) | `popular_movies` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Top 250 Movies](https://www.imdb.com/chart/top) | `top_movies` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Top Rated English Movies](https://www.imdb.com/chart/top-english-movies) | `top_english` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Most Popular TV Shows](https://www.imdb.com/chart/tvmeter) | `popular_shows` | :fontawesome-solid-circle-xmark:{ .red } | :fontawesome-solid-circle-check:{ .green } | +| [Top 250 TV Shows](https://www.imdb.com/chart/toptv) | `top_shows` | :fontawesome-solid-circle-xmark:{ .red } | :fontawesome-solid-circle-check:{ .green } | +| [Lowest Rated Movies](https://www.imdb.com/chart/bottom) | `lowest_rated` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Top Rated Indian Movies](https://www.imdb.com/india/top-rated-indian-movies/) | `top_indian` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Top Rated Tamil Movies](https://www.imdb.com/india/top-rated-tamil-movies/) | `top_tamil` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Top Rated Telugu Movies](https://www.imdb.com/india/top-rated-telugu-movies/) | `top_telugu` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Top Rated Malayalam Movies](https://www.imdb.com/india/top-rated-malayalam-movies/) | `top_malayalam` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Trending Indian Movies & Shows](https://www.imdb.com/india/upcoming/) | `trending_india` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-check:{ .green } | +| [Trending Tamil Movies](https://www.imdb.com/india/tamil/) | `trending_tamil` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | +| [Trending Telugu Movies](https://www.imdb.com/india/telugu/) | `trending_telugu` | :fontawesome-solid-circle-check:{ .green } | :fontawesome-solid-circle-xmark:{ .red } | ```yaml collections: @@ -62,34 +68,48 @@ collections: Finds every item in an IMDb List. -The expected input is an IMDb List URL. Multiple values are supported as a list only a comma-separated string will not work. +| List Parameter | Description | +|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `list_id` | Specify the IMDb List ID. **This attribute is required.**
**Options:** The ID that starts with `ls` found in the URL of the list. (ex. `ls005526372`) | +| `limit` | Specify how items you want returned by the query.
**Options:** Any Integer `0` or greater where `0` get all items.
**Default:** `0` | +| `sort_by` | Choose from one of the many available sort options.
**Options:** `custom.asc`, `custom.desc`, `title.asc`, `title.desc`, `rating.asc`, `rating.desc`, `popularity.asc`, `popularity.desc`, `votes.asc`, `votes.desc`, `release.asc`, `release.desc`, `runtime.asc`, `runtime.desc`, `added.asc`, `added.desc`
**Default:** `custom.asc` | + +Multiple values are supported as a list only a comma-separated string will not work. The `sync_mode: sync` and `collection_order: custom` Setting are recommended since the lists are continuously updated and in a specific order. ```yaml collections: James Bonds: - imdb_list: https://www.imdb.com/list/ls006405458 + imdb_list: + list_id: ls006405458 + limit: 100 + sort_by: rating.asc collection_order: custom sync_mode: sync ``` -You can also limit the number of items to search for by using the `limit` and `url` parameters under `imdb_list`. +You can search multiple lists in one collection by using a list. ```yaml collections: Christmas: imdb_list: - - url: https://www.imdb.com/list/ls025976544/ + - list_id: ls025976544 limit: 10 - - url: https://www.imdb.com/list/ls003863000/ + sort_by: rating.asc + - list_id: ls003863000 limit: 10 - - url: https://www.imdb.com/list/ls027454200/ + sort_by: rating.asc + - list_id: ls027454200 limit: 10 - - url: https://www.imdb.com/list/ls027886673/ + sort_by: rating.asc + - list_id: ls027886673 limit: 10 - - url: https://www.imdb.com/list/ls097998599/ + sort_by: rating.asc + - list_id: ls097998599 limit: 10 + sort_by: rating.asc sync_mode: sync collection_order: alpha ``` @@ -184,7 +204,7 @@ The `sync_mode: sync` and `collection_order: custom` Setting are recommended sin | Search Parameter | Description | |:------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `limit` | Specify how items you want returned by the query.
**Options:** Any Integer greater than `0`
**Default:** `100` | +| `limit` | Specify how items you want returned by the query.
**Options:** Any Integer `0` or greater where `0` get all items.
**Default:** `100` | | `sort_by` | Choose from one of the many available sort options.
**Options:** `popularity.asc`, `popularity.desc`, `title.asc`, `title.desc`, `rating.asc`, `rating.desc`, `votes.asc`, `votes.desc`, `box_office.asc`, `box_office.desc`, `runtime.asc`, `runtime.desc`, `year.asc`, `year.desc`, `release.asc`, `release.desc`
**Default:** `popularity.asc` | | `title` | Search by title name.
**Options:** Any String | | `type` | Item must match at least one given type. Can be a comma-separated list.
**Options:** `movie`, `tv_series`, `short`, `tv_episode`, `tv_mini_series`, `tv_movie`, `tv_special`, `tv_short`, `video_game`, `video`, `music_video`, `podcast_series`, `podcast_episode` | diff --git a/docs/files/filters.md b/docs/files/filters.md index dd9f67de..38ce9dde 100644 --- a/docs/files/filters.md +++ b/docs/files/filters.md @@ -241,7 +241,8 @@ collections: ```yaml collections: Daniel Craig only James Bonds: - imdb_list: https://www.imdb.com/list/ls006405458/ + imdb_list: + list_id: ls006405458 filters: actor: Daniel Craig ``` diff --git a/modules/builder.py b/modules/builder.py index 9f102d45..8f41f0e8 100644 --- a/modules/builder.py +++ b/modules/builder.py @@ -1479,7 +1479,7 @@ class CollectionBuilder: raise Failed(f"{self.Type} Error: imdb_id {value} must begin with tt") elif method_name == "imdb_list": try: - for imdb_dict in self.config.IMDb.validate_imdb_lists(self.Type, method_data, self.language): + for imdb_dict in self.config.IMDb.validate_imdb_lists(self.Type, method_data): self.builders.append((method_name, imdb_dict)) except Failed as e: logger.error(e) @@ -1739,9 +1739,8 @@ class CollectionBuilder: final_attributes["letter"] = util.parse(self.Type, "prefix", dict_data, methods=dict_methods, parent=method_name) final_text += f"\nPrefix: {final_attributes['letter']}" if "type" in dict_methods: - type_list = util.parse(self.Type, "type", dict_data, datatype="commalist", methods=dict_methods, parent=method_name, options=mal.search_types) - final_attributes["type"] = ",".join(type_list) - final_text += f"\nType: {' or '.join(type_list)}" + final_attributes["type"] = util.parse(self.Type, "type", dict_data, methods=dict_methods, parent=method_name, options=mal.search_types) + final_text += f"\nType: {final_attributes['type']}" if "status" in dict_methods: final_attributes["status"] = util.parse(self.Type, "status", dict_data, methods=dict_methods, parent=method_name, options=mal.search_status) final_text += f"\nStatus: {final_attributes['status']}" diff --git a/modules/imdb.py b/modules/imdb.py index 2878900a..d833fd4e 100644 --- a/modules/imdb.py +++ b/modules/imdb.py @@ -6,8 +6,11 @@ from modules.util import Failed logger = util.logger builders = ["imdb_list", "imdb_id", "imdb_chart", "imdb_watchlist", "imdb_search", "imdb_award"] -movie_charts = ["box_office", "popular_movies", "top_movies", "top_english", "top_indian", "lowest_rated"] -show_charts = ["popular_shows", "top_shows"] +movie_charts = [ + "box_office", "popular_movies", "top_movies", "top_english", "lowest_rated", + "top_indian", "top_tamil", "top_telugu", "top_malayalam", "trending_india", "trending_tamil", "trending_telugu" +] +show_charts = ["popular_shows", "top_shows", "trending_india"] charts = { "box_office": "Box Office", "popular_movies": "Most Popular Movies", @@ -15,8 +18,30 @@ charts = { "top_movies": "Top 250 Movies", "top_shows": "Top 250 TV Shows", "top_english": "Top Rated English Movies", + "lowest_rated": "Lowest Rated Movies", + "top_tamil": "Top Rated Tamil Movies", + "top_telugu": "Top Rated Telugu Movies", + "top_malayalam": "Top Rated Malayalam Movies", + "trending_india": "Trending Indian Movies & Shows", + "trending_tamil": "Trending Tamil Movies", + "trending_telugu": "Trending Telugu Movies", "top_indian": "Top Rated Indian Movies", - "lowest_rated": "Lowest Rated Movies" +} +chart_urls = { + "box_office": "chart/boxoffice", + "popular_movies": "chart/moviemeter", + "popular_shows": "chart/tvmeter", + "top_movies": "chart/top", + "top_shows": "chart/toptv", + "top_english": "chart/top-english-movies", + "lowest_rated": "chart/bottom", + "top_indian": "india/top-rated-indian-movies", + "top_tamil": "india/top-rated-tamil-movies", + "top_telugu": "india/top-rated-telugu-movies", + "top_malayalam": "india/top-rated-malayalam-movies", + "trending_india": "india/upcoming", + "trending_tamil": "india/tamil", + "trending_telugu": "india/telugu", } imdb_search_attributes = [ "limit", "sort_by", "title", "type", "type.not", "release.after", "release.before", "rating.gte", "rating.lte", @@ -40,6 +65,17 @@ sort_by_options = { "release": "RELEASE_DATE", } sort_options = [f"{a}.{d}"for a in sort_by_options for d in ["asc", "desc"]] +list_sort_by_options = { + "custom": "LIST_ORDER", + "popularity": "POPULARITY", + "title": "TITLE_REGIONAL", + "rating": "USER_RATING", + "votes": "USER_RATING_COUNT", + "runtime": "RUNTIME", + "added": "DATE_ADDED", + "release": "RELEASE_DATE", +} +list_sort_options = [f"{a}.{d}"for a in sort_by_options for d in ["asc", "desc"]] title_type_options = { "movie": "movie", "tv_series": "tvSeries", "short": "short", "tv_episode": "tvEpisode", "tv_mini_series": "tvMiniSeries", "tv_movie": "tvMovie", "tv_special": "tvSpecial", "tv_short": "tvShort", "video_game": "videoGame", "video": "video", @@ -89,7 +125,8 @@ event_options = { } base_url = "https://www.imdb.com" git_base = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Awards/master" -hash_url = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/HASH" +search_hash_url = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/HASH" +list_hash_url = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/LIST_HASH" graphql_url = "https://api.graphql.imdb.com/" list_url = f"{base_url}/list/ls" @@ -103,7 +140,8 @@ class IMDb: self._episode_ratings = None self._events_validation = None self._events = {} - self._hash = None + self._search_hash = None + self._list_hash = None self.event_url_validation = {} def _request(self, url, language=None, xpath=None, params=None): @@ -117,10 +155,16 @@ class IMDb: return self.requests.post_json(graphql_url, headers={"content-type": "application/json"}, json=json_data) @property - def hash(self): - if self._hash is None: - self._hash = self.requests.get(hash_url).text.strip() - return self._hash + def search_hash(self): + if self._search_hash is None: + self._search_hash = self.requests.get(search_hash_url).text.strip() + return self._search_hash + + @property + def list_hash(self): + if self._list_hash is None: + self._list_hash = self.requests.get(list_hash_url).text.strip() + return self._list_hash @property def events_validation(self): @@ -133,26 +177,29 @@ class IMDb: self._events[event_id] = self.requests.get_yaml(f"{git_base}/events/{event_id}.yml").data return self._events[event_id] - def validate_imdb_lists(self, err_type, imdb_lists, language): + def validate_imdb_lists(self, err_type, imdb_lists): valid_lists = [] for imdb_dict in util.get_list(imdb_lists, split=False): if not isinstance(imdb_dict, dict): - imdb_dict = {"url": imdb_dict} + imdb_dict = {"list_id": imdb_dict} + if "url" in imdb_dict and "list_id" not in imdb_dict: + imdb_dict["list_id"] = imdb_dict["url"] dict_methods = {dm.lower(): dm for dm in imdb_dict} - if "url" not in dict_methods: - raise Failed(f"{err_type} Error: imdb_list url attribute not found") - elif imdb_dict[dict_methods["url"]] is None: - raise Failed(f"{err_type} Error: imdb_list url attribute is blank") + if "list_id" not in dict_methods: + raise Failed(f"{err_type} Error: imdb_list list_id attribute not found") + elif imdb_dict[dict_methods["list_id"]] is None: + raise Failed(f"{err_type} Error: imdb_list list_id attribute is blank") else: - imdb_url = imdb_dict[dict_methods["url"]].strip() - if imdb_url.startswith(f"{base_url}/search/"): - raise Failed("IMDb Error: URLs with https://www.imdb.com/search/ no longer works with imdb_list use imdb_search.") - if imdb_url.startswith(f"{base_url}/filmosearch/"): - raise Failed("IMDb Error: URLs with https://www.imdb.com/filmosearch/ no longer works with imdb_list use imdb_search.") - if not imdb_url.startswith(list_url): - raise Failed(f"IMDb Error: imdb_list URLs must begin with {list_url}") - self._total(imdb_url, language) - list_count = None + imdb_url = imdb_dict[dict_methods["list_id"]].strip() + if imdb_url.startswith(f"{base_url}/search/"): + raise Failed("IMDb Error: URLs with https://www.imdb.com/search/ no longer works with imdb_list use imdb_search.") + if imdb_url.startswith(f"{base_url}/filmosearch/"): + raise Failed("IMDb Error: URLs with https://www.imdb.com/filmosearch/ no longer works with imdb_list use imdb_search.") + search = re.search(r"(ls\d+)", imdb_url) + if not search: + raise Failed("IMDb Error: imdb_list list_id must begin with ls (ex. ls005526372)") + new_dict = {"list_id": search.group(1)} + if "limit" in dict_methods: if imdb_dict[dict_methods["limit"]] is None: logger.warning(f"{err_type} Warning: imdb_list limit attribute is blank using 0 as default") @@ -160,14 +207,18 @@ class IMDb: try: value = int(str(imdb_dict[dict_methods["limit"]])) if 0 <= value: - list_count = value + new_dict["limit"] = value except ValueError: pass - if list_count is None: - logger.warning(f"{err_type} Warning: imdb_list limit attribute must be an integer 0 or greater using 0 as default") - if list_count is None: - list_count = 0 - valid_lists.append({"url": imdb_url, "limit": list_count}) + if "limit" not in new_dict: + logger.warning(f"{err_type} Warning: imdb_list limit attribute: {imdb_dict[dict_methods['limit']]} must be an integer 0 or greater using 0 as default") + if "limit" not in new_dict: + new_dict["limit"] = 0 + + if "sort_by" in dict_methods: + new_dict["sort_by"] = util.parse(err_type, dict_methods, imdb_dict, parent="imdb_list", default="custom.asc", options=list_sort_options) + + valid_lists.append(new_dict) return valid_lists def validate_imdb_watchlists(self, err_type, users, language): @@ -220,63 +271,12 @@ class IMDb: return [f for f in json.loads(jsonline[jsonline.find('{'):-2])["starbars"]] raise Failed(f"IMDb Error: Failed to parse URL: {imdb_url}") - def _total(self, imdb_url, language): - xpath_total = "//div[@class='desc lister-total-num-results']/text()" - per_page = 100 - results = self._request(imdb_url, language=language, xpath=xpath_total) - total = 0 - for result in results: - if "title" in result: - try: - total = int(re.findall("(\\d+) title", result.replace(",", ""))[0]) - break - except IndexError: - pass - if total > 0: - return total, per_page - raise Failed(f"IMDb Error: Failed to parse URL: {imdb_url}") - - def _ids_from_url(self, imdb_url, language, limit): - total, item_count = self._total(imdb_url, language) - imdb_ids = [] - parsed_url = urlparse(imdb_url) - params = parse_qs(parsed_url.query) - imdb_base = parsed_url._replace(query=None).geturl() # noqa - params.pop("start", None) # noqa - params.pop("count", None) # noqa - params.pop("page", None) # noqa - logger.trace(f"URL: {imdb_base}") - logger.trace(f"Params: {params}") - if limit < 1 or total < limit: - limit = total - remainder = limit % item_count - if remainder == 0: - remainder = item_count - num_of_pages = math.ceil(int(limit) / item_count) - for i in range(1, num_of_pages + 1): - start_num = (i - 1) * item_count + 1 - logger.ghost(f"Parsing Page {i}/{num_of_pages} {start_num}-{limit if i == num_of_pages else i * item_count}") - params["page"] = i # noqa - ids_found = self._request(imdb_base, language=language, xpath="//div[contains(@class, 'lister-item-image')]//a/img//@data-tconst", params=params) - if i == num_of_pages: - ids_found = ids_found[:remainder] - imdb_ids.extend(ids_found) - time.sleep(2) - logger.exorcise() - if len(imdb_ids) > 0: - return imdb_ids - raise Failed(f"IMDb Error: No IMDb IDs Found at {imdb_url}") - - def _search_json(self, data): + def _graphql_json(self, data, search=True): + page_limit = 250 if search else 100 out = { "locale": "en-US", - "first": data["limit"] if "limit" in data and 0 < data["limit"] < 250 else 250, - "titleTypeConstraint": {"anyTitleTypeIds": [title_type_options[t] for t in data["type"]] if "type" in data else []}, + "first": data["limit"] if "limit" in data and 0 < data["limit"] < page_limit else page_limit, } - sort = data["sort_by"] if "sort_by" in data else "popularity.asc" - sort_by, sort_order = sort.split(".") - out["sortBy"] = sort_by_options[sort_by] - out["sortOrder"] = sort_order.upper() def check_constraint(bases, mods, constraint, lower="", translation=None, range_name=None): if not isinstance(bases, list): @@ -302,84 +302,96 @@ class IMDb: if range_data: out[constraint][range_name[i]] = range_data - check_constraint("type", [("not", "excludeTitleTypeIds")], "titleTypeConstraint", translation=title_type_options) - check_constraint("release", [("after", "start"), ("before", "end")], "releaseDateConstraint", range_name="releaseDateRange") - check_constraint("title", [("", "searchTerm")], "titleTextConstraint") - check_constraint(["rating", "votes"], [("gte", "min"), ("lte", "max")], "userRatingsConstraint", range_name=["aggregateRatingRange", "ratingsCountRange"]) - check_constraint("genre", [("", "all"), ("any", "any"), ("not", "exclude")], "genreConstraint", lower="GenreIds", translation=genre_options) - check_constraint("topic", [("", "all"), ("any", "any"), ("not", "no")], "withTitleDataConstraint", lower="DataAvailable", translation=topic_options) - check_constraint("alternate_version", [("", "all"), ("any", "any")], "alternateVersionMatchingConstraint", lower="AlternateVersionTextTerms") - check_constraint("crazy_credit", [("", "all"), ("any", "any")], "crazyCreditMatchingConstraint", lower="CrazyCreditTextTerms") - check_constraint("location", [("", "all"), ("any", "any")], "filmingLocationConstraint", lower="Locations") - check_constraint("goof", [("", "all"), ("any", "any")], "goofMatchingConstraint", lower="GoofTextTerms") - check_constraint("plot", [("", "all"), ("any", "any")], "plotMatchingConstraint", lower="PlotTextTerms") - check_constraint("quote", [("", "all"), ("any", "any")], "quoteMatchingConstraint", lower="QuoteTextTerms") - check_constraint("soundtrack", [("", "all"), ("any", "any")], "soundtrackMatchingConstraint", lower="SoundtrackTextTerms") - check_constraint("trivia", [("", "all"), ("any", "any")], "triviaMatchingConstraint", lower="TriviaTextTerms") - - if "event" in data or "event.winning" in data: - input_list = [] - if "event" in data: - input_list.extend([event_options[a] if a in event_options else {"eventId": a} for a in data["event"]]) - if "event.winning" in data: - for a in data["event.winning"]: - award_dict = event_options[a] if a in event_options else {"eventId": a} - award_dict["winnerFilter"] = "WINNER_ONLY" - input_list.append(award_dict) - out["awardConstraint"] = {"allEventNominations": input_list} - - if any([a in data for a in ["imdb_top", "imdb_bottom", "popularity.gte", "popularity.lte"]]): - ranges = [] - if "imdb_top" in data: - ranges.append({"rankRange": {"max": data["imdb_top"]}, "rankedTitleListType": "TOP_RATED_MOVIES"}) - if "imdb_bottom" in data: - ranges.append({"rankRange": {"max": data["imdb_bottom"]}, "rankedTitleListType": "LOWEST_RATED_MOVIES"}) - if "popularity.gte" in data or "popularity.lte" in data: - num_range = {} - if "popularity.lte" in data: - num_range["max"] = data["popularity.lte"] - if "popularity.gte" in data: - num_range["min"] = data["popularity.gte"] - ranges.append({"rankRange": num_range, "rankedTitleListType": "TITLE_METER"}) - out["rankedTitleListConstraint"] = {"allRankedTitleLists": ranges} - - check_constraint("series", [("", "any"), ("not", "exclude")], "episodicConstraint", lower="SeriesIds") - check_constraint("list", [("", "inAllLists"), ("any", "inAnyList"), ("not", "notInAnyList")], "listConstraint") - - if "company" in data: - company_ids = [] - for c in data["company"]: - if c in company_options: - company_ids.extend(company_options[c]) - else: - company_ids.append(c) - out["creditedCompanyConstraint"] = {"anyCompanyIds": company_ids} - - check_constraint("content_rating", [("", "anyRegionCertificateRatings")], "certificateConstraint") - check_constraint("country", [("", "all"), ("any", "any"), ("not", "exclude"), ("origin", "anyPrimary")], "originCountryConstraint", lower="Countries") - check_constraint("keyword", [("", "all"), ("any", "any"), ("not", "exclude")], "keywordConstraint", lower="Keywords", translation=(" ", "-")) - check_constraint("language", [("", "all"), ("any", "any"), ("not", "exclude"), ("primary", "anyPrimary")], "languageConstraint", lower="Languages") - check_constraint("cast", [("", "all"), ("any", "any"), ("not", "exclude")], "creditedNameConstraint", lower="NameIds") - check_constraint("runtime", [("gte", "min"), ("lte", "max")], "runtimeConstraint", range_name="runtimeRangeMinutes") + sort = data["sort_by"] if "sort_by" in data else "popularity.asc" if search else "custom.asc" + sort_by, sort_order = sort.split(".") - if "adult" in data and data["adult"]: - out["explicitContentConstraint"] = {"explicitContentFilter": "INCLUDE_ADULT"} + if search: + out["titleTypeConstraint"] = {"anyTitleTypeIds": [title_type_options[t] for t in data["type"]] if "type" in data else []} + out["sortBy"] = sort_by_options[sort_by] + out["sortOrder"] = sort_order.upper() + + check_constraint("type", [("not", "excludeTitleTypeIds")], "titleTypeConstraint", translation=title_type_options) + check_constraint("release", [("after", "start"), ("before", "end")], "releaseDateConstraint", range_name="releaseDateRange") + check_constraint("title", [("", "searchTerm")], "titleTextConstraint") + check_constraint(["rating", "votes"], [("gte", "min"), ("lte", "max")], "userRatingsConstraint", range_name=["aggregateRatingRange", "ratingsCountRange"]) + check_constraint("genre", [("", "all"), ("any", "any"), ("not", "exclude")], "genreConstraint", lower="GenreIds", translation=genre_options) + check_constraint("topic", [("", "all"), ("any", "any"), ("not", "no")], "withTitleDataConstraint", lower="DataAvailable", translation=topic_options) + check_constraint("alternate_version", [("", "all"), ("any", "any")], "alternateVersionMatchingConstraint", lower="AlternateVersionTextTerms") + check_constraint("crazy_credit", [("", "all"), ("any", "any")], "crazyCreditMatchingConstraint", lower="CrazyCreditTextTerms") + check_constraint("location", [("", "all"), ("any", "any")], "filmingLocationConstraint", lower="Locations") + check_constraint("goof", [("", "all"), ("any", "any")], "goofMatchingConstraint", lower="GoofTextTerms") + check_constraint("plot", [("", "all"), ("any", "any")], "plotMatchingConstraint", lower="PlotTextTerms") + check_constraint("quote", [("", "all"), ("any", "any")], "quoteMatchingConstraint", lower="QuoteTextTerms") + check_constraint("soundtrack", [("", "all"), ("any", "any")], "soundtrackMatchingConstraint", lower="SoundtrackTextTerms") + check_constraint("trivia", [("", "all"), ("any", "any")], "triviaMatchingConstraint", lower="TriviaTextTerms") + + if "event" in data or "event.winning" in data: + input_list = [] + if "event" in data: + input_list.extend([event_options[a] if a in event_options else {"eventId": a} for a in data["event"]]) + if "event.winning" in data: + for a in data["event.winning"]: + award_dict = event_options[a] if a in event_options else {"eventId": a} + award_dict["winnerFilter"] = "WINNER_ONLY" + input_list.append(award_dict) + out["awardConstraint"] = {"allEventNominations": input_list} + + if any([a in data for a in ["imdb_top", "imdb_bottom", "popularity.gte", "popularity.lte"]]): + ranges = [] + if "imdb_top" in data: + ranges.append({"rankRange": {"max": data["imdb_top"]}, "rankedTitleListType": "TOP_RATED_MOVIES"}) + if "imdb_bottom" in data: + ranges.append({"rankRange": {"max": data["imdb_bottom"]}, "rankedTitleListType": "LOWEST_RATED_MOVIES"}) + if "popularity.gte" in data or "popularity.lte" in data: + num_range = {} + if "popularity.lte" in data: + num_range["max"] = data["popularity.lte"] + if "popularity.gte" in data: + num_range["min"] = data["popularity.gte"] + ranges.append({"rankRange": num_range, "rankedTitleListType": "TITLE_METER"}) + out["rankedTitleListConstraint"] = {"allRankedTitleLists": ranges} + + check_constraint("series", [("", "any"), ("not", "exclude")], "episodicConstraint", lower="SeriesIds") + check_constraint("list", [("", "inAllLists"), ("any", "inAnyList"), ("not", "notInAnyList")], "listConstraint") + + if "company" in data: + company_ids = [] + for c in data["company"]: + if c in company_options: + company_ids.extend(company_options[c]) + else: + company_ids.append(c) + out["creditedCompanyConstraint"] = {"anyCompanyIds": company_ids} + + check_constraint("content_rating", [("", "anyRegionCertificateRatings")], "certificateConstraint") + check_constraint("country", [("", "all"), ("any", "any"), ("not", "exclude"), ("origin", "anyPrimary")], "originCountryConstraint", lower="Countries") + check_constraint("keyword", [("", "all"), ("any", "any"), ("not", "exclude")], "keywordConstraint", lower="Keywords", translation=(" ", "-")) + check_constraint("language", [("", "all"), ("any", "any"), ("not", "exclude"), ("primary", "anyPrimary")], "languageConstraint", lower="Languages") + check_constraint("cast", [("", "all"), ("any", "any"), ("not", "exclude")], "creditedNameConstraint", lower="NameIds") + check_constraint("runtime", [("gte", "min"), ("lte", "max")], "runtimeConstraint", range_name="runtimeRangeMinutes") + + if "adult" in data and data["adult"]: + out["explicitContentConstraint"] = {"explicitContentFilter": "INCLUDE_ADULT"} + else: + out["lsConst"] = data["list_id"] + out["sort"] = {"by": list_sort_by_options[sort_by], "order": sort_order.upper()} logger.trace(out) return { - "operationName": "AdvancedTitleSearch", + "operationName": "AdvancedTitleSearch" if search else "TitleListMainPage", "variables": out, - "extensions": {"persistedQuery": {"version": 1, "sha256Hash": self.hash}} + "extensions": {"persistedQuery": {"version": 1, "sha256Hash": self.search_hash if search else self.list_hash}} } - def _search(self, data): - json_obj = self._search_json(data) - item_count = 250 + def _pagination(self, data, search=True): + json_obj = self._graphql_json(data, search=search) + item_count = 250 if search else 100 imdb_ids = [] logger.ghost("Parsing Page 1") response_json = self._graph_request(json_obj) try: - total = response_json["data"]["advancedTitleSearch"]["total"] + search_data = response_json["data"]["advancedTitleSearch"] if search else response_json["data"]["list"]["titleListItemSearch"] + total = search_data["total"] limit = data["limit"] if limit < 1 or total < limit: limit = total @@ -387,16 +399,17 @@ class IMDb: if remainder == 0: remainder = item_count num_of_pages = math.ceil(int(limit) / item_count) - end_cursor = response_json["data"]["advancedTitleSearch"]["pageInfo"]["endCursor"] - imdb_ids.extend([n["node"]["title"]["id"] for n in response_json["data"]["advancedTitleSearch"]["edges"]]) + end_cursor = search_data["pageInfo"]["endCursor"] + imdb_ids.extend([n["node"]["title"]["id"] if search else n["listItem"]["id"] for n in search_data["edges"]]) if num_of_pages > 1: for i in range(2, num_of_pages + 1): start_num = (i - 1) * item_count + 1 logger.ghost(f"Parsing Page {i}/{num_of_pages} {start_num}-{limit if i == num_of_pages else i * item_count}") json_obj["variables"]["after"] = end_cursor response_json = self._graph_request(json_obj) - end_cursor = response_json["data"]["advancedTitleSearch"]["pageInfo"]["endCursor"] - ids_found = [n["node"]["title"]["id"] for n in response_json["data"]["advancedTitleSearch"]["edges"]] + search_data = response_json["data"]["advancedTitleSearch"] if search else response_json["data"]["list"]["titleListItemSearch"] + end_cursor = search_data["pageInfo"]["endCursor"] + ids_found = [n["node"]["title"]["id"] if search else n["listItem"]["id"] for n in search_data["edges"]] if i == num_of_pages: ids_found = ids_found[:remainder] imdb_ids.extend(ids_found) @@ -489,35 +502,22 @@ class IMDb: return parental_dict def _ids_from_chart(self, chart, language): - if chart == "box_office": - url = "chart/boxoffice" - elif chart == "popular_movies": - url = "chart/moviemeter" - elif chart == "popular_shows": - url = "chart/tvmeter" - elif chart == "top_movies": - url = "chart/top" - elif chart == "top_shows": - url = "chart/toptv" - elif chart == "top_english": - url = "chart/top-english-movies" - elif chart == "top_indian": - url = "india/top-rated-indian-movies" - elif chart == "lowest_rated": - url = "chart/bottom" - else: + if chart not in chart_urls: raise Failed(f"IMDb Error: chart: {chart} not ") - links = self._request(f"{base_url}/{url}", language=language, xpath="//li//a[@class='ipc-title-link-wrapper']/@href") - return [re.search("(tt\\d+)", link).group(1) for link in links] + script_data = self._request(f"{base_url}/{chart_urls[chart]}", language=language, xpath="//script[@id='__NEXT_DATA__']/text()")[0] + return [x.group(1) for x in re.finditer(r'"(tt\d+)"', script_data)] def get_imdb_ids(self, method, data, language): if method == "imdb_id": logger.info(f"Processing IMDb ID: {data}") return [(data, "imdb")] elif method == "imdb_list": - status = f"{data['limit']} Items at " if data['limit'] > 0 else '' - logger.info(f"Processing IMDb List: {status}{data['url']}") - return [(i, "imdb") for i in self._ids_from_url(data["url"], language, data["limit"])] + logger.info(f"Processing IMDb List: {data['list_id']}") + if data["limit"] > 0: + logger.info(f" Limit: {data['limit']}") + if "sort_by" in data: + logger.info(f" Sort By: {data['sort_by']}") + return [(i, "imdb") for i in self._pagination(data, search=False)] elif method == "imdb_chart": logger.info(f"Processing IMDb Chart: {charts[data]}") return [(_i, "imdb") for _i in self._ids_from_chart(data, language)] @@ -538,7 +538,7 @@ class IMDb: logger.info(f"Processing IMDb Search:") for k, v in data.items(): logger.info(f" {k}: {v}") - return [(_i, "imdb") for _i in self._search(data)] + return [(_i, "imdb") for _i in self._pagination(data)] else: raise Failed(f"IMDb Error: Method {method} not supported") diff --git a/modules/letterboxd.py b/modules/letterboxd.py index 7410680f..17a711ff 100644 --- a/modules/letterboxd.py +++ b/modules/letterboxd.py @@ -56,7 +56,7 @@ class Letterboxd: def get_list_description(self, list_url, language): logger.trace(f"URL: {list_url}") response = self.requests.get_html(list_url, language=language) - descriptions = response.xpath("//meta[@property='og:description']/@content") + descriptions = response.xpath("//meta[@name='description']/@content") if len(descriptions) > 0 and len(descriptions[0]) > 0 and "About this list: " in descriptions[0]: return str(descriptions[0]).split("About this list: ")[1] return None