From 4c248fe2fec524ea10e43c40bc07eb2377a65034 Mon Sep 17 00:00:00 2001 From: meisnate12 Date: Fri, 27 Jan 2023 10:16:00 -0500 Subject: [PATCH] [34] add imdb_keyword filter --- VERSION | 2 +- docs/config/configuration.md | 3 ++ docs/metadata/filters.md | 28 +++++++------ modules/builder.py | 81 ++++++++++++++++++++++++++++++------ modules/cache.py | 32 ++++++++++++++ modules/imdb.py | 47 +++++++++++++++++++++ modules/library.py | 2 + modules/tmdb.py | 5 +++ 8 files changed, 174 insertions(+), 26 deletions(-) diff --git a/VERSION b/VERSION index d8129f39..2bd4e439 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.18.3-develop33 +1.18.3-develop34 diff --git a/docs/config/configuration.md b/docs/config/configuration.md index 0bc74569..73f97eb7 100644 --- a/docs/config/configuration.md +++ b/docs/config/configuration.md @@ -30,4 +30,7 @@ This table outlines the third-party services that Plex Meta Manager can make use This example outlines what a "standard" config.yml file might look like when in use. ```{literalinclude} ../../config/config.yml.template +--- +language: yaml +--- ``` \ No newline at end of file diff --git a/docs/metadata/filters.md b/docs/metadata/filters.md index 7e51040b..fd05224c 100644 --- a/docs/metadata/filters.md +++ b/docs/metadata/filters.md @@ -110,9 +110,10 @@ Tag filters can take multiple values as a **list or a comma-separated string**. | `resolution` | Uses the resolution tag to match | ✅ | ✅1 | ✅1 | ✅ | ❌ | ❌ | ❌ | | `audio_language` | Uses the audio language tags to match | ✅ | ✅1 | ✅1 | ✅ | ❌ | ❌ | ❌ | | `subtitle_language` | Uses the subtitle language tags to match | ✅ | ✅1 | ✅1 | ✅ | ❌ | ❌ | ❌ | -| `tmdb_genre`2 | Uses the genre from TMDb to match | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| `tmdb_keyword`2 | Uses the keyword from TMDb to match | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `tmdb_genre`2 | Uses the genres from TMDb to match | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `tmdb_keyword`2 | Uses the keywords from TMDb to match | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | `origin_country`2 | Uses TMDb origin country [ISO 3166-1 alpha-2 codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) to match
Example: `origin_country: us` | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `imdb_keyword`2 | Uses the keywords from IMDb to match See [Special](#special-filters) for more attributes | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | 1 Filters using the special `episodes` [filter](#special-filters) with the [default percent](details/definition). @@ -210,19 +211,22 @@ Special Filters each have their own set of rules for how they're used. ### Attribute -| Special Filters | Description | Movies | Shows | Seasons | Episodes | Artists | Albums | Track | -|:-----------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:| -| `history` | Uses the release date attribute (originally available) to match dates throughout history
`day`: Match the Day and Month to Today's Date
`month`: Match the Month to Today's Date
`1-30`: Match the Day and Month to Today's Date or `1-30` days before Today's Date | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | -| `episodes` | Uses the item's episodes attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items episodes that must match the sub-filter. | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | -| `seasons` | Uses the item's seasons attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items seasons that must match the sub-filter. | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| `tracks` | Uses the item's tracks attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items tracks that must match the sub-filter. | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | -| `albums` | Uses the item's albums attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items albums that must match the sub-filter. | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| `original_language`1
`original_language.not`1 | Uses TMDb original language [ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) to match
Example: `original_language: en, ko` | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| `tmdb_status`1
`tmdb_status.not`1 | Uses TMDb Status to match
**Values:** `returning`, `planned`, `production`, `ended`, `canceled`, `pilot` | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| `tmdb_type`1
`tmdb_type.not`1 | Uses TMDb Type to match
**Values:** `documentary`, `news`, `production`, `miniseries`, `reality`, `scripted`, `talk_show`, `video` | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Special Filters | Description | Movies | Shows | Seasons | Episodes | Artists | Albums | Track | +|:-----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:| +| `history` | Uses the release date attribute (originally available) to match dates throughout history
`day`: Match the Day and Month to Today's Date
`month`: Match the Month to Today's Date
`1-30`: Match the Day and Month to Today's Date or `1-30` days before Today's Date | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | +| `episodes` | Uses the item's episodes attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items episodes that must match the sub-filter. | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| `seasons` | Uses the item's seasons attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items seasons that must match the sub-filter. | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `tracks` | Uses the item's tracks attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items tracks that must match the sub-filter. | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | +| `albums` | Uses the item's albums attributes to match
Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items albums that must match the sub-filter. | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| `original_language`1
`original_language.not`1 | Uses TMDb original language [ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) to match
Example: `original_language: en, ko` | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `tmdb_status`1
`tmdb_status.not`1 | Uses TMDb Status to match
**Values:** `returning`, `planned`, `production`, `ended`, `canceled`, `pilot` | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `tmdb_type`1
`tmdb_type.not`1 | Uses TMDb Type to match
**Values:** `documentary`, `news`, `production`, `miniseries`, `reality`, `scripted`, `talk_show`, `video` | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| `imdb_keyword`12 | Uses the keywords from IMDb to match
`keywords`: list of keywords to match
`minimum_votes`: minimum number of votes keywords must have
`minimum_relevant`: minimum number of relevant votes keywords must have
`minimum_percentage`: minimum percentage of relevant votes keywords must have | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | 1 Also filters out missing movies/shows from being added to Radarr/Sonarr. +2 Also is a Tag Filter and can use all of those modifiers. + ## Collection Filter Examples A few examples are listed below: diff --git a/modules/builder.py b/modules/builder.py index 3ba41472..be7191c8 100644 --- a/modules/builder.py +++ b/modules/builder.py @@ -76,7 +76,7 @@ filters_by_type = { "movie_show_episode_track": ["duration"], "movie_show_artist_album": ["genre"], "movie_show_episode": ["actor", "content_rating", "audience_rating"], - "movie_show": ["studio", "original_language", "tmdb_vote_count", "tmdb_year", "tmdb_genre", "tmdb_title", "tmdb_keyword"], + "movie_show": ["studio", "original_language", "tmdb_vote_count", "tmdb_year", "tmdb_genre", "tmdb_title", "tmdb_keyword", "imdb_keyword"], "movie_episode": ["director", "producer", "writer"], "movie_artist": ["country"], "show_artist": ["folder"], @@ -100,6 +100,7 @@ tmdb_filters = [ "original_language", "origin_country", "tmdb_vote_count", "tmdb_year", "tmdb_keyword", "tmdb_genre", "first_episode_aired", "last_episode_aired", "last_episode_aired_or_never", "tmdb_status", "tmdb_type", "tmdb_title" ] +imdb_filters = ["imdb_keyword"] string_filters = [ "title", "summary", "studio", "edition", "record_label", "folder", "filepath", "audio_track_title", "tmdb_title", "audio_codec", "audio_profile", "video_codec", "video_profile" @@ -107,7 +108,7 @@ string_filters = [ string_modifiers = ["", ".not", ".is", ".isnot", ".begins", ".ends", ".regex"] tag_filters = [ "actor", "collection", "content_rating", "country", "director", "network", "genre", "label", "producer", "year", - "origin_country", "writer", "resolution", "audio_language", "subtitle_language", "tmdb_keyword", "tmdb_genre" + "origin_country", "writer", "resolution", "audio_language", "subtitle_language", "tmdb_keyword", "tmdb_genre", "imdb_keyword" ] tag_modifiers = ["", ".not", ".regex", ".count_gt", ".count_gte", ".count_lt", ".count_lte"] boolean_filters = ["has_collection", "has_overlay", "has_dolby_vision"] @@ -1607,6 +1608,7 @@ class CollectionBuilder: if current_filters: self.filters.append(current_filters) self.has_tmdb_filters = any([k in tmdb_filters for f in self.filters for k, v in f]) + self.has_imdb_filters = any([k in imdb_filters for f in self.filters for k, v in f]) def gather_ids(self, method, value): expired = None @@ -2097,6 +2099,9 @@ class CollectionBuilder: return util.validate_regex(data, self.Type, validate=validate) elif attribute in string_attributes and modifier in ["", ".not", ".is", ".isnot", ".begins", ".ends"]: return smart_pair(util.get_list(data, split=False)) + elif (attribute in number_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]) \ + or (attribute in tag_attributes and modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]): + return util.parse(self.Type, final, data, datatype="int", minimum=0) elif attribute == "origin_country": return util.get_list(data, upper=True) elif attribute in ["original_language", "tmdb_keyword"]: @@ -2115,6 +2120,19 @@ class CollectionBuilder: return util.parse(self.Type, final, data, datatype="commalist", options=[v for k, v in tmdb.discover_types.items()]) elif attribute == "tmdb_status": return util.parse(self.Type, final, data, datatype="commalist", options=[v for k, v in tmdb.discover_status.items()]) + elif attribute == "imdb_keyword": + new_dictionary = {"minimum_votes": 0, "minimum_relevant": 0, "minimum_percentage": 0} + if isinstance(data, dict) and "keyword" not in data: + raise Failed(f"{self.Type} Error: imdb_keyword requires the keyword attribute") + elif isinstance(data, dict): + dict_methods = {dm.lower(): dm for dm in data} + new_dictionary["keywords"] = util.parse(self.Type, "keyword", data, methods=dict_methods, parent=attribute, datatype="commalist") + new_dictionary["minimum_votes"] = util.parse(self.Type, "minimum_votes", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0) + new_dictionary["minimum_relevant"] = util.parse(self.Type, "minimum_relevant", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0) + new_dictionary["minimum_percentage"] = util.parse(self.Type, "minimum_percentage", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0, maximum=100) + else: + new_dictionary["keywords"] = util.parse(self.Type, final, data, datatype="commalist") + return new_dictionary elif attribute in tag_attributes and modifier in ["", ".not"]: if attribute in plex.tmdb_attributes: final_values = [] @@ -2184,9 +2202,6 @@ class CollectionBuilder: data = str(data)[:-1] search_data = util.parse(self.Type, final, data, datatype="int", minimum=0) return f"{search_data}{search_mod}" if plex_search else search_data - elif (attribute in number_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]) \ - or (attribute in tag_attributes and modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]): - return util.parse(self.Type, final, data, datatype="int", minimum=0) elif attribute in float_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]: return util.parse(self.Type, final, data, datatype="float", minimum=0, maximum=None if attribute == "duration" else 10) elif attribute in boolean_attributes or (attribute in float_attributes and modifier in [".rated"]): @@ -2325,8 +2340,16 @@ class CollectionBuilder: return False return True + def check_imdb_filters(self, imdb_info, filters_in): + for filter_method, filter_data in filters_in: + filter_attr, modifier, filter_final = self.library.split(filter_method) + if self.config.IMDb.item_filter(imdb_info, filter_attr, modifier, filter_final, filter_data) is False: + return False + return True + def check_missing_filters(self, item_id, is_movie, tmdb_item=None, check_released=False): - if self.has_tmdb_filters or check_released: + imdb_info = None + if self.has_tmdb_filters or self.has_imdb_filters or check_released: try: if tmdb_item is None: if is_movie: @@ -2335,20 +2358,34 @@ class CollectionBuilder: tmdb_item = self.config.TMDb.get_show(self.config.Convert.tvdb_to_tmdb(item_id, fail=True), ignore_cache=True) except Failed: return False + if self.has_imdb_filters and tmdb_item and tmdb_item.imdb_id: + try: + imdb_info = self.config.IMDb.keywords(tmdb_item.imdb_id) + except Failed as e: + logger.error(e) + return False if check_released: date_to_check = tmdb_item.release_date if is_movie else tmdb_item.first_air_date if not date_to_check or date_to_check > self.current_time: return False final_return = True - if self.has_tmdb_filters: + if self.has_tmdb_filters or self.has_imdb_filters: final_return = False for filter_list in self.filters: - tmdb_f = [(k, v) for k, v in filter_list if k in tmdb_filters] - if not tmdb_f: - continue + tmdb_f = [] + imdb_f = [] + for k, v in filter_list: + if k.split(".")[0] in tmdb_filters: + tmdb_f.append((k, v)) + elif k.split(".")[0] in imdb_filters: + imdb_f.append((k, v)) or_result = True - if self.check_tmdb_filters(tmdb_item, tmdb_f, is_movie) is False: - or_result = False + if tmdb_f: + if not tmdb_item or self.check_tmdb_filters(tmdb_item, tmdb_f, is_movie) is False: + or_result = False + if imdb_f: + if not imdb_info and self.check_imdb_filters(imdb_info, imdb_f) is False: + or_result = False if or_result: final_return = True return final_return @@ -2360,12 +2397,16 @@ class CollectionBuilder: item = self.library.reload(item) final_return = False tmdb_item = None + imdb_info = None for filter_list in self.filters: tmdb_f = [] + imdb_f = [] plex_f = [] for k, v in filter_list: if k.split(".")[0] in tmdb_filters: tmdb_f.append((k, v)) + elif k.split(".")[0] in imdb_filters: + imdb_f.append((k, v)) else: plex_f.append((k, v)) or_result = True @@ -2380,10 +2421,24 @@ class CollectionBuilder: tmdb_item = self.config.TMDb.get_movie(self.library.movie_rating_key_map[item.ratingKey], ignore_cache=True) else: tmdb_item = self.config.TMDb.get_show(self.config.Convert.tvdb_to_tmdb(self.library.show_rating_key_map[item.ratingKey], fail=True), ignore_cache=True) - except Failed: + except Failed as e: + logger.error(e) or_result = False if not tmdb_item or self.check_tmdb_filters(tmdb_item, tmdb_f, item.ratingKey in self.library.movie_rating_key_map) is False: or_result = False + if imdb_f: + if not imdb_info and isinstance(item, (Movie, Show)): + if item.ratingKey not in self.library.imdb_rating_key_map: + logger.warning(f"Filter Error: No IMDb ID found for {item.title}") + or_result = False + else: + try: + imdb_info = self.config.IMDb.keywords(self.library.imdb_rating_key_map[item.ratingKey]) + except Failed as e: + logger.error(e) + or_result = False + if not imdb_info or self.check_imdb_filters(imdb_info, imdb_f) is False: + or_result = False if plex_f and self.library.check_filters(item, plex_f, self.current_time) is False: or_result = False if or_result: diff --git a/modules/cache.py b/modules/cache.py index a7097b74..f99017cd 100644 --- a/modules/cache.py +++ b/modules/cache.py @@ -268,6 +268,13 @@ class Cache: media_id TEXT, media_type TEXT)""" ) + cursor.execute( + """CREATE TABLE IF NOT EXISTS imdb_keywords ( + key INTEGER PRIMARY KEY, + imdb_id TEXT, + keywords TEXT, + expiration_date TEXT)""" + ) cursor.execute( """CREATE TABLE IF NOT EXISTS imdb_parental ( key INTEGER PRIMARY KEY, @@ -937,6 +944,31 @@ class Cache: with closing(connection.cursor()) as cursor: cursor.execute(f"DELETE FROM list_ids WHERE list_key = ?", (list_key,)) + def query_imdb_keywords(self, imdb_id, expiration): + imdb_dict = {} + expired = None + with sqlite3.connect(self.cache_path) as connection: + connection.row_factory = sqlite3.Row + with closing(connection.cursor()) as cursor: + cursor.execute("SELECT * FROM imdb_keywords WHERE imdb_id = ?", (imdb_id,)) + row = cursor.fetchone() + if row: + keywords = row["keywords"] if row["keywords"] else "" + imdb_dict = {k.split(":")[0]: (int(k.split(":")[1]), int(k.split(":")[2])) for k in keywords.split("|")} + datetime_object = datetime.strptime(row["expiration_date"], "%Y-%m-%d") + time_between_insertion = datetime.now() - datetime_object + expired = time_between_insertion.days > expiration + return imdb_dict, expired + + def update_imdb_keywords(self, expired, imdb_id, keywords, expiration): + expiration_date = datetime.now() if expired is True else (datetime.now() - timedelta(days=random.randint(1, expiration))) + with sqlite3.connect(self.cache_path) as connection: + connection.row_factory = sqlite3.Row + with closing(connection.cursor()) as cursor: + cursor.execute("INSERT OR IGNORE INTO imdb_keywords(imdb_id) VALUES(?)", (imdb_id,)) + update_sql = "UPDATE imdb_keywords SET keywords = ?, expiration_date = ? WHERE imdb_id = ?" + cursor.execute(update_sql, ("|".join([f"{k}:{u}:{v}" for k, (u, v) in keywords.items()]), expiration_date.strftime("%Y-%m-%d"), imdb_id)) + def query_imdb_parental(self, imdb_id, expiration): imdb_dict = {} expired = None diff --git a/modules/imdb.py b/modules/imdb.py index c77fa0f8..0c853286 100644 --- a/modules/imdb.py +++ b/modules/imdb.py @@ -157,6 +157,29 @@ class IMDb: return imdb_ids raise Failed(f"IMDb Error: No IMDb IDs Found at {imdb_url}") + def keywords(self, imdb_id, ignore_cache=False): + imdb_keywords = {} + expired = None + if self.config.Cache and not ignore_cache: + imdb_keywords, expired = self.config.Cache.query_imdb_keywords(imdb_id, self.config.Cache.expiration) + if imdb_keywords and expired is False: + return imdb_keywords + response = self.config.get_html(f"https://www.imdb.com/title/{imdb_id}/keywords") + keywords = response.xpath("//td[@class='soda sodavote']") + if not keywords: + raise Failed(f"IMDb Error: No Item Found for IMDb ID: {imdb_id}") + for k in keywords: + name = k.xpath("div[@class='sodatext']/a/text()")[0] + relevant = k.xpath("div[@class='did-you-know-actions']/div/a/text()")[0].strip() + if "of" in relevant: + result = re.search(r"(\d+) of (\d+).*", relevant) + imdb_keywords[name] = (int(result.group(1)), int(result.group(2))) + else: + imdb_keywords[name] = (0, 0) + if self.config.Cache and not ignore_cache: + self.config.Cache.update_imdb_keywords(expired, imdb_id, imdb_keywords, self.config.Cache.expiration) + return imdb_keywords + def parental_guide(self, imdb_id, ignore_cache=False): parental_dict = {} expired = None @@ -289,3 +312,27 @@ class IMDb: if imdb_id not in self.episode_ratings or season_num not in self.episode_ratings[imdb_id] or episode_num not in self.episode_ratings[imdb_id][season_num]: return None return self.episode_ratings[imdb_id][season_num][episode_num] + + def item_filter(self, imdb_info, filter_attr, modifier, filter_final, filter_data): + if filter_attr == "imdb_keyword": + mr = imdb_info["minimum_relevant"] + mv = imdb_info["minimum_votes"] + mp = imdb_info["minimum_percentage"] + attrs = [k for k, (r, v) in imdb_info.items() if r >= mr and v >= mv and (v == 0 or r / v >= mp)] + if modifier == ".regex": + has_match = False + for reg in filter_data: + for name in attrs: + if re.compile(reg).search(name): + has_match = True + if has_match is False: + return False + elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]: + test_number = len(attrs) if attrs else 0 + modifier = f".{modifier[7:]}" + if test_number is None or util.is_number_filter(test_number, modifier, filter_data): + return False + elif (not list(set(filter_data) & set(attrs)) and modifier == "") \ + or (list(set(filter_data) & set(attrs)) and modifier == ".not"): + return False + return True diff --git a/modules/library.py b/modules/library.py index 0a5fbc6f..60679d0e 100644 --- a/modules/library.py +++ b/modules/library.py @@ -29,6 +29,7 @@ class Library(ABC): self.mal_map = {} self.movie_rating_key_map = {} self.show_rating_key_map = {} + self.imdb_rating_key_map = {} self.cached_items = {} self.run_again = [] self.type = "" @@ -316,6 +317,7 @@ class Library(ABC): self.show_rating_key_map[key] = main_id[0] util.add_dict_list(main_id, key, self.show_map) if imdb_id: + self.imdb_rating_key_map[key] = imdb_id[0] util.add_dict_list(imdb_id, key, self.imdb_map) logger.info("") logger.info(f"Processed {len(items)} {self.type}s") diff --git a/modules/tmdb.py b/modules/tmdb.py index de300715..052d7d38 100644 --- a/modules/tmdb.py +++ b/modules/tmdb.py @@ -461,6 +461,11 @@ class TMDb: has_match = True if has_match is False: return False + elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]: + test_number = len(attrs) if attrs else 0 + modifier = f".{modifier[7:]}" + if test_number is None or util.is_number_filter(test_number, modifier, filter_data): + return False elif (not list(set(filter_data) & set(attrs)) and modifier == "") \ or (list(set(filter_data) & set(attrs)) and modifier == ".not"): return False