[34] add imdb_keyword filter

2 years ago · 4c248fe2fe
parent 93e6a3ad41
commit 4c248fe2fe
8 changed files with 174 additions and 26 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-1.18.3-develop33
+1.18.3-develop34
--- a/docs/config/configuration.md
+++ b/docs/config/configuration.md
@ -30,4 +30,7 @@ This table outlines the third-party services that Plex Meta Manager can make use
 This example outlines what a "standard" config.yml file might look like when in use.

 ```{literalinclude} ../../config/config.yml.template
+---
+language: yaml
+---
 ```
--- a/docs/metadata/filters.md
+++ b/docs/metadata/filters.md
@ -110,9 +110,10 @@ Tag filters can take multiple values as a **list or a comma-separated string**.
 | `resolution`                 | Uses the resolution tag to match                                                                                                                | &#9989;  | &#9989;<sup>1</sup> | &#9989;<sup>1</sup> | &#9989;  | &#10060; | &#10060; | &#10060; |
 | `audio_language`             | Uses the audio language tags to match                                                                                                           | &#9989;  | &#9989;<sup>1</sup> | &#9989;<sup>1</sup> | &#9989;  | &#10060; | &#10060; | &#10060; |
 | `subtitle_language`          | Uses the subtitle language tags to match                                                                                                        | &#9989;  | &#9989;<sup>1</sup> | &#9989;<sup>1</sup> | &#9989;  | &#10060; | &#10060; | &#10060; |
-| `tmdb_genre`<sup>2</sup>     | Uses the genre from TMDb to match                                                                                                               | &#9989;  |       &#9989;       |      &#10060;       | &#10060; | &#10060; | &#10060; | &#10060; |
-| `tmdb_keyword`<sup>2</sup>   | Uses the keyword from TMDb to match                                                                                                             | &#9989;  |       &#9989;       |      &#10060;       | &#10060; | &#10060; | &#10060; | &#10060; |
+| `tmdb_genre`<sup>2</sup>     | Uses the genres from TMDb to match                                                                                                              | &#9989;  |       &#9989;       |      &#10060;       | &#10060; | &#10060; | &#10060; | &#10060; |
+| `tmdb_keyword`<sup>2</sup>   | Uses the keywords from TMDb to match                                                                                                            | &#9989;  |       &#9989;       |      &#10060;       | &#10060; | &#10060; | &#10060; | &#10060; |
 | `origin_country`<sup>2</sup> | Uses TMDb origin country [ISO 3166-1 alpha-2 codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) to match<br>Example: `origin_country: us` | &#10060; |       &#9989;       |      &#10060;       | &#10060; | &#10060; | &#10060; | &#10060; |
+| `imdb_keyword`<sup>2</sup>   | Uses the keywords from IMDb to match See [Special](#special-filters) for more attributes                                                        | &#9989;  |       &#9989;       |      &#10060;       | &#10060; | &#10060; | &#10060; | &#10060; |

 <sup>1</sup> Filters using the special `episodes` [filter](#special-filters) with the [default percent](details/definition).

@ -211,7 +212,7 @@ Special Filters each have their own set of rules for how they're used.
 ### Attribute

 | Special Filters                                                        | Description                                                                                                                                                                                                                                                                                              |  Movies  |  Shows   | Seasons  | Episodes | Artists  |  Albums  |  Track   |
-|:-----------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
+|:-----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
 | `history`                                                              | Uses the release date attribute (originally available) to match dates throughout history<br>`day`: Match the Day and Month to Today's Date<br>`month`: Match the Month to Today's Date<br>`1-30`: Match the Day and Month to Today's Date or `1-30` days before Today's Date                             | &#9989;  | &#9989;  | &#10060; | &#9989;  | &#10060; | &#9989;  | &#10060; |
 | `episodes`                                                             | Uses the item's episodes attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items episodes that must match the sub-filter.                                                                                                           | &#10060; | &#9989;  | &#9989;  | &#10060; | &#10060; | &#10060; | &#10060; |
 | `seasons`                                                              | Uses the item's seasons attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items seasons that must match the sub-filter.                                                                                                             | &#10060; | &#9989;  | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
@ -220,9 +221,12 @@ Special Filters each have their own set of rules for how they're used.
 | `original_language`<sup>1</sup><br>`original_language.not`<sup>1</sup> | Uses TMDb original language [ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) to match<br>Example: `original_language: en, ko`                                                                                                                                                    | &#9989;  | &#9989;  | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
 | `tmdb_status`<sup>1</sup><br>`tmdb_status.not`<sup>1</sup>             | Uses TMDb Status to match<br>**Values:** `returning`, `planned`, `production`, `ended`, `canceled`, `pilot`                                                                                                                                                                                              | &#10060; | &#9989;  | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
 | `tmdb_type`<sup>1</sup><br>`tmdb_type.not`<sup>1</sup>                 | Uses TMDb Type to match<br>**Values:** `documentary`, `news`, `production`, `miniseries`, `reality`, `scripted`, `talk_show`, `video`                                                                                                                                                                    | &#10060; | &#9989;  | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
+| `imdb_keyword`<sup>1</sup><sup>2</sup>                                 | Uses the keywords from IMDb to match<br>`keywords`: list of keywords to match<br>`minimum_votes`: minimum number of votes keywords must have<br>`minimum_relevant`: minimum number of relevant votes keywords must have<br>`minimum_percentage`: minimum percentage of relevant votes keywords must have | &#9989;  | &#9989;  | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |

 <sup>1</sup> Also filters out missing movies/shows from being added to Radarr/Sonarr.

+<sup>2</sup> Also is a Tag Filter and can use all of those modifiers.
+
 ## Collection Filter Examples

 A few examples are listed below:
--- a/modules/builder.py
+++ b/modules/builder.py
@ -76,7 +76,7 @@ filters_by_type = {
    "movie_show_episode_track": ["duration"],
    "movie_show_artist_album": ["genre"],
    "movie_show_episode": ["actor", "content_rating", "audience_rating"],
-    "movie_show": ["studio", "original_language", "tmdb_vote_count", "tmdb_year", "tmdb_genre", "tmdb_title", "tmdb_keyword"],
+    "movie_show": ["studio", "original_language", "tmdb_vote_count", "tmdb_year", "tmdb_genre", "tmdb_title", "tmdb_keyword", "imdb_keyword"],
    "movie_episode": ["director", "producer", "writer"],
    "movie_artist": ["country"],
    "show_artist": ["folder"],
@ -100,6 +100,7 @@ tmdb_filters = [
    "original_language", "origin_country", "tmdb_vote_count", "tmdb_year", "tmdb_keyword", "tmdb_genre",
    "first_episode_aired", "last_episode_aired", "last_episode_aired_or_never", "tmdb_status", "tmdb_type", "tmdb_title"
 ]
+imdb_filters = ["imdb_keyword"]
 string_filters = [
    "title", "summary", "studio", "edition", "record_label", "folder", "filepath", "audio_track_title", "tmdb_title",
    "audio_codec", "audio_profile", "video_codec", "video_profile"
@ -107,7 +108,7 @@ string_filters = [
 string_modifiers = ["", ".not", ".is", ".isnot", ".begins", ".ends", ".regex"]
 tag_filters = [
    "actor", "collection", "content_rating", "country", "director", "network", "genre", "label", "producer", "year",
-    "origin_country", "writer", "resolution", "audio_language", "subtitle_language", "tmdb_keyword", "tmdb_genre"
+    "origin_country", "writer", "resolution", "audio_language", "subtitle_language", "tmdb_keyword", "tmdb_genre", "imdb_keyword"
 ]
 tag_modifiers = ["", ".not", ".regex", ".count_gt", ".count_gte", ".count_lt", ".count_lte"]
 boolean_filters = ["has_collection", "has_overlay", "has_dolby_vision"]
@ -1607,6 +1608,7 @@ class CollectionBuilder:
            if current_filters:
                self.filters.append(current_filters)
        self.has_tmdb_filters = any([k in tmdb_filters for f in self.filters for k, v in f])
+        self.has_imdb_filters = any([k in imdb_filters for f in self.filters for k, v in f])

    def gather_ids(self, method, value):
        expired = None
@ -2097,6 +2099,9 @@ class CollectionBuilder:
            return util.validate_regex(data, self.Type, validate=validate)
        elif attribute in string_attributes and modifier in ["", ".not", ".is", ".isnot", ".begins", ".ends"]:
            return smart_pair(util.get_list(data, split=False))
+        elif (attribute in number_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]) \
+                or (attribute in tag_attributes and modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]):
+            return util.parse(self.Type, final, data, datatype="int", minimum=0)
        elif attribute == "origin_country":
            return util.get_list(data, upper=True)
        elif attribute in ["original_language", "tmdb_keyword"]:
@ -2115,6 +2120,19 @@ class CollectionBuilder:
            return util.parse(self.Type, final, data, datatype="commalist", options=[v for k, v in tmdb.discover_types.items()])
        elif attribute == "tmdb_status":
            return util.parse(self.Type, final, data, datatype="commalist", options=[v for k, v in tmdb.discover_status.items()])
+        elif attribute == "imdb_keyword":
+            new_dictionary = {"minimum_votes": 0, "minimum_relevant": 0, "minimum_percentage": 0}
+            if isinstance(data, dict) and "keyword" not in data:
+                raise Failed(f"{self.Type} Error: imdb_keyword requires the keyword attribute")
+            elif isinstance(data, dict):
+                dict_methods = {dm.lower(): dm for dm in data}
+                new_dictionary["keywords"] = util.parse(self.Type, "keyword", data, methods=dict_methods, parent=attribute, datatype="commalist")
+                new_dictionary["minimum_votes"] = util.parse(self.Type, "minimum_votes", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0)
+                new_dictionary["minimum_relevant"] = util.parse(self.Type, "minimum_relevant", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0)
+                new_dictionary["minimum_percentage"] = util.parse(self.Type, "minimum_percentage", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0, maximum=100)
+            else:
+                new_dictionary["keywords"] = util.parse(self.Type, final, data, datatype="commalist")
+            return new_dictionary
        elif attribute in tag_attributes and modifier in ["", ".not"]:
            if attribute in plex.tmdb_attributes:
                final_values = []
@ -2184,9 +2202,6 @@ class CollectionBuilder:
                data = str(data)[:-1]
            search_data = util.parse(self.Type, final, data, datatype="int", minimum=0)
            return f"{search_data}{search_mod}" if plex_search else search_data
-        elif (attribute in number_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]) \
-                or (attribute in tag_attributes and modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]):
-            return util.parse(self.Type, final, data, datatype="int", minimum=0)
        elif attribute in float_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]:
            return util.parse(self.Type, final, data, datatype="float", minimum=0, maximum=None if attribute == "duration" else 10)
        elif attribute in boolean_attributes or (attribute in float_attributes and modifier in [".rated"]):
@ -2325,8 +2340,16 @@ class CollectionBuilder:
                return False
        return True

+    def check_imdb_filters(self, imdb_info, filters_in):
+        for filter_method, filter_data in filters_in:
+            filter_attr, modifier, filter_final = self.library.split(filter_method)
+            if self.config.IMDb.item_filter(imdb_info, filter_attr, modifier, filter_final, filter_data) is False:
+                return False
+        return True
+
    def check_missing_filters(self, item_id, is_movie, tmdb_item=None, check_released=False):
-        if self.has_tmdb_filters or check_released:
+        imdb_info = None
+        if self.has_tmdb_filters or self.has_imdb_filters or check_released:
            try:
                if tmdb_item is None:
                    if is_movie:
@ -2335,19 +2358,33 @@ class CollectionBuilder:
                        tmdb_item = self.config.TMDb.get_show(self.config.Convert.tvdb_to_tmdb(item_id, fail=True), ignore_cache=True)
            except Failed:
                return False
+            if self.has_imdb_filters and tmdb_item and tmdb_item.imdb_id:
+                try:
+                    imdb_info = self.config.IMDb.keywords(tmdb_item.imdb_id)
+                except Failed as e:
+                    logger.error(e)
+                    return False
        if check_released:
            date_to_check = tmdb_item.release_date if is_movie else tmdb_item.first_air_date
            if not date_to_check or date_to_check > self.current_time:
                return False
        final_return = True
-        if self.has_tmdb_filters:
+        if self.has_tmdb_filters or self.has_imdb_filters:
            final_return = False
            for filter_list in self.filters:
-                tmdb_f = [(k, v) for k, v in filter_list if k in tmdb_filters]
-                if not tmdb_f:
-                    continue
+                tmdb_f = []
+                imdb_f = []
+                for k, v in filter_list:
+                    if k.split(".")[0] in tmdb_filters:
+                        tmdb_f.append((k, v))
+                    elif k.split(".")[0] in imdb_filters:
+                        imdb_f.append((k, v))
                or_result = True
-                if self.check_tmdb_filters(tmdb_item, tmdb_f, is_movie) is False:
+                if tmdb_f:
+                    if not tmdb_item or self.check_tmdb_filters(tmdb_item, tmdb_f, is_movie) is False:
+                        or_result = False
+                if imdb_f:
+                    if not imdb_info and self.check_imdb_filters(imdb_info, imdb_f) is False:
                        or_result = False
                if or_result:
                    final_return = True
@ -2360,12 +2397,16 @@ class CollectionBuilder:
            item = self.library.reload(item)
            final_return = False
            tmdb_item = None
+            imdb_info = None
            for filter_list in self.filters:
                tmdb_f = []
+                imdb_f = []
                plex_f = []
                for k, v in filter_list:
                    if k.split(".")[0] in tmdb_filters:
                        tmdb_f.append((k, v))
+                    elif k.split(".")[0] in imdb_filters:
+                        imdb_f.append((k, v))
                    else:
                        plex_f.append((k, v))
                or_result = True
@ -2380,10 +2421,24 @@ class CollectionBuilder:
                                    tmdb_item = self.config.TMDb.get_movie(self.library.movie_rating_key_map[item.ratingKey], ignore_cache=True)
                                else:
                                    tmdb_item = self.config.TMDb.get_show(self.config.Convert.tvdb_to_tmdb(self.library.show_rating_key_map[item.ratingKey], fail=True), ignore_cache=True)
-                            except Failed:
+                            except Failed as e:
+                                logger.error(e)
                                or_result = False
                    if not tmdb_item or self.check_tmdb_filters(tmdb_item, tmdb_f, item.ratingKey in self.library.movie_rating_key_map) is False:
                        or_result = False
+                if imdb_f:
+                    if not imdb_info and isinstance(item, (Movie, Show)):
+                        if item.ratingKey not in self.library.imdb_rating_key_map:
+                            logger.warning(f"Filter Error: No IMDb ID found for {item.title}")
+                            or_result = False
+                        else:
+                            try:
+                                imdb_info = self.config.IMDb.keywords(self.library.imdb_rating_key_map[item.ratingKey])
+                            except Failed as e:
+                                logger.error(e)
+                                or_result = False
+                    if not imdb_info or self.check_imdb_filters(imdb_info, imdb_f) is False:
+                        or_result = False
                if plex_f and self.library.check_filters(item, plex_f, self.current_time) is False:
                    or_result = False
                if or_result:
--- a/modules/cache.py
+++ b/modules/cache.py
@ -268,6 +268,13 @@ class Cache:
                    media_id TEXT,
                    media_type TEXT)"""
                )
+                cursor.execute(
+                    """CREATE TABLE IF NOT EXISTS imdb_keywords (
+                    key INTEGER PRIMARY KEY,
+                    imdb_id TEXT,
+                    keywords TEXT,
+                    expiration_date TEXT)"""
+                )
                cursor.execute(
                    """CREATE TABLE IF NOT EXISTS imdb_parental (
                    key INTEGER PRIMARY KEY,
@ -937,6 +944,31 @@ class Cache:
            with closing(connection.cursor()) as cursor:
                cursor.execute(f"DELETE FROM list_ids WHERE list_key = ?", (list_key,))

+    def query_imdb_keywords(self, imdb_id, expiration):
+        imdb_dict = {}
+        expired = None
+        with sqlite3.connect(self.cache_path) as connection:
+            connection.row_factory = sqlite3.Row
+            with closing(connection.cursor()) as cursor:
+                cursor.execute("SELECT * FROM imdb_keywords WHERE imdb_id = ?", (imdb_id,))
+                row = cursor.fetchone()
+                if row:
+                    keywords = row["keywords"] if row["keywords"] else ""
+                    imdb_dict = {k.split(":")[0]: (int(k.split(":")[1]), int(k.split(":")[2])) for k in keywords.split("|")}
+                    datetime_object = datetime.strptime(row["expiration_date"], "%Y-%m-%d")
+                    time_between_insertion = datetime.now() - datetime_object
+                    expired = time_between_insertion.days > expiration
+        return imdb_dict, expired
+
+    def update_imdb_keywords(self, expired, imdb_id, keywords, expiration):
+        expiration_date = datetime.now() if expired is True else (datetime.now() - timedelta(days=random.randint(1, expiration)))
+        with sqlite3.connect(self.cache_path) as connection:
+            connection.row_factory = sqlite3.Row
+            with closing(connection.cursor()) as cursor:
+                cursor.execute("INSERT OR IGNORE INTO imdb_keywords(imdb_id) VALUES(?)", (imdb_id,))
+                update_sql = "UPDATE imdb_keywords SET keywords = ?, expiration_date = ? WHERE imdb_id = ?"
+                cursor.execute(update_sql, ("|".join([f"{k}:{u}:{v}" for k, (u, v) in keywords.items()]), expiration_date.strftime("%Y-%m-%d"), imdb_id))
+
    def query_imdb_parental(self, imdb_id, expiration):
        imdb_dict = {}
        expired = None
--- a/modules/imdb.py
+++ b/modules/imdb.py
@ -157,6 +157,29 @@ class IMDb:
            return imdb_ids
        raise Failed(f"IMDb Error: No IMDb IDs Found at {imdb_url}")

+    def keywords(self, imdb_id, ignore_cache=False):
+        imdb_keywords = {}
+        expired = None
+        if self.config.Cache and not ignore_cache:
+            imdb_keywords, expired = self.config.Cache.query_imdb_keywords(imdb_id, self.config.Cache.expiration)
+            if imdb_keywords and expired is False:
+                return imdb_keywords
+        response = self.config.get_html(f"https://www.imdb.com/title/{imdb_id}/keywords")
+        keywords = response.xpath("//td[@class='soda sodavote']")
+        if not keywords:
+            raise Failed(f"IMDb Error: No Item Found for IMDb ID: {imdb_id}")
+        for k in keywords:
+            name = k.xpath("div[@class='sodatext']/a/text()")[0]
+            relevant = k.xpath("div[@class='did-you-know-actions']/div/a/text()")[0].strip()
+            if "of" in relevant:
+                result = re.search(r"(\d+) of (\d+).*", relevant)
+                imdb_keywords[name] = (int(result.group(1)), int(result.group(2)))
+            else:
+                imdb_keywords[name] = (0, 0)
+        if self.config.Cache and not ignore_cache:
+            self.config.Cache.update_imdb_keywords(expired, imdb_id, imdb_keywords, self.config.Cache.expiration)
+        return imdb_keywords
+
    def parental_guide(self, imdb_id, ignore_cache=False):
        parental_dict = {}
        expired = None
@ -289,3 +312,27 @@ class IMDb:
        if imdb_id not in self.episode_ratings or season_num not in self.episode_ratings[imdb_id] or episode_num not in self.episode_ratings[imdb_id][season_num]:
            return None
        return self.episode_ratings[imdb_id][season_num][episode_num]
+
+    def item_filter(self, imdb_info, filter_attr, modifier, filter_final, filter_data):
+        if filter_attr == "imdb_keyword":
+            mr = imdb_info["minimum_relevant"]
+            mv = imdb_info["minimum_votes"]
+            mp = imdb_info["minimum_percentage"]
+            attrs = [k for k, (r, v) in imdb_info.items() if r >= mr and v >= mv and (v == 0 or r / v >= mp)]
+            if modifier == ".regex":
+                has_match = False
+                for reg in filter_data:
+                    for name in attrs:
+                        if re.compile(reg).search(name):
+                            has_match = True
+                if has_match is False:
+                    return False
+            elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]:
+                test_number = len(attrs) if attrs else 0
+                modifier = f".{modifier[7:]}"
+                if test_number is None or util.is_number_filter(test_number, modifier, filter_data):
+                    return False
+            elif (not list(set(filter_data) & set(attrs)) and modifier == "") \
+                    or (list(set(filter_data) & set(attrs)) and modifier == ".not"):
+                return False
+        return True
--- a/modules/library.py
+++ b/modules/library.py
@ -29,6 +29,7 @@ class Library(ABC):
        self.mal_map = {}
        self.movie_rating_key_map = {}
        self.show_rating_key_map = {}
+        self.imdb_rating_key_map = {}
        self.cached_items = {}
        self.run_again = []
        self.type = ""
@ -316,6 +317,7 @@ class Library(ABC):
                        self.show_rating_key_map[key] = main_id[0]
                        util.add_dict_list(main_id, key, self.show_map)
                if imdb_id:
+                    self.imdb_rating_key_map[key] = imdb_id[0]
                    util.add_dict_list(imdb_id, key, self.imdb_map)
        logger.info("")
        logger.info(f"Processed {len(items)} {self.type}s")
--- a/modules/tmdb.py
+++ b/modules/tmdb.py
@ -461,6 +461,11 @@ class TMDb:
                            has_match = True
                if has_match is False:
                    return False
+            elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]:
+                test_number = len(attrs) if attrs else 0
+                modifier = f".{modifier[7:]}"
+                if test_number is None or util.is_number_filter(test_number, modifier, filter_data):
+                    return False
            elif (not list(set(filter_data) & set(attrs)) and modifier == "") \
                    or (list(set(filter_data) & set(attrs)) and modifier == ".not"):
                return False