[34] add imdb_keyword filter

pull/1290/head
meisnate12 2 years ago
parent 93e6a3ad41
commit 4c248fe2fe

@ -1 +1 @@
1.18.3-develop33
1.18.3-develop34

@ -30,4 +30,7 @@ This table outlines the third-party services that Plex Meta Manager can make use
This example outlines what a "standard" config.yml file might look like when in use.
```{literalinclude} ../../config/config.yml.template
---
language: yaml
---
```

@ -110,9 +110,10 @@ Tag filters can take multiple values as a **list or a comma-separated string**.
| `resolution` | Uses the resolution tag to match | &#9989; | &#9989;<sup>1</sup> | &#9989;<sup>1</sup> | &#9989; | &#10060; | &#10060; | &#10060; |
| `audio_language` | Uses the audio language tags to match | &#9989; | &#9989;<sup>1</sup> | &#9989;<sup>1</sup> | &#9989; | &#10060; | &#10060; | &#10060; |
| `subtitle_language` | Uses the subtitle language tags to match | &#9989; | &#9989;<sup>1</sup> | &#9989;<sup>1</sup> | &#9989; | &#10060; | &#10060; | &#10060; |
| `tmdb_genre`<sup>2</sup> | Uses the genre from TMDb to match | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_keyword`<sup>2</sup> | Uses the keyword from TMDb to match | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_genre`<sup>2</sup> | Uses the genres from TMDb to match | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_keyword`<sup>2</sup> | Uses the keywords from TMDb to match | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `origin_country`<sup>2</sup> | Uses TMDb origin country [ISO 3166-1 alpha-2 codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) to match<br>Example: `origin_country: us` | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `imdb_keyword`<sup>2</sup> | Uses the keywords from IMDb to match See [Special](#special-filters) for more attributes | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
<sup>1</sup> Filters using the special `episodes` [filter](#special-filters) with the [default percent](details/definition).
@ -210,19 +211,22 @@ Special Filters each have their own set of rules for how they're used.
### Attribute
| Special Filters | Description | Movies | Shows | Seasons | Episodes | Artists | Albums | Track |
|:-----------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
| `history` | Uses the release date attribute (originally available) to match dates throughout history<br>`day`: Match the Day and Month to Today's Date<br>`month`: Match the Month to Today's Date<br>`1-30`: Match the Day and Month to Today's Date or `1-30` days before Today's Date | &#9989; | &#9989; | &#10060; | &#9989; | &#10060; | &#9989; | &#10060; |
| `episodes` | Uses the item's episodes attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items episodes that must match the sub-filter. | &#10060; | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; |
| `seasons` | Uses the item's seasons attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items seasons that must match the sub-filter. | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tracks` | Uses the item's tracks attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items tracks that must match the sub-filter. | &#10060; | &#10060; | &#10060; | &#10060; | &#9989; | &#9989; | &#10060; |
| `albums` | Uses the item's albums attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items albums that must match the sub-filter. | &#10060; | &#10060; | &#10060; | &#10060; | &#9989; | &#10060; | &#10060; |
| `original_language`<sup>1</sup><br>`original_language.not`<sup>1</sup> | Uses TMDb original language [ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) to match<br>Example: `original_language: en, ko` | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_status`<sup>1</sup><br>`tmdb_status.not`<sup>1</sup> | Uses TMDb Status to match<br>**Values:** `returning`, `planned`, `production`, `ended`, `canceled`, `pilot` | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_type`<sup>1</sup><br>`tmdb_type.not`<sup>1</sup> | Uses TMDb Type to match<br>**Values:** `documentary`, `news`, `production`, `miniseries`, `reality`, `scripted`, `talk_show`, `video` | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| Special Filters | Description | Movies | Shows | Seasons | Episodes | Artists | Albums | Track |
|:-----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
| `history` | Uses the release date attribute (originally available) to match dates throughout history<br>`day`: Match the Day and Month to Today's Date<br>`month`: Match the Month to Today's Date<br>`1-30`: Match the Day and Month to Today's Date or `1-30` days before Today's Date | &#9989; | &#9989; | &#10060; | &#9989; | &#10060; | &#9989; | &#10060; |
| `episodes` | Uses the item's episodes attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items episodes that must match the sub-filter. | &#10060; | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; |
| `seasons` | Uses the item's seasons attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items seasons that must match the sub-filter. | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tracks` | Uses the item's tracks attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items tracks that must match the sub-filter. | &#10060; | &#10060; | &#10060; | &#10060; | &#9989; | &#9989; | &#10060; |
| `albums` | Uses the item's albums attributes to match <br> Use the `percentage` attribute given a number between 0-100 to determine the percentage of an items albums that must match the sub-filter. | &#10060; | &#10060; | &#10060; | &#10060; | &#9989; | &#10060; | &#10060; |
| `original_language`<sup>1</sup><br>`original_language.not`<sup>1</sup> | Uses TMDb original language [ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) to match<br>Example: `original_language: en, ko` | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_status`<sup>1</sup><br>`tmdb_status.not`<sup>1</sup> | Uses TMDb Status to match<br>**Values:** `returning`, `planned`, `production`, `ended`, `canceled`, `pilot` | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `tmdb_type`<sup>1</sup><br>`tmdb_type.not`<sup>1</sup> | Uses TMDb Type to match<br>**Values:** `documentary`, `news`, `production`, `miniseries`, `reality`, `scripted`, `talk_show`, `video` | &#10060; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
| `imdb_keyword`<sup>1</sup><sup>2</sup> | Uses the keywords from IMDb to match<br>`keywords`: list of keywords to match<br>`minimum_votes`: minimum number of votes keywords must have<br>`minimum_relevant`: minimum number of relevant votes keywords must have<br>`minimum_percentage`: minimum percentage of relevant votes keywords must have | &#9989; | &#9989; | &#10060; | &#10060; | &#10060; | &#10060; | &#10060; |
<sup>1</sup> Also filters out missing movies/shows from being added to Radarr/Sonarr.
<sup>2</sup> Also is a Tag Filter and can use all of those modifiers.
## Collection Filter Examples
A few examples are listed below:

@ -76,7 +76,7 @@ filters_by_type = {
"movie_show_episode_track": ["duration"],
"movie_show_artist_album": ["genre"],
"movie_show_episode": ["actor", "content_rating", "audience_rating"],
"movie_show": ["studio", "original_language", "tmdb_vote_count", "tmdb_year", "tmdb_genre", "tmdb_title", "tmdb_keyword"],
"movie_show": ["studio", "original_language", "tmdb_vote_count", "tmdb_year", "tmdb_genre", "tmdb_title", "tmdb_keyword", "imdb_keyword"],
"movie_episode": ["director", "producer", "writer"],
"movie_artist": ["country"],
"show_artist": ["folder"],
@ -100,6 +100,7 @@ tmdb_filters = [
"original_language", "origin_country", "tmdb_vote_count", "tmdb_year", "tmdb_keyword", "tmdb_genre",
"first_episode_aired", "last_episode_aired", "last_episode_aired_or_never", "tmdb_status", "tmdb_type", "tmdb_title"
]
imdb_filters = ["imdb_keyword"]
string_filters = [
"title", "summary", "studio", "edition", "record_label", "folder", "filepath", "audio_track_title", "tmdb_title",
"audio_codec", "audio_profile", "video_codec", "video_profile"
@ -107,7 +108,7 @@ string_filters = [
string_modifiers = ["", ".not", ".is", ".isnot", ".begins", ".ends", ".regex"]
tag_filters = [
"actor", "collection", "content_rating", "country", "director", "network", "genre", "label", "producer", "year",
"origin_country", "writer", "resolution", "audio_language", "subtitle_language", "tmdb_keyword", "tmdb_genre"
"origin_country", "writer", "resolution", "audio_language", "subtitle_language", "tmdb_keyword", "tmdb_genre", "imdb_keyword"
]
tag_modifiers = ["", ".not", ".regex", ".count_gt", ".count_gte", ".count_lt", ".count_lte"]
boolean_filters = ["has_collection", "has_overlay", "has_dolby_vision"]
@ -1607,6 +1608,7 @@ class CollectionBuilder:
if current_filters:
self.filters.append(current_filters)
self.has_tmdb_filters = any([k in tmdb_filters for f in self.filters for k, v in f])
self.has_imdb_filters = any([k in imdb_filters for f in self.filters for k, v in f])
def gather_ids(self, method, value):
expired = None
@ -2097,6 +2099,9 @@ class CollectionBuilder:
return util.validate_regex(data, self.Type, validate=validate)
elif attribute in string_attributes and modifier in ["", ".not", ".is", ".isnot", ".begins", ".ends"]:
return smart_pair(util.get_list(data, split=False))
elif (attribute in number_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]) \
or (attribute in tag_attributes and modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]):
return util.parse(self.Type, final, data, datatype="int", minimum=0)
elif attribute == "origin_country":
return util.get_list(data, upper=True)
elif attribute in ["original_language", "tmdb_keyword"]:
@ -2115,6 +2120,19 @@ class CollectionBuilder:
return util.parse(self.Type, final, data, datatype="commalist", options=[v for k, v in tmdb.discover_types.items()])
elif attribute == "tmdb_status":
return util.parse(self.Type, final, data, datatype="commalist", options=[v for k, v in tmdb.discover_status.items()])
elif attribute == "imdb_keyword":
new_dictionary = {"minimum_votes": 0, "minimum_relevant": 0, "minimum_percentage": 0}
if isinstance(data, dict) and "keyword" not in data:
raise Failed(f"{self.Type} Error: imdb_keyword requires the keyword attribute")
elif isinstance(data, dict):
dict_methods = {dm.lower(): dm for dm in data}
new_dictionary["keywords"] = util.parse(self.Type, "keyword", data, methods=dict_methods, parent=attribute, datatype="commalist")
new_dictionary["minimum_votes"] = util.parse(self.Type, "minimum_votes", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0)
new_dictionary["minimum_relevant"] = util.parse(self.Type, "minimum_relevant", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0)
new_dictionary["minimum_percentage"] = util.parse(self.Type, "minimum_percentage", data, methods=dict_methods, parent=attribute, datatype="int", minimum=0, maximum=100)
else:
new_dictionary["keywords"] = util.parse(self.Type, final, data, datatype="commalist")
return new_dictionary
elif attribute in tag_attributes and modifier in ["", ".not"]:
if attribute in plex.tmdb_attributes:
final_values = []
@ -2184,9 +2202,6 @@ class CollectionBuilder:
data = str(data)[:-1]
search_data = util.parse(self.Type, final, data, datatype="int", minimum=0)
return f"{search_data}{search_mod}" if plex_search else search_data
elif (attribute in number_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]) \
or (attribute in tag_attributes and modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]):
return util.parse(self.Type, final, data, datatype="int", minimum=0)
elif attribute in float_attributes and modifier in ["", ".not", ".gt", ".gte", ".lt", ".lte"]:
return util.parse(self.Type, final, data, datatype="float", minimum=0, maximum=None if attribute == "duration" else 10)
elif attribute in boolean_attributes or (attribute in float_attributes and modifier in [".rated"]):
@ -2325,8 +2340,16 @@ class CollectionBuilder:
return False
return True
def check_imdb_filters(self, imdb_info, filters_in):
for filter_method, filter_data in filters_in:
filter_attr, modifier, filter_final = self.library.split(filter_method)
if self.config.IMDb.item_filter(imdb_info, filter_attr, modifier, filter_final, filter_data) is False:
return False
return True
def check_missing_filters(self, item_id, is_movie, tmdb_item=None, check_released=False):
if self.has_tmdb_filters or check_released:
imdb_info = None
if self.has_tmdb_filters or self.has_imdb_filters or check_released:
try:
if tmdb_item is None:
if is_movie:
@ -2335,20 +2358,34 @@ class CollectionBuilder:
tmdb_item = self.config.TMDb.get_show(self.config.Convert.tvdb_to_tmdb(item_id, fail=True), ignore_cache=True)
except Failed:
return False
if self.has_imdb_filters and tmdb_item and tmdb_item.imdb_id:
try:
imdb_info = self.config.IMDb.keywords(tmdb_item.imdb_id)
except Failed as e:
logger.error(e)
return False
if check_released:
date_to_check = tmdb_item.release_date if is_movie else tmdb_item.first_air_date
if not date_to_check or date_to_check > self.current_time:
return False
final_return = True
if self.has_tmdb_filters:
if self.has_tmdb_filters or self.has_imdb_filters:
final_return = False
for filter_list in self.filters:
tmdb_f = [(k, v) for k, v in filter_list if k in tmdb_filters]
if not tmdb_f:
continue
tmdb_f = []
imdb_f = []
for k, v in filter_list:
if k.split(".")[0] in tmdb_filters:
tmdb_f.append((k, v))
elif k.split(".")[0] in imdb_filters:
imdb_f.append((k, v))
or_result = True
if self.check_tmdb_filters(tmdb_item, tmdb_f, is_movie) is False:
or_result = False
if tmdb_f:
if not tmdb_item or self.check_tmdb_filters(tmdb_item, tmdb_f, is_movie) is False:
or_result = False
if imdb_f:
if not imdb_info and self.check_imdb_filters(imdb_info, imdb_f) is False:
or_result = False
if or_result:
final_return = True
return final_return
@ -2360,12 +2397,16 @@ class CollectionBuilder:
item = self.library.reload(item)
final_return = False
tmdb_item = None
imdb_info = None
for filter_list in self.filters:
tmdb_f = []
imdb_f = []
plex_f = []
for k, v in filter_list:
if k.split(".")[0] in tmdb_filters:
tmdb_f.append((k, v))
elif k.split(".")[0] in imdb_filters:
imdb_f.append((k, v))
else:
plex_f.append((k, v))
or_result = True
@ -2380,10 +2421,24 @@ class CollectionBuilder:
tmdb_item = self.config.TMDb.get_movie(self.library.movie_rating_key_map[item.ratingKey], ignore_cache=True)
else:
tmdb_item = self.config.TMDb.get_show(self.config.Convert.tvdb_to_tmdb(self.library.show_rating_key_map[item.ratingKey], fail=True), ignore_cache=True)
except Failed:
except Failed as e:
logger.error(e)
or_result = False
if not tmdb_item or self.check_tmdb_filters(tmdb_item, tmdb_f, item.ratingKey in self.library.movie_rating_key_map) is False:
or_result = False
if imdb_f:
if not imdb_info and isinstance(item, (Movie, Show)):
if item.ratingKey not in self.library.imdb_rating_key_map:
logger.warning(f"Filter Error: No IMDb ID found for {item.title}")
or_result = False
else:
try:
imdb_info = self.config.IMDb.keywords(self.library.imdb_rating_key_map[item.ratingKey])
except Failed as e:
logger.error(e)
or_result = False
if not imdb_info or self.check_imdb_filters(imdb_info, imdb_f) is False:
or_result = False
if plex_f and self.library.check_filters(item, plex_f, self.current_time) is False:
or_result = False
if or_result:

@ -268,6 +268,13 @@ class Cache:
media_id TEXT,
media_type TEXT)"""
)
cursor.execute(
"""CREATE TABLE IF NOT EXISTS imdb_keywords (
key INTEGER PRIMARY KEY,
imdb_id TEXT,
keywords TEXT,
expiration_date TEXT)"""
)
cursor.execute(
"""CREATE TABLE IF NOT EXISTS imdb_parental (
key INTEGER PRIMARY KEY,
@ -937,6 +944,31 @@ class Cache:
with closing(connection.cursor()) as cursor:
cursor.execute(f"DELETE FROM list_ids WHERE list_key = ?", (list_key,))
def query_imdb_keywords(self, imdb_id, expiration):
imdb_dict = {}
expired = None
with sqlite3.connect(self.cache_path) as connection:
connection.row_factory = sqlite3.Row
with closing(connection.cursor()) as cursor:
cursor.execute("SELECT * FROM imdb_keywords WHERE imdb_id = ?", (imdb_id,))
row = cursor.fetchone()
if row:
keywords = row["keywords"] if row["keywords"] else ""
imdb_dict = {k.split(":")[0]: (int(k.split(":")[1]), int(k.split(":")[2])) for k in keywords.split("|")}
datetime_object = datetime.strptime(row["expiration_date"], "%Y-%m-%d")
time_between_insertion = datetime.now() - datetime_object
expired = time_between_insertion.days > expiration
return imdb_dict, expired
def update_imdb_keywords(self, expired, imdb_id, keywords, expiration):
expiration_date = datetime.now() if expired is True else (datetime.now() - timedelta(days=random.randint(1, expiration)))
with sqlite3.connect(self.cache_path) as connection:
connection.row_factory = sqlite3.Row
with closing(connection.cursor()) as cursor:
cursor.execute("INSERT OR IGNORE INTO imdb_keywords(imdb_id) VALUES(?)", (imdb_id,))
update_sql = "UPDATE imdb_keywords SET keywords = ?, expiration_date = ? WHERE imdb_id = ?"
cursor.execute(update_sql, ("|".join([f"{k}:{u}:{v}" for k, (u, v) in keywords.items()]), expiration_date.strftime("%Y-%m-%d"), imdb_id))
def query_imdb_parental(self, imdb_id, expiration):
imdb_dict = {}
expired = None

@ -157,6 +157,29 @@ class IMDb:
return imdb_ids
raise Failed(f"IMDb Error: No IMDb IDs Found at {imdb_url}")
def keywords(self, imdb_id, ignore_cache=False):
imdb_keywords = {}
expired = None
if self.config.Cache and not ignore_cache:
imdb_keywords, expired = self.config.Cache.query_imdb_keywords(imdb_id, self.config.Cache.expiration)
if imdb_keywords and expired is False:
return imdb_keywords
response = self.config.get_html(f"https://www.imdb.com/title/{imdb_id}/keywords")
keywords = response.xpath("//td[@class='soda sodavote']")
if not keywords:
raise Failed(f"IMDb Error: No Item Found for IMDb ID: {imdb_id}")
for k in keywords:
name = k.xpath("div[@class='sodatext']/a/text()")[0]
relevant = k.xpath("div[@class='did-you-know-actions']/div/a/text()")[0].strip()
if "of" in relevant:
result = re.search(r"(\d+) of (\d+).*", relevant)
imdb_keywords[name] = (int(result.group(1)), int(result.group(2)))
else:
imdb_keywords[name] = (0, 0)
if self.config.Cache and not ignore_cache:
self.config.Cache.update_imdb_keywords(expired, imdb_id, imdb_keywords, self.config.Cache.expiration)
return imdb_keywords
def parental_guide(self, imdb_id, ignore_cache=False):
parental_dict = {}
expired = None
@ -289,3 +312,27 @@ class IMDb:
if imdb_id not in self.episode_ratings or season_num not in self.episode_ratings[imdb_id] or episode_num not in self.episode_ratings[imdb_id][season_num]:
return None
return self.episode_ratings[imdb_id][season_num][episode_num]
def item_filter(self, imdb_info, filter_attr, modifier, filter_final, filter_data):
if filter_attr == "imdb_keyword":
mr = imdb_info["minimum_relevant"]
mv = imdb_info["minimum_votes"]
mp = imdb_info["minimum_percentage"]
attrs = [k for k, (r, v) in imdb_info.items() if r >= mr and v >= mv and (v == 0 or r / v >= mp)]
if modifier == ".regex":
has_match = False
for reg in filter_data:
for name in attrs:
if re.compile(reg).search(name):
has_match = True
if has_match is False:
return False
elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]:
test_number = len(attrs) if attrs else 0
modifier = f".{modifier[7:]}"
if test_number is None or util.is_number_filter(test_number, modifier, filter_data):
return False
elif (not list(set(filter_data) & set(attrs)) and modifier == "") \
or (list(set(filter_data) & set(attrs)) and modifier == ".not"):
return False
return True

@ -29,6 +29,7 @@ class Library(ABC):
self.mal_map = {}
self.movie_rating_key_map = {}
self.show_rating_key_map = {}
self.imdb_rating_key_map = {}
self.cached_items = {}
self.run_again = []
self.type = ""
@ -316,6 +317,7 @@ class Library(ABC):
self.show_rating_key_map[key] = main_id[0]
util.add_dict_list(main_id, key, self.show_map)
if imdb_id:
self.imdb_rating_key_map[key] = imdb_id[0]
util.add_dict_list(imdb_id, key, self.imdb_map)
logger.info("")
logger.info(f"Processed {len(items)} {self.type}s")

@ -461,6 +461,11 @@ class TMDb:
has_match = True
if has_match is False:
return False
elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]:
test_number = len(attrs) if attrs else 0
modifier = f".{modifier[7:]}"
if test_number is None or util.is_number_filter(test_number, modifier, filter_data):
return False
elif (not list(set(filter_data) & set(attrs)) and modifier == "") \
or (list(set(filter_data) & set(attrs)) and modifier == ".not"):
return False

Loading…
Cancel
Save