You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Plex-Meta-Manager/modules/mojo.py

275 lines
17 KiB

from datetime import datetime
from modules import util
from modules.request import parse_qs, urlparse
from modules.util import Failed
from num2words import num2words
logger = util.logger
builders = ["mojo_world", "mojo_domestic", "mojo_international", "mojo_record", "mojo_all_time", "mojo_never"]
top_options = {
"second_weekend_drop": ("Biggest Second Weekend Drops", "/chart/biggest_second_weekend_gross_drop/", None),
"post_thanksgiving_weekend_drop": ("Largest Post-Thanksgiving Weekend Drops", "/chart/post_thanksgiving_weekend_drop/", None),
"top_opening_weekend": ("Top Opening Weekends", "/chart/top_opening_weekend/", None),
"worst_opening_weekend_theater_avg": ("Worst Opening Weekend Per-Theater Averages", "/chart/btm_wide_opening_weekend_theater_avg/", None),
"top_opening_weekend_theater_avg_all": ("Top Opening Theater Averages", "/chart/top_opening_weekend_theater_avg/", {"by_release_scale": "all"}),
"top_opening_weekend_theater_avg_wide": ("Top Wide Opening Theater Averages", "/chart/top_opening_weekend_theater_avg/", {"by_release_scale": "wide"}),
"january": ("Top Opening Weekend in January", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "january"}),
"february": ("Top Opening Weekend in February", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "february"}),
"march": ("Top Opening Weekend in March", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "march"}),
"april": ("Top Opening Weekend in April", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "april"}),
"may": ("Top Opening Weekend in May", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "may"}),
"june": ("Top Opening Weekend in June", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "june"}),
"july": ("Top Opening Weekend in July", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "july"}),
"august": ("Top Opening Weekend in August", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "august"}),
"september": ("Top Opening Weekend in September", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "september"}),
"october": ("Top Opening Weekend in October", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "october"}),
"november": ("Top Opening Weekend in November", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "november"}),
"december": ("Top Opening Weekend in December", "/chart/release_top_opn_wkd_in_month/", {"in_occasion": "december"}),
"spring": ("Top Opening Weekend in Spring", "/chart/release_top_opn_wkd_in_season/", {"in_occasion": "spring"}),
"summer": ("Top Opening Weekend in Summer", "/chart/release_top_opn_wkd_in_season/", {"in_occasion": "summer"}),
"fall": ("Top Opening Weekend in Fall", "/chart/release_top_opn_wkd_in_season/", {"in_occasion": "fall"}),
"holiday_season": ("Top Opening Weekend in The Holiday Season", "/chart/release_top_opn_wkd_in_season/", {"in_occasion": "holiday_season"}),
"winter": ("Top Opening Weekend in Winter", "/chart/release_top_opn_wkd_in_season/", {"in_occasion": "winter"}),
"g": ("Top Opening Weekend for G Ratings", "/chart/top_opening_wknd_by_mpaa/", {"by_mpaa": "G"}),
"g/pg": ("Top Opening Weekend for G/PG Ratings", "/chart/top_opening_wknd_by_mpaa/", {"by_mpaa": "G%2FPG"}),
"pg": ("Top Opening Weekend for PG Ratings", "/chart/top_opening_wknd_by_mpaa/", {"by_mpaa": "PG"}),
"pg-13": ("Top Opening Weekend for PG-13 Ratings", "/chart/top_opening_wknd_by_mpaa/", {"by_mpaa": "PG-13"}),
"r": ("Top Opening Weekend for R Ratings", "/chart/top_opening_wknd_by_mpaa/", {"by_mpaa": "R"}),
"nc-17": ("Top Opening Weekend for NC-17 Ratings", "/chart/top_opening_wknd_by_mpaa/", {"by_mpaa": "NC-17"}),
"mlk": ("Top Weekend for MLK Day", "/chart/release_top_weekend_gross/", {"by_occasion", "us_mlkday_weekend"}),
"easter": ("Top Weekend for Easter", "/chart/release_top_weekend_gross/", {"by_occasion", "easter_weekend"}),
"4th": ("Top Weekend for the 4th of July", "/chart/release_top_weekend_gross/", {"by_occasion", "us_july4_weekend"}),
"memorial": ("Top Weekend for Memorial Day", "/chart/release_top_weekend_gross/", {"by_occasion", "us_memorialday_weekend"}),
"labor": ("Top Weekend for Labor Day", "/chart/release_top_weekend_gross/", {"by_occasion", "us_laborday_weekend"}),
"president": ("Top Weekend for President's Day", "/chart/release_top_weekend_gross/", {"by_occasion", "us_presidentsday_weekend"}),
"thanksgiving_3": ("Top 3 Day Weekend for Thanksgiving", "/chart/release_top_weekend_gross/", {"by_occasion", "us_thanksgiving_3"}),
"thanksgiving_5": ("Top 5 Day Weekend for Thanksgiving", "/chart/release_top_weekend_gross/", {"by_occasion", "us_thanksgiving_5"}),
"mlk_opening": ("Top Opening Weekend for MLK Day", "/chart/top_opening_holiday_weekends/", {"by_occasion", "us_mlkday_weekend"}),
"easter_opening": ("Top Opening Weekend for Easter", "/chart/top_opening_holiday_weekends/", {"by_occasion", "easter_weekend"}),
"memorial_opening": ("Top Opening Weekend for Memorial Day", "/chart/top_opening_holiday_weekends/", {"by_occasion", "us_memorialday_weekend"}),
"labor_opening": ("Top Opening Weekend for Labor Day", "/chart/top_opening_holiday_weekends/", {"by_occasion", "us_laborday_weekend"}),
"president_opening": ("Top Opening Weekend for MLK Day", "/chart/top_opening_holiday_weekends/", {"by_occasion", "us_presidentsday_weekend"}),
"thanksgiving_3_opening": ("Top 3 Day Opening Weekend for Thanksgiving", "/chart/top_thanksgiving_openings/", {"by_occasion", "us_thanksgiving_3"}),
"thanksgiving_5_opening": ("Top 5 Day Opening Weekend for Thanksgiving", "/chart/top_thanksgiving_openings/", {"by_occasion", "us_thanksgiving_5"}),
"opening_week": ("Top Opening Week", "/chart/top_opening_week/", None),
"biggest_theater_drop": ("Biggest Theater Drops", "/chart/biggest_third_weekend_num_theaters_drop/", None),
"opening_day": ("Top Opening Day", "/chart/top_opening_day/", None),
"single_day_grosses": ("Top Day", "/chart/release_top_daily_gross/", None),
"christmas_day_gross": ("Top Christmas Day", "/chart/release_top_holiday_gross/", {"by_occasion": "christmas_day"}),
"new_years_day_gross": ("Top New Years Day", "/chart/release_top_holiday_gross/", {"by_occasion": "newyearsday"}),
"friday": ("Top Friday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "friday"}),
"saturday": ("Top Saturday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "saturday"}),
"sunday": ("Top Sunday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "sunday"}),
"monday": ("Top Monday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "monday"}),
"tuesday": ("Top Tuesday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "tuesday"}),
"wednesday": ("Top Wednesday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "wednesday"}),
"thursday": ("Top Thursday", "/chart/release_top_daily_gross_by_dow/", {"by_occasion": "thursday"}),
"friday_non_opening": ("Top Friday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "friday"}),
"saturday_non_opening": ("Top Saturday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "saturday"}),
"sunday_non_opening": ("Top Sunday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "sunday"}),
"monday_non_opening": ("Top Monday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "monday"}),
"tuesday_non_opening": ("Top Tuesday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "tuesday"}),
"wednesday_non_opening": ("Top Wednesday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "wednesday"}),
"thursday_non_opening": ("Top Thursday Non-Opening", "/chart/top_non_opening_by_dow/", {"by_occasion": "thursday"}),
}
chart_options = ["domestic", "worldwide"]
content_rating_options = {
"g": "G",
"g/pg": "G%2FPG",
"pg": "PG",
"pg-13": "PG-13",
"r": "R",
"nc-17": "NC-17",
}
never_in_options = {
"1": ("#1", "never_1"),
"5": ("the Top 5", "never_5"),
"10": ("the Top 10", "never_10"),
}
intl_range_options = ["weekend", "monthly", "quarterly", "yearly"]
dome_range_options = intl_range_options + ["daily", "weekly", "season", "holiday"]
year_options = ["current"] + [str(t) for t in range(1977, datetime.now().year + 1)]
quarter_options = ["current", "q1", "q2", "q3", "q4"]
quarters = {1: "q1", 2: "q1", 3: "q1", 4: "q2", 5: "q2", 6: "q2", 7: "q3", 8: "q3", 9: "q3", 10: "q4", 11: "q4", 12: "q4"}
season_options = ["current", "winter", "spring", "summer", "fall", "holiday"]
seasons = {1: "winter", 2: "winter", 3: "spring", 4: "spring", 5: "summer", 6: "summer", 7: "summer", 8: "summer", 9: "fall", 10: "fall", 11: "holiday", 12: "holiday"}
holiday_options = {
"new_years_day": ("New Year's Day", "newyearsday"),
"new_year_weekend": ("New Year Weekend", "us_newyear_weekend"),
"mlk_day": ("MLK Day", "us_mlkday"),
"mlk_day_weekend": ("MLK Day Weekend", "us_mlkday_weekend"),
"presidents_day": ("President's Day", "us_presidentsday"),
"presidents_day_weekend": ("President's Day Weekend", "us_presidentsday_weekend"),
"easter": ("Easter", "easter_sunday"),
"easter_weekend": ("Easter Weekend", "easter_weekend"),
"memorial_day": ("Memorial Day", "us_memorialday"),
"memorial_day_weekend": ("Memorial Day Weekend", "us_memorialday_weekend"),
"independence_day": ("Independence Day", "us_july4"),
"independence_day_weekend": ("Independence Day Weekend", "us_july4_weekend"),
"labor_day": ("Labor Day", "us_laborday"),
"labor_day_weekend": ("Labor Day Weekend", "us_laborday_weekend"),
"indigenous_day": ("Indigenous People's Day", "us_indig_peoples_day"),
"indigenous_day_weekend": ("", "us_indig_peoples_day_weekend"),
"halloween": ("Halloween", "halloween"),
"thanksgiving": ("Thanksgiving", "us_thanksgiving"),
"thanksgiving_3": ("Thanksgiving Weekend", "us_thanksgiving_3"),
"thanksgiving_4": ("Thanksgiving 4-Day Weekend", "us_thanksgiving_4"),
"thanksgiving_5": ("Thanksgiving 5-Day Weekend", "us_thanksgiving_5"),
"post_thanksgiving_weekend": ("Post-Thanksgiving Weekend", "us_post_thanksgiving_weekend"),
"christmas_day": ("Christmas Day", "christmas_day"),
"christmas_weekend": ("Christmas Weekend", "us_christmas_weekend"),
"new_years_eve": ("New Year's Eve", "newyearseve")
}
base_url = "https://www.boxofficemojo.com"
class BoxOfficeMojo:
def __init__(self, requests, cache):
self.requests = requests
self.cache = cache
self._never_options = None
self._intl_options = None
self._year_options = None
def _options(self, url, nav_type="area"):
output = {}
options = self._request(url, xpath=f"//select[@id='{nav_type}-navSelector']/option")
for option in options:
query = parse_qs(urlparse(option.xpath("@value")[0]).query)
output[option.xpath("text()")[0].lower()] = query["area"][0] if "area" in query else ""
return output
@property
def never_options(self):
if self._never_options is None:
self._never_options = self._options("/chart/never_in_top/")
return self._never_options
@property
def intl_options(self):
if self._intl_options is None:
self._intl_options = self._options("/intl/")
return self._intl_options
@property
def year_options(self):
if self._year_options is None:
self._year_options = [y for y in self._options("/year/world/", nav_type="year")]
return self._year_options
def _request(self, url, xpath=None, params=None):
logger.trace(f"URL: {base_url}{url}")
if params:
logger.trace(f"Params: {params}")
response = self.requests.get_html(f"{base_url}{url}", header=True, params=params)
return response.xpath(xpath) if xpath else response
def _parse_list(self, url, params, limit):
response = self._request(url, params=params)
total_html = response.xpath("//li[contains(@class, 'mojo-pagination-button-center')]/a/text()")
total = int(total_html[0].replace(",", "").split(" ")[2]) if total_html else 0
if total and (limit < 1 or total < limit):
limit = total
pages = int((limit - 1) / 200) + 1 if total else 0
for field_name in ["release ", "title", "release_group"]:
output = response.xpath(f"//td[contains(@class, 'mojo-field-type-{field_name}')]/a/@href")
if output:
break
for i in range(1, pages):
response = self._request(url, params={"offset": 200 * i})
output.extend(response.xpath(f"//td[contains(@class, 'mojo-field-type-{field_name}')]/a/@href"))
if not limit or len(output) < limit:
limit = len(output)
return [i[:i.index("?")] for i in output[:limit]]
def _imdb(self, url):
response = self._request(url)
imdb_url = response.xpath("//select[@id='releasegroup-picker-navSelector']/option[text()='All Releases']/@value")
if not imdb_url:
raise Failed(f"Mojo Error: IMDb ID not found at {base_url}{url}")
return imdb_url[0][7:-1]
def get_imdb_ids(self, method, data):
params = None
if method == "mojo_record":
text, url, params = top_options[data["chart"]]
elif method == "mojo_world":
text = f"{data['year']} Worldwide Box Office"
url = f"/year/world/{data['year']}/"
elif method == "mojo_all_time":
text = f"Top Lifetime {data['chart'].capitalize()}"
if data["content_rating_filter"] is None:
url = "/chart/top_lifetime_gross/" if data["chart"] == "domestic" else "/chart/ww_top_lifetime_gross/"
else:
text += f" {data['content_rating_filter'].upper()}"
url = f"/chart/mpaa_title_lifetime_gross/"
params = {"by_mpaa": content_rating_options[data['content_rating_filter']]}
text += " Grosses"
elif method == "mojo_never":
pretty, arg_key = never_in_options[data["never"]]
text = f"Top-Grossing Movies That Never Hit {pretty} {data['chart'].capitalize()}"
url = f"/chart/never_in_top/"
params = {"by_rank_threshold": data["never"]}
if data["chart"] != "domestic":
params["area"] = self.never_options[data["chart"]]
else:
chart = data["chart"].capitalize() if "chart" in data else "Domestic"
if data["range"] == "daily":
day = datetime.strptime(data["range_data"], "%Y-%m-%d")
day = day.strftime("%b {th}, %Y").replace("{th}", num2words(day.day, to='ordinal_num'))
chart_title = f"{day}"
url = f"/date/{data['range_data']}/"
elif data["range"] == "weekend":
chart_title = f"Weekend {data['range_data']} {data['year']}"
url = f"/weekend/{data['year']}W{data['range_data']:02}/"
elif data["range"] == "weekly":
chart_title = f"Week {data['range_data']} {data['year']}"
url = f"/weekly/{data['year']}W{data['range_data']:02}/"
elif data["range"] == "monthly":
chart_title = f"{data['range_data'].capitalize()} {data['year']}"
url = f"/month/{data['range_data']}/{data['year']}/"
elif data["range"] == "quarterly":
chart_title = f"{data['range_data'].capitalize()} {data['year']}"
url = f"/quarter/{data['range_data']}/{data['year']}/"
elif data["range"] == "season":
chart_title = f"{data['range_data'].capitalize()} {data['year']}"
url = f"/season/{data['range_data']}/{data['year']}/"
elif data["range"] == "holiday":
title, slug = holiday_options[data["range_data"]]
chart_title = f"{title} {data['year']}"
url = f"/holiday/{slug}/{data['year']}/"
else:
chart_title = f"{data['year']}"
url = f"/year/{data['year']}/"
text = f"{chart} Box Office For {chart_title}"
if data["limit"]:
text += f" ({data['limit']})"
logger.info(f"Processing {method.replace('_', ' ').title()}: {text}")
items = self._parse_list(url, params, data["limit"])
if not items:
raise Failed(f"Mojo Error: No List Items found in {method}: {data}")
ids = []
total_items = len(items)
for i, item in enumerate(items, 1):
logger.ghost(f"Finding IMDb ID {i}/{total_items}")
if "title" in item:
imdb_id = item[7:-1]
else:
imdb_id = None
expired = None
if self.cache:
imdb_id, expired = self.cache.query_letterboxd_map(item)
if not imdb_id or expired is not False:
try:
imdb_id = self._imdb(item)
except Failed as e:
logger.error(e)
continue
if self.cache:
self.cache.update_letterboxd_map(expired, item, imdb_id)
ids.append((imdb_id, "imdb"))
logger.info(f"Processed {total_items} IMDb IDs")
return ids