[24] Fixed `imdb_watchlist` builder (#2140)

5 months ago · a16bebd673
parent 279159c688
commit a16bebd673
8 changed files with 124 additions and 101 deletions
--- a/.github/workflows/increment-build.yml
+++ b/.github/workflows/increment-build.yml
@ -39,6 +39,7 @@ jobs:
  increment-build:
    runs-on: ubuntu-latest
    needs: [ verify-changes ]
+    if: needs.verify-changes.outputs.build == 'true'
    outputs:
      version: ${{ steps.update-version.outputs.version }}
      build-value: ${{ steps.update-version.outputs.build-value }}
--- a/.github/workflows/validate-pull.yml
+++ b/.github/workflows/validate-pull.yml
@ -8,6 +8,8 @@ jobs:

  validate-pull:
    runs-on: ubuntu-latest
+    outputs:
+      build: ${{ steps.list-changes.outputs.build }}
    steps:

      - name: Display Refs
@ -29,13 +31,29 @@ jobs:
          ref: ${{ github.event.pull_request.head.ref }}
          repository: ${{ github.event.pull_request.head.repo.full_name }}

+      - name: Get changes
+        id: get-changes
+        run: echo "files=$(git diff --name-only HEAD^ | xargs)" >> $GITHUB_OUTPUT
+
+      - name: List changed files
+        id: list-changes
+        run: |
+            for file in ${{ steps.get-changes.outputs.files }}; do
+                if [[ $file =~ ^(defaults|fonts|modules|kometa.py|requirements.txt|.dockerignore|Dockerfile).*$ ]] ; then
+                    echo "$file will trigger docker build"
+                    echo "build=true" >> $GITHUB_OUTPUT
+                else
+                    echo "$file will not trigger docker build"
+                fi
+            done
+
      - name: Run Spellcheck
        uses: rojopolis/spellcheck-github-actions@0.38.0

  docker-build-pull:
    runs-on: ubuntu-latest
    needs: [ validate-pull ]
-    if: contains(github.event.pull_request.labels.*.name, 'docker') || contains(github.event.pull_request.labels.*.name, 'tester')
+    if: needs.verify-changes.outputs.build == 'true' && (contains(github.event.pull_request.labels.*.name, 'docker') || contains(github.event.pull_request.labels.*.name, 'tester'))
    outputs:
      commit-msg: ${{ steps.update-version.outputs.commit-msg }}
      version: ${{ steps.update-version.outputs.version }}
@ -53,6 +71,7 @@ jobs:
      - name: Check Out Repo
        uses: actions/checkout@v4
        with:
+          token: ${{ secrets.PAT }}
          ref: ${{ github.event.pull_request.head.ref }}
          repository: ${{ github.event.pull_request.head.repo.full_name }}

@ -170,22 +189,22 @@ jobs:
      - name: Get Description
        id: get-description
        run: |
-          body="${{ github.event.pull_request.body }}"
-          body=$(echo "$body" | sed -n '/## Description/,/## Issues Fixed or Closed/{/## Description/b;/## Issues Fixed or Closed/b;p}')
+          body='${{ github.event.pull_request.body }}'
+          body=$(echo "$body" | sed -n '/## Description/,/##/{/## Description/b;/##/b;p}')
          body=$(echo $body|tr -d '\n')
-          echo "description=$body" >> $GITHUB_OUTPUT
+          echo 'description=$body' >> $GITHUB_OUTPUT

      - name: Discord Testers Notification
        uses: Kometa-Team/discord-notifications@master
        with:
          webhook_id_token: ${{ secrets.TESTERS_WEBHOOK }}
-          message: "The Kometa team are requesting <@&917323027438510110> to assist with testing an upcoming feature/bug fix.
+          message: 'The Kometa team are requesting <@&917323027438510110> to assist with testing an upcoming feature/bug fix.

                    * For Local Git pull and checkout the `${{ github.event.pull_request.head.ref }}` branch${{ needs.docker-build-pull.outputs.extra-text }}

                    * For Docker use the `kometateam/kometa:${{ needs.docker-build-pull.outputs.tag-name }}` image to do your testing

-                    Please report back either here or on the original GitHub Pull Request"
+                    Please report back either here or on the original GitHub Pull Request'
          title: ${{ github.event.pull_request.title }}
          description: ${{ steps.get-description.outputs.description }}
          url: https://github.com/Kometa-Team/${{ vars.REPO_NAME }}/pull/${{ github.event.number }}
@ -198,14 +217,14 @@ jobs:
  update-testers:
    runs-on: ubuntu-latest
    needs: [ docker-build-pull ]
-    if: github.event.action == 'synchronize' && github.event.label.name == 'tester'
+    if: github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'tester')
    steps:

      - name: Discord Testers Notification
        uses: Kometa-Team/discord-notifications@master
        with:
          webhook_id_token: ${{ secrets.TESTERS_WEBHOOK }}
-          message: "New Commit Pushed to `${{ needs.docker-build-pull.outputs.tag-name }}`: ${{ needs.docker-build-pull.outputs.version }}"
+          message: 'New Commit Pushed to `${{ needs.docker-build-pull.outputs.tag-name }}`: ${{ needs.docker-build-pull.outputs.version }}'
          description: ${{ needs.docker-build-pull.outputs.commit-msg }}
          url: https://github.com/Kometa-Team/${{ vars.REPO_NAME }}/pull/${{ github.event.number }}
          color: ${{ vars.COLOR_SUCCESS }}
--- a/3
+++ b/3
@ -2,7 +2,7 @@
 Added tenacity requirement at 8.4.2
 Update PlexAPI requirement to 4.15.14
 Update psutil requirement to 6.0.0
-Update setuptools requirement to 70.1.0
+Update setuptools requirement to 70.1.1

 # Removed Features

@ -16,5 +16,6 @@ Added [`letterboxd_user_lists`](https://kometa.wiki/en/latest/files/dynamic_type
 # Bug Fixes
 Fixed multiple anime `int()` Errors
 Fixed #2100 `verify_ssl` wasn't working when downloading images
+Fixed `imdb_watchlist`

 Various other Minor Fixes
--- a/2
+++ b/2
@ -1 +1 @@
-2.0.2-build23
+2.0.2-build24
--- a/docs/files/builders/imdb.md
+++ b/docs/files/builders/imdb.md
@ -118,31 +118,36 @@ collections:

 Finds every item in an IMDb User's Watchlist.

-The expected input is an IMDb User ID (example: `ur12345678`). Multiple values are supported as a list or as a comma-separated string.
+| List Parameter | Description                                                                                                                                                                                                                                                                                                                                   |
+|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `user_id`      | Specify the User ID for the IMDb Watchlist. **This attribute is required.**<br>**Options:** The ID that starts with `ur` found in the URL of the watchlist. (ex. `ur12345678`)                                                                                                                                                                |
+| `limit`        | Specify how items you want returned by the query.<br>**Options:** Any Integer `0` or greater where `0` get all items.<br>**Default:** `0`                                                                                                                                                                                                     |
+| `sort_by`      | Choose from one of the many available sort options.<br>**Options:** `custom.asc`, `custom.desc`, `title.asc`, `title.desc`, `rating.asc`, `rating.desc`, `popularity.asc`, `popularity.desc`, `votes.asc`, `votes.desc`, `release.asc`, `release.desc`, `runtime.asc`, `runtime.desc`, `added.asc`, `added.desc`<br>**Default:** `custom.asc` |
+
+Multiple values are supported as a list only a comma-separated string will not work.

 The `sync_mode: sync` and `collection_order: custom` Setting are recommended since the lists are continuously updated and in a specific order.

 ```yaml
 collections:
  My Watch Watchlist:
-    imdb_watchlist: ur64054558
-    collection_order: custom
-    sync_mode: sync
-```
-```yaml
-collections:
-  My Friends Watchlists:
-    imdb_watchlist: ur64054558, ur12345678
+    imdb_watchlist: 
+      user_id: ur64054558
+      sort_by: rating.asc
    collection_order: custom
    sync_mode: sync
 ```
+
 ```yaml
 collections:
  My Friends Watchlists:
    imdb_watchlist: 
-      - ur64054558
-      - ur12345678
-    collection_order: custom
+      - user_id: ur64054558
+        sort_by: rating.asc
+        limit: 100
+      - user_id: ur12345678
+        sort_by: rating.asc
+        limit: 100
    sync_mode: sync
 ```

--- a/modules/builder.py
+++ b/modules/builder.py
@ -1477,12 +1477,9 @@ class CollectionBuilder:
                    self.builders.append((method_name, value))
                else:
                    raise Failed(f"{self.Type} Error: imdb_id {value} must begin with tt")
-        elif method_name == "imdb_list":
-            try:
-                for imdb_dict in self.config.IMDb.validate_imdb_lists(self.Type, method_data):
+        elif method_name in ["imdb_list", "imdb_watchlist"]:
+            for imdb_dict in self.config.IMDb.validate_imdb(self.Type, method_name, method_data):
                self.builders.append((method_name, imdb_dict))
-            except Failed as e:
-                logger.error(e)
        elif method_name == "imdb_chart":
            for value in util.get_list(method_data):
                if value in imdb.movie_charts and not self.library.is_movie:
@ -1493,9 +1490,6 @@ class CollectionBuilder:
                    self.builders.append((method_name, value))
                else:
                    raise Failed(f"{self.Type} Error: chart: {value} is invalid options are {[i for i in imdb.charts]}")
-        elif method_name == "imdb_watchlist":
-            for imdb_user in self.config.IMDb.validate_imdb_watchlists(self.Type, method_data, self.language):
-                self.builders.append((method_name, imdb_user))
        elif method_name == "imdb_award":
            for dict_data in util.parse(self.Type, method_name, method_data, datatype="listdict"):
                dict_methods = {dm.lower(): dm for dm in dict_data}
--- a/modules/imdb.py
+++ b/modules/imdb.py
@ -1,6 +1,5 @@
-import csv, gzip, json, math, os, re, shutil, time
+import csv, gzip, json, math, os, re, shutil
 from modules import util
-from modules.request import parse_qs, urlparse
 from modules.util import Failed

 logger = util.logger
@ -127,6 +126,7 @@ base_url = "https://www.imdb.com"
 git_base = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Awards/master"
 search_hash_url = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/HASH"
 list_hash_url = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/LIST_HASH"
+watchlist_hash_url = "https://raw.githubusercontent.com/Kometa-Team/IMDb-Hash/master/WATCHLIST_HASH"
 graphql_url = "https://api.graphql.imdb.com/"
 list_url = f"{base_url}/list/ls"

@ -142,6 +142,7 @@ class IMDb:
        self._events = {}
        self._search_hash = None
        self._list_hash = None
+        self._watchlist_hash = None
        self.event_url_validation = {}

    def _request(self, url, language=None, xpath=None, params=None):
@ -166,6 +167,12 @@ class IMDb:
            self._list_hash = self.requests.get(list_hash_url).text.strip()
        return self._list_hash

+    @property
+    def watchlist_hash(self):
+        if self._watchlist_hash is None:
+            self._watchlist_hash = self.requests.get(watchlist_hash_url).text.strip()
+        return self._watchlist_hash
+
    @property
    def events_validation(self):
        if self._events_validation is None:
@ -177,32 +184,44 @@ class IMDb:
            self._events[event_id] = self.requests.get_yaml(f"{git_base}/events/{event_id}.yml").data
        return self._events[event_id]

-    def validate_imdb_lists(self, err_type, imdb_lists):
+    def validate_imdb(self, err_type, method, imdb_dicts):
        valid_lists = []
-        for imdb_dict in util.get_list(imdb_lists, split=False):
+        main = "list_id" if method == "imdb_list" else "user_id"
+        for imdb_dict in util.get_list(imdb_dicts, split=True if isinstance(imdb_dicts, str) else False):
            if not isinstance(imdb_dict, dict):
-                imdb_dict = {"list_id": imdb_dict}
-            if "url" in imdb_dict and "list_id" not in imdb_dict:
-                imdb_dict["list_id"] = imdb_dict["url"]
+                imdb_dict = {main: imdb_dict}
+            if "url" in imdb_dict and main not in imdb_dict:
+                imdb_dict[main] = imdb_dict["url"]
            dict_methods = {dm.lower(): dm for dm in imdb_dict}
-            if "list_id" not in dict_methods:
-                raise Failed(f"{err_type} Error: imdb_list list_id attribute not found")
-            elif imdb_dict[dict_methods["list_id"]] is None:
-                raise Failed(f"{err_type} Error: imdb_list list_id attribute is blank")
+            if main not in dict_methods:
+                raise Failed(f"{err_type} Error: {method} {main} attribute not found")
+            elif imdb_dict[dict_methods[main]] is None:
+                raise Failed(f"{err_type} Error: {method} {main} attribute is blank")
            else:
-                imdb_url = imdb_dict[dict_methods["list_id"]].strip()
-                if imdb_url.startswith(f"{base_url}/search/"):
-                    raise Failed("IMDb Error: URLs with https://www.imdb.com/search/ no longer works with imdb_list use imdb_search.")
-                if imdb_url.startswith(f"{base_url}/filmosearch/"):
-                    raise Failed("IMDb Error: URLs with https://www.imdb.com/filmosearch/ no longer works with imdb_list use imdb_search.")
-                search = re.search(r"(ls\d+)", imdb_url)
+                main_data = imdb_dict[dict_methods[main]].strip()
+                if method == "imdb_list":
+                    if main_data.startswith(f"{base_url}/search/"):
+                        raise Failed(f"IMDb Error: URLs with https://www.imdb.com/search/ no longer works with {method} use imdb_search.")
+                    if main_data.startswith(f"{base_url}/filmosearch/"):
+                        raise Failed(f"IMDb Error: URLs with https://www.imdb.com/filmosearch/ no longer works with {method} use imdb_search.")
+                    search = re.search(r"(ls\d+)", main_data)
                    if not search:
-                    raise Failed("IMDb Error: imdb_list list_id must begin with ls (ex. ls005526372)")
-                new_dict = {"list_id": search.group(1)}
+                        raise Failed(f"IMDb Error: {method} {main} must begin with ls (ex. ls005526372)")
+                    new_dict = {main: search.group(1)}
+                else:
+                    user_id = None
+                    if main_data.startswith("ur"):
+                        try:
+                            user_id = int(main_data[2:])
+                        except ValueError:
+                            pass
+                    if not user_id:
+                        raise Failed(f"{err_type} Error: {method} {main}: {main_data} not in the format of 'ur########'")
+                    new_dict = {main: main_data}

            if "limit" in dict_methods:
                if imdb_dict[dict_methods["limit"]] is None:
-                    logger.warning(f"{err_type} Warning: imdb_list limit attribute is blank using 0 as default")
+                    logger.warning(f"{err_type} Warning: {method} limit attribute is blank using 0 as default")
                else:
                    try:
                        value = int(str(imdb_dict[dict_methods["limit"]]))
@ -211,31 +230,16 @@ class IMDb:
                    except ValueError:
                        pass
                if "limit" not in new_dict:
-                    logger.warning(f"{err_type} Warning: imdb_list limit attribute: {imdb_dict[dict_methods['limit']]} must be an integer 0 or greater using 0 as default")
+                    logger.warning(f"{err_type} Warning: {method} limit attribute: {imdb_dict[dict_methods['limit']]} must be an integer 0 or greater using 0 as default")
            if "limit" not in new_dict:
                new_dict["limit"] = 0

            if "sort_by" in dict_methods:
-                new_dict["sort_by"] = util.parse(err_type, dict_methods, imdb_dict, parent="imdb_list", default="custom.asc", options=list_sort_options)
+                new_dict["sort_by"] = util.parse(err_type, dict_methods, imdb_dict, parent=method, default="custom.asc", options=list_sort_options)

            valid_lists.append(new_dict)
        return valid_lists

-    def validate_imdb_watchlists(self, err_type, users, language):
-        valid_users = []
-        for user in util.get_list(users):
-            user_id = None
-            if user.startswith("ur"):
-                try:
-                    user_id = int(user[2:])
-                except ValueError:
-                    pass
-            if not user_id:
-                raise Failed(f"{err_type} Error: User {user} not in the format of 'ur########'")
-            if self._watchlist(user, language):
-                valid_users.append(user)
-        return valid_users
-
    def get_event_years(self, event_id):
        if event_id in self.events_validation:
            return True, self.events_validation[event_id]["years"]
@ -263,16 +267,16 @@ class IMDb:
                break
        return award_names, category_names

-    def _watchlist(self, user, language):
-        imdb_url = f"{base_url}/user/{user}/watchlist"
-        for text in self._request(imdb_url, language=language, xpath="//div[@class='article']/script/text()")[0].split("\n"):
-            if text.strip().startswith("IMDbReactInitialState.push"):
-                jsonline = text.strip()
-                return [f for f in json.loads(jsonline[jsonline.find('{'):-2])["starbars"]]
-        raise Failed(f"IMDb Error: Failed to parse URL: {imdb_url}")
+    def _json_operation(self, list_type):
+        if list_type == "search":
+            return "AdvancedTitleSearch", self.search_hash
+        elif list_type == "list":
+            return "TitleListMainPage", self.list_hash
+        else:
+            return "WatchListPageRefiner", self.watchlist_hash

-    def _graphql_json(self, data, search=True):
-        page_limit = 250 if search else 100
+    def _graphql_json(self, data, list_type):
+        page_limit = 250 if list_type == "search" else 100
        out = {
            "locale": "en-US",
            "first": data["limit"] if "limit" in data and 0 < data["limit"] < page_limit else page_limit,
@ -302,10 +306,10 @@ class IMDb:
                    if range_data:
                        out[constraint][range_name[i]] = range_data

-        sort = data["sort_by"] if "sort_by" in data else "popularity.asc" if search else "custom.asc"
+        sort = data["sort_by"] if "sort_by" in data else "popularity.asc" if list_type == "search" else "custom.asc"
        sort_by, sort_order = sort.split(".")

-        if search:
+        if list_type == "search":
            out["titleTypeConstraint"] = {"anyTitleTypeIds": [title_type_options[t] for t in data["type"]] if "type" in data else []}
            out["sortBy"] = sort_by_options[sort_by]
            out["sortOrder"] = sort_order.upper()
@ -373,24 +377,26 @@ class IMDb:
            if "adult" in data and data["adult"]:
                out["explicitContentConstraint"] = {"explicitContentFilter": "INCLUDE_ADULT"}
        else:
+            if list_type == "list":
                out["lsConst"] = data["list_id"]
+            else:
+                out["urConst"] = data["user_id"]
            out["sort"] = {"by": list_sort_by_options[sort_by], "order": sort_order.upper()}

        logger.trace(out)
-        return {
-            "operationName": "AdvancedTitleSearch" if search else "TitleListMainPage",
-            "variables": out,
-            "extensions": {"persistedQuery": {"version": 1, "sha256Hash": self.search_hash if search else self.list_hash}}
-        }
+        op, sha = self._json_operation(list_type)
+        return {"operationName": op, "variables": out, "extensions": {"persistedQuery": {"version": 1, "sha256Hash": sha}}}

-    def _pagination(self, data, search=True):
-        json_obj = self._graphql_json(data, search=search)
-        item_count = 250 if search else 100
+    def _pagination(self, data, list_type):
+        is_list = list_type != "search"
+        json_obj = self._graphql_json(data, list_type)
+        item_count = 100 if is_list else 250
        imdb_ids = []
        logger.ghost("Parsing Page 1")
        response_json = self._graph_request(json_obj)
        try:
-            search_data = response_json["data"]["advancedTitleSearch"] if search else response_json["data"]["list"]["titleListItemSearch"]
+            step = "list" if list_type == "list" else "predefinedList"
+            search_data = response_json["data"][step]["titleListItemSearch"] if is_list else response_json["data"]["advancedTitleSearch"]
            total = search_data["total"]
            limit = data["limit"]
            if limit < 1 or total < limit:
@ -400,16 +406,16 @@ class IMDb:
                remainder = item_count
            num_of_pages = math.ceil(int(limit) / item_count)
            end_cursor = search_data["pageInfo"]["endCursor"]
-            imdb_ids.extend([n["node"]["title"]["id"] if search else n["listItem"]["id"] for n in search_data["edges"]])
+            imdb_ids.extend([n["listItem"]["id"] if is_list else n["node"]["title"]["id"] for n in search_data["edges"]])
            if num_of_pages > 1:
                for i in range(2, num_of_pages + 1):
                    start_num = (i - 1) * item_count + 1
                    logger.ghost(f"Parsing Page {i}/{num_of_pages} {start_num}-{limit if i == num_of_pages else i * item_count}")
                    json_obj["variables"]["after"] = end_cursor
                    response_json = self._graph_request(json_obj)
-                    search_data = response_json["data"]["advancedTitleSearch"] if search else response_json["data"]["list"]["titleListItemSearch"]
+                    search_data = response_json["data"][step]["titleListItemSearch"] if is_list else response_json["data"]["advancedTitleSearch"]
                    end_cursor = search_data["pageInfo"]["endCursor"]
-                    ids_found = [n["node"]["title"]["id"] if search else n["listItem"]["id"] for n in search_data["edges"]]
+                    ids_found = [n["listItem"]["id"] if is_list else n["node"]["title"]["id"] for n in search_data["edges"]]
                    if i == num_of_pages:
                        ids_found = ids_found[:remainder]
                    imdb_ids.extend(ids_found)
@ -511,19 +517,16 @@ class IMDb:
        if method == "imdb_id":
            logger.info(f"Processing IMDb ID: {data}")
            return [(data, "imdb")]
-        elif method == "imdb_list":
-            logger.info(f"Processing IMDb List: {data['list_id']}")
+        elif method in ["imdb_list", "imdb_watchlist"]:
+            logger.info(f"Processing IMDb {'List' if method == 'imdb_list' else 'Watchlist'}: {data['list_id' if method == 'imdb_list' else 'user_id']}")
            if data["limit"] > 0:
                logger.info(f"    Limit: {data['limit']}")
            if "sort_by" in data:
                logger.info(f"    Sort By: {data['sort_by']}")
-            return [(i, "imdb") for i in self._pagination(data, search=False)]
+            return [(i, "imdb") for i in self._pagination(data, "list" if method == "imdb_list" else "watchlist")]
        elif method == "imdb_chart":
            logger.info(f"Processing IMDb Chart: {charts[data]}")
            return [(_i, "imdb") for _i in self._ids_from_chart(data, language)]
-        elif method == "imdb_watchlist":
-            logger.info(f"Processing IMDb Watchlist: {data}")
-            return [(_i, "imdb") for _i in self._watchlist(data, language)]
        elif method == "imdb_award":
            if data["event_year"] not in ["all", "latest"] and len(data["event_year"]) == 1:
                event_slug = f"{data['event_year'][0]}/1" if "-" not in data["event_year"][0] else data["event_year"][0].replace("-", "/")
@ -538,7 +541,7 @@ class IMDb:
            logger.info(f"Processing IMDb Search:")
            for k, v in data.items():
                logger.info(f"    {k}: {v}")
-            return [(_i, "imdb") for _i in self._pagination(data)]
+            return [(_i, "imdb") for _i in self._pagination(data, "search")]
        else:
            raise Failed(f"IMDb Error: Method {method} not supported")

--- a/requirements.txt
+++ b/requirements.txt
@ -12,5 +12,5 @@ requests==2.32.3
 tenacity==8.4.2
 ruamel.yaml==0.18.6
 schedule==1.2.2
-setuptools==70.1.0
+setuptools==70.1.1
 tmdbapis==1.2.16