From ddd16e1e296920c8b16f6a7471109de7bf0348ca Mon Sep 17 00:00:00 2001 From: meisnate12 Date: Wed, 1 Jun 2022 01:33:05 -0400 Subject: [PATCH] [11] add IMDb Topic Search --- VERSION | 2 +- docs/metadata/builders/imdb.md | 2 +- modules/imdb.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index ef126b24..cc0d85f8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.17.0-develop10 +1.17.0-develop11 diff --git a/docs/metadata/builders/imdb.md b/docs/metadata/builders/imdb.md index 07e4e913..fb30152b 100644 --- a/docs/metadata/builders/imdb.md +++ b/docs/metadata/builders/imdb.md @@ -49,7 +49,7 @@ collections: ## IMDb List -Finds every item in an IMDb List, [IMDb Keyword Search](https://www.imdb.com/search/keyword/), or [IMDb Title Search](https://www.imdb.com/search/title/). +Finds every item in an IMDb List, [Keyword Search](https://www.imdb.com/search/keyword/), [Title Search](https://www.imdb.com/search/title/), or [Topic Search](https://www.imdb.com/search/title-text/). The expected input is an IMDb List URL or IMDb Search URL. Multiple values are supported as a list only a comma-separated string will not work. diff --git a/modules/imdb.py b/modules/imdb.py index 002e886b..76e97c8e 100644 --- a/modules/imdb.py +++ b/modules/imdb.py @@ -22,6 +22,7 @@ base_url = "https://www.imdb.com" urls = { "lists": f"{base_url}/list/ls", "searches": f"{base_url}/search/title/", + "title_text_searches": f"{base_url}/search/title-text/", "keyword_searches": f"{base_url}/search/keyword/", "filmography_searches": f"{base_url}/filmosearch/" } @@ -71,6 +72,9 @@ class IMDb: elif imdb_url.startswith(urls["searches"]): xpath_total = "//div[@class='desc']/span/text()" per_page = 250 + elif imdb_url.startswith(urls["title_text_searches"]): + xpath_total = "//div[@class='desc']/span/text()" + per_page = 50 else: xpath_total = "//div[@class='desc']/text()" per_page = 50 @@ -113,6 +117,8 @@ class IMDb: if search_url: params["count"] = remainder if i == num_of_pages else item_count # noqa params["start"] = start_num # noqa + elif imdb_base.startswith(urls["title_text_searches"]): + params["start"] = start_num # noqa else: params["page"] = i # noqa response = self.config.get_html(imdb_base, headers=headers, params=params)