From f71545a4b0ed01838ae4fa890af38ad894626bea Mon Sep 17 00:00:00 2001 From: Wee Date: Sun, 17 Apr 2022 21:31:33 +0700 Subject: [PATCH 1/4] Allow the Playwright installation to fail Excluded Playwright from requirements.txt to prevent arm/v6 and arm/v7 builds from failing. --- Dockerfile | 5 +++++ requirements.txt | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2e8131f7..23a3f2c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,11 @@ COPY requirements.txt /requirements.txt RUN pip install --target=/dependencies -r /requirements.txt +# Playwright is an alternative to Selenium +# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing +RUN pip install --target=/dependencies playwright~=1.20 \ + || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." + # Final image stage FROM python:3.8-slim diff --git a/requirements.txt b/requirements.txt index 2a8be8e3..d50f3f80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,3 @@ lxml # 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0 selenium ~= 4.1.0 - -# An alternative to Selenium -playwright ~= 1.20 From 25a7fd050f9030250338a9410be35d2374129b3b Mon Sep 17 00:00:00 2001 From: Wee Date: Sun, 17 Apr 2022 22:07:43 +0700 Subject: [PATCH 2/4] Hide the Playwright option for unsupported devices --- changedetectionio/content_fetcher.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 518b8b09..00454e16 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -23,6 +23,7 @@ class Fetcher(): fetcher_description ="No description" fetcher_list_order = 0 + fetcher_enabled = True @abstractmethod def get_error(self): @@ -65,10 +66,12 @@ def available_fetchers(): # @todo html_ is maybe better as fetcher_ or something # In this case, make sure to edit the default one in store.py and fetch_site_status.py if "html_" in name: - t=tuple([name,obj.fetcher_description,obj.fetcher_list_order]) + t=tuple([name,obj.fetcher_description,obj.fetcher_list_order,obj.fetcher_enabled]) p.append(t) # sort by obj.fetcher_list_order p.sort(key=lambda x: x[2]) + # filter out fetchers that aren't enabled + p = filter(lambda x: x[3], p) # strip obj.fetcher_list_order from each member in the tuple p = list(map(lambda x: x[:2], p)) @@ -81,6 +84,10 @@ class html_playwright(Fetcher): if os.getenv("PLAYWRIGHT_DRIVER_URL"): fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL")) fetcher_list_order = 3 + try: + from playwright.sync_api import sync_playwright + except ModuleNotFoundError: + fetcher_enabled = False browser_type = '' command_executor = '' From c990db2bd5d6800a8debbf49433f9ac6175cac70 Mon Sep 17 00:00:00 2001 From: Wee Date: Sun, 17 Apr 2022 23:27:17 +0700 Subject: [PATCH 3/4] Replace Playwright server with a pre-built image --- changedetectionio/content_fetcher.py | 2 +- docker-compose.yml | 24 +++++++----------------- playwright/Dockerfile | 13 ------------- playwright/seccomp_profile.json | 12 ------------ playwright/server.js | 10 ---------- 5 files changed, 8 insertions(+), 53 deletions(-) delete mode 100644 playwright/Dockerfile delete mode 100644 playwright/seccomp_profile.json delete mode 100644 playwright/server.js diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index 00454e16..1a1139f6 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -103,7 +103,7 @@ class html_playwright(Fetcher): self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"') self.command_executor = os.getenv( "PLAYWRIGHT_DRIVER_URL", - 'ws://playwright-server:4444/playwright' + 'ws://playwright-chrome:3000/playwright' ).strip('"') # If any proxy settings are enabled, then we should setup the proxy object diff --git a/docker-compose.yml b/docker-compose.yml index 0914ed6f..0e6605fd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,11 +24,7 @@ services: # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy # # Alternative Playwright URL, do not use "'s or 's! - # - PLAYWRIGHT_DRIVER_URL=ws://playwright-server:4444/playwright - # - # Alternative Playwright Browser Type, must match with PLAYWRIGHT_BROWSER_TYPE in the playwright-server service - # See https://playwright.dev/docs/browsers - # - PLAYWRIGHT_BROWSER_TYPE=chromium + # - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/playwright # # Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password # @@ -71,18 +67,12 @@ services: # - /dev/shm:/dev/shm # restart: unless-stopped -# playwright-server: -# hostname: playwright-server -# build: ./playwright -# environment: -# - PLAYWRIGHT_PORT=4444 -# # Must match with PLAYWRIGHT_BROWSER_TYPE in the changedetection service -# - PLAYWRIGHT_BROWSER_TYPE=chromium -# ipc: host -# user: pwuser -# security_opt: -# - seccomp:./playwright/seccomp_profile.json -# restart: unless-stopped + # Used for fetching pages via Playwright+Chrome where you need Javascript support. + + playwright-chrome: + hostname: playwright-chrome + image: browserless/chrome + restart: unless-stopped volumes: changedetection-data: diff --git a/playwright/Dockerfile b/playwright/Dockerfile deleted file mode 100644 index 8dcd659e..00000000 --- a/playwright/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM mcr.microsoft.com/playwright:v1.20.0-focal - -WORKDIR /server -RUN npm install playwright -COPY server.js . - -ENV PLAYWRIGHT_PORT=4444 -ENV PLAYWRIGHT_BROWSER_TYPE=chromium -ENV PLAYWRIGHT_HEADLESS=true - -EXPOSE ${PLAYWRIGHT_PORT} - -CMD [ "node", "server.js" ] diff --git a/playwright/seccomp_profile.json b/playwright/seccomp_profile.json deleted file mode 100644 index bfeea36c..00000000 --- a/playwright/seccomp_profile.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "comment": "Allow create user namespaces", - "names": [ - "clone", - "setns", - "unshare" - ], - "action": "SCMP_ACT_ALLOW", - "args": [], - "includes": {}, - "excludes": {} -} diff --git a/playwright/server.js b/playwright/server.js deleted file mode 100644 index 9a730305..00000000 --- a/playwright/server.js +++ /dev/null @@ -1,10 +0,0 @@ -const playwright = require('playwright'); - -const port = parseInt(process.env.PLAYWRIGHT_PORT) || 4444; -const browserType = process.env.PLAYWRIGHT_BROWSER_TYPE?.toLowerCase() || 'chromium'; -const headless = process.env.PLAYWRIGHT_HEADLESS?.toLowerCase() === 'true' || true; -const wsPath = 'playwright'; -console.log('using port:', port, 'browser:', browserType, 'headless:', headless, 'wspath:', wsPath); - -const serverPromise = playwright[browserType].launchServer({ headless: headless, port: port, wsPath: wsPath }); -serverPromise.then(bs => console.log(bs.wsEndpoint())); From 1be1cee04d429dee7f680bf92a3556fa5a7e8a4a Mon Sep 17 00:00:00 2001 From: Wee Date: Mon, 18 Apr 2022 00:47:09 +0700 Subject: [PATCH 4/4] Comment out playwright-chrome service --- docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0e6605fd..88ee8a76 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,10 +69,10 @@ services: # Used for fetching pages via Playwright+Chrome where you need Javascript support. - playwright-chrome: - hostname: playwright-chrome - image: browserless/chrome - restart: unless-stopped +# playwright-chrome: +# hostname: playwright-chrome +# image: browserless/chrome +# restart: unless-stopped volumes: changedetection-data: