Fetching - Prefer to use SockPuppetBrowser (#2163)

pull/2168/head
dgtlmoon 11 months ago committed by GitHub
parent 5119efe4fb
commit c5a4e0aaa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -28,12 +28,12 @@ jobs:
docker network create changedet-network docker network create changedet-network
# Selenium+browserless # Selenium and sockpuppetbrowser
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
docker run --network changedet-network -d --name browserless --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.60-chrome-stable docker run --network changedet-network -d --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
# For accessing custom browser tests # For accessing custom browser tests
docker run --network changedet-network -d --name browserless-custom-url --hostname browserless-custom-url -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm --shm-size="2g" browserless/chrome:1.60-chrome-stable docker run --network changedet-network -d --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url --rm dgtlmoon/sockpuppetbrowser:latest
- name: Build changedetection.io container for testing - name: Build changedetection.io container for testing
run: | run: |
@ -47,6 +47,12 @@ jobs:
# Debug SMTP server/echo message back server # Debug SMTP server/echo message back server
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'python changedetectionio/tests/smtp/smtp-test-server.py' docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'python changedetectionio/tests/smtp/smtp-test-server.py'
- name: Show docker container state and other debug info
run: |
set -x
echo "Running processes in docker..."
docker ps
- name: Test built container with Pytest (generally as requests/plaintext fetching) - name: Test built container with Pytest (generally as requests/plaintext fetching)
run: | run: |
# Unit tests # Unit tests
@ -63,43 +69,33 @@ jobs:
- name: Specific tests in built container for Selenium - name: Specific tests in built container for Selenium
run: | run: |
# Selenium fetch # Selenium fetch
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
- name: Specific tests in built container for Playwright - name: Specific tests in built container for Playwright and SocketPuppetBrowser
run: | run: |
# Playwright/Browserless fetch # Playwright via Sockpuppetbrowser fetch
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
- name: Specific tests in built container for headers and requests checks with Playwright - name: Specific tests in built container for headers and requests checks with Playwright
run: | run: |
# Settings headers playwright tests - Call back in from Browserless, check headers # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Specific tests in built container for headers and requests checks with Selenium - name: Specific tests in built container for headers and requests checks with Selenium
run: | run: |
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Specific tests in built container with Playwright as Puppeteer experimental fetcher
run: |
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Test built container restock detection via Playwright - name: Test built container restock detection via Playwright
run: | run: |
# restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
- name: Test SMTP notification mime types - name: Test SMTP notification mime types
run: | run: |
# SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
- name: Test with puppeteer fetcher and disk cache
run: |
docker run --rm -e "PUPPETEER_DISK_CACHE=/tmp/data/" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py'
# Browserless would have had -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" added above
- name: Test proxy interaction - name: Test proxy interaction
run: | run: |
cd changedetectionio cd changedetectionio

@ -4,22 +4,13 @@
# Why? # Why?
# `browsersteps_playwright_browser_interface.chromium.connect_over_cdp()` will only run once without async() # `browsersteps_playwright_browser_interface.chromium.connect_over_cdp()` will only run once without async()
# - this flask app is not async() # - this flask app is not async()
# - browserless has a single timeout/keepalive which applies to the session made at .connect_over_cdp() # - A single timeout/keepalive which applies to the session made at .connect_over_cdp()
# #
# So it means that we must unfortunately for now just keep a single timer since .connect_over_cdp() was run # So it means that we must unfortunately for now just keep a single timer since .connect_over_cdp() was run
# and know when that reaches timeout/keepalive :( when that time is up, restart the connection and tell the user # and know when that reaches timeout/keepalive :( when that time is up, restart the connection and tell the user
# that their time is up, insert another coin. (reload) # that their time is up, insert another coin. (reload)
# #
# Bigger picture
# - It's horrible that we have this click+wait deal, some nice socket.io solution using something similar
# to what the browserless debug UI already gives us would be smarter..
# #
# OR
# - Some API call that should be hacked into browserless or playwright that we can "/api/bump-keepalive/{session_id}/60"
# So we can tell it that we need more time (run this on each action)
#
# OR
# - use multiprocessing to bump this over to its own process and add some transport layer (queue/pipes)
from distutils.util import strtobool from distutils.util import strtobool
from flask import Blueprint, request, make_response from flask import Blueprint, request, make_response

@ -169,7 +169,7 @@ class steppable_browser_interface():
self.page.locator(selector, timeout=1000).uncheck(timeout=1000) self.page.locator(selector, timeout=1000).uncheck(timeout=1000)
# Responsible for maintaining a live 'context' with browserless # Responsible for maintaining a live 'context' with the chrome CDP
# @todo - how long do contexts live for anyway? # @todo - how long do contexts live for anyway?
class browsersteps_live_ui(steppable_browser_interface): class browsersteps_live_ui(steppable_browser_interface):
context = None context = None

@ -311,125 +311,6 @@ class base_html_playwright(Fetcher):
with open(destination, 'w') as f: with open(destination, 'w') as f:
f.write(content) f.write(content)
def run_fetch_browserless_puppeteer(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False):
from pkg_resources import resource_string
extra_wait_ms = (int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) * 1000
self.xpath_element_js = self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
code = resource_string(__name__, "res/puppeteer_fetch.js").decode('utf-8')
# In the future inject this is a proper JS package
code = code.replace('%xpath_scrape_code%', self.xpath_element_js)
code = code.replace('%instock_scrape_code%', self.instock_data_js)
from requests.exceptions import ConnectTimeout, ReadTimeout
wait_browserless_seconds = 240
browserless_function_url = os.getenv('BROWSERLESS_FUNCTION_URL')
from urllib.parse import urlparse
if not browserless_function_url:
# Convert/try to guess from PLAYWRIGHT_DRIVER_URL
o = urlparse(os.getenv('PLAYWRIGHT_DRIVER_URL'))
browserless_function_url = o._replace(scheme="http")._replace(path="function").geturl()
# Append proxy connect string
if self.proxy:
# Remove username/password if it exists in the URL or you will receive "ERR_NO_SUPPORTED_PROXIES" error
# Actual authentication handled by Puppeteer/node
o = urlparse(self.proxy.get('server'))
proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl())
browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}"
try:
amp = '&' if '?' in browserless_function_url else '?'
response = requests.request(
method="POST",
json={
"code": code,
"context": {
# Very primitive disk cache - USE WITH EXTREME CAUTION
# Run browserless container with -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]"
'disk_cache_dir': os.getenv("PUPPETEER_DISK_CACHE", False), # or path to disk cache ending in /, ie /tmp/cache/
'execute_js': self.webdriver_js_execute_code,
'extra_wait_ms': extra_wait_ms,
'include_filters': current_include_filters,
'req_headers': request_headers,
'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
'url': url,
'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
'proxy_username': self.proxy.get('username', '') if self.proxy else False,
'proxy_password': self.proxy.get('password', '') if self.proxy and self.proxy.get('username') else False,
'no_cache_list': [
'twitter',
'.pdf'
],
# Could use https://github.com/easylist/easylist here, or install a plugin
'block_url_list': [
'adnxs.com',
'analytics.twitter.com',
'doubleclick.net',
'google-analytics.com',
'googletagmanager',
'trustpilot.com'
]
}
},
# @todo /function needs adding ws:// to http:// rebuild this
url=browserless_function_url+f"{amp}--disable-features=AudioServiceOutOfProcess&dumpio=true&--disable-remote-fonts",
timeout=wait_browserless_seconds)
except ReadTimeout:
raise PageUnloadable(url=url, status_code=None, message=f"No response from browserless in {wait_browserless_seconds}s")
except ConnectTimeout:
raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..")
else:
# 200 Here means that the communication to browserless worked only, not the page state
try:
x = response.json()
except Exception as e:
raise PageUnloadable(url=url, message="Error reading JSON response from browserless")
try:
self.status_code = response.status_code
except Exception as e:
raise PageUnloadable(url=url, message="Error reading status_code code response from browserless")
self.headers = x.get('headers')
if self.status_code != 200 and not ignore_status_codes:
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, page_html=x.get('content',''))
if self.status_code == 200:
import base64
if not x.get('screenshot'):
# https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips
# https://github.com/puppeteer/puppeteer/issues/1834
# https://github.com/puppeteer/puppeteer/issues/1834#issuecomment-381047051
# Check your memory is shared and big enough
raise ScreenshotUnavailable(url=url, status_code=None)
if not x.get('content', '').strip():
raise EmptyReply(url=url, status_code=None)
self.content = x.get('content')
self.instock_data = x.get('instock_data')
self.screenshot = base64.b64decode(x.get('screenshot'))
self.xpath_data = x.get('xpath_data')
else:
# Some other error from browserless
raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8'))
def run(self, def run(self,
url, url,
timeout, timeout,
@ -441,21 +322,6 @@ class base_html_playwright(Fetcher):
is_binary=False): is_binary=False):
# For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!)
# browser_connection_is_custom doesnt work with puppeteer style fetch (use playwright native too in this case)
if not self.browser_connection_is_custom and not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'):
if strtobool(os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH')):
# Temporary backup solution until we rewrite the playwright code
return self.run_fetch_browserless_puppeteer(
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes,
current_include_filters,
is_binary)
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
import playwright._impl._errors import playwright._impl._errors
@ -528,7 +394,7 @@ class base_html_playwright(Fetcher):
self.status_code = response.status self.status_code = response.status
except Exception as e: except Exception as e:
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
logger.critical(f"Response from browserless/playwright did not have a status_code! Response follows.") logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
logger.critical(response) logger.critical(response)
raise PageUnloadable(url=url, status_code=None, message=str(e)) raise PageUnloadable(url=url, status_code=None, message=str(e))

@ -57,7 +57,7 @@ class import_url_list(Importer):
# Flask wtform validators wont work with basic auth, use validators package # Flask wtform validators wont work with basic auth, use validators package
# Up to 5000 per batch so we dont flood the server # Up to 5000 per batch so we dont flood the server
# @todo validators.url failed on local hostnames (such as referring to ourself when using browserless) # @todo validators.url will fail when you add your own IP etc
if len(url) and 'http' in url.lower() and good < 5000: if len(url) and 'http' in url.lower() and good < 5000:
extras = None extras = None
if processor: if processor:

@ -146,7 +146,7 @@ module.exports = async ({page, context}) => {
var xpath_data; var xpath_data;
var instock_data; var instock_data;
try { try {
// Not sure the best way here, in the future this should be a new package added to npm then run in browserless // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
// (Once the old playwright is removed) // (Once the old playwright is removed)
xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters); xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
instock_data = await page.evaluate(() => {%instock_scrape_code%}); instock_data = await page.evaluate(() => {%instock_scrape_code%});

@ -6,16 +6,16 @@
set -x set -x
# A extra browser is configured, but we never chose to use it, so it should NOT show in the logs # A extra browser is configured, but we never chose to use it, so it should NOT show in the logs
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url'
docker logs browserless-custom-url &>log.txt docker logs sockpuppetbrowser-custom-url &>log.txt
grep 'custom-browser-search-string=1' log.txt grep 'custom-browser-search-string=1' log.txt
if [ $? -ne 1 ] if [ $? -ne 1 ]
then then
echo "Saw a request in 'browserless-custom-url' container with 'custom-browser-search-string=1' when I should not" echo "Saw a request in 'sockpuppetbrowser-custom-url' container with 'custom-browser-search-string=1' when I should not"
exit 1 exit 1
fi fi
docker logs browserless &>log.txt docker logs sockpuppetbrowser &>log.txt
grep 'custom-browser-search-string=1' log.txt grep 'custom-browser-search-string=1' log.txt
if [ $? -ne 1 ] if [ $? -ne 1 ]
then then
@ -24,16 +24,16 @@ then
fi fi
# Special connect string should appear in the custom-url container, but not in the 'default' one # Special connect string should appear in the custom-url container, but not in the 'default' one
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_via_custom_browser_url' docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_via_custom_browser_url'
docker logs browserless-custom-url &>log.txt docker logs sockpuppetbrowser-custom-url &>log.txt
grep 'custom-browser-search-string=1' log.txt grep 'custom-browser-search-string=1' log.txt
if [ $? -ne 0 ] if [ $? -ne 0 ]
then then
echo "Did not see request in 'browserless-custom-url' container with 'custom-browser-search-string=1' when I should" echo "Did not see request in 'sockpuppetbrowser-custom-url' container with 'custom-browser-search-string=1' when I should"
exit 1 exit 1
fi fi
docker logs browserless &>log.txt docker logs sockpuppetbrowser &>log.txt
grep 'custom-browser-search-string=1' log.txt grep 'custom-browser-search-string=1' log.txt
if [ $? -ne 1 ] if [ $? -ne 1 ]
then then

@ -35,7 +35,7 @@ docker run --network changedet-network \
docker run --network changedet-network \ docker run --network changedet-network \
-e "SOCKSTEST=manual-playwright" \ -e "SOCKSTEST=manual-playwright" \
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
-e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" \ -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
--rm \ --rm \
test-changedetectionio \ test-changedetectionio \
bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'

@ -10,7 +10,7 @@ $(document).ready(function () {
} }
}) })
var browsersteps_session_id; var browsersteps_session_id;
var browserless_seconds_remaining = 0; var browser_interface_seconds_remaining = 0;
var apply_buttons_disabled = false; var apply_buttons_disabled = false;
var include_text_elements = $("#include_text_elements"); var include_text_elements = $("#include_text_elements");
var xpath_data = false; var xpath_data = false;
@ -49,7 +49,7 @@ $(document).ready(function () {
$('#browsersteps-img').removeAttr('src'); $('#browsersteps-img').removeAttr('src');
$("#browsersteps-click-start").show(); $("#browsersteps-click-start").show();
$("#browsersteps-selector-wrapper .spinner").hide(); $("#browsersteps-selector-wrapper .spinner").hide();
browserless_seconds_remaining = 0; browser_interface_seconds_remaining = 0;
browsersteps_session_id = false; browsersteps_session_id = false;
apply_buttons_disabled = false; apply_buttons_disabled = false;
ctx.clearRect(0, 0, c.width, c.height); ctx.clearRect(0, 0, c.width, c.height);
@ -61,12 +61,12 @@ $(document).ready(function () {
$('#browser_steps >li:first-child').css('opacity', '0.5'); $('#browser_steps >li:first-child').css('opacity', '0.5');
} }
// Show seconds remaining until playwright/browserless needs to restart the session // Show seconds remaining until the browser interface needs to restart the session
// (See comment at the top of changedetectionio/blueprint/browser_steps/__init__.py ) // (See comment at the top of changedetectionio/blueprint/browser_steps/__init__.py )
setInterval(() => { setInterval(() => {
if (browserless_seconds_remaining >= 1) { if (browser_interface_seconds_remaining >= 1) {
document.getElementById('browserless-seconds-remaining').innerText = browserless_seconds_remaining + " seconds remaining in session"; document.getElementById('browser-seconds-remaining').innerText = browser_interface_seconds_remaining + " seconds remaining in session";
browserless_seconds_remaining -= 1; browser_interface_seconds_remaining -= 1;
} }
}, "1000") }, "1000")
@ -261,7 +261,7 @@ $(document).ready(function () {
// This should trigger 'Goto site' // This should trigger 'Goto site'
console.log("Got startup response, requesting Goto-Site (first) step fake click"); console.log("Got startup response, requesting Goto-Site (first) step fake click");
$('#browser_steps >li:first-child .apply').click(); $('#browser_steps >li:first-child .apply').click();
browserless_seconds_remaining = 500; browser_interface_seconds_remaining = 500;
set_first_gotosite_disabled(); set_first_gotosite_disabled();
}).fail(function (data) { }).fail(function (data) {
console.log(data); console.log(data);

@ -228,7 +228,7 @@ User-Agent: wonderbra 1.0") }}
</div> </div>
</div> </div>
<div id="browser-steps-fieldlist" style="padding-left: 1em; width: 350px; font-size: 80%;" > <div id="browser-steps-fieldlist" style="padding-left: 1em; width: 350px; font-size: 80%;" >
<span id="browserless-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
{{ render_field(form.browser_steps) }} {{ render_field(form.browser_steps) }}
</div> </div>
</div> </div>

@ -7,10 +7,11 @@ from ..util import live_server_setup, wait_for_all_checks
def do_test(client, live_server, make_test_use_extra_browser=False): def do_test(client, live_server, make_test_use_extra_browser=False):
# Grep for this string in the logs? # Grep for this string in the logs?
test_url = f"https://changedetection.io/ci-test.html" test_url = f"https://changedetection.io/ci-test.html?non-custom-default=true"
# "non-custom-default" should not appear in the custom browser connection
custom_browser_name = 'custom browser URL' custom_browser_name = 'custom browser URL'
# needs to be set and something like 'ws://127.0.0.1:3000?stealth=1&--disable-web-security=true' # needs to be set and something like 'ws://127.0.0.1:3000'
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
##################### #####################
@ -19,9 +20,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
data={"application-empty_pages_are_a_change": "", data={"application-empty_pages_are_a_change": "",
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_webdriver", 'application-fetch_backend': "html_webdriver",
# browserless-custom-url is setup in .github/workflows/test-only.yml 'requests-extra_browsers-0-browser_connection_url': 'ws://sockpuppetbrowser-custom-url:3000',
# the test script run_custom_browser_url_test.sh will look for 'custom-browser-search-string' in the container logs
'requests-extra_browsers-0-browser_connection_url': 'ws://browserless-custom-url:3000?stealth=1&--disable-web-security=true&custom-browser-search-string=1',
'requests-extra_browsers-0-browser_name': custom_browser_name 'requests-extra_browsers-0-browser_name': custom_browser_name
}, },
follow_redirects=True follow_redirects=True
@ -51,7 +50,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={ data={
"url": test_url, # 'run_customer_browser_url_tests.sh' will search for this string to know if we hit the right browser container or not
"url": f"https://changedetection.io/ci-test.html?custom-browser-search-string=1",
"tags": "", "tags": "",
"headers": "", "headers": "",
'fetch_backend': f"extra_browser_{custom_browser_name}", 'fetch_backend': f"extra_browser_{custom_browser_name}",

@ -456,7 +456,7 @@ def test_ignore_json_order(client, live_server):
def test_correct_header_detect(client, live_server): def test_correct_header_detect(client, live_server):
# Like in https://github.com/dgtlmoon/changedetection.io/pull/1593 # Like in https://github.com/dgtlmoon/changedetection.io/pull/1593
# Specify extra html that JSON is sometimes wrapped in - when using Browserless/Puppeteer etc # Specify extra html that JSON is sometimes wrapped in - when using SockpuppetBrowser / Puppeteer / Playwrightetc
with open("test-datastore/endpoint-content.txt", "w") as f: with open("test-datastore/endpoint-content.txt", "w") as f:
f.write('<html><body>{"hello" : 123, "world": 123}') f.write('<html><body>{"hello" : 123, "world": 123}')

@ -14,7 +14,7 @@ def test_headers_in_request(client, live_server):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_headers', _external=True) test_url = url_for('test_headers', _external=True)
if os.getenv('PLAYWRIGHT_DRIVER_URL'): if os.getenv('PLAYWRIGHT_DRIVER_URL'):
# Because its no longer calling back to localhost but from browserless, set in test-only.yml # Because its no longer calling back to localhost but from the browser container, set in test-only.yml
test_url = test_url.replace('localhost', 'changedet') test_url = test_url.replace('localhost', 'changedet')
# Add the test URL twice, we will check # Add the test URL twice, we will check
@ -89,7 +89,7 @@ def test_body_in_request(client, live_server):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_body', _external=True) test_url = url_for('test_body', _external=True)
if os.getenv('PLAYWRIGHT_DRIVER_URL'): if os.getenv('PLAYWRIGHT_DRIVER_URL'):
# Because its no longer calling back to localhost but from browserless, set in test-only.yml # Because its no longer calling back to localhost but from the browser container, set in test-only.yml
test_url = test_url.replace('localhost', 'cdio') test_url = test_url.replace('localhost', 'cdio')
res = client.post( res = client.post(
@ -181,7 +181,7 @@ def test_method_in_request(client, live_server):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_method', _external=True) test_url = url_for('test_method', _external=True)
if os.getenv('PLAYWRIGHT_DRIVER_URL'): if os.getenv('PLAYWRIGHT_DRIVER_URL'):
# Because its no longer calling back to localhost but from browserless, set in test-only.yml # Because its no longer calling back to localhost but from the browser container, set in test-only.yml
test_url = test_url.replace('localhost', 'cdio') test_url = test_url.replace('localhost', 'cdio')
# Add the test URL twice, we will check # Add the test URL twice, we will check
@ -258,7 +258,7 @@ def test_headers_textfile_in_request(client, live_server):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_headers', _external=True) test_url = url_for('test_headers', _external=True)
if os.getenv('PLAYWRIGHT_DRIVER_URL'): if os.getenv('PLAYWRIGHT_DRIVER_URL'):
# Because its no longer calling back to localhost but from browserless, set in test-only.yml # Because its no longer calling back to localhost but from the browser container, set in test-only.yml
test_url = test_url.replace('localhost', 'cdio') test_url = test_url.replace('localhost', 'cdio')
print ("TEST URL IS ",test_url) print ("TEST URL IS ",test_url)

@ -30,7 +30,7 @@ services:
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy # https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
# #
# Alternative Playwright URL, do not use "'s or 's! # Alternative Playwright URL, do not use "'s or 's!
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true # - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
# #
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password # Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
# #
@ -71,32 +71,23 @@ services:
# condition: service_started # condition: service_started
# Used for fetching pages via Playwright+Chrome where you need Javascript support. # Used for fetching pages via Playwright+Chrome where you need Javascript support.
# Note: Playwright/browserless not supported on ARM type devices (rPi etc)
# RECOMMENDED FOR FETCHING PAGES WITH CHROME # RECOMMENDED FOR FETCHING PAGES WITH CHROME
# playwright-chrome: # playwright-chrome:
# hostname: playwright-chrome # hostname: playwright-chrome
# image: browserless/chrome:1.60-chrome-stable # image: dgtlmoon/sockpuppetbrowser:latest
# cap_add:
# - SYS_ADMIN
## SYS_ADMIN might be too much, but it can be needed on your platform https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci
# restart: unless-stopped # restart: unless-stopped
# environment: # environment:
# - SCREEN_WIDTH=1920 # - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1024 # - SCREEN_HEIGHT=1024
# - SCREEN_DEPTH=16 # - SCREEN_DEPTH=16
# - ENABLE_DEBUGGER=false # - MAX_CONCURRENT_CHROME_PROCESSES=10
# - PREBOOT_CHROME=true
# - CONNECTION_TIMEOUT=300000
# - MAX_CONCURRENT_SESSIONS=10
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
# Used for fetching pages via Playwright+Chrome where you need Javascript support. # Used for fetching pages via Playwright+Chrome where you need Javascript support.
# Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector)
# Does not report status codes (200, 404, 403) and other issues # Does not report status codes (200, 404, 403) and other issues
# More information about the advantages of playwright/browserless https://www.browserless.io/blog/2023/12/13/migrating-selenium-to-playwright/
# browser-chrome: # browser-chrome:
# hostname: browser-chrome # hostname: browser-chrome
# image: selenium/standalone-chrome:4 # image: selenium/standalone-chrome:4

Loading…
Cancel
Save