fix(installer): harden setupSuperUser race and add slow-resource e2e
Some checks failed
ci / tests (push) Has been cancelled
ci / detect-release (push) Has been cancelled
ci / publish-image (push) Has been cancelled
ci / tag-stable (push) Has been cancelled

This commit is contained in:
Kevin Veen-Birkenbach
2026-02-14 19:01:55 +01:00
parent 7ecb26cc92
commit fa6adea3c1
4 changed files with 225 additions and 23 deletions

View File

@@ -388,6 +388,34 @@ def _has_continue_to_matomo_action(page, *, timeout_s: float = 0.2) -> bool:
return loc is not None return loc is not None
def _wait_for_superuser_login_field(
page, *, timeout_s: float, poll_interval_ms: int = 300
) -> bool:
if timeout_s <= 0:
return _has_superuser_login_field(page, timeout_s=0.2)
deadline = time.time() + timeout_s
last_wait_log_at = 0.0
while time.time() < deadline:
_wait_dom_settled(page)
if _has_superuser_login_field(page, timeout_s=0.2):
return True
now = time.time()
if now - last_wait_log_at >= 5:
_log(
"[install] setupSuperUser reached but login form is not visible yet; "
f"waiting (url={page.url}, step={_get_step_hint(page.url)})"
)
_page_warnings(page)
last_wait_log_at = now
page.wait_for_timeout(poll_interval_ms)
return _has_superuser_login_field(page, timeout_s=0.2)
def _fill_required_input(page, selectors, value: str, *, label: str) -> None: def _fill_required_input(page, selectors, value: str, *, label: str) -> None:
loc, _ = _first_present_css_locator(page, selectors, timeout_s=1.0) loc, _ = _first_present_css_locator(page, selectors, timeout_s=1.0)
if loc is None: if loc is None:
@@ -736,12 +764,21 @@ class WebInstaller(Installer):
progress_deadline = time.time() + INSTALLER_STEP_DEADLINE_S progress_deadline = time.time() + INSTALLER_STEP_DEADLINE_S
while not _has_superuser_login_field(page): while not _has_superuser_login_field(page):
if time.time() >= progress_deadline: now = time.time()
if now >= progress_deadline:
raise RuntimeError( raise RuntimeError(
"Installer did not reach superuser step " "Installer did not reach superuser step "
f"within {INSTALLER_STEP_DEADLINE_S}s " f"within {INSTALLER_STEP_DEADLINE_S}s "
f"(url={page.url}, step={_get_step_hint(page.url)})." f"(url={page.url}, step={_get_step_hint(page.url)})."
) )
current_step = _get_step_hint(page.url)
if "setupSuperUser" in current_step:
remaining_s = max(0.0, progress_deadline - now)
if _wait_for_superuser_login_field(page, timeout_s=remaining_s):
break
continue
if _resolve_tables_creation_conflict( if _resolve_tables_creation_conflict(
page, timeout_s=INSTALLER_TABLES_ERASE_TIMEOUT_S page, timeout_s=INSTALLER_TABLES_ERASE_TIMEOUT_S
): ):

View File

@@ -0,0 +1,32 @@
services:
db:
cpus: 0.35
mem_reservation: 192m
mem_limit: 320m
healthcheck:
interval: 10s
timeout: 5s
retries: 90
matomo:
cpus: 0.35
mem_reservation: 192m
mem_limit: 384m
healthcheck:
interval: 15s
timeout: 8s
retries: 120
start_period: 120s
bootstrap:
cpus: 0.75
mem_reservation: 512m
mem_limit: 1g
environment:
MATOMO_TIMEOUT: "120"
MATOMO_PLAYWRIGHT_NAV_TIMEOUT_MS: "120000"
MATOMO_INSTALLER_READY_TIMEOUT_S: "420"
MATOMO_INSTALLER_STEP_TIMEOUT_S: "120"
MATOMO_INSTALLER_STEP_DEADLINE_S: "420"
MATOMO_INSTALLER_TABLES_CREATION_TIMEOUT_S: "360"
MATOMO_INSTALLER_TABLES_ERASE_TIMEOUT_S: "240"

View File

@@ -7,13 +7,21 @@ import urllib.request
COMPOSE_FILE = os.environ.get("MATOMO_STACK_COMPOSE_FILE", "docker-compose.yml") COMPOSE_FILE = os.environ.get("MATOMO_STACK_COMPOSE_FILE", "docker-compose.yml")
SLOW_COMPOSE_FILE = os.environ.get(
"MATOMO_STACK_SLOW_COMPOSE_FILE", "tests/e2e/docker-compose.slow.yml"
)
# Pick a non-default port to avoid collisions with other CI stacks that use 8080 # Pick a non-default port to avoid collisions with other CI stacks that use 8080
MATOMO_PORT = os.environ.get("MATOMO_PORT", "18080") MATOMO_PORT = os.environ.get("MATOMO_PORT", "18080")
MATOMO_HOST_URL = os.environ.get("MATOMO_STACK_URL", f"http://127.0.0.1:{MATOMO_PORT}") MATOMO_HOST_URL = os.environ.get("MATOMO_STACK_URL", f"http://127.0.0.1:{MATOMO_PORT}")
MATOMO_SLOW_PORT = os.environ.get("MATOMO_SLOW_PORT", "18081")
MATOMO_SLOW_HOST_URL = os.environ.get(
"MATOMO_SLOW_STACK_URL", f"http://127.0.0.1:{MATOMO_SLOW_PORT}"
)
# How long we wait for Matomo HTTP to respond at all (seconds) # How long we wait for Matomo HTTP to respond at all (seconds)
WAIT_TIMEOUT_SECONDS = int(os.environ.get("MATOMO_STACK_WAIT_TIMEOUT", "180")) WAIT_TIMEOUT_SECONDS = int(os.environ.get("MATOMO_STACK_WAIT_TIMEOUT", "180"))
SLOW_WAIT_TIMEOUT_SECONDS = int(os.environ.get("MATOMO_SLOW_STACK_WAIT_TIMEOUT", "420"))
def _run( def _run(
@@ -32,8 +40,13 @@ def _run(
) )
def _compose_cmd(*args: str) -> list[str]: def _compose_cmd(*args: str, compose_files: list[str] | None = None) -> list[str]:
return ["docker", "compose", "-f", COMPOSE_FILE, *args] files = compose_files or [COMPOSE_FILE]
cmd = ["docker", "compose"]
for compose_file in files:
cmd.extend(["-f", compose_file])
cmd.extend(args)
return cmd
def _wait_for_http_any_status(url: str, timeout_s: int) -> None: def _wait_for_http_any_status(url: str, timeout_s: int) -> None:
@@ -108,12 +121,19 @@ class TestRootDockerComposeStack(unittest.TestCase):
extra_env={"MATOMO_PORT": MATOMO_PORT}, extra_env={"MATOMO_PORT": MATOMO_PORT},
) )
def test_root_docker_compose_yml_stack_bootstraps_and_token_works(self) -> None: def _assert_stack_bootstraps_and_token_works(
# Build bootstrap image from Dockerfile (as defined in docker-compose.yml) self,
*,
compose_files: list[str],
matomo_port: str,
matomo_host_url: str,
wait_timeout_seconds: int,
bootstrap_retries: int = 2,
) -> None:
build = _run( build = _run(
_compose_cmd("build", "bootstrap"), _compose_cmd("build", "bootstrap", compose_files=compose_files),
check=False, check=False,
extra_env={"MATOMO_PORT": MATOMO_PORT}, extra_env={"MATOMO_PORT": matomo_port},
) )
self.assertEqual( self.assertEqual(
build.returncode, build.returncode,
@@ -121,11 +141,10 @@ class TestRootDockerComposeStack(unittest.TestCase):
f"compose build failed\nstdout:\n{build.stdout}\nstderr:\n{build.stderr}", f"compose build failed\nstdout:\n{build.stdout}\nstderr:\n{build.stderr}",
) )
# Start db + matomo (bootstrap is one-shot and started via "run")
up = _run( up = _run(
_compose_cmd("up", "-d", "db", "matomo"), _compose_cmd("up", "-d", "db", "matomo", compose_files=compose_files),
check=False, check=False,
extra_env={"MATOMO_PORT": MATOMO_PORT}, extra_env={"MATOMO_PORT": matomo_port},
) )
self.assertEqual( self.assertEqual(
up.returncode, up.returncode,
@@ -133,17 +152,14 @@ class TestRootDockerComposeStack(unittest.TestCase):
f"compose up failed\nstdout:\n{up.stdout}\nstderr:\n{up.stderr}", f"compose up failed\nstdout:\n{up.stdout}\nstderr:\n{up.stderr}",
) )
# Wait until Matomo answers on the published port _wait_for_http_any_status(matomo_host_url + "/", wait_timeout_seconds)
_wait_for_http_any_status(MATOMO_HOST_URL + "/", WAIT_TIMEOUT_SECONDS)
# Run bootstrap: it should print ONLY the token to stdout.
# Retry once because first-run installer startup can be flaky on slow CI.
boot_attempts: list[subprocess.CompletedProcess] = [] boot_attempts: list[subprocess.CompletedProcess] = []
for _ in range(2): for _ in range(bootstrap_retries):
boot = _run( boot = _run(
_compose_cmd("run", "--rm", "bootstrap"), _compose_cmd("run", "--rm", "bootstrap", compose_files=compose_files),
check=False, check=False,
extra_env={"MATOMO_PORT": MATOMO_PORT}, extra_env={"MATOMO_PORT": matomo_port},
) )
boot_attempts.append(boot) boot_attempts.append(boot)
if boot.returncode == 0: if boot.returncode == 0:
@@ -152,9 +168,26 @@ class TestRootDockerComposeStack(unittest.TestCase):
if boot.returncode != 0: if boot.returncode != 0:
matomo_logs = _run( matomo_logs = _run(
_compose_cmd("logs", "--no-color", "--tail=200", "matomo"), _compose_cmd(
"logs",
"--no-color",
"--tail=250",
"matomo",
compose_files=compose_files,
),
check=False, check=False,
extra_env={"MATOMO_PORT": MATOMO_PORT}, extra_env={"MATOMO_PORT": matomo_port},
)
db_logs = _run(
_compose_cmd(
"logs",
"--no-color",
"--tail=200",
"db",
compose_files=compose_files,
),
check=False,
extra_env={"MATOMO_PORT": matomo_port},
) )
attempts_dump = "\n\n".join( attempts_dump = "\n\n".join(
[ [
@@ -169,7 +202,8 @@ class TestRootDockerComposeStack(unittest.TestCase):
self.fail( self.fail(
"bootstrap container failed after retry.\n" "bootstrap container failed after retry.\n"
f"{attempts_dump}\n\n" f"{attempts_dump}\n\n"
f"[matomo logs]\n{matomo_logs.stdout}\n{matomo_logs.stderr}" f"[matomo logs]\n{matomo_logs.stdout}\n{matomo_logs.stderr}\n\n"
f"[db logs]\n{db_logs.stdout}\n{db_logs.stderr}"
) )
token = (boot.stdout or "").strip() token = (boot.stdout or "").strip()
@@ -179,9 +213,8 @@ class TestRootDockerComposeStack(unittest.TestCase):
f"Expected token_auth on stdout, got stdout={boot.stdout!r} stderr={boot.stderr!r}", f"Expected token_auth on stdout, got stdout={boot.stdout!r} stderr={boot.stderr!r}",
) )
# Verify token works against Matomo API
api_url = ( api_url = (
f"{MATOMO_HOST_URL}/index.php" f"{matomo_host_url}/index.php"
f"?module=API&method=SitesManager.getSitesWithAtLeastViewAccess" f"?module=API&method=SitesManager.getSitesWithAtLeastViewAccess"
f"&format=json&token_auth={token}" f"&format=json&token_auth={token}"
) )
@@ -190,6 +223,26 @@ class TestRootDockerComposeStack(unittest.TestCase):
self.assertIsInstance(data, list) self.assertIsInstance(data, list)
def test_root_docker_compose_yml_stack_bootstraps_and_token_works(self) -> None:
self._assert_stack_bootstraps_and_token_works(
compose_files=[COMPOSE_FILE],
matomo_port=MATOMO_PORT,
matomo_host_url=MATOMO_HOST_URL,
wait_timeout_seconds=WAIT_TIMEOUT_SECONDS,
bootstrap_retries=2,
)
def test_root_docker_compose_yml_stack_bootstraps_under_resource_pressure(
self,
) -> None:
self._assert_stack_bootstraps_and_token_works(
compose_files=[COMPOSE_FILE, SLOW_COMPOSE_FILE],
matomo_port=MATOMO_SLOW_PORT,
matomo_host_url=MATOMO_SLOW_HOST_URL,
wait_timeout_seconds=SLOW_WAIT_TIMEOUT_SECONDS,
bootstrap_retries=3,
)
class TestRootDockerComposeDefinition(unittest.TestCase): class TestRootDockerComposeDefinition(unittest.TestCase):
def test_bootstrap_service_waits_for_healthy_matomo_and_has_readiness_knobs( def test_bootstrap_service_waits_for_healthy_matomo_and_has_readiness_knobs(
@@ -217,6 +270,28 @@ class TestRootDockerComposeDefinition(unittest.TestCase):
self.assertIn("healthcheck:", matomo_block) self.assertIn("healthcheck:", matomo_block)
self.assertIn("curl -fsS http://127.0.0.1/ >/dev/null || exit 1", matomo_block) self.assertIn("curl -fsS http://127.0.0.1/ >/dev/null || exit 1", matomo_block)
def test_slow_override_sets_tight_resources_and_longer_timeouts(self) -> None:
cfg = _run(
_compose_cmd("config", compose_files=[COMPOSE_FILE, SLOW_COMPOSE_FILE]),
check=True,
extra_env={"MATOMO_PORT": MATOMO_SLOW_PORT},
)
self.assertEqual(cfg.returncode, 0, cfg.stderr)
matomo_block = _extract_service_block(cfg.stdout, "matomo")
self.assertIn("cpus: 0.35", matomo_block)
self.assertIn('mem_limit: "402653184"', matomo_block)
self.assertIn("start_period: 2m0s", matomo_block)
db_block = _extract_service_block(cfg.stdout, "db")
self.assertIn("cpus: 0.35", db_block)
self.assertIn('mem_limit: "335544320"', db_block)
bootstrap_block = _extract_service_block(cfg.stdout, "bootstrap")
self.assertIn("MATOMO_INSTALLER_STEP_TIMEOUT_S:", bootstrap_block)
self.assertIn("MATOMO_INSTALLER_STEP_DEADLINE_S:", bootstrap_block)
self.assertIn("MATOMO_INSTALLER_READY_TIMEOUT_S:", bootstrap_block)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -1,6 +1,10 @@
import unittest import unittest
from matomo_bootstrap.installers.web import _click_next_with_wait, _count_locator from matomo_bootstrap.installers.web import (
_click_next_with_wait,
_count_locator,
_wait_for_superuser_login_field,
)
class _FlakyLocator: class _FlakyLocator:
@@ -125,6 +129,37 @@ class _NoNextButNamedLoginAppearsPage:
self.login_visible = True self.login_visible = True
class _DelayedSuperuserLoginPage:
def __init__(self, *, reveal_after_wait_calls: int | None):
self.url = "http://matomo/index.php?action=setupSuperUser&module=Installation"
self.login_visible = False
self._wait_calls = 0
self._reveal_after_wait_calls = reveal_after_wait_calls
def locator(self, selector: str):
return _StaticLocator(self, selector)
def get_by_role(self, role: str, name: str):
return _RoleLocator(0)
def get_by_text(self, *_args, **_kwargs):
return _RoleLocator(0)
def title(self) -> str:
return "setupSuperUser"
def wait_for_load_state(self, *_args, **_kwargs):
return None
def wait_for_timeout(self, *_args, **_kwargs):
self._wait_calls += 1
if (
self._reveal_after_wait_calls is not None
and self._wait_calls >= self._reveal_after_wait_calls
):
self.login_visible = True
class TestWebInstallerLocatorCountIntegration(unittest.TestCase): class TestWebInstallerLocatorCountIntegration(unittest.TestCase):
def test_retries_transient_navigation_error(self) -> None: def test_retries_transient_navigation_error(self) -> None:
locator = _FlakyLocator( locator = _FlakyLocator(
@@ -168,6 +203,29 @@ class TestWebInstallerLocatorCountIntegration(unittest.TestCase):
self.assertEqual(step, "Installation:setupSuperUser") self.assertEqual(step, "Installation:setupSuperUser")
self.assertTrue(page.login_visible) self.assertTrue(page.login_visible)
def test_wait_for_superuser_login_field_allows_delayed_form(self) -> None:
page = _DelayedSuperuserLoginPage(reveal_after_wait_calls=4)
visible = _wait_for_superuser_login_field(
page,
timeout_s=1.0,
poll_interval_ms=1,
)
self.assertTrue(visible)
self.assertTrue(page.login_visible)
def test_wait_for_superuser_login_field_times_out_when_absent(self) -> None:
page = _DelayedSuperuserLoginPage(reveal_after_wait_calls=None)
visible = _wait_for_superuser_login_field(
page,
timeout_s=0.01,
poll_interval_ms=1,
)
self.assertFalse(visible)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()