Harden installer readiness and fix e2e healthcheck

This commit is contained in:
Kevin Veen-Birkenbach
2026-02-13 15:20:18 +01:00
parent 9e267ec83f
commit 37a17b536d
6 changed files with 360 additions and 121 deletions

View File

@@ -78,8 +78,46 @@ help:
# ----------------------------
venv:
@test -x "$(VENV_PY)" || ($(PYTHON) -m venv $(VENV_DIR))
@$(VENV_PIP) -q install -U pip setuptools wheel >/dev/null
@set -e; \
if [ ! -d "$(VENV_DIR)" ]; then \
echo "Creating $(VENV_DIR) ..."; \
$(PYTHON) -m venv "$(VENV_DIR)"; \
fi; \
if ! [ -x "$(VENV_PY)" ] || ! "$(VENV_PY)" -V >/dev/null 2>&1; then \
echo "Repairing $(VENV_PY) symlink ..."; \
fix_target=""; \
for cand in "$(VENV_DIR)/bin/python3.14" "$(VENV_DIR)/bin/python3.13" "$(VENV_DIR)/bin/python3.12" "$(VENV_DIR)/bin/python3.11" "$(VENV_DIR)/bin/python3.10"; do \
if [ -x "$$cand" ]; then \
fix_target="$$(basename "$$cand")"; \
break; \
fi; \
done; \
if [ -z "$$fix_target" ] && [ -x "$(VENV_PIP)" ]; then \
shebang="$$(head -n1 "$(VENV_PIP)" | sed 's/^#!//')"; \
if [ -n "$$shebang" ] && [ -x "$$shebang" ]; then \
fix_target="$$(basename "$$shebang")"; \
fi; \
fi; \
if [ -n "$$fix_target" ] && [ -x "$(VENV_DIR)/bin/$$fix_target" ]; then \
ln -sfn "$$fix_target" "$(VENV_PY)"; \
ln -sfn "$$fix_target" "$(VENV_DIR)/bin/python3"; \
fi; \
fi; \
if ! [ -x "$(VENV_PIP)" ] || ! "$(VENV_PIP)" --version >/dev/null 2>&1; then \
echo "Repairing pip via ensurepip ..."; \
"$(VENV_PY)" -m ensurepip --upgrade >/dev/null 2>&1 || true; \
fi; \
if ! [ -x "$(VENV_PY)" ] || ! "$(VENV_PY)" -V >/dev/null 2>&1; then \
echo "ERROR: Could not repair $(VENV_PY) in existing $(VENV_DIR)."; \
echo "Run 'make clean' once or remove $(VENV_DIR) manually."; \
exit 2; \
fi; \
if ! [ -x "$(VENV_PIP)" ] || ! "$(VENV_PIP)" --version >/dev/null 2>&1; then \
echo "ERROR: Could not repair $(VENV_PIP) in existing $(VENV_DIR)."; \
echo "Run 'make clean' once or remove $(VENV_DIR) manually."; \
exit 2; \
fi; \
"$(VENV_PIP)" -q install -U pip setuptools wheel >/dev/null
deps-e2e: venv
@$(VENV_PIP) install -e ".[e2e]"

View File

@@ -172,7 +172,7 @@ services:
volumes:
- matomo_data:/var/www/html
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1/ >/dev/null || exit 1"]
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1/ >/dev/null || exit 1"]
interval: 10s
timeout: 5s
retries: 60
@@ -213,6 +213,10 @@ services:
MATOMO_PLAYWRIGHT_HEADLESS: "1"
MATOMO_PLAYWRIGHT_NAV_TIMEOUT_MS: "60000"
MATOMO_PLAYWRIGHT_SLOWMO_MS: "0"
MATOMO_INSTALLER_READY_TIMEOUT_S: "180"
MATOMO_INSTALLER_STEP_TIMEOUT_S: "30"
MATOMO_INSTALLER_STEP_DEADLINE_S: "180"
MATOMO_INSTALLER_DEBUG_DIR: "/tmp/matomo-bootstrap"
restart: "no"
@@ -269,6 +273,8 @@ matomo-bootstrap
2. **Installation (if needed)**
* uses a recorded Playwright flow to complete the Matomo web installer
* waits until installer controls are interactive before clicking next steps
* writes screenshot/HTML debug artifacts on installer failure
3. **Authentication**
* logs in using Matomos `Login.logme` controller (cookie session)

View File

@@ -34,7 +34,7 @@ services:
volumes:
- matomo_data:/var/www/html
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1/ >/dev/null || exit 1"]
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1/ >/dev/null || exit 1"]
interval: 10s
timeout: 5s
retries: 60
@@ -47,7 +47,7 @@ services:
container_name: matomo-bootstrap
depends_on:
matomo:
condition: service_started
condition: service_healthy
environment:
MATOMO_URL: "http://matomo"
MATOMO_ADMIN_USER: "administrator"
@@ -65,6 +65,10 @@ services:
MATOMO_PLAYWRIGHT_HEADLESS: "1"
MATOMO_PLAYWRIGHT_NAV_TIMEOUT_MS: "60000"
MATOMO_PLAYWRIGHT_SLOWMO_MS: "0"
MATOMO_INSTALLER_READY_TIMEOUT_S: "180"
MATOMO_INSTALLER_STEP_TIMEOUT_S: "30"
MATOMO_INSTALLER_STEP_DEADLINE_S: "180"
MATOMO_INSTALLER_DEBUG_DIR: "/tmp/matomo-bootstrap"
# bootstrap is a one-shot command that prints the token and exits
# if you want to re-run, do: docker compose run --rm bootstrap
restart: "no"

View File

@@ -27,3 +27,9 @@ MATOMO_TIMEZONE=Germany - Berlin
# MATOMO_PLAYWRIGHT_HEADLESS=1
# MATOMO_PLAYWRIGHT_NAV_TIMEOUT_MS=60000
# MATOMO_PLAYWRIGHT_SLOWMO_MS=0
# Installer readiness / step guards
# MATOMO_INSTALLER_READY_TIMEOUT_S=180
# MATOMO_INSTALLER_STEP_TIMEOUT_S=30
# MATOMO_INSTALLER_STEP_DEADLINE_S=180
# MATOMO_INSTALLER_DEBUG_DIR=/tmp/matomo-bootstrap

View File

@@ -4,6 +4,7 @@ import os
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
from .base import Installer
@@ -20,6 +21,16 @@ PLAYWRIGHT_SLOWMO_MS = int(os.environ.get("MATOMO_PLAYWRIGHT_SLOWMO_MS", "0"))
PLAYWRIGHT_NAV_TIMEOUT_MS = int(
os.environ.get("MATOMO_PLAYWRIGHT_NAV_TIMEOUT_MS", "60000")
)
INSTALLER_READY_TIMEOUT_S = int(
os.environ.get("MATOMO_INSTALLER_READY_TIMEOUT_S", "180")
)
INSTALLER_STEP_TIMEOUT_S = int(os.environ.get("MATOMO_INSTALLER_STEP_TIMEOUT_S", "30"))
INSTALLER_STEP_DEADLINE_S = int(
os.environ.get("MATOMO_INSTALLER_STEP_DEADLINE_S", "180")
)
INSTALLER_DEBUG_DIR = os.environ.get(
"MATOMO_INSTALLER_DEBUG_DIR", "/tmp/matomo-bootstrap"
).rstrip("/")
# Values used by the installer flow (recorded)
DEFAULT_SITE_NAME = os.environ.get("MATOMO_SITE_NAME", "localhost")
@@ -27,6 +38,23 @@ DEFAULT_SITE_URL = os.environ.get("MATOMO_SITE_URL", "http://localhost")
DEFAULT_TIMEZONE = os.environ.get("MATOMO_TIMEZONE", "Germany - Berlin")
DEFAULT_ECOMMERCE = os.environ.get("MATOMO_ECOMMERCE", "Ecommerce enabled")
NEXT_BUTTON_CANDIDATES: list[tuple[str, str]] = [
("link", "Next »"),
("button", "Next »"),
("link", "Next"),
("button", "Next"),
("link", "Continue"),
("button", "Continue"),
("link", "Proceed"),
("button", "Proceed"),
("link", "Start Installation"),
("button", "Start Installation"),
("link", "Weiter"),
("button", "Weiter"),
("link", "Fortfahren"),
("button", "Fortfahren"),
]
def _log(msg: str) -> None:
# IMPORTANT: logs must not pollute stdout (tests expect only token on stdout)
@@ -127,6 +155,158 @@ def _page_warnings(page, *, prefix: str = "[install]") -> list[str]:
return out
def _wait_dom_settled(page) -> None:
try:
page.wait_for_load_state("domcontentloaded")
except Exception:
pass
try:
# Best effort: helps when the UI needs a bit more rendering time.
page.wait_for_load_state("networkidle", timeout=2_000)
except Exception:
pass
page.wait_for_timeout(250)
def _get_step_hint(url: str) -> str:
try:
parsed = urllib.parse.urlparse(url)
qs = urllib.parse.parse_qs(parsed.query)
module = (qs.get("module") or [""])[0]
action = (qs.get("action") or [""])[0]
if module or action:
return f"{module}:{action}"
return parsed.path or url
except Exception:
return url
def _safe_page_snapshot_name() -> str:
return time.strftime("%Y%m%d-%H%M%S")
def _dump_failure_artifacts(page, reason: str) -> None:
os.makedirs(INSTALLER_DEBUG_DIR, exist_ok=True)
stamp = _safe_page_snapshot_name()
base = f"{INSTALLER_DEBUG_DIR}/installer-failure-{stamp}"
screenshot_path = f"{base}.png"
html_path = f"{base}.html"
meta_path = f"{base}.txt"
try:
page.screenshot(path=screenshot_path, full_page=True)
except Exception as exc:
_log(f"[install] Could not write screenshot: {exc}")
screenshot_path = "<unavailable>"
try:
html = page.content()
with open(html_path, "w", encoding="utf-8") as f:
f.write(html)
except Exception as exc:
_log(f"[install] Could not write HTML snapshot: {exc}")
html_path = "<unavailable>"
try:
url = page.url
except Exception:
url = "<unknown-url>"
try:
title = page.title()
except Exception:
title = "<unknown-title>"
try:
with open(meta_path, "w", encoding="utf-8") as f:
f.write(f"reason: {reason}\n")
f.write(f"url: {url}\n")
f.write(f"title: {title}\n")
f.write(f"step_hint: {_get_step_hint(url)}\n")
except Exception as exc:
_log(f"[install] Could not write metadata snapshot: {exc}")
meta_path = "<unavailable>"
_log("[install] Debug artifacts written:")
_log(f"[install] screenshot: {screenshot_path}")
_log(f"[install] html: {html_path}")
_log(f"[install] meta: {meta_path}")
def _first_next_locator(page):
for role, name in NEXT_BUTTON_CANDIDATES:
loc = page.get_by_role(role, name=name)
try:
if loc.count() > 0 and loc.first.is_visible():
return loc.first, f"{role}:{name}"
except Exception:
continue
text_loc = page.get_by_text("Next", exact=False)
try:
if text_loc.count() > 0 and text_loc.first.is_visible():
return text_loc.first, "text:Next*"
except Exception:
pass
return None, ""
def _installer_interactive(page) -> bool:
checks = [
page.locator("#login-0").count() > 0,
page.locator("#siteName-0").count() > 0,
page.get_by_role("button", name="Continue to Matomo »").count() > 0,
]
loc, _ = _first_next_locator(page)
return any(checks) or loc is not None
def _wait_for_installer_interactive(page, *, timeout_s: int) -> None:
_log(f"[install] Waiting for interactive installer UI (timeout={timeout_s}s)...")
deadline = time.time() + timeout_s
while time.time() < deadline:
_wait_dom_settled(page)
if _installer_interactive(page):
_log("[install] Installer UI looks interactive.")
return
page.wait_for_timeout(300)
raise RuntimeError(
f"Installer UI did not become interactive within {timeout_s}s "
f"(url={page.url}, step={_get_step_hint(page.url)})."
)
def _click_next_with_wait(page, *, timeout_s: int) -> str:
deadline = time.time() + timeout_s
while time.time() < deadline:
loc, label = _first_next_locator(page)
if loc is not None:
before_url = page.url
before_step = _get_step_hint(before_url)
try:
loc.click(timeout=2_000)
except Exception:
page.wait_for_timeout(250)
continue
_wait_dom_settled(page)
after_url = page.url
after_step = _get_step_hint(after_url)
_log(
f"[install] Clicked {label}; step {before_step} -> {after_step} "
f"(url {before_url} -> {after_url})"
)
return after_step
page.wait_for_timeout(300)
raise RuntimeError(
"Could not find a Next/Continue control in the installer UI "
f"within {timeout_s}s (url={page.url}, step={_get_step_hint(page.url)})."
)
def wait_http(url: str, timeout: int = 180) -> None:
"""
Consider Matomo 'reachable' as soon as the HTTP server answers - even with 500.
@@ -213,127 +393,84 @@ class WebInstaller(Installer):
page.set_default_navigation_timeout(PLAYWRIGHT_NAV_TIMEOUT_MS)
page.set_default_timeout(PLAYWRIGHT_NAV_TIMEOUT_MS)
def click_next() -> None:
"""
Matomo installer mixes link/button variants and sometimes includes '»'.
We try common variants in a robust order.
"""
candidates = [
("link", "Next »"),
("button", "Next »"),
("link", "Next"),
("button", "Next"),
("link", "Continue"),
("button", "Continue"),
("link", "Proceed"),
("button", "Proceed"),
("link", "Start Installation"),
("button", "Start Installation"),
("link", "Weiter"),
("button", "Weiter"),
("link", "Fortfahren"),
("button", "Fortfahren"),
]
for role, name in candidates:
loc = page.get_by_role(role, name=name)
if loc.count() > 0:
loc.first.click()
return
loc = page.get_by_text("Next", exact=False)
if loc.count() > 0:
loc.first.click()
return
raise RuntimeError(
"Could not find a Next/Continue control in the installer UI."
)
page.goto(base_url, wait_until="domcontentloaded")
_page_warnings(page)
def superuser_form_visible() -> bool:
return page.locator("#login-0").count() > 0
for _ in range(12):
if superuser_form_visible():
break
click_next()
page.wait_for_load_state("domcontentloaded")
page.wait_for_timeout(200)
_page_warnings(page)
else:
raise RuntimeError(
"Installer did not reach superuser step (login-0 not found)."
)
page.locator("#login-0").click()
page.locator("#login-0").fill(config.admin_user)
page.locator("#password-0").click()
page.locator("#password-0").fill(config.admin_password)
if page.locator("#password_bis-0").count() > 0:
page.locator("#password_bis-0").click()
page.locator("#password_bis-0").fill(config.admin_password)
page.locator("#email-0").click()
page.locator("#email-0").fill(config.admin_email)
_page_warnings(page)
if page.get_by_role("button", name="Next »").count() > 0:
page.get_by_role("button", name="Next »").click()
else:
click_next()
page.wait_for_load_state("domcontentloaded")
page.wait_for_timeout(200)
_page_warnings(page)
if page.locator("#siteName-0").count() > 0:
page.locator("#siteName-0").click()
page.locator("#siteName-0").fill(DEFAULT_SITE_NAME)
if page.locator("#url-0").count() > 0:
page.locator("#url-0").click()
page.locator("#url-0").fill(DEFAULT_SITE_URL)
_page_warnings(page)
try:
page.get_by_role("combobox").first.click()
page.get_by_role("listbox").get_by_text(DEFAULT_TIMEZONE).click()
except Exception:
_log("Timezone selection skipped (not found / changed UI).")
try:
page.get_by_role("combobox").nth(2).click()
page.get_by_role("listbox").get_by_text(DEFAULT_ECOMMERCE).click()
except Exception:
_log("Ecommerce selection skipped (not found / changed UI).")
_page_warnings(page)
click_next()
page.wait_for_load_state("domcontentloaded")
page.wait_for_timeout(200)
_page_warnings(page)
if page.get_by_role("link", name="Next »").count() > 0:
page.get_by_role("link", name="Next »").click()
page.wait_for_load_state("domcontentloaded")
page.wait_for_timeout(200)
page.goto(base_url, wait_until="domcontentloaded")
_wait_for_installer_interactive(
page, timeout_s=INSTALLER_READY_TIMEOUT_S
)
_page_warnings(page)
if page.get_by_role("button", name="Continue to Matomo »").count() > 0:
page.get_by_role("button", name="Continue to Matomo »").click()
page.wait_for_load_state("domcontentloaded")
page.wait_for_timeout(200)
progress_deadline = time.time() + INSTALLER_STEP_DEADLINE_S
while page.locator("#login-0").count() == 0:
if time.time() >= progress_deadline:
raise RuntimeError(
"Installer did not reach superuser step "
f"within {INSTALLER_STEP_DEADLINE_S}s "
f"(url={page.url}, step={_get_step_hint(page.url)})."
)
_click_next_with_wait(page, timeout_s=INSTALLER_STEP_TIMEOUT_S)
_page_warnings(page)
page.locator("#login-0").click()
page.locator("#login-0").fill(config.admin_user)
page.locator("#password-0").click()
page.locator("#password-0").fill(config.admin_password)
if page.locator("#password_bis-0").count() > 0:
page.locator("#password_bis-0").click()
page.locator("#password_bis-0").fill(config.admin_password)
page.locator("#email-0").click()
page.locator("#email-0").fill(config.admin_email)
_page_warnings(page)
context.close()
browser.close()
_click_next_with_wait(page, timeout_s=INSTALLER_STEP_TIMEOUT_S)
_page_warnings(page)
if page.locator("#siteName-0").count() > 0:
page.locator("#siteName-0").click()
page.locator("#siteName-0").fill(DEFAULT_SITE_NAME)
if page.locator("#url-0").count() > 0:
page.locator("#url-0").click()
page.locator("#url-0").fill(DEFAULT_SITE_URL)
_page_warnings(page)
try:
page.get_by_role("combobox").first.click()
page.get_by_role("listbox").get_by_text(DEFAULT_TIMEZONE).click()
except Exception:
_log("Timezone selection skipped (not found / changed UI).")
try:
page.get_by_role("combobox").nth(2).click()
page.get_by_role("listbox").get_by_text(DEFAULT_ECOMMERCE).click()
except Exception:
_log("Ecommerce selection skipped (not found / changed UI).")
_page_warnings(page)
_click_next_with_wait(page, timeout_s=INSTALLER_STEP_TIMEOUT_S)
_page_warnings(page)
if page.get_by_role("link", name="Next »").count() > 0:
page.get_by_role("link", name="Next »").click()
_wait_dom_settled(page)
_page_warnings(page)
if page.get_by_role("button", name="Continue to Matomo »").count() > 0:
page.get_by_role("button", name="Continue to Matomo »").click()
_wait_dom_settled(page)
_page_warnings(page)
except Exception as exc:
_dump_failure_artifacts(page, reason=str(exc))
raise
finally:
context.close()
browser.close()
time.sleep(1)
if not is_installed(base_url):

View File

@@ -56,6 +56,29 @@ def _wait_for_http_any_status(url: str, timeout_s: int) -> None:
raise RuntimeError(f"Matomo did not become reachable at {url} ({last_exc})")
def _extract_service_block(compose_config: str, service_name: str) -> str:
lines = compose_config.splitlines()
marker = f" {service_name}:"
start = -1
for idx, line in enumerate(lines):
if line == marker:
start = idx
break
if start < 0:
raise AssertionError(
f"service block not found in compose config: {service_name}"
)
end = len(lines)
for idx in range(start + 1, len(lines)):
line = lines[idx]
if line.startswith(" ") and not line.startswith(" "):
end = idx
break
return "\n".join(lines[start:end])
class TestRootDockerComposeStack(unittest.TestCase):
"""
E2E test for repository root docker-compose.yml:
@@ -131,5 +154,30 @@ class TestRootDockerComposeStack(unittest.TestCase):
self.assertIsInstance(data, list)
class TestRootDockerComposeDefinition(unittest.TestCase):
def test_bootstrap_service_waits_for_healthy_matomo_and_has_readiness_knobs(
self,
) -> None:
cfg = _run(
_compose_cmd("config"),
check=True,
extra_env={"MATOMO_PORT": MATOMO_PORT},
)
self.assertEqual(cfg.returncode, 0, cfg.stderr)
bootstrap_block = _extract_service_block(cfg.stdout, "bootstrap")
self.assertIn("depends_on:", bootstrap_block)
self.assertIn("matomo:", bootstrap_block)
self.assertIn("condition: service_healthy", bootstrap_block)
self.assertIn("MATOMO_INSTALLER_READY_TIMEOUT_S:", bootstrap_block)
self.assertIn("MATOMO_INSTALLER_STEP_TIMEOUT_S:", bootstrap_block)
self.assertIn("MATOMO_INSTALLER_STEP_DEADLINE_S:", bootstrap_block)
matomo_block = _extract_service_block(cfg.stdout, "matomo")
self.assertIn("healthcheck:", matomo_block)
self.assertIn("curl -fsS http://127.0.0.1/ >/dev/null || exit 1", matomo_block)
if __name__ == "__main__":
unittest.main()