refactor: migrate to src/ package + add DinD-based E2E runner with debug artifacts

- Replace legacy standalone scripts with a proper src-layout Python package
  (baudolo backup/restore/configure entrypoints via pyproject.toml)
- Remove old scripts/files (backup-docker-to-local.py, recover-docker-from-local.sh,
  databases.csv.tpl, Todo.md)
- Add Dockerfile to build the project image for local/E2E usage
- Update Makefile: build image and run E2E via external runner script
- Add scripts/test-e2e.sh:
  - start DinD + dedicated network
  - recreate DinD data volume (and shared /tmp volume)
  - pre-pull helper images (alpine-rsync, alpine)
  - load local baudolo:local image into DinD
  - run unittest E2E suite inside DinD and abort on first failure
  - on failure: dump host+DinD diagnostics and archive shared /tmp into artifacts/
- Add artifacts/ debug outputs produced by failing E2E runs (logs, events, tmp archive)

https://chatgpt.com/share/694ec23f-0794-800f-9a59-8365bc80f435
This commit is contained in:
Kevin Veen-Birkenbach
2025-12-26 18:13:26 +01:00
parent 41910aece2
commit c30b4865d4
55 changed files with 2950 additions and 804 deletions

View File

@@ -0,0 +1 @@
"""Baudolo backup package."""

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env python3
from __future__ import annotations
from .app import main
if __name__ == "__main__":
raise SystemExit(main())

183
src/baudolo/backup/app.py Normal file
View File

@@ -0,0 +1,183 @@
from __future__ import annotations
import os
import pathlib
from datetime import datetime
import pandas
from dirval import create_stamp_file
from .cli import parse_args
from .compose import handle_docker_compose_services
from .db import backup_database
from .docker import (
change_containers_status,
containers_using_volume,
docker_volume_names,
get_image_info,
has_image,
)
from .shell import execute_shell_command
from .volume import backup_volume
def get_machine_id() -> str:
return execute_shell_command("sha256sum /etc/machine-id")[0][0:64]
def stamp_directory(version_dir: str) -> None:
"""
Use dirval as a Python library to stamp the directory (no CLI dependency).
"""
create_stamp_file(version_dir)
def create_version_directory(versions_dir: str, backup_time: str) -> str:
version_dir = os.path.join(versions_dir, backup_time)
pathlib.Path(version_dir).mkdir(parents=True, exist_ok=True)
return version_dir
def create_volume_directory(version_dir: str, volume_name: str) -> str:
path = os.path.join(version_dir, volume_name)
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
return path
def is_image_ignored(container: str, images_no_backup_required: list[str]) -> bool:
if not images_no_backup_required:
return False
img = get_image_info(container)
return any(pat in img for pat in images_no_backup_required)
def volume_is_fully_ignored(containers: list[str], images_no_backup_required: list[str]) -> bool:
"""
Skip file backup only if all containers linked to the volume are ignored.
"""
if not containers:
return False
return all(is_image_ignored(c, images_no_backup_required) for c in containers)
def requires_stop(containers: list[str], images_no_stop_required: list[str]) -> bool:
"""
Stop is required if ANY container image is NOT in the whitelist patterns.
"""
for c in containers:
img = get_image_info(c)
if not any(pat in img for pat in images_no_stop_required):
return True
return False
def backup_mariadb_or_postgres(
*,
container: str,
volume_dir: str,
databases_df: "pandas.DataFrame",
database_containers: list[str],
) -> bool:
"""
Returns True if the container is a DB container we handled.
"""
for img in ["mariadb", "postgres"]:
if has_image(container, img):
backup_database(
container=container,
volume_dir=volume_dir,
db_type=img,
databases_df=databases_df,
database_containers=database_containers,
)
return True
return False
def _backup_dumps_for_volume(
*,
containers: list[str],
vol_dir: str,
databases_df: "pandas.DataFrame",
database_containers: list[str],
) -> bool:
"""
Create DB dumps for any mariadb/postgres containers attached to this volume.
Returns True if at least one dump was produced.
"""
dumped_any = False
for c in containers:
if backup_mariadb_or_postgres(
container=c,
volume_dir=vol_dir,
databases_df=databases_df,
database_containers=database_containers,
):
dumped_any = True
return dumped_any
def main() -> int:
args = parse_args()
machine_id = get_machine_id()
backup_time = datetime.now().strftime("%Y%m%d%H%M%S")
versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name)
version_dir = create_version_directory(versions_dir, backup_time)
databases_df = pandas.read_csv(args.databases_csv, sep=";")
print("💾 Start volume backups...", flush=True)
for volume_name in docker_volume_names():
print(f"Start backup routine for volume: {volume_name}", flush=True)
containers = containers_using_volume(volume_name)
vol_dir = create_volume_directory(version_dir, volume_name)
# Old behavior: DB dumps are additional to file backups.
_backup_dumps_for_volume(
containers=containers,
vol_dir=vol_dir,
databases_df=databases_df,
database_containers=args.database_containers,
)
# dump-only: skip ALL file rsync backups
if args.dump_only:
continue
# skip file backup if all linked containers are ignored
if volume_is_fully_ignored(containers, args.images_no_backup_required):
print(
f"Skipping file backup for volume '{volume_name}' (all linked containers are ignored).",
flush=True,
)
continue
if args.everything:
# "everything": always do pre-rsync, then stop + rsync again
backup_volume(versions_dir, volume_name, vol_dir)
change_containers_status(containers, "stop")
backup_volume(versions_dir, volume_name, vol_dir)
if not args.shutdown:
change_containers_status(containers, "start")
continue
# default: rsync, and if needed stop + rsync
backup_volume(versions_dir, volume_name, vol_dir)
if requires_stop(containers, args.images_no_stop_required):
change_containers_status(containers, "stop")
backup_volume(versions_dir, volume_name, vol_dir)
if not args.shutdown:
change_containers_status(containers, "start")
# Stamp the backup version directory using dirval (python lib)
stamp_directory(version_dir)
print("Finished volume backups.", flush=True)
print("Handling Docker Compose services...", flush=True)
handle_docker_compose_services(args.compose_dir, args.docker_compose_hard_restart_required)
return 0

93
src/baudolo/backup/cli.py Normal file
View File

@@ -0,0 +1,93 @@
from __future__ import annotations
import argparse
import os
from pathlib import Path
def _default_repo_name() -> str:
"""
Derive the repository name from the folder that contains `src/`.
Expected layout:
<repo-root>/src/baudolo/backup/cli.py
=> parents[0]=backup, [1]=baudolo, [2]=src, [3]=repo-root
"""
try:
return Path(__file__).resolve().parents[3].name
except Exception:
return "backup-docker-to-local"
def parse_args() -> argparse.Namespace:
dirname = os.path.dirname(__file__)
default_databases_csv = os.path.join(dirname, "databases.csv")
p = argparse.ArgumentParser(description="Backup Docker volumes.")
p.add_argument(
"--compose-dir",
type=str,
required=True,
help="Path to the parent directory containing docker-compose setups",
)
p.add_argument(
"--docker-compose-hard-restart-required",
nargs="+",
default=["mailu"],
help="Compose dir names that require 'docker-compose down && up -d' (default: mailu)",
)
p.add_argument(
"--repo-name",
default=_default_repo_name(),
help="Backup repo folder name under <backups-dir>/<machine-id>/ (default: git repo folder name)",
)
p.add_argument(
"--databases-csv",
default=default_databases_csv,
help=f"Path to databases.csv (default: {default_databases_csv})",
)
p.add_argument(
"--backups-dir",
default="/Backups",
help="Backup root directory (default: /Backups)",
)
p.add_argument(
"--database-containers",
nargs="+",
required=True,
help="Container names treated as special instances for database backups",
)
p.add_argument(
"--images-no-stop-required",
nargs="+",
required=True,
help="Image name patterns for which containers should not be stopped during file backup",
)
p.add_argument(
"--images-no-backup-required",
nargs="+",
default=[],
help="Image name patterns for which no backup should be performed",
)
p.add_argument(
"--everything",
action="store_true",
help="Force file backup for all volumes and also execute database dumps (like old script)",
)
p.add_argument(
"--shutdown",
action="store_true",
help="Do not restart containers after backup",
)
p.add_argument(
"--dump-only",
action="store_true",
help="Only create DB dumps (skip ALL file rsync backups)",
)
return p.parse_args()

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
import os
import subprocess
def hard_restart_docker_services(dir_path: str) -> None:
print(f"Hard restart docker-compose services in: {dir_path}", flush=True)
subprocess.run(["docker-compose", "down"], cwd=dir_path, check=True)
subprocess.run(["docker-compose", "up", "-d"], cwd=dir_path, check=True)
def handle_docker_compose_services(parent_directory: str, hard_restart_required: list[str]) -> None:
for entry in os.scandir(parent_directory):
if not entry.is_dir():
continue
dir_path = entry.path
name = os.path.basename(dir_path)
compose_file = os.path.join(dir_path, "docker-compose.yml")
print(f"Checking directory: {dir_path}", flush=True)
if not os.path.isfile(compose_file):
print("No docker-compose.yml found. Skipping.", flush=True)
continue
if name in hard_restart_required:
print(f"{name}: hard restart required.", flush=True)
hard_restart_docker_services(dir_path)
else:
print(f"{name}: no restart required.", flush=True)

73
src/baudolo/backup/db.py Normal file
View File

@@ -0,0 +1,73 @@
from __future__ import annotations
import os
import pathlib
import re
import pandas
from .shell import BackupException, execute_shell_command
def get_instance(container: str, database_containers: list[str]) -> str:
if container in database_containers:
return container
return re.split(r"(_|-)(database|db|postgres)", container)[0]
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
cmd = (
f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dumpall -U {username} -h localhost > {out_file}"
)
execute_shell_command(cmd)
def backup_database(
*,
container: str,
volume_dir: str,
db_type: str,
databases_df: "pandas.DataFrame",
database_containers: list[str],
) -> None:
instance_name = get_instance(container, database_containers)
entries = databases_df.loc[databases_df["instance"] == instance_name]
if entries.empty:
raise BackupException(f"No entry found for instance '{instance_name}'")
out_dir = os.path.join(volume_dir, "sql")
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
for row in entries.iloc:
db_name = row["database"]
user = row["username"]
password = row["password"]
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
if db_type == "mariadb":
cmd = (
f"docker exec {container} /usr/bin/mariadb-dump "
f"-u {user} -p{password} {db_name} > {dump_file}"
)
execute_shell_command(cmd)
continue
if db_type == "postgres":
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
if not db_name:
fallback_pg_dumpall(container, user, password, cluster_file)
return
try:
cmd = (
f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
)
execute_shell_command(cmd)
except BackupException as e:
print(f"pg_dump failed: {e}", flush=True)
print(f"Falling back to pg_dumpall for instance '{instance_name}'", flush=True)
fallback_pg_dumpall(container, user, password, cluster_file)
continue

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
from .shell import execute_shell_command
def get_image_info(container: str) -> str:
return execute_shell_command(
f"docker inspect --format '{{{{.Config.Image}}}}' {container}"
)[0]
def has_image(container: str, pattern: str) -> bool:
"""Return True if container's image contains the pattern."""
return pattern in get_image_info(container)
def docker_volume_names() -> list[str]:
return execute_shell_command("docker volume ls --format '{{.Name}}'")
def containers_using_volume(volume_name: str) -> list[str]:
return execute_shell_command(
f"docker ps --filter volume=\"{volume_name}\" --format '{{{{.Names}}}}'"
)
def change_containers_status(containers: list[str], status: str) -> None:
"""Stop or start a list of containers."""
if not containers:
print(f"No containers to {status}.", flush=True)
return
names = " ".join(containers)
print(f"{status.capitalize()} containers: {names}...", flush=True)
execute_shell_command(f"docker {status} {names}")
def docker_volume_exists(volume: str) -> bool:
# Avoid throwing exceptions for exists checks.
try:
execute_shell_command(f"docker volume inspect {volume} >/dev/null 2>&1 && echo OK")
return True
except Exception:
return False

View File

@@ -0,0 +1,26 @@
from __future__ import annotations
import subprocess
class BackupException(Exception):
"""Generic exception for backup errors."""
def execute_shell_command(command: str) -> list[str]:
"""Execute a shell command and return its output lines."""
print(command, flush=True)
process = subprocess.Popen(
[command],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
)
out, err = process.communicate()
if process.returncode != 0:
raise BackupException(
f"Error in command: {command}\n"
f"Output: {out}\nError: {err}\n"
f"Exit code: {process.returncode}"
)
return [line.decode("utf-8") for line in out.splitlines()]

View File

@@ -0,0 +1,42 @@
from __future__ import annotations
import os
import pathlib
from .shell import BackupException, execute_shell_command
def get_storage_path(volume_name: str) -> str:
path = execute_shell_command(
f"docker volume inspect --format '{{{{ .Mountpoint }}}}' {volume_name}"
)[0]
return f"{path}/"
def get_last_backup_dir(versions_dir: str, volume_name: str, current_backup_dir: str) -> str | None:
versions = sorted(os.listdir(versions_dir), reverse=True)
for version in versions:
candidate = os.path.join(versions_dir, version, volume_name, "files", "")
if candidate != current_backup_dir and os.path.isdir(candidate):
return candidate
return None
def backup_volume(versions_dir: str, volume_name: str, volume_dir: str) -> None:
"""Perform incremental file backup of a Docker volume."""
dest = os.path.join(volume_dir, "files") + "/"
pathlib.Path(dest).mkdir(parents=True, exist_ok=True)
last = get_last_backup_dir(versions_dir, volume_name, dest)
link_dest = f"--link-dest='{last}'" if last else ""
source = get_storage_path(volume_name)
cmd = f"rsync -abP --delete --delete-excluded {link_dest} {source} {dest}"
try:
execute_shell_command(cmd)
except BackupException as e:
if "file has vanished" in str(e):
print("Warning: Some files vanished before transfer. Continuing.", flush=True)
else:
raise