fix(seed): handle empty databases.csv and add unit tests

- Gracefully handle empty databases.csv by creating header columns and emitting a warning
- Add _empty_df() helper for consistent DataFrame initialization
- Add unit tests for baudolo-seed including empty-file regression case
- Apply minor formatting fixes across backup and e2e test files

https://chatgpt.com/share/69628f0b-8744-800f-b08d-2633e05167da
This commit is contained in:
Kevin Veen-Birkenbach
2026-01-10 18:40:22 +01:00
parent e4bc075474
commit d976640312
8 changed files with 289 additions and 39 deletions

View File

@@ -72,6 +72,7 @@ def requires_stop(containers: list[str], images_no_stop_required: list[str]) ->
return True
return False
def backup_mariadb_or_postgres(
*,
container: str,

View File

@@ -68,7 +68,7 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Do not restart containers after backup",
)
p.add_argument(
"--dump-only-sql",
action="store_true",

View File

@@ -52,7 +52,9 @@ def _atomic_write_cmd(cmd: str, out_file: str) -> None:
execute_shell_command(f"mv {tmp} {out_file}")
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
def fallback_pg_dumpall(
container: str, username: str, password: str, out_file: str
) -> None:
"""
Perform a full Postgres cluster dump using pg_dumpall.
"""
@@ -103,9 +105,7 @@ def backup_database(
"'*' is currently only supported for Postgres."
)
cluster_file = os.path.join(
out_dir, f"{instance_name}.cluster.backup.sql"
)
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
fallback_pg_dumpall(container, user, password, cluster_file)
produced = True
continue

View File

@@ -7,10 +7,11 @@ import re
import sys
import pandas as pd
from typing import Optional
from pandas.errors import EmptyDataError
DB_NAME_RE = re.compile(r"^[a-zA-Z0-9_][a-zA-Z0-9_-]*$")
def _validate_database_value(value: Optional[str], *, instance: str) -> str:
v = (value or "").strip()
if v == "":
@@ -31,6 +32,11 @@ def _validate_database_value(value: Optional[str], *, instance: str) -> str:
)
return v
def _empty_df() -> pd.DataFrame:
return pd.DataFrame(columns=["instance", "database", "username", "password"])
def check_and_add_entry(
file_path: str,
instance: str,
@@ -48,17 +54,21 @@ def check_and_add_entry(
database = _validate_database_value(database, instance=instance)
if os.path.exists(file_path):
df = pd.read_csv(
file_path,
sep=";",
dtype=str,
keep_default_na=False,
)
try:
df = pd.read_csv(
file_path,
sep=";",
dtype=str,
keep_default_na=False,
)
except EmptyDataError:
print(
f"WARNING: databases.csv exists but is empty: {file_path}. Creating header columns.",
file=sys.stderr,
)
df = _empty_df()
else:
df = pd.DataFrame(
columns=["instance", "database", "username", "password"]
)
df = _empty_df()
mask = (df["instance"] == instance) & (df["database"] == database)
if mask.any():