ci(nix): retry flake evaluation on GitHub API rate limits
Some checks failed
CI / test-unit (push) Has been cancelled
CI / test-integration (push) Has been cancelled
CI / test-env-virtual (push) Has been cancelled
CI / test-env-nix (push) Has been cancelled
CI / test-e2e (push) Has been cancelled
CI / test-virgin-user (push) Has been cancelled
CI / test-virgin-root (push) Has been cancelled
CI / codesniffer-shellcheck (push) Has been cancelled
CI / codesniffer-ruff (push) Has been cancelled
Mark stable commit / test-unit (push) Has been cancelled
Mark stable commit / test-integration (push) Has been cancelled
Mark stable commit / test-env-virtual (push) Has been cancelled
Mark stable commit / test-env-nix (push) Has been cancelled
Mark stable commit / test-e2e (push) Has been cancelled
Mark stable commit / test-virgin-user (push) Has been cancelled
Mark stable commit / test-virgin-root (push) Has been cancelled
Mark stable commit / codesniffer-shellcheck (push) Has been cancelled
Mark stable commit / codesniffer-ruff (push) Has been cancelled
Mark stable commit / mark-stable (push) Has been cancelled

Add a reusable retry helper that detects GitHub API 403 rate-limit errors
during Nix flake evaluation and retries with exponential backoff.

Apply the retry logic to flake-only CI tests so transient GitHub rate
limits no longer cause random CI failures while preserving fast failure
for real errors.

https://chatgpt.com/share/693d7ec5-ac70-800f-a627-ef705c653ba1
This commit is contained in:
Kevin Veen-Birkenbach
2025-12-13 15:57:05 +01:00
parent 55a0ae4337
commit 0bc7a3ecc0
2 changed files with 70 additions and 4 deletions

52
scripts/nix/lib/retry_403.sh Executable file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ -n "${PKGMGR_NIX_RETRY_403_SH:-}" ]]; then
return 0
fi
PKGMGR_NIX_RETRY_403_SH=1
# Retry only when we see the GitHub API rate limit 403 error during nix flake evaluation.
# Retries 7 times with delays: 10, 30, 50, 80, 130, 210, 420 seconds.
run_with_github_403_retry() {
local -a delays=(10 30 50 80 130 210 420)
local attempt=0
local max_retries="${#delays[@]}"
while true; do
local err tmp
tmp="$(mktemp -t nix-err.XXXXXX)"
err=0
# Run the command; capture stderr for inspection while preserving stdout.
if "$@" 2>"$tmp"; then
rm -f "$tmp"
return 0
else
err=$?
fi
# Only retry on the specific GitHub API rate limit 403 case.
if grep -qE 'HTTP error 403' "$tmp" && grep -qiE 'API rate limit exceeded|api\.github\.com' "$tmp"; then
if (( attempt >= max_retries )); then
cat "$tmp" >&2
rm -f "$tmp"
return "$err"
fi
local sleep_s="${delays[$attempt]}"
attempt=$((attempt + 1))
echo "[nix-retry] GitHub API rate-limit (403). Retry ${attempt}/${max_retries} in ${sleep_s}s: $*" >&2
cat "$tmp" >&2
rm -f "$tmp"
sleep "$sleep_s"
continue
fi
# Not our retry case -> fail fast with original stderr.
cat "$tmp" >&2
rm -f "$tmp"
return "$err"
done
}

View File

@@ -27,7 +27,7 @@ docker run --rm \
echo ">>> preflight: nix must exist in image"
if ! command -v nix >/dev/null 2>&1; then
echo "NO_NIX"
echo "ERROR: nix not found in image '\'''"${IMAGE}"''\'' (PKGMGR_DISTRO='"${PKGMGR_DISTRO}"')"
echo "ERROR: nix not found in image '"${IMAGE}"' (PKGMGR_DISTRO='"${PKGMGR_DISTRO}"')"
echo "HINT: Ensure Nix is installed during image build for this distro."
exit 1
fi
@@ -35,14 +35,28 @@ docker run --rm \
echo ">>> nix version"
nix --version
# ------------------------------------------------------------
# Retry helper for GitHub API rate-limit (HTTP 403)
# ------------------------------------------------------------
if [[ -f /src/scripts/nix/lib/retry_403.sh ]]; then
# shellcheck source=./scripts/nix/lib/retry_403.sh
source /src/scripts/nix/lib/retry_403.sh
elif [[ -f ./scripts/nix/lib/retry_403.sh ]]; then
# shellcheck source=./scripts/nix/lib/retry_403.sh
source ./scripts/nix/lib/retry_403.sh
else
echo "ERROR: retry helper not found: scripts/nix/lib/retry_403.sh"
exit 1
fi
echo ">>> nix flake show"
nix flake show . --no-write-lock-file >/dev/null
run_with_github_403_retry nix flake show . --no-write-lock-file >/dev/null
echo ">>> nix build .#default"
nix build .#default --no-link --no-write-lock-file
run_with_github_403_retry nix build .#default --no-link --no-write-lock-file
echo ">>> nix run .#pkgmgr -- --help"
nix run .#pkgmgr -- --help --no-write-lock-file
run_with_github_403_retry nix run .#pkgmgr -- --help --no-write-lock-file
echo ">>> OK: Nix flake-only test succeeded."
'