From 0bc7a3ecc0cb40f062f82cfcd1c6c00dcebea848 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Sat, 13 Dec 2025 15:57:05 +0100 Subject: [PATCH] ci(nix): retry flake evaluation on GitHub API rate limits Add a reusable retry helper that detects GitHub API 403 rate-limit errors during Nix flake evaluation and retries with exponential backoff. Apply the retry logic to flake-only CI tests so transient GitHub rate limits no longer cause random CI failures while preserving fast failure for real errors. https://chatgpt.com/share/693d7ec5-ac70-800f-a627-ef705c653ba1 --- scripts/nix/lib/retry_403.sh | 52 ++++++++++++++++++++++++++++++++++++ scripts/test/test-env-nix.sh | 22 ++++++++++++--- 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100755 scripts/nix/lib/retry_403.sh diff --git a/scripts/nix/lib/retry_403.sh b/scripts/nix/lib/retry_403.sh new file mode 100755 index 0000000..83963de --- /dev/null +++ b/scripts/nix/lib/retry_403.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -n "${PKGMGR_NIX_RETRY_403_SH:-}" ]]; then + return 0 +fi +PKGMGR_NIX_RETRY_403_SH=1 + +# Retry only when we see the GitHub API rate limit 403 error during nix flake evaluation. +# Retries 7 times with delays: 10, 30, 50, 80, 130, 210, 420 seconds. +run_with_github_403_retry() { + local -a delays=(10 30 50 80 130 210 420) + local attempt=0 + local max_retries="${#delays[@]}" + + while true; do + local err tmp + tmp="$(mktemp -t nix-err.XXXXXX)" + err=0 + + # Run the command; capture stderr for inspection while preserving stdout. + if "$@" 2>"$tmp"; then + rm -f "$tmp" + return 0 + else + err=$? + fi + + # Only retry on the specific GitHub API rate limit 403 case. + if grep -qE 'HTTP error 403' "$tmp" && grep -qiE 'API rate limit exceeded|api\.github\.com' "$tmp"; then + if (( attempt >= max_retries )); then + cat "$tmp" >&2 + rm -f "$tmp" + return "$err" + fi + + local sleep_s="${delays[$attempt]}" + attempt=$((attempt + 1)) + + echo "[nix-retry] GitHub API rate-limit (403). Retry ${attempt}/${max_retries} in ${sleep_s}s: $*" >&2 + cat "$tmp" >&2 + rm -f "$tmp" + sleep "$sleep_s" + continue + fi + + # Not our retry case -> fail fast with original stderr. + cat "$tmp" >&2 + rm -f "$tmp" + return "$err" + done +} diff --git a/scripts/test/test-env-nix.sh b/scripts/test/test-env-nix.sh index aacd76d..90de78a 100755 --- a/scripts/test/test-env-nix.sh +++ b/scripts/test/test-env-nix.sh @@ -27,7 +27,7 @@ docker run --rm \ echo ">>> preflight: nix must exist in image" if ! command -v nix >/dev/null 2>&1; then echo "NO_NIX" - echo "ERROR: nix not found in image '\'''"${IMAGE}"''\'' (PKGMGR_DISTRO='"${PKGMGR_DISTRO}"')" + echo "ERROR: nix not found in image '"${IMAGE}"' (PKGMGR_DISTRO='"${PKGMGR_DISTRO}"')" echo "HINT: Ensure Nix is installed during image build for this distro." exit 1 fi @@ -35,14 +35,28 @@ docker run --rm \ echo ">>> nix version" nix --version + # ------------------------------------------------------------ + # Retry helper for GitHub API rate-limit (HTTP 403) + # ------------------------------------------------------------ + if [[ -f /src/scripts/nix/lib/retry_403.sh ]]; then + # shellcheck source=./scripts/nix/lib/retry_403.sh + source /src/scripts/nix/lib/retry_403.sh + elif [[ -f ./scripts/nix/lib/retry_403.sh ]]; then + # shellcheck source=./scripts/nix/lib/retry_403.sh + source ./scripts/nix/lib/retry_403.sh + else + echo "ERROR: retry helper not found: scripts/nix/lib/retry_403.sh" + exit 1 + fi + echo ">>> nix flake show" - nix flake show . --no-write-lock-file >/dev/null + run_with_github_403_retry nix flake show . --no-write-lock-file >/dev/null echo ">>> nix build .#default" - nix build .#default --no-link --no-write-lock-file + run_with_github_403_retry nix build .#default --no-link --no-write-lock-file echo ">>> nix run .#pkgmgr -- --help" - nix run .#pkgmgr -- --help --no-write-lock-file + run_with_github_403_retry nix run .#pkgmgr -- --help --no-write-lock-file echo ">>> OK: Nix flake-only test succeeded." '