**fix(mirror): derive remote repository owner and name from URL**
Some checks failed
Mark stable commit / test-unit (push) Has been cancelled
Mark stable commit / test-integration (push) Has been cancelled
Mark stable commit / test-env-virtual (push) Has been cancelled
Mark stable commit / test-env-nix (push) Has been cancelled
Mark stable commit / test-e2e (push) Has been cancelled
Mark stable commit / test-virgin-user (push) Has been cancelled
Mark stable commit / test-virgin-root (push) Has been cancelled
Mark stable commit / codesniffer-shellcheck (push) Has been cancelled
Mark stable commit / codesniffer-ruff (push) Has been cancelled
Mark stable commit / mark-stable (push) Has been cancelled

* Parse host, owner, and repository name directly from Git remote URLs
* Prevent provisioning under incorrect repository names
* Make Git URL the single source of truth for remote provisioning
* Improve diagnostics when URL parsing fails
This commit is contained in:
Kevin Veen-Birkenbach
2025-12-14 14:54:19 +01:00
parent 0d652d995e
commit 27c0c7c01f
2 changed files with 68 additions and 31 deletions

View File

@@ -9,7 +9,7 @@ from pkgmgr.core.remote_provisioning.ensure import EnsureOptions
from .context import build_context from .context import build_context
from .git_remote import determine_primary_remote_url from .git_remote import determine_primary_remote_url
from .types import Repository from .types import Repository
from .url_utils import hostport_from_git_url, normalize_provider_host from .url_utils import normalize_provider_host, parse_repo_from_git_url
def ensure_remote_repository( def ensure_remote_repository(
@@ -18,11 +18,6 @@ def ensure_remote_repository(
all_repos: List[Repository], all_repos: List[Repository],
preview: bool, preview: bool,
) -> None: ) -> None:
"""
Ensure that the remote repository exists using provider APIs.
This is ONLY called when ensure_remote=True.
"""
ctx = build_context(repo, repositories_base_dir, all_repos) ctx = build_context(repo, repositories_base_dir, all_repos)
resolved_mirrors = ctx.resolved_mirrors resolved_mirrors = ctx.resolved_mirrors
@@ -31,15 +26,13 @@ def ensure_remote_repository(
print("[INFO] No remote URL could be derived; skipping remote provisioning.") print("[INFO] No remote URL could be derived; skipping remote provisioning.")
return return
host_raw, _port = hostport_from_git_url(primary_url) host_raw, owner_from_url, name_from_url = parse_repo_from_git_url(primary_url)
host = normalize_provider_host(host_raw) host = normalize_provider_host(host_raw)
owner = repo.get("account") if not host or not owner_from_url or not name_from_url:
name = repo.get("repository") print("[WARN] Could not derive host/owner/repository from URL; cannot ensure remote repo.")
print(f" url={primary_url!r}")
if not host or not owner or not name: print(f" host={host!r}, owner={owner_from_url!r}, repository={name_from_url!r}")
print("[WARN] Missing host/account/repository; cannot ensure remote repo.")
print(f" host={host!r}, account={owner!r}, repository={name!r}")
return return
print("------------------------------------------------------------") print("------------------------------------------------------------")
@@ -49,8 +42,8 @@ def ensure_remote_repository(
spec = RepoSpec( spec = RepoSpec(
host=str(host), host=str(host),
owner=str(owner), owner=str(owner_from_url),
name=str(name), name=str(name_from_url),
private=bool(repo.get("private", True)), private=bool(repo.get("private", True)),
description=str(repo.get("description", "")), description=str(repo.get("description", "")),
) )

View File

@@ -6,14 +6,6 @@ from typing import Optional, Tuple
def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]: def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]:
"""
Extract (host, port) from common Git remote URL formats.
Supports:
- ssh://git@host:2201/owner/repo.git
- https://host/owner/repo.git
- git@host:owner/repo.git (scp-like; no explicit port)
"""
url = (url or "").strip() url = (url or "").strip()
if not url: if not url:
return "", None return "", None
@@ -24,7 +16,6 @@ def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]:
if "@" in netloc: if "@" in netloc:
netloc = netloc.split("@", 1)[1] netloc = netloc.split("@", 1)[1]
# IPv6 bracket form: [::1]:2222
if netloc.startswith("[") and "]" in netloc: if netloc.startswith("[") and "]" in netloc:
host = netloc[1:netloc.index("]")] host = netloc[1:netloc.index("]")]
rest = netloc[netloc.index("]") + 1 :] rest = netloc[netloc.index("]") + 1 :]
@@ -37,7 +28,6 @@ def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]:
return netloc.strip(), None return netloc.strip(), None
# scp-like: git@host:owner/repo.git
if "@" in url and ":" in url: if "@" in url and ":" in url:
after_at = url.split("@", 1)[1] after_at = url.split("@", 1)[1]
host = after_at.split(":", 1)[0].strip() host = after_at.split(":", 1)[0].strip()
@@ -48,12 +38,6 @@ def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]:
def normalize_provider_host(host: str) -> str: def normalize_provider_host(host: str) -> str:
"""
Normalize host for provider matching:
- strip brackets
- strip optional :port
- lowercase
"""
host = (host or "").strip() host = (host or "").strip()
if not host: if not host:
return "" return ""
@@ -65,3 +49,63 @@ def normalize_provider_host(host: str) -> str:
host = host.rsplit(":", 1)[0] host = host.rsplit(":", 1)[0]
return host.strip().lower() return host.strip().lower()
def _strip_dot_git(name: str) -> str:
n = (name or "").strip()
if n.lower().endswith(".git"):
return n[:-4]
return n
def parse_repo_from_git_url(url: str) -> Tuple[str, Optional[str], Optional[str]]:
"""
Parse (host, owner, repo_name) from common Git remote URLs.
Supports:
- ssh://git@host:2201/owner/repo.git
- https://host/owner/repo.git
- git@host:owner/repo.git
- host/owner/repo(.git) (best-effort)
Returns:
(host, owner, repo_name) with owner/repo possibly None if not derivable.
"""
u = (url or "").strip()
if not u:
return "", None, None
# URL-style (ssh://, https://, http://)
if "://" in u:
parsed = urlparse(u)
host = (parsed.hostname or "").strip()
path = (parsed.path or "").strip("/")
parts = [p for p in path.split("/") if p]
if len(parts) >= 2:
owner = parts[0]
repo_name = _strip_dot_git(parts[1])
return host, owner, repo_name
return host, None, None
# SCP-like: git@host:owner/repo.git
if "@" in u and ":" in u:
after_at = u.split("@", 1)[1]
host = after_at.split(":", 1)[0].strip()
path = after_at.split(":", 1)[1].strip("/")
parts = [p for p in path.split("/") if p]
if len(parts) >= 2:
owner = parts[0]
repo_name = _strip_dot_git(parts[1])
return host, owner, repo_name
return host, None, None
# Fallback: host/owner/repo.git
host = u.split("/", 1)[0].strip()
rest = u.split("/", 1)[1] if "/" in u else ""
parts = [p for p in rest.strip("/").split("/") if p]
if len(parts) >= 2:
owner = parts[0]
repo_name = _strip_dot_git(parts[1])
return host, owner, repo_name
return host, None, None