From 27c0c7c01fc7c8891033b12932911b00063421ea Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Sun, 14 Dec 2025 14:54:19 +0100 Subject: [PATCH] **fix(mirror): derive remote repository owner and name from URL** * Parse host, owner, and repository name directly from Git remote URLs * Prevent provisioning under incorrect repository names * Make Git URL the single source of truth for remote provisioning * Improve diagnostics when URL parsing fails --- src/pkgmgr/actions/mirror/remote_provision.py | 23 ++---- src/pkgmgr/actions/mirror/url_utils.py | 76 +++++++++++++++---- 2 files changed, 68 insertions(+), 31 deletions(-) diff --git a/src/pkgmgr/actions/mirror/remote_provision.py b/src/pkgmgr/actions/mirror/remote_provision.py index bbc82d7..d24cca1 100644 --- a/src/pkgmgr/actions/mirror/remote_provision.py +++ b/src/pkgmgr/actions/mirror/remote_provision.py @@ -9,7 +9,7 @@ from pkgmgr.core.remote_provisioning.ensure import EnsureOptions from .context import build_context from .git_remote import determine_primary_remote_url from .types import Repository -from .url_utils import hostport_from_git_url, normalize_provider_host +from .url_utils import normalize_provider_host, parse_repo_from_git_url def ensure_remote_repository( @@ -18,11 +18,6 @@ def ensure_remote_repository( all_repos: List[Repository], preview: bool, ) -> None: - """ - Ensure that the remote repository exists using provider APIs. - - This is ONLY called when ensure_remote=True. - """ ctx = build_context(repo, repositories_base_dir, all_repos) resolved_mirrors = ctx.resolved_mirrors @@ -31,15 +26,13 @@ def ensure_remote_repository( print("[INFO] No remote URL could be derived; skipping remote provisioning.") return - host_raw, _port = hostport_from_git_url(primary_url) + host_raw, owner_from_url, name_from_url = parse_repo_from_git_url(primary_url) host = normalize_provider_host(host_raw) - owner = repo.get("account") - name = repo.get("repository") - - if not host or not owner or not name: - print("[WARN] Missing host/account/repository; cannot ensure remote repo.") - print(f" host={host!r}, account={owner!r}, repository={name!r}") + if not host or not owner_from_url or not name_from_url: + print("[WARN] Could not derive host/owner/repository from URL; cannot ensure remote repo.") + print(f" url={primary_url!r}") + print(f" host={host!r}, owner={owner_from_url!r}, repository={name_from_url!r}") return print("------------------------------------------------------------") @@ -49,8 +42,8 @@ def ensure_remote_repository( spec = RepoSpec( host=str(host), - owner=str(owner), - name=str(name), + owner=str(owner_from_url), + name=str(name_from_url), private=bool(repo.get("private", True)), description=str(repo.get("description", "")), ) diff --git a/src/pkgmgr/actions/mirror/url_utils.py b/src/pkgmgr/actions/mirror/url_utils.py index fc14cf9..f97eb3c 100644 --- a/src/pkgmgr/actions/mirror/url_utils.py +++ b/src/pkgmgr/actions/mirror/url_utils.py @@ -6,14 +6,6 @@ from typing import Optional, Tuple def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]: - """ - Extract (host, port) from common Git remote URL formats. - - Supports: - - ssh://git@host:2201/owner/repo.git - - https://host/owner/repo.git - - git@host:owner/repo.git (scp-like; no explicit port) - """ url = (url or "").strip() if not url: return "", None @@ -24,7 +16,6 @@ def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]: if "@" in netloc: netloc = netloc.split("@", 1)[1] - # IPv6 bracket form: [::1]:2222 if netloc.startswith("[") and "]" in netloc: host = netloc[1:netloc.index("]")] rest = netloc[netloc.index("]") + 1 :] @@ -37,7 +28,6 @@ def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]: return netloc.strip(), None - # scp-like: git@host:owner/repo.git if "@" in url and ":" in url: after_at = url.split("@", 1)[1] host = after_at.split(":", 1)[0].strip() @@ -48,12 +38,6 @@ def hostport_from_git_url(url: str) -> Tuple[str, Optional[str]]: def normalize_provider_host(host: str) -> str: - """ - Normalize host for provider matching: - - strip brackets - - strip optional :port - - lowercase - """ host = (host or "").strip() if not host: return "" @@ -65,3 +49,63 @@ def normalize_provider_host(host: str) -> str: host = host.rsplit(":", 1)[0] return host.strip().lower() + + +def _strip_dot_git(name: str) -> str: + n = (name or "").strip() + if n.lower().endswith(".git"): + return n[:-4] + return n + + +def parse_repo_from_git_url(url: str) -> Tuple[str, Optional[str], Optional[str]]: + """ + Parse (host, owner, repo_name) from common Git remote URLs. + + Supports: + - ssh://git@host:2201/owner/repo.git + - https://host/owner/repo.git + - git@host:owner/repo.git + - host/owner/repo(.git) (best-effort) + + Returns: + (host, owner, repo_name) with owner/repo possibly None if not derivable. + """ + u = (url or "").strip() + if not u: + return "", None, None + + # URL-style (ssh://, https://, http://) + if "://" in u: + parsed = urlparse(u) + host = (parsed.hostname or "").strip() + path = (parsed.path or "").strip("/") + parts = [p for p in path.split("/") if p] + if len(parts) >= 2: + owner = parts[0] + repo_name = _strip_dot_git(parts[1]) + return host, owner, repo_name + return host, None, None + + # SCP-like: git@host:owner/repo.git + if "@" in u and ":" in u: + after_at = u.split("@", 1)[1] + host = after_at.split(":", 1)[0].strip() + path = after_at.split(":", 1)[1].strip("/") + parts = [p for p in path.split("/") if p] + if len(parts) >= 2: + owner = parts[0] + repo_name = _strip_dot_git(parts[1]) + return host, owner, repo_name + return host, None, None + + # Fallback: host/owner/repo.git + host = u.split("/", 1)[0].strip() + rest = u.split("/", 1)[1] if "/" in u else "" + parts = [p for p in rest.strip("/").split("/") if p] + if len(parts) >= 2: + owner = parts[0] + repo_name = _strip_dot_git(parts[1]) + return host, owner, repo_name + + return host, None, None