|
12 | 12 | from urllib.parse import urlparse, urlunparse |
13 | 13 |
|
14 | 14 | import git |
| 15 | +import httpx |
| 16 | +from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND |
15 | 17 |
|
16 | 18 | from gitingest.utils.compat_func import removesuffix |
17 | 19 | from gitingest.utils.exceptions import InvalidGitHubTokenError |
@@ -96,18 +98,17 @@ async def ensure_git_installed() -> None: |
96 | 98 | """ |
97 | 99 | try: |
98 | 100 | # Use GitPython to check git availability |
99 | | - git_cmd = git.Git() |
100 | | - git_cmd.version() |
| 101 | + git.Git().version() |
101 | 102 | except git.GitCommandError as exc: |
102 | 103 | msg = "Git is not installed or not accessible. Please install Git first." |
103 | 104 | raise RuntimeError(msg) from exc |
104 | 105 | except Exception as exc: |
105 | 106 | msg = "Git is not installed or not accessible. Please install Git first." |
106 | 107 | raise RuntimeError(msg) from exc |
107 | | - |
| 108 | + |
108 | 109 | if sys.platform == "win32": |
109 | 110 | try: |
110 | | - longpaths_value = git_cmd.config("core.longpaths") |
| 111 | + longpaths_value = git.Git().config("core.longpaths") |
111 | 112 | if longpaths_value.lower() != "true": |
112 | 113 | logger.warning( |
113 | 114 | "Git clone may fail on Windows due to long file paths. " |
@@ -214,24 +215,29 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str | |
214 | 215 | raise ValueError(msg) |
215 | 216 |
|
216 | 217 | await ensure_git_installed() |
217 | | - |
| 218 | + |
218 | 219 | # Use GitPython to get remote references |
219 | 220 | try: |
| 221 | + git_cmd = git.Git() |
| 222 | + |
| 223 | + # Prepare environment with authentication if needed |
| 224 | + env = None |
| 225 | + if token and is_github_host(url): |
| 226 | + auth_url = _add_token_to_url(url, token) |
| 227 | + url = auth_url |
| 228 | + |
220 | 229 | fetch_tags = ref_type == "tags" |
221 | 230 | to_fetch = "tags" if fetch_tags else "heads" |
222 | | - |
| 231 | + |
223 | 232 | # Build ls-remote command |
224 | | - cmd_args = [f"--{to_fetch}"] |
| 233 | + cmd_args = ["ls-remote", f"--{to_fetch}"] |
225 | 234 | if fetch_tags: |
226 | 235 | cmd_args.append("--refs") # Filter out peeled tag objects |
227 | 236 | cmd_args.append(url) |
228 | | - |
229 | | - # Run the command with proper authentication |
230 | | - with git_auth_context(url, token) as (git_cmd, auth_url): |
231 | | - # Replace the URL in cmd_args with the authenticated URL |
232 | | - cmd_args[-1] = auth_url # URL is the last argument |
233 | | - output = git_cmd.ls_remote(*cmd_args) |
234 | | - |
| 237 | + |
| 238 | + # Run the command |
| 239 | + output = git_cmd.execute(cmd_args, env=env) |
| 240 | + |
235 | 241 | # Parse output |
236 | 242 | return [ |
237 | 243 | line.split(f"refs/{to_fetch}/", 1)[1] |
@@ -260,28 +266,22 @@ def create_git_repo(local_path: str, url: str, token: str | None = None) -> git. |
260 | 266 | git.Repo |
261 | 267 | A GitPython Repo object configured with authentication. |
262 | 268 |
|
263 | | - Raises |
264 | | - ------ |
265 | | - ValueError |
266 | | - If the local path is not a valid git repository. |
267 | | -
|
268 | 269 | """ |
269 | 270 | try: |
270 | 271 | repo = git.Repo(local_path) |
271 | | - |
| 272 | + |
272 | 273 | # Configure authentication if needed |
273 | 274 | if token and is_github_host(url): |
274 | 275 | auth_header = create_git_auth_header(token, url=url) |
275 | 276 | # Set the auth header in git config for this repo |
276 | | - key, value = auth_header.split("=", 1) |
| 277 | + key, value = auth_header.split('=', 1) |
277 | 278 | repo.git.config(key, value) |
278 | | - |
| 279 | + |
| 280 | + return repo |
279 | 281 | except git.InvalidGitRepositoryError as exc: |
280 | 282 | msg = f"Invalid git repository at {local_path}" |
281 | 283 | raise ValueError(msg) from exc |
282 | 284 |
|
283 | | - return repo |
284 | | - |
285 | 285 |
|
286 | 286 | def create_git_auth_header(token: str, url: str = "https://github.com") -> str: |
287 | 287 | """Create a Basic authentication header for GitHub git operations. |
@@ -416,10 +416,10 @@ async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None |
416 | 416 | if config.blob: |
417 | 417 | # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt) |
418 | 418 | subpath = str(Path(subpath).parent.as_posix()) |
419 | | - |
| 419 | + |
420 | 420 | try: |
421 | 421 | repo = create_git_repo(config.local_path, config.url, token) |
422 | | - repo.git.sparse_checkout("set", subpath) |
| 422 | + repo.git.execute(["sparse-checkout", "set", subpath]) |
423 | 423 | except git.GitCommandError as exc: |
424 | 424 | msg = f"Failed to configure sparse-checkout: {exc}" |
425 | 425 | raise RuntimeError(msg) from exc |
@@ -479,22 +479,27 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None) |
479 | 479 |
|
480 | 480 | """ |
481 | 481 | try: |
482 | | - # Execute ls-remote command with proper authentication |
483 | | - with git_auth_context(url, token) as (git_cmd, auth_url): |
484 | | - output = git_cmd.ls_remote(auth_url, pattern) |
| 482 | + git_cmd = git.Git() |
| 483 | + |
| 484 | + # Prepare authentication if needed |
| 485 | + auth_url = url |
| 486 | + if token and is_github_host(url): |
| 487 | + auth_url = _add_token_to_url(url, token) |
| 488 | + |
| 489 | + # Execute ls-remote command |
| 490 | + output = git_cmd.execute(["ls-remote", auth_url, pattern]) |
485 | 491 | lines = output.splitlines() |
486 | | - |
| 492 | + |
487 | 493 | sha = _pick_commit_sha(lines) |
488 | 494 | if not sha: |
489 | 495 | msg = f"{pattern!r} not found in {url}" |
490 | 496 | raise ValueError(msg) |
491 | 497 |
|
| 498 | + return sha |
492 | 499 | except git.GitCommandError as exc: |
493 | | - msg = f"Failed to resolve {pattern} in {url}:\n{exc}" |
| 500 | + msg = f"Failed to resolve {pattern} in {url}: {exc}" |
494 | 501 | raise ValueError(msg) from exc |
495 | 502 |
|
496 | | - return sha |
497 | | - |
498 | 503 |
|
499 | 504 | def _pick_commit_sha(lines: Iterable[str]) -> str | None: |
500 | 505 | """Return a commit SHA from ``git ls-remote`` output. |
@@ -529,3 +534,37 @@ def _pick_commit_sha(lines: Iterable[str]) -> str | None: |
529 | 534 | first_non_peeled = sha |
530 | 535 |
|
531 | 536 | return first_non_peeled # branch or lightweight tag (or None) |
| 537 | + |
| 538 | + |
| 539 | +def _add_token_to_url(url: str, token: str) -> str: |
| 540 | + """Add authentication token to GitHub URL. |
| 541 | +
|
| 542 | + Parameters |
| 543 | + ---------- |
| 544 | + url : str |
| 545 | + The original GitHub URL. |
| 546 | + token : str |
| 547 | + The GitHub token to add. |
| 548 | +
|
| 549 | + Returns |
| 550 | + ------- |
| 551 | + str |
| 552 | + The URL with embedded authentication. |
| 553 | +
|
| 554 | + """ |
| 555 | + from urllib.parse import urlparse, urlunparse |
| 556 | + |
| 557 | + parsed = urlparse(url) |
| 558 | + # Add token as username in URL (GitHub supports this) |
| 559 | + netloc = f"x-oauth-basic:{token}@{parsed.hostname}" |
| 560 | + if parsed.port: |
| 561 | + netloc += f":{parsed.port}" |
| 562 | + |
| 563 | + return urlunparse(( |
| 564 | + parsed.scheme, |
| 565 | + netloc, |
| 566 | + parsed.path, |
| 567 | + parsed.params, |
| 568 | + parsed.query, |
| 569 | + parsed.fragment |
| 570 | + )) |
0 commit comments