Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7e65aac
WIP: Refactor backend to a rest api and make weak changes to the fron…
ix-56h Jul 1, 2025
e569af4
fix: remove references to ingest_id and add download feature
ix-56h Jul 1, 2025
e610f5d
Fix ruff issues
ix-56h Jul 1, 2025
1fb8085
fix: ruff errors
ix-56h Jul 1, 2025
736d17f
Merge branch 'main' into feat_rest_api
filipchristiansen Jul 1, 2025
c3eacaf
fix: ruff errors and add ruff to pyproject dev dependancies to avoid …
ix-56h Jul 1, 2025
74b8957
remove useless doc
ix-56h Jul 1, 2025
8693c55
refactor: centralize PAT validation, streamline repo checks & misc cl…
filipchristiansen Jul 1, 2025
f244494
WIP: Refactor backend to a rest api and make weak changes to the fron…
ix-56h Jul 1, 2025
2516879
fix weird behavior after rebase attempt
ix-56h Jul 1, 2025
fa1f7fa
clean after rebase
ix-56h Jul 1, 2025
552c3f8
Merge branch 'main' into feat_rest_api
ix-56h Jul 1, 2025
5317abf
remove api ingest endpoint test (duplicate)
ix-56h Jul 1, 2025
b897bf2
refactor: Refactor backend to a rest api and make weak changes to the…
ix-56h Jul 1, 2025
e986348
Remove uv.lock from version control
ix-56h Jul 1, 2025
0ddf255
remove useless api_models.md docs
ix-56h Jul 1, 2025
974f309
pre-commit cleaning
ix-56h Jul 1, 2025
990a90b
Update .gitignore
ix-56h Jul 1, 2025
535e726
resolve review comments and remove is_index argument from process_query
ix-56h Jul 1, 2025
5e87100
resolve remaining comments
ix-56h Jul 1, 2025
a234adc
Reset .gitignore to match main branch
ix-56h Jul 1, 2025
aa718a5
fix: remove result bool in Success response object since it's useless…
ix-56h Jul 1, 2025
373fe34
fix: clean query processor return types, remove deprecated fields, be…
ix-56h Jul 2, 2025
fb57c51
fix: unit tests, remove deprecated is_index from jinja templates
ix-56h Jul 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,14 @@ ipython_config.py
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
uv.lock
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#pdm
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
Expand Down Expand Up @@ -174,6 +175,7 @@ node_modules/
history.txt
cleanup.py
Caddyfile
uv.lock

# ignore default output directory
tmp/*
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,8 @@ asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
python_classes = "Test*"
python_functions = "test_*"

[dependency-groups]
dev = [
"ruff>=0.12.1",
]
31 changes: 6 additions & 25 deletions src/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
from pathlib import Path

from dotenv import load_dotenv
from fastapi import FastAPI, Request
from fastapi import FastAPI
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from slowapi.errors import RateLimitExceeded
from starlette.middleware.trustedhost import TrustedHostMiddleware

from server.routers import download, dynamic, index
from server.server_config import templates
from server.routers import dynamic, index
from server.routers.ingest import router as ingest
from server.server_utils import lifespan, limiter, rate_limit_exception_handler

# Load environment variables from .env file
Expand Down Expand Up @@ -58,7 +58,7 @@ async def health_check() -> dict[str, str]:
return {"status": "healthy"}


@app.head("/")
@app.head("/", include_in_schema=False)
async def head_root() -> HTMLResponse:
"""Respond to HTTP HEAD requests for the root URL.

Expand All @@ -73,26 +73,7 @@ async def head_root() -> HTMLResponse:
return HTMLResponse(content=None, headers={"content-type": "text/html; charset=utf-8"})


@app.get("/api/", response_class=HTMLResponse)
@app.get("/api", response_class=HTMLResponse)
async def api_docs(request: Request) -> HTMLResponse:
"""Render the API documentation page.

Parameters
----------
request : Request
The incoming HTTP request.

Returns
-------
HTMLResponse
A rendered HTML page displaying API documentation.

"""
return templates.TemplateResponse("api.jinja", {"request": request})


@app.get("/robots.txt")
@app.get("/robots.txt", include_in_schema=False)
async def robots() -> FileResponse:
"""Serve the ``robots.txt`` file to guide search engine crawlers.

Expand Down Expand Up @@ -120,5 +101,5 @@ async def llm_txt() -> FileResponse:

# Include routers for modular endpoints
app.include_router(index)
app.include_router(download)
app.include_router(ingest)
app.include_router(dynamic)
124 changes: 123 additions & 1 deletion src/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,134 @@

from __future__ import annotations

from pydantic import BaseModel
from enum import Enum
from typing import Literal, Union

from pydantic import BaseModel, Field, field_validator

# needed for type checking (pydantic)
from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import)


class PatternType(str, Enum):
"""Enumeration for pattern types used in file filtering."""

INCLUDE = "include"
EXCLUDE = "exclude"


class IngestRequest(BaseModel):
"""Request model for the /api/ingest endpoint.

Attributes
----------
input_text : str
The Git repository URL or slug to ingest.
max_file_size : int
Maximum file size slider position (0-500) for filtering files.
pattern_type : PatternType
Type of pattern to use for file filtering (include or exclude).
pattern : str
Glob/regex pattern string for file filtering.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.

"""

input_text: str = Field(..., description="Git repository URL or slug to ingest")
max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)")
pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
pattern: str = Field(default="", description="Glob/regex pattern for file filtering")
token: str | None = Field(default=None, description="GitHub PAT for private repositories")

@field_validator("input_text")
@classmethod
def validate_input_text(cls, v: str) -> str:
"""Validate that input_text is not empty."""
if not v.strip():
err = "input_text cannot be empty"
raise ValueError(err)
return v.strip()

@field_validator("pattern")
@classmethod
def validate_pattern(cls, v: str) -> str:
"""Validate pattern field."""
return v.strip() if v else ""


class IngestSuccessResponse(BaseModel):
"""Success response model for the /api/ingest endpoint.

Attributes
----------
result : Literal[True]
Always True for successful responses.
repo_url : str
The original repository URL that was processed.
short_repo_url : str
Short form of repository URL (user/repo).
summary : str
Summary of the ingestion process including token estimates.
tree : str
File tree structure of the repository.
content : str
Processed content from the repository files.
default_file_size : int
The file size slider position used.
pattern_type : str
The pattern type used for filtering.
pattern : str
The pattern used for filtering.
token : str | None
The token used (if any).

"""

result: Literal[True] = True
repo_url: str = Field(..., description="Original repository URL")
short_repo_url: str = Field(..., description="Short repository URL (user/repo)")
summary: str = Field(..., description="Ingestion summary with token estimates")
tree: str = Field(..., description="File tree structure")
content: str = Field(..., description="Processed file content")
default_file_size: int = Field(..., description="File size slider position used")
pattern_type: str = Field(..., description="Pattern type used")
pattern: str = Field(..., description="Pattern used")
token: str | None = Field(None, description="Token used (if any)")


class IngestErrorResponse(BaseModel):
"""Error response model for the /api/ingest endpoint.

Attributes
----------
error : str
Error message describing what went wrong.
repo_url : str
The repository URL that failed to process.
default_file_size : int
The file size slider position that was used.
pattern_type : str
The pattern type that was used.
pattern : str
The pattern that was used.
token : str | None
The token that was used (if any).

"""

error: str = Field(..., description="Error message")
repo_url: str = Field(..., description="Repository URL that failed")
default_file_size: int = Field(..., description="File size slider position used")
pattern_type: str = Field(..., description="Pattern type used")
pattern: str = Field(..., description="Pattern used")
token: str | None = Field(None, description="Token used (if any)")


# Union type for API responses
IngestResponse = Union[IngestSuccessResponse, IngestErrorResponse]


class QueryForm(BaseModel):
"""Form data for the query.

Expand Down
27 changes: 7 additions & 20 deletions src/server/query_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

from __future__ import annotations

from functools import partial
from pathlib import Path
from typing import TYPE_CHECKING, cast
from typing import Any, cast

from gitingest.clone import clone_repo
from gitingest.ingestion import ingest_query
Expand All @@ -14,34 +13,26 @@
DEFAULT_FILE_SIZE_KB,
EXAMPLE_REPOS,
MAX_DISPLAY_SIZE,
templates,
)
from server.server_utils import Colors, log_slider_to_size

if TYPE_CHECKING:
from fastapi import Request
from starlette.templating import _TemplateResponse


async def process_query(
request: Request,
*,
input_text: str,
slider_position: int,
pattern_type: str = "exclude",
pattern: str = "",
is_index: bool = False,
token: str | None = None,
) -> _TemplateResponse:
) -> dict[str, Any]:
"""Process a query by parsing input, cloning a repository, and generating a summary.

Handle user input, process Git repository data, and prepare
a response for rendering a template with the processed results or an error message.

Parameters
----------
request : Request
The HTTP request object.
input_text : str
Input text provided by the user, typically a Git repository URL or slug.
slider_position : int
Expand All @@ -57,8 +48,8 @@ async def process_query(

Returns
-------
_TemplateResponse
Rendered template response containing the processed results or an error message.
dict[str, Any]
A dictionary containing the processed results or an error message.

Raises
------
Expand All @@ -79,12 +70,9 @@ async def process_query(
if token:
validate_github_token(token)

template = "index.jinja" if is_index else "git.jinja"
template_response = partial(templates.TemplateResponse, name=template)
max_file_size = log_slider_to_size(slider_position)

context = {
"request": request,
"repo_url": input_text,
"examples": EXAMPLE_REPOS if is_index else [],
"default_file_size": slider_position,
Expand Down Expand Up @@ -126,10 +114,10 @@ async def process_query(
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
print(f"{Colors.RED}{exc}{Colors.END}")

context["error_message"] = f"Error: {exc}"
context["error"] = f"Error: {exc}"
if "405" in str(exc):
context["error_message"] = "Repository not found. Please make sure it is public."
return template_response(context=context)
return context

if len(content) > MAX_DISPLAY_SIZE:
content = (
Expand All @@ -154,11 +142,10 @@ async def process_query(
"summary": summary,
"tree": tree,
"content": content,
"ingest_id": query.id,
},
)

return template_response(context=context)
return context


def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/server/routers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Module containing the routers for the FastAPI application."""

from server.routers.download import router as download
from server.routers.dynamic import router as dynamic
from server.routers.index import router as index
from server.routers.ingest import router as ingest

__all__ = ["download", "dynamic", "index"]
__all__ = ["dynamic", "index", "ingest"]
Loading
Loading