Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.10", "3.11", "3.12", "3.13"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ repos:
hooks:
- id: pyupgrade
description: "Automatically upgrade syntax for newer versions."
args: [--py3-plus, --py36-plus, --py38-plus, --py39-plus, --py310-plus]
args: [--py3-plus, --py36-plus]

- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
Expand Down
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ You can also replace `hub` with `ingest` in any GitHub URL to access the corespo
- **CLI tool**: Run it as a shell command
- **Python package**: Import it in your code

## 📚 Requirements

- Python 3.7+

## 📦 Installation

``` bash
Expand Down Expand Up @@ -61,7 +65,7 @@ gitingest --help

This will write the digest in a text file (default `digest.txt`) in your current working directory.

## 🐛 Python package usage
## 🐍 Python package usage

```python
# Synchronous usage
Expand All @@ -81,7 +85,7 @@ result = asyncio.run(ingest_async("path/to/directory"))

By default, this won't write a file but can be enabled with the `output` argument.

## 🌐 Self-host
## 🐳 Self-host

1. Build the image:

Expand All @@ -104,7 +108,7 @@ If you are hosting it on a domain, you can specify the allowed hostnames via env
ALLOWED_HOSTS="example.com, localhost, 127.0.0.1"
```

## ✔️ Contributing to Gitingest
## 🤝 Contributing

### Non-technical ways to contribute

Expand All @@ -128,6 +132,6 @@ Gitingest aims to be friendly for first time contributors, with a simple python

Check out the NPM alternative 📦 Repomix: <https://github.com/yamadashy/repomix>

## Project Growth
## 🚀 Project Growth

[![Star History Chart](https://api.star-history.com/svg?repos=cyclotruc/gitingest&type=Date)](https://star-history.com/#cyclotruc/gitingest&Date)
11 changes: 5 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,21 @@ name = "gitingest"
version = "0.1.3"
description="CLI tool to analyze and create text dumps of codebases for LLMs"
readme = {file = "README.md", content-type = "text/markdown" }
requires-python = ">= 3.10"
requires-python = ">= 3.7"
dependencies = [
"click>=8.0.0",
"fastapi[standard]",
"python-dotenv",
"slowapi",
"starlette",
"tiktoken",
"uvicorn",
"typing_extensions; python_version < '3.10'",
]

license = {file = "LICENSE"}
authors = [{name = "Romain Courtois", email = "[email protected]"}]
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
install_requires=[
"click>=8.0.0",
"tiktoken",
"typing_extensions; python_version < '3.10'",
],
entry_points={
"console_scripts": [
"gitingest=gitingest.cli:main",
],
},
python_requires=">=3.6",
python_requires=">=3.7",
author="Romain Courtois",
author_email="[email protected]",
description="CLI tool to analyze and create text dumps of codebases for LLMs",
Expand Down
35 changes: 18 additions & 17 deletions src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# pylint: disable=no-value-for-parameter

import asyncio
from typing import Optional, Tuple

import click

Expand All @@ -19,31 +20,31 @@
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
def main(
source: str,
output: str | None,
output: Optional[str],
max_size: int,
exclude_pattern: tuple[str, ...],
include_pattern: tuple[str, ...],
branch: str | None,
exclude_pattern: Tuple[str, ...],
include_pattern: Tuple[str, ...],
branch: Optional[str],
):
"""
Main entry point for the CLI. This function is called when the CLI is run as a script.
Main entry point for the CLI. This function is called when the CLI is run as a script.

It calls the async main function to run the command.

Parameters
----------
source : str
The source directory or repository to analyze.
output : str | None
output : str, optional
The path where the output file will be written. If not specified, the output will be written
to a file named `<repo_name>.txt` in the current directory.
max_size : int
The maximum file size to process, in bytes. Files larger than this size will be ignored.
exclude_pattern : tuple[str, ...]
exclude_pattern : Tuple[str, ...]
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
include_pattern : tuple[str, ...]
include_pattern : Tuple[str, ...]
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str | None
branch : str, optional
The branch to clone (optional).
"""
# Main entry point for the CLI. This function is called when the CLI is run as a script.
Expand All @@ -52,11 +53,11 @@ def main(

async def _async_main(
source: str,
output: str | None,
output: Optional[str],
max_size: int,
exclude_pattern: tuple[str, ...],
include_pattern: tuple[str, ...],
branch: str | None,
exclude_pattern: Tuple[str, ...],
include_pattern: Tuple[str, ...],
branch: Optional[str],
) -> None:
"""
Analyze a directory or repository and create a text dump of its contents.
Expand All @@ -68,16 +69,16 @@ async def _async_main(
----------
source : str
The source directory or repository to analyze.
output : str | None
output : str, optional
The path where the output file will be written. If not specified, the output will be written
to a file named `<repo_name>.txt` in the current directory.
max_size : int
The maximum file size to process, in bytes. Files larger than this size will be ignored.
exclude_pattern : tuple[str, ...]
exclude_pattern : Tuple[str, ...]
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
include_pattern : tuple[str, ...]
include_pattern : Tuple[str, ...]
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str | None
branch : str, optional
The branch to clone (optional).

Raises
Expand Down
4 changes: 3 additions & 1 deletion src/gitingest/ignore_patterns.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
""" Default ignore patterns for Gitingest. """

DEFAULT_IGNORE_PATTERNS: set[str] = {
from typing import Set

DEFAULT_IGNORE_PATTERNS: Set[str] = {
# Python
"*.pyc",
"*.pyo",
Expand Down
41 changes: 21 additions & 20 deletions src/gitingest/notebook_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import warnings
from itertools import chain
from pathlib import Path
from typing import Any
from typing import Any, Dict, List, Optional

from gitingest.exceptions import InvalidNotebookError

Expand Down Expand Up @@ -32,12 +32,13 @@ def process_notebook(file: Path, include_output: bool = True) -> str:
"""
try:
with file.open(encoding="utf-8") as f:
notebook: dict[str, Any] = json.load(f)
notebook: Dict[str, Any] = json.load(f)
except json.JSONDecodeError as e:
raise InvalidNotebookError(f"Invalid JSON in notebook: {file}") from e

# Check if the notebook contains worksheets
if worksheets := notebook.get("worksheets"):
worksheets = notebook.get("worksheets")
if worksheets:
warnings.warn(
"Worksheets are deprecated as of IPEP-17. Consider updating the notebook. "
"(See: https://github.com/jupyter/nbformat and "
Expand All @@ -57,26 +58,27 @@ def process_notebook(file: Path, include_output: bool = True) -> str:
result = ["# Jupyter notebook converted to Python script."]

for cell in cells:
if cell_str := _process_cell(cell, include_output=include_output):
cell_str = _process_cell(cell, include_output=include_output)
if cell_str:
result.append(cell_str)

return "\n\n".join(result) + "\n"


def _process_cell(cell: dict[str, Any], include_output: bool) -> str | None:
def _process_cell(cell: Dict[str, Any], include_output: bool) -> Optional[str]:
"""
Process a Jupyter notebook cell and return the cell content as a string.

Parameters
----------
cell : dict[str, Any]
cell : Dict[str, Any]
The cell dictionary from a Jupyter notebook.
include_output : bool
Whether to include cell outputs in the generated script

Returns
-------
str | None
str, optional
The cell content as a string, or None if the cell is empty.

Raises
Expand All @@ -101,7 +103,8 @@ def _process_cell(cell: dict[str, Any], include_output: bool) -> str | None:
return f'"""\n{cell_str}\n"""'

# Add cell output as comments
if include_output and (outputs := cell.get("outputs")):
outputs = cell.get("outputs")
if include_output and outputs:

# Include cell outputs as comments
output_lines = []
Expand All @@ -118,18 +121,18 @@ def _process_cell(cell: dict[str, Any], include_output: bool) -> str | None:
return cell_str


def _extract_output(output: dict[str, Any]) -> list[str]:
def _extract_output(output: Dict[str, Any]) -> List[str]:
"""
Extract the output from a Jupyter notebook cell.

Parameters
----------
output : dict[str, Any]
output : Dict[str, Any]
The output dictionary from a Jupyter notebook cell.

Returns
-------
list[str]
List[str]
The output as a list of strings.

Raises
Expand All @@ -139,15 +142,13 @@ def _extract_output(output: dict[str, Any]) -> list[str]:
"""
output_type = output["output_type"]

match output_type:
case "stream":
return output["text"]
if output_type == "stream":
return output["text"]

case "execute_result" | "display_data":
return output["data"]["text/plain"]
if output_type in ("execute_result", "display_data"):
return output["data"]["text/plain"]

case "error":
return [f"Error: {output['ename']}: {output['evalue']}"]
if output_type == "error":
return [f"Error: {output['ename']}: {output['evalue']}"]

case _:
raise ValueError(f"Unknown output type: {output_type}")
raise ValueError(f"Unknown output type: {output_type}")
Loading
Loading