Skip to content

Commit f90595d

Browse files
AbhiRam162105cyclotrucfilipchristiansen
authored
feat(cli):Add support for .gitingest file processing in query ingestion (#191)
Co-authored-by: Romain Courtois <[email protected]> Co-authored-by: Filip Christiansen <[email protected]>
1 parent 811fe69 commit f90595d

File tree

3 files changed

+71
-0
lines changed

3 files changed

+71
-0
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ repos:
102102
slowapi,
103103
starlette,
104104
tiktoken,
105+
tomli,
105106
uvicorn,
106107
]
107108
- id: pylint
@@ -118,6 +119,7 @@ repos:
118119
python-dotenv,
119120
slowapi,
120121
starlette,
122+
tomli,
121123
tiktoken,
122124
uvicorn,
123125
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ python-dotenv
44
slowapi
55
starlette
66
tiktoken
7+
tomli
78
uvicorn

src/gitingest/query_ingestion.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import locale
44
import os
55
import platform
6+
import warnings
67
from fnmatch import fnmatch
78
from pathlib import Path
89
from typing import Any, Dict, List, Optional, Set, Tuple, Union
910

1011
import tiktoken
12+
import tomli
1113

1214
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
1315
from gitingest.exceptions import (
@@ -899,4 +901,70 @@ def run_ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
899901
if query.type and query.type == "blob":
900902
return _ingest_single_file(path, query)
901903

904+
apply_gitingest_file(path, query)
902905
return _ingest_directory(path, query)
906+
907+
908+
def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
909+
"""
910+
Apply the .gitingest file to the query object.
911+
912+
This function reads the .gitingest file in the specified path and updates the query object with the ignore
913+
patterns found in the file.
914+
915+
Parameters
916+
----------
917+
path : Path
918+
The path of the directory to ingest.
919+
query : ParsedQuery
920+
The parsed query object containing information about the repository and query parameters.
921+
It should have an attribute `ignore_patterns` which is either None or a set of strings.
922+
"""
923+
path_gitingest = path / ".gitingest"
924+
925+
if not path_gitingest.is_file():
926+
return
927+
928+
try:
929+
with path_gitingest.open("rb") as f:
930+
data = tomli.load(f)
931+
except tomli.TOMLDecodeError as exc:
932+
warnings.warn(f"Invalid TOML in {path_gitingest}: {exc}", UserWarning)
933+
return
934+
935+
config_section = data.get("config", {})
936+
ignore_patterns = config_section.get("ignore_patterns")
937+
938+
if not ignore_patterns:
939+
return
940+
941+
# If a single string is provided, make it a list of one element
942+
if isinstance(ignore_patterns, str):
943+
ignore_patterns = [ignore_patterns]
944+
945+
if not isinstance(ignore_patterns, (list, set)):
946+
warnings.warn(
947+
f"Expected a list/set for 'ignore_patterns', got {type(ignore_patterns)} in {path_gitingest}. Skipping.",
948+
UserWarning,
949+
)
950+
return
951+
952+
# Filter out duplicated patterns
953+
ignore_patterns = set(ignore_patterns)
954+
955+
# Filter out any non-string entries
956+
valid_patterns = {pattern for pattern in ignore_patterns if isinstance(pattern, str)}
957+
invalid_patterns = ignore_patterns - valid_patterns
958+
959+
if invalid_patterns:
960+
warnings.warn(f"Ignore patterns {invalid_patterns} are not strings. Skipping.", UserWarning)
961+
962+
if not valid_patterns:
963+
return
964+
965+
if query.ignore_patterns is None:
966+
query.ignore_patterns = valid_patterns
967+
else:
968+
query.ignore_patterns.update(valid_patterns)
969+
970+
return

0 commit comments

Comments
 (0)