|
3 | 3 | import locale
|
4 | 4 | import os
|
5 | 5 | import platform
|
| 6 | +import warnings |
6 | 7 | from fnmatch import fnmatch
|
7 | 8 | from pathlib import Path
|
8 | 9 | from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
9 | 10 |
|
10 | 11 | import tiktoken
|
| 12 | +import tomli |
11 | 13 |
|
12 | 14 | from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
|
13 | 15 | from gitingest.exceptions import (
|
@@ -899,4 +901,70 @@ def run_ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
|
899 | 901 | if query.type and query.type == "blob":
|
900 | 902 | return _ingest_single_file(path, query)
|
901 | 903 |
|
| 904 | + apply_gitingest_file(path, query) |
902 | 905 | return _ingest_directory(path, query)
|
| 906 | + |
| 907 | + |
| 908 | +def apply_gitingest_file(path: Path, query: ParsedQuery) -> None: |
| 909 | + """ |
| 910 | + Apply the .gitingest file to the query object. |
| 911 | +
|
| 912 | + This function reads the .gitingest file in the specified path and updates the query object with the ignore |
| 913 | + patterns found in the file. |
| 914 | +
|
| 915 | + Parameters |
| 916 | + ---------- |
| 917 | + path : Path |
| 918 | + The path of the directory to ingest. |
| 919 | + query : ParsedQuery |
| 920 | + The parsed query object containing information about the repository and query parameters. |
| 921 | + It should have an attribute `ignore_patterns` which is either None or a set of strings. |
| 922 | + """ |
| 923 | + path_gitingest = path / ".gitingest" |
| 924 | + |
| 925 | + if not path_gitingest.is_file(): |
| 926 | + return |
| 927 | + |
| 928 | + try: |
| 929 | + with path_gitingest.open("rb") as f: |
| 930 | + data = tomli.load(f) |
| 931 | + except tomli.TOMLDecodeError as exc: |
| 932 | + warnings.warn(f"Invalid TOML in {path_gitingest}: {exc}", UserWarning) |
| 933 | + return |
| 934 | + |
| 935 | + config_section = data.get("config", {}) |
| 936 | + ignore_patterns = config_section.get("ignore_patterns") |
| 937 | + |
| 938 | + if not ignore_patterns: |
| 939 | + return |
| 940 | + |
| 941 | + # If a single string is provided, make it a list of one element |
| 942 | + if isinstance(ignore_patterns, str): |
| 943 | + ignore_patterns = [ignore_patterns] |
| 944 | + |
| 945 | + if not isinstance(ignore_patterns, (list, set)): |
| 946 | + warnings.warn( |
| 947 | + f"Expected a list/set for 'ignore_patterns', got {type(ignore_patterns)} in {path_gitingest}. Skipping.", |
| 948 | + UserWarning, |
| 949 | + ) |
| 950 | + return |
| 951 | + |
| 952 | + # Filter out duplicated patterns |
| 953 | + ignore_patterns = set(ignore_patterns) |
| 954 | + |
| 955 | + # Filter out any non-string entries |
| 956 | + valid_patterns = {pattern for pattern in ignore_patterns if isinstance(pattern, str)} |
| 957 | + invalid_patterns = ignore_patterns - valid_patterns |
| 958 | + |
| 959 | + if invalid_patterns: |
| 960 | + warnings.warn(f"Ignore patterns {invalid_patterns} are not strings. Skipping.", UserWarning) |
| 961 | + |
| 962 | + if not valid_patterns: |
| 963 | + return |
| 964 | + |
| 965 | + if query.ignore_patterns is None: |
| 966 | + query.ignore_patterns = valid_patterns |
| 967 | + else: |
| 968 | + query.ignore_patterns.update(valid_patterns) |
| 969 | + |
| 970 | + return |
0 commit comments