From 411b6e40d04e942b30704b643e7b6fd8d7e10702 Mon Sep 17 00:00:00 2001
From: un-pogaz <46523284+un-pogaz@users.noreply.github.com>
Date: Mon, 24 Mar 2025 08:03:43 +0100
Subject: [PATCH 1/3] New option: per-file-ignores (#3673)

---
 README.rst                        |  26 +++
 codespell_lib/_codespell.py       | 253 ++++++++++++++++++++++--------
 codespell_lib/tests/test_basic.py |  67 +++++++-
 3 files changed, 283 insertions(+), 63 deletions(-)
diff --git a/README.rst b/README.rst
index 8e256dc923..f5cbfe7903 100644
--- a/README.rst
+++ b/README.rst
@@ -156,6 +156,26 @@ Words should be separated by a comma.
        def wrod(wrods) # codespell:ignore
            pass
 
+Per-file ignores
+----------------
+
+To give a finer control, is possible to specified a additional set of words to ignore into a specific file only.
+
+1. ``--per-file-ignores``: A pair of arguments into the command line. The first provide a file, or a glob, and the second a comma-separated list of word to ignore for the given file:
+
+   .. code-block:: sh
+
+       codespell --per-file-ignores "*.ext" word1,word2,word3
+
+2. A comment anywhere in the file, preferably at the top. Words should be separated by a comma:
+
+   .. code-block:: python
+
+        # codespell:file-ignore wrod
+
+       def wrod(wrods)
+           pass
+
 Using a config file
 -------------------
 
@@ -173,6 +193,9 @@ be specified in this file (without the preceding dashes), for example:
     skip = *.po,*.ts,./src/3rdParty,./src/Test
     count =
     quiet-level = 3
+    [codespell.per-file-ignores]
+    *.ext1 = word1,word2,word3
+    *.ext2 = word4
 
 Python's
 `configparser <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure>`_
@@ -191,6 +214,9 @@ previous config file:
     skip = '*.po,*.ts,./src/3rdParty,./src/Test'
     count = true
     quiet-level = 3
+    [tool.codespell.per-file-ignores]
+    "*.ext1" = word1,word2,word3
+    "*.ext2" = word4
 
 The above INI and TOML files are equivalent to running:
 
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index dee6a63ee8..f30e754b52 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -61,6 +61,9 @@
     r"(\b(?:https?|[ts]?ftp|file|git|smb)://[^\s]+(?=$|\s)|\b[\w.%+-]+@[\w.-]+\b)"
 )
 inline_ignore_regex = re.compile(r"[^\w\s]\s*codespell:ignore\b(\s+(?P<words>[\w,]*))?")
+inside_file_ignore_regex = re.compile(
+    r"[^\w\s]\s*codespell:file-ignore\b(\s+(?P<words>[\w,]*))"
+)
 USAGE = """
 \t%prog [OPTIONS] [file1 file2 ... fileN]
 """
@@ -333,7 +336,13 @@ def _split_lines(self, text: str, width: int) -> List[str]:
 def _toml_to_parseconfig(toml_dict: Dict[str, Any]) -> Dict[str, Any]:
     """Convert a dict read from a TOML file to the parseconfig.read_dict() format."""
     return {
-        k: "" if v is True else ",".join(v) if isinstance(v, list) else v
+        k: ""
+        if v is True
+        else ",".join(v)
+        if isinstance(v, list)
+        else _toml_to_parseconfig(v)
+        if isinstance(v, dict)
+        else v
         for k, v in toml_dict.items()
         if v is not False
     }
@@ -476,6 +485,18 @@ def parse_options(
         'the dictionary file. If set to "*", all '
         "misspelling in URIs and emails will be ignored.",
     )
+    parser.add_argument(
+        "--per-file-ignores",
+        action="append",
+        nargs=2,
+        help="Require a pair of arguments. The first argument "
+        "is a file to apply the second argument, a "
+        "comma-separated list of words to be ignored for"
+        "this file only. The first argument accepts globs "
+        "as well. The words in the second argument are case "
+        "sensitive based on how they are written in the "
+        "dictionary file.",
+    )
     parser.add_argument(
         "-r",
         "--regex",
@@ -660,7 +681,11 @@ def parse_options(
                 with open(toml_file, "rb") as f:
                     data = tomllib.load(f).get("tool", {})
                 if "codespell" in data:
-                    data["codespell"] = _toml_to_parseconfig(data["codespell"])
+                    data_toml = _toml_to_parseconfig(data["codespell"])
+                    for k in list(data_toml.keys()):
+                        if isinstance(data_toml[k], dict):
+                            data[f"codespell.{k}"] = data_toml.pop(k)
+                    data["codespell"] = data_toml
                 config.read_dict(data)
 
     # Collect which config files are going to be used
@@ -673,9 +698,9 @@ def parse_options(
 
     # Use config files
     config.read(used_cfg_files)
+    # Build a "fake" argv list using option name and value.
+    cfg_args = []
     if config.has_section("codespell"):
-        # Build a "fake" argv list using option name and value.
-        cfg_args = []
         for key in config["codespell"]:
             # Add option as arg.
             cfg_args.append(f"--{key}")
@@ -684,6 +709,20 @@ def parse_options(
             if val:
                 cfg_args.append(val)
 
+    # Iter dict arguments
+    for key in ["per-file-ignores"]:
+        section = f"codespell.{key}"
+        if config.has_section(section):
+            for name in config[section]:
+                # If value is blank, skip.
+                val = config[section][name]
+                if val:
+                    # Add option as pair args.
+                    cfg_args.append(f"--{key}")
+                    cfg_args.append(name)
+                    cfg_args.append(val)
+
+    if cfg_args:
         # Parse config file options.
         options = parser.parse_args(cfg_args)
 
@@ -722,6 +761,50 @@ def parse_ignore_words_option(
     return (ignore_words, ignore_words_cased)
 
 
+def parse_per_file_ignores_option(
+    per_file_ignores_option: List[Tuple[str, str]],
+) -> Dict[GlobMatch, Set[str]]:
+    per_file_ignores_cased: Dict[GlobMatch, Set[str]] = {}
+    if per_file_ignores_option:
+        for file, comma_separated_words in per_file_ignores_option:
+            per_file_ignores_cased[GlobMatch([file])] = {
+                word.strip() for word in comma_separated_words.split(",")
+            }
+    return per_file_ignores_cased
+
+
+def parse_dictionary_option(
+    parser: argparse.ArgumentParser,
+    dictionary_option: List[str],
+    builtin_option: str,
+) -> Tuple[int, List[str]]:
+    use_dictionaries = []
+    for dictionary in flatten_clean_comma_separated_arguments(dictionary_option):
+        if dictionary == "-":
+            # figure out which builtin dictionaries to use
+            use = sorted(set(builtin_option.split(",")))
+            for u in use:
+                for builtin in _builtin_dictionaries:
+                    if builtin[0] == u:
+                        use_dictionaries.append(
+                            os.path.join(_data_root, f"dictionary{builtin[2]}.txt")
+                        )
+                        break
+                else:
+                    return _usage_error(
+                        parser,
+                        f"ERROR: Unknown builtin dictionary: {u}",
+                    ), []
+        else:
+            if not os.path.isfile(dictionary):
+                return _usage_error(
+                    parser,
+                    f"ERROR: cannot find dictionary file: {dictionary}",
+                ), []
+            use_dictionaries.append(dictionary)
+    return 0, use_dictionaries
+
+
 def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
     with open(filename, encoding="utf-8") as f:
         exclude_lines.update(line.rstrip() for line in f)
@@ -736,6 +819,19 @@ def build_ignore_words(
         )
 
 
+def build_ignore_words_for_file(
+    ignore_words_cased: Set[str],
+    per_file_ignores: Dict[GlobMatch, Set[str]],
+    file_name: str,
+    file_path: str,
+) -> Set[str]:
+    ignore_words_cased_for_file = set(ignore_words_cased)
+    for m, v in per_file_ignores.items():
+        if m.match(file_name) or m.match(file_path):
+            ignore_words_cased_for_file.update(v)
+    return ignore_words_cased_for_file
+
+
 def is_hidden(filename: str, check_hidden: bool) -> bool:
     bfilename = os.path.basename(filename)
 
@@ -894,35 +990,16 @@ def parse_file(
         lines = f.readlines()
     else:
         if options.check_filenames:
-            for word in extract_words(filename, word_regex, ignore_word_regex):
-                if word in ignore_words_cased:
-                    continue
-                lword = word.lower()
-                if lword not in misspellings:
-                    continue
-                fix = misspellings[lword].fix
-                fixword = fix_case(word, misspellings[lword].data)
-
-                if summary and fix:
-                    summary.update(lword)
-
-                cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
-                cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
-                crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
-
-                reason = misspellings[lword].reason
-                if reason:
-                    if options.quiet_level & QuietLevels.DISABLED_FIXES:
-                        continue
-                    creason = f"  | {colors.FILE}{reason}{colors.DISABLE}"
-                else:
-                    if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
-                        continue
-                    creason = ""
-
-                bad_count += 1
-
-                print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")
+            bad_count += parse_filename(
+                filename,
+                colors,
+                summary,
+                misspellings,
+                ignore_words_cased,
+                word_regex,
+                ignore_word_regex,
+                options,
+            )
 
         # ignore irregular files
         if not os.path.isfile(filename):
@@ -945,6 +1022,14 @@ def parse_file(
         except OSError:
             return bad_count
 
+    inside_file_to_ignore: Set[str] = set()
+    for line in lines:
+        match = inside_file_ignore_regex.search(line)
+        if match:
+            inside_file_to_ignore.update(
+                filter(None, (match.group("words") or "").split(","))
+            )
+
     for i, line in enumerate(lines):
         if line.rstrip() in exclude_lines:
             continue
@@ -982,7 +1067,11 @@ def parse_file(
             if word in ignore_words_cased:
                 continue
             lword = word.lower()
-            if lword in misspellings and lword not in extra_words_to_ignore:
+            if (
+                lword in misspellings
+                and lword not in extra_words_to_ignore
+                and lword not in inside_file_to_ignore
+            ):
                 # Sometimes we find a 'misspelling' which is actually a valid word
                 # preceded by a string escape sequence.  Ignore such cases as
                 # they're usually false alarms; see issue #17 among others.
@@ -1082,6 +1171,44 @@ def parse_file(
     return bad_count
 
 
+def parse_filename(
+    filename: str,
+    colors: TermColors,
+    summary: Optional[Summary],
+    misspellings: Dict[str, Misspelling],
+    ignore_words_cased: Set[str],
+    word_regex: Pattern[str],
+    ignore_word_regex: Optional[Pattern[str]],
+    options: argparse.Namespace,
+) -> int:
+    bad_count = 0
+    for word in extract_words(filename, word_regex, ignore_word_regex):
+        if word in ignore_words_cased:
+            continue
+        lword = word.lower()
+        if lword not in misspellings:
+            continue
+        fix = misspellings[lword].fix
+        fixword = fix_case(word, misspellings[lword].data)
+        if summary and fix:
+            summary.update(lword)
+        cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
+        cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
+        crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
+        reason = misspellings[lword].reason
+        if reason:
+            if options.quiet_level & QuietLevels.DISABLED_FIXES:
+                continue
+            creason = f"  | {colors.FILE}{reason}{colors.DISABLE}"
+        else:
+            if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
+                continue
+            creason = ""
+        bad_count += 1
+        print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")
+    return bad_count
+
+
 def flatten_clean_comma_separated_arguments(
     arguments: Iterable[str],
 ) -> List[str]:
@@ -1187,6 +1314,17 @@ def main(*args: str) -> int:
                 )
             build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)
 
+    per_file_ignores = parse_per_file_ignores_option(options.per_file_ignores)
+    try:
+        for match in per_file_ignores:
+            match.match("/random/path")  # does not need a real path
+    except re.error:
+        return _usage_error(
+            parser,
+            "ERROR: --per-file-ignores has been fed an invalid glob, "
+            "try escaping special characters",
+        )
+
     uri_regex = options.uri_regex or uri_regex_def
     try:
         uri_regex = re.compile(uri_regex)
@@ -1200,32 +1338,13 @@ def main(*args: str) -> int:
         itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
     )
 
-    dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"])
-
-    use_dictionaries = []
-    for dictionary in dictionaries:
-        if dictionary == "-":
-            # figure out which builtin dictionaries to use
-            use = sorted(set(options.builtin.split(",")))
-            for u in use:
-                for builtin in _builtin_dictionaries:
-                    if builtin[0] == u:
-                        use_dictionaries.append(
-                            os.path.join(_data_root, f"dictionary{builtin[2]}.txt")
-                        )
-                        break
-                else:
-                    return _usage_error(
-                        parser,
-                        f"ERROR: Unknown builtin dictionary: {u}",
-                    )
-        else:
-            if not os.path.isfile(dictionary):
-                return _usage_error(
-                    parser,
-                    f"ERROR: cannot find dictionary file: {dictionary}",
-                )
-            use_dictionaries.append(dictionary)
+    error, use_dictionaries = parse_dictionary_option(
+        parser,
+        options.dictionary or ["-"],
+        options.builtin,
+    )
+    if error != 0:
+        return error
     misspellings: Dict[str, Misspelling] = {}
     for dictionary in use_dictionaries:
         build_dict(dictionary, misspellings, ignore_words)
@@ -1305,7 +1424,12 @@ def main(*args: str) -> int:
                         colors,
                         summary,
                         misspellings,
-                        ignore_words_cased,
+                        build_ignore_words_for_file(
+                            ignore_words_cased,
+                            per_file_ignores,
+                            file_,
+                            fname,
+                        ),
                         exclude_lines,
                         file_opener,
                         word_regex,
@@ -1330,7 +1454,12 @@ def main(*args: str) -> int:
                 colors,
                 summary,
                 misspellings,
-                ignore_words_cased,
+                build_ignore_words_for_file(
+                    ignore_words_cased,
+                    per_file_ignores,
+                    os.path.basename(filename),
+                    filename,
+                ),
                 exclude_lines,
                 file_opener,
                 word_regex,
diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
index a6c05fc089..929964c7cc 100644
--- a/codespell_lib/tests/test_basic.py
+++ b/codespell_lib/tests/test_basic.py
@@ -396,6 +396,42 @@ def test_ignore_words_with_cases(
     assert cs.main("-Lmis", "-f", bad_name) == 0
 
 
+def test_per_file_ignores(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Test --per-file-ignores options."""
+    text = "abandonned abondon abilty"
+    bad_file = tmp_path / "bad.txt"
+    bad_file.write_text(text)
+    name = "ignore.txt"
+    fname = tmp_path / name
+    fname.write_text(text)
+    assert cs.main(tmp_path) == 6
+
+    assert cs.main(fname, bad_file, "--per-file-ignores", name, "abondon") == 5
+    assert cs.main(fname, bad_file, "--per-file-ignores", name, "abondon,abilty") == 4
+    # case sensitive
+    assert cs.main(fname, bad_file, "--per-file-ignores", name, "Abilty") == 6
+    assert cs.main(fname, bad_file, "--per-file-ignores", "name.txt", "abilty") == 6
+    # several pair arguments
+    assert (
+        cs.main(
+            fname,
+            bad_file,
+            # pair arguments 1
+            "--per-file-ignores",
+            name,
+            "abondon",
+            # pair arguments 2
+            "--per-file-ignores",
+            name,
+            "abilty",
+        )
+        == 4
+    )
+
+
 def test_ignore_word_list(
     tmp_path: Path,
     capsys: pytest.CaptureFixture[str],
@@ -453,6 +489,23 @@ def test_ignore_word_list(
             "You could also use line based igore (codespell:ignore igare) to igore ",
             2,
         ),
+        # file-ignore
+        ("abandonned abondon abilty \n # codespell:file-ignore abondon", 2),
+        ("abandonned abondon abilty \n // codespell:file-ignore abondon,abilty", 1),
+        (
+            "abandonned abondon abilty \n /* codespell:file-ignore abandonned,abondon,abilty",  # noqa: E501
+            0,
+        ),
+        # ignore unused ignore
+        ("abandonned abondon abilty \n # codespell:file-ignore nomenklatur", 3),
+        # ignore these as they aren't valid
+        ("abandonned abondon abilty \n # codespell:file-ignore", 3),
+        ("abandonned abondon abilty \n # codespell:file-igore word", 3),
+        # several in the same file
+        (
+            "// codespell:file-ignore abondon \n abandonned abondon abilty \n // codespell:file-ignore abilty",  # noqa: E501
+            1,
+        ),
     ],
 )
 def test_inline_ignores(
@@ -1286,15 +1339,17 @@ def test_config_toml(
     (d / "bad.txt").write_text("abandonned donn\n")
     (d / "good.txt").write_text("good")
     (d / "abandonned.txt").write_text("")
+    (d / "per-file.txt").write_text("donn")
 
     # Should fail when checking all files.
     result = cs.main(d, "--check-filenames", count=True, std=True)
     assert isinstance(result, tuple)
     code, stdout, _ = result
     # Code in this case is not exit code, but count of misspellings.
-    assert code == 3
+    assert code == 4
     assert "bad.txt" in stdout
     assert "abandonned.txt" in stdout
+    assert "per-file.txt" in stdout
 
     if kind.startswith("cfg"):
         conffile = tmp_path / "setup.cfg"
@@ -1304,6 +1359,8 @@ def test_config_toml(
 [codespell]
 skip = bad.txt, whatever.txt
 count =
+[codespell.per-file-ignores]
+per-file.txt = donn
 """
         else:
             assert kind == "cfg_multiline"
@@ -1314,6 +1371,8 @@ def test_config_toml(
    ,
 
 count =
+[codespell.per-file-ignores]
+per-file.txt = donn
 """
         conffile.write_text(text)
     else:
@@ -1327,6 +1386,8 @@ def test_config_toml(
 skip = 'bad.txt,whatever.txt'
 check-filenames = false
 count = true
+[tool.codespell.per-file-ignores]
+"per-file.txt" = 'donn'
 """
         else:
             assert kind == "toml_list"
@@ -1335,6 +1396,8 @@ def test_config_toml(
 skip = ['bad.txt', 'whatever.txt']
 check-filenames = false
 count = true
+[tool.codespell.per-file-ignores]
+"per-file.txt" = ['donn']
 """
         tomlfile.write_text(text)
 
@@ -1345,6 +1408,7 @@ def test_config_toml(
     assert code == 0
     assert "bad.txt" not in stdout
     assert "abandonned.txt" not in stdout
+    assert "per-file.txt" not in stdout
 
     # And both should automatically work if they're in cwd
     cwd = Path.cwd()
@@ -1358,6 +1422,7 @@ def test_config_toml(
     assert code == 0
     assert "bad.txt" not in stdout
     assert "abandonned.txt" not in stdout
+    assert "per-file.txt" not in stdout
 
 
 @contextlib.contextmanager

From 406b277dd7b6575144042ee986e1e387d5812171 Mon Sep 17 00:00:00 2001
From: un-pogaz <46523284+un-pogaz@users.noreply.github.com>
Date: Tue, 25 Mar 2025 07:47:20 +0100
Subject: [PATCH 2/3] test_bad_glob_per_file_ignores()

---
 codespell_lib/tests/test_basic.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
index 929964c7cc..8e95ff7c58 100644
--- a/codespell_lib/tests/test_basic.py
+++ b/codespell_lib/tests/test_basic.py
@@ -207,6 +207,29 @@ def test_bad_glob(
     assert cs.main("--skip", "[[]b-a[]].txt", g) == 0
 
 
+def test_bad_glob_per_file_ignores(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    # disregard invalid globs, properly handle escaped globs
+    g = tmp_path / "glob"
+    g.mkdir()
+    fname = g / "[b-a].txt"
+    fname.write_text("abandonned\n")
+    assert cs.main(g) == 1
+    # bad glob is invalid
+    result = cs.main(g, "--per-file-ignores", "[b-a].txt", "abandonned", std=True)
+    assert isinstance(result, tuple)
+    code, _, stderr = result
+    if sys.hexversion < 0x030A05F0:  # Python < 3.10.5 raises re.error
+        assert code == EX_USAGE, "invalid glob"
+        assert "invalid glob" in stderr
+    else:  # Python >= 3.10.5 does not match
+        assert code == 1
+    # properly escaped glob is valid, and matches glob-like file name
+    assert cs.main(g, "--per-file-ignores", "[[]b-a[]].txt", "abandonned") == 0
+
+
 @pytest.mark.skipif(sys.platform != "linux", reason="Only supported on Linux")
 def test_permission_error(
     tmp_path: Path,

From a064dcb286853da62f1c5bb5601d1973f898c92e Mon Sep 17 00:00:00 2001
From: un_pogaz <46523284+un-pogaz@users.noreply.github.com>
Date: Sun, 3 Aug 2025 12:49:25 +0200
Subject: [PATCH 3/3] code review: apply TheGiraffe3 suggestions

Co-authored-by: Loymdayddaud <145969603+TheGiraffe3@users.noreply.github.com>
---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index f5cbfe7903..f8e2aa0b98 100644
--- a/README.rst
+++ b/README.rst
@@ -159,9 +159,9 @@ Words should be separated by a comma.
 Per-file ignores
 ----------------
 
-To give a finer control, is possible to specified a additional set of words to ignore into a specific file only.
+To give a finer control, it is possible to specify an additional set of words to ignore in a specific file.
 
-1. ``--per-file-ignores``: A pair of arguments into the command line. The first provide a file, or a glob, and the second a comma-separated list of word to ignore for the given file:
+1. ``--per-file-ignores``: A pair of arguments in the command line. The first provides a file, or a glob, and the second a comma-separated list of words to ignore for the given file:
 
    .. code-block:: sh