From 54cf4bf3c01d2335dfde8baba162e48a5c1ffa70 Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Mon, 8 Sep 2025 12:49:51 +0200 Subject: [PATCH 1/4] Increase test coverage for difflib --- Lib/test/test_difflib.py | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 0eab3f523dc5fe..3efaefe5b2861f 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -29,6 +29,27 @@ def test_one_delete(self): ('delete', 40, 41, 40, 40), ('equal', 41, 81, 40, 80)]) + + def test_opcode_caching(self): + sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) + self.assertEqual(list(sm.get_opcodes()), + [ ('insert', 0, 0, 0, 1), + ('equal', 0, 100, 1, 101)]) + + sm.a = 'a' * 40 + 'c' + 'b' * 40 + sm.b = 'a' * 40 + 'b' * 40 + self.assertEqual(list(sm.get_opcodes()), + [ ('insert', 0, 0, 0, 1), + ('equal', 0, 100, 1, 101)]) + + # To avoid caching in set_seqs. + sm.set_seqs("".join(list(sm.a)), "".join(list(sm.b))) + self.assertEqual(list(sm.get_opcodes()), + [ ('equal', 0, 40, 0, 40), + ('delete', 40, 41, 40, 40), + ('equal', 41, 81, 40, 80)]) + + def test_bjunk(self): sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40) @@ -293,6 +314,15 @@ def test_close_matches_aligned(self): '+ kitten\n', '+ puppy\n']) + def test_one_insert(self): + m = difflib.Differ().compare('b' * 2, 'a' + 'b' * 2) + self.assertEqual(list(m), ['+ a', ' b', ' b']) + + def test_one_delete(self): + m = difflib.Differ().compare('a' + 'b' * 2, 'b' * 2) + self.assertEqual(list(m), ['- a', ' b', ' b']) + + class TestOutputFormat(unittest.TestCase): def test_tab_delimiter(self): args = [['one'], ['two'], 'Original', 'Current', @@ -601,6 +631,22 @@ def test_longest_match_with_popular_chars(self): self.assertFalse(self.longer_match_exists(a, b, match.size)) +class TestCloseMatches(unittest.TestCase): + def test_invalid_inputs(self): + self.assertRaises(ValueError, difflib.get_close_matches, "spam", ['egg'], n=0) + self.assertRaises(ValueError, difflib.get_close_matches, "spam", ['egg'], n=-1) + self.assertRaises(ValueError, difflib.get_close_matches, "spam", ['egg'], cutoff=1.1) + self.assertRaises(ValueError, difflib.get_close_matches, "spam", ['egg'], cutoff=-0.1) + + +class TestRestore(unittest.TestCase): + def test_invalid_input(self): + with self.assertRaises(ValueError): + ''.join(difflib.restore([], 0)) + with self.assertRaises(ValueError): + ''.join(difflib.restore([], 3)) + + def setUpModule(): difflib.HtmlDiff._default_prefix = 0 From 8e97d08ee06c8f0a2bf6364ca3213aad1b6986ab Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Wed, 10 Sep 2025 15:19:24 +0200 Subject: [PATCH 2/4] Add comment about happy paths in doctests --- Lib/test/test_difflib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 3efaefe5b2861f..a869cb419f1f92 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -632,6 +632,8 @@ def test_longest_match_with_popular_chars(self): class TestCloseMatches(unittest.TestCase): + # Happy paths are tested in the doctests of `difflib.get_close_matches`. + def test_invalid_inputs(self): self.assertRaises(ValueError, difflib.get_close_matches, "spam", ['egg'], n=0) self.assertRaises(ValueError, difflib.get_close_matches, "spam", ['egg'], n=-1) @@ -640,6 +642,8 @@ def test_invalid_inputs(self): class TestRestore(unittest.TestCase): + # Happy paths are tested in the doctests of `difflib.restore`. + def test_invalid_input(self): with self.assertRaises(ValueError): ''.join(difflib.restore([], 0)) From 5ec7140524b635841854abe46a9d9af282f65f92 Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Wed, 10 Sep 2025 15:29:48 +0200 Subject: [PATCH 3/4] Improve caching test --- Lib/test/test_difflib.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index a869cb419f1f92..b4fffb2e29bd61 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -29,26 +29,13 @@ def test_one_delete(self): ('delete', 40, 41, 40, 40), ('equal', 41, 81, 40, 80)]) - def test_opcode_caching(self): sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) - self.assertEqual(list(sm.get_opcodes()), - [ ('insert', 0, 0, 0, 1), - ('equal', 0, 100, 1, 101)]) - - sm.a = 'a' * 40 + 'c' + 'b' * 40 - sm.b = 'a' * 40 + 'b' * 40 - self.assertEqual(list(sm.get_opcodes()), + opcode = sm.get_opcodes() + self.assertEqual(opcode, [ ('insert', 0, 0, 0, 1), ('equal', 0, 100, 1, 101)]) - - # To avoid caching in set_seqs. - sm.set_seqs("".join(list(sm.a)), "".join(list(sm.b))) - self.assertEqual(list(sm.get_opcodes()), - [ ('equal', 0, 40, 0, 40), - ('delete', 40, 41, 40, 40), - ('equal', 41, 81, 40, 80)]) - + self.assertIs(opcode, sm.get_opcodes()) def test_bjunk(self): sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', From 96637292fd307ca83b2a53549e2ddcdf2f7c2774 Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke <47750513+JanEricNitschke@users.noreply.github.com> Date: Thu, 11 Sep 2025 12:36:50 +0200 Subject: [PATCH 4/4] Add comment that caching is an implementation detail Co-authored-by: Petr Viktorin --- Lib/test/test_difflib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index b4fffb2e29bd61..771fd46e042a41 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -35,6 +35,8 @@ def test_opcode_caching(self): self.assertEqual(opcode, [ ('insert', 0, 0, 0, 1), ('equal', 0, 100, 1, 101)]) + # Implementation detail: opcodes are cached; + # `get_opcodes()` returns the same object self.assertIs(opcode, sm.get_opcodes()) def test_bjunk(self):