From 29503c607f68692f3d1b091d245951411b6e93a7 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Thu, 12 Oct 2017 14:31:07 +0100 Subject: [PATCH 01/12] Fix parsing when diff contains carriage returns Loose carriage returns appear as new lines inside Visual Studio. Account for this when calculating `DiffLine.NewLineNumber`. The `DiffLine.NewLineNumber` hasn't been updated yet. --- src/GitHub.Exports/Models/DiffUtilities.cs | 34 +++++++++++++++++++ .../Models/DiffUtilitiesTests.cs | 30 ++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index f604c294ae..9f361bc52a 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using System.Diagnostics.CodeAnalysis; +using System.Linq; namespace GitHub.Models { @@ -12,6 +13,9 @@ public static class DiffUtilities public static IEnumerable ParseFragment(string diff) { + diff = NormalizeLineEndings(diff); + diff = EscapeCarriageReturns(diff); + using (var reader = new StringReader(diff)) { string line; @@ -22,6 +26,8 @@ public static IEnumerable ParseFragment(string diff) while ((line = reader.ReadLine()) != null) { + line = UnescapeCarriageReturns(line); + var headerMatch = ChunkHeaderRegex.Match(line); if (headerMatch.Success) @@ -61,12 +67,14 @@ public static IEnumerable ParseFragment(string diff) case DiffChangeType.None: ++oldLine; ++newLine; + newLine += CountCarriageReturns(line); break; case DiffChangeType.Delete: ++oldLine; break; case DiffChangeType.Add: ++newLine; + newLine += CountCarriageReturns(line); break; } } @@ -113,6 +121,32 @@ public static DiffLine Match(IEnumerable diff, IList target return null; } + static string NormalizeLineEndings(string text) + { + return text.Replace("\r\n", "\n"); + } + + static string EscapeCarriageReturns(string text) + { + return text.Replace("\r", "\\r"); + } + + static string UnescapeCarriageReturns(string text) + { + return text.Replace("\\r", "\r"); + } + + static int CountCarriageReturns(string text) + { + int count = 0; + foreach (var ch in text) + { + if (ch == '\r') count++; + } + + return count; + } + [SuppressMessage("Microsoft.Globalization", "CA1305:SpecifyIFormatProvider", MessageId = "System.String.Format(System.String,System.Object)")] static DiffChangeType GetLineChange(char c) { diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index 8d379aae73..ce9637c3c8 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -96,6 +96,36 @@ public void NoNewLineNotAtEndOfChunk_CheckDiffLineNumber() Assert.Equal(3, line.DiffLineNumber); } + [Theory] + [InlineData("+foo\n+bar\n", "+foo", "+bar")] + [InlineData("+fo\ro\n+bar\n", "+fo\ro", "+bar")] + [InlineData("+foo\r\r\n+bar\n", "+foo\r", "+bar")] + public void FirstChunk_CheckLineContent(string diffLines, string contentLine0, string contentLine1) + { + var header = "@@ -1 +1 @@"; + var diff = header + "\n" + diffLines; + + var chunk = DiffUtilities.ParseFragment(diff).First(); + + Assert.Equal(contentLine0, chunk.Lines[0].Content); + Assert.Equal(contentLine1, chunk.Lines[1].Content); + } + + [Theory] + [InlineData("+foo\n+bar\n", 1, 2)] + [InlineData("+fo\ro\n+bar\n", 1, 3)] + [InlineData("+foo\r\r\n+bar\n", 1, 3)] + public void FirstChunk_CheckNewLineNumber(string diffLines, int lineNumber0, int lineNumber1) + { + var header = "@@ -1 +1 @@"; + var diff = header + "\n" + diffLines; + + var chunk = DiffUtilities.ParseFragment(diff).First(); + + Assert.Equal(lineNumber0, chunk.Lines[0].NewLineNumber); + Assert.Equal(lineNumber1, chunk.Lines[1].NewLineNumber); + } + [Fact] public void FirstChunk_CheckDiffLineZeroBased() { From 53780dd7a993e17ff1f7cb9799cfbb738adf7ff4 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Thu, 12 Oct 2017 14:53:38 +0100 Subject: [PATCH 02/12] Account for carriage returns when calculating `OldLineNumber` --- src/GitHub.Exports/Models/DiffUtilities.cs | 11 +++++------ .../Models/DiffUtilitiesTests.cs | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index 9f361bc52a..e0f47ef48a 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -62,19 +62,18 @@ public static IEnumerable ParseFragment(string diff) Content = line, }); + var lineCount = 1 + CountCarriageReturns(line); switch (type) { case DiffChangeType.None: - ++oldLine; - ++newLine; - newLine += CountCarriageReturns(line); + oldLine += lineCount; + newLine += lineCount; break; case DiffChangeType.Delete: - ++oldLine; + oldLine += lineCount; break; case DiffChangeType.Add: - ++newLine; - newLine += CountCarriageReturns(line); + newLine += lineCount; break; } } diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index ce9637c3c8..e52d759264 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -126,6 +126,21 @@ public void FirstChunk_CheckNewLineNumber(string diffLines, int lineNumber0, int Assert.Equal(lineNumber1, chunk.Lines[1].NewLineNumber); } + [Theory] + [InlineData("-foo\n-bar\n", 1, 2)] + [InlineData("-fo\ro\n-bar\n", 1, 3)] + [InlineData("-foo\r\r\n-bar\n", 1, 3)] + public void FirstChunk_CheckOldLineNumber(string diffLines, int lineNumber0, int lineNumber1) + { + var header = "@@ -1 +1 @@"; + var diff = header + "\n" + diffLines; + + var chunk = DiffUtilities.ParseFragment(diff).First(); + + Assert.Equal(lineNumber0, chunk.Lines[0].OldLineNumber); + Assert.Equal(lineNumber1, chunk.Lines[1].OldLineNumber); + } + [Fact] public void FirstChunk_CheckDiffLineZeroBased() { From ca54747bee0f40b691dff8394ec4ff7cfd407755 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Thu, 12 Oct 2017 15:41:19 +0100 Subject: [PATCH 03/12] Optimize for common case with no loose carriage returns --- src/GitHub.Exports/Models/DiffUtilities.cs | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index e0f47ef48a..6514e84296 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -14,7 +14,13 @@ public static class DiffUtilities public static IEnumerable ParseFragment(string diff) { diff = NormalizeLineEndings(diff); - diff = EscapeCarriageReturns(diff); + + // Optimize for common case where there are no loose carriage returns. + var hasCarriageReturn = HasCarriageReturn(diff); + if (hasCarriageReturn) + { + diff = EscapeCarriageReturns(diff); + } using (var reader = new StringReader(diff)) { @@ -26,7 +32,10 @@ public static IEnumerable ParseFragment(string diff) while ((line = reader.ReadLine()) != null) { - line = UnescapeCarriageReturns(line); + if (hasCarriageReturn) + { + line = UnescapeCarriageReturns(line); + } var headerMatch = ChunkHeaderRegex.Match(line); @@ -62,7 +71,12 @@ public static IEnumerable ParseFragment(string diff) Content = line, }); - var lineCount = 1 + CountCarriageReturns(line); + var lineCount = 1; + if (hasCarriageReturn) + { + lineCount += CountCarriageReturns(line); + } + switch (type) { case DiffChangeType.None: @@ -125,6 +139,11 @@ static string NormalizeLineEndings(string text) return text.Replace("\r\n", "\n"); } + static bool HasCarriageReturn(string text) + { + return text.IndexOf('\r') != -1; + } + static string EscapeCarriageReturns(string text) { return text.Replace("\r", "\\r"); From e4c94ad4c53661c3062ccbb6fbd061fc441575e8 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Thu, 12 Oct 2017 15:50:20 +0100 Subject: [PATCH 04/12] Add some comments in parsing code --- src/GitHub.Exports/Models/DiffUtilities.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index 6514e84296..e6338944cb 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -3,7 +3,6 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using System.Diagnostics.CodeAnalysis; -using System.Linq; namespace GitHub.Models { @@ -13,6 +12,7 @@ public static class DiffUtilities public static IEnumerable ParseFragment(string diff) { + // Turn Windows line endings into Unix line endings. diff = NormalizeLineEndings(diff); // Optimize for common case where there are no loose carriage returns. @@ -30,6 +30,7 @@ public static IEnumerable ParseFragment(string diff) int oldLine = -1; int newLine = -1; + // Diff lines should only be separated using a '\n' (lines can contain a '\r'). while ((line = reader.ReadLine()) != null) { if (hasCarriageReturn) From 88ff0e647c0e061c5742f4ac3c87648d279075d4 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Fri, 13 Oct 2017 12:09:53 +0100 Subject: [PATCH 05/12] Only touch diff if it contains loose carriage return Loose carriage returns are the exception, so we don't always want to cater for them! --- src/GitHub.Exports/Models/DiffUtilities.cs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index e6338944cb..8d1513b650 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -9,16 +9,17 @@ namespace GitHub.Models public static class DiffUtilities { static readonly Regex ChunkHeaderRegex = new Regex(@"^@@\s+\-(\d+),?\d*\s+\+(\d+),?\d*\s@@"); + static readonly Regex ContainsLooseCarriageReturnRegex = new Regex(@"[\r]([^\n]|\z)", RegexOptions.Compiled); public static IEnumerable ParseFragment(string diff) { - // Turn Windows line endings into Unix line endings. - diff = NormalizeLineEndings(diff); - // Optimize for common case where there are no loose carriage returns. - var hasCarriageReturn = HasCarriageReturn(diff); - if (hasCarriageReturn) + var containsLooseCarriageReturn = ContainsLooseCarriageReturn(diff); + + if (containsLooseCarriageReturn) { + // Turn Windows line endings into Unix line endings. + diff = NormalizeLineEndings(diff); diff = EscapeCarriageReturns(diff); } @@ -33,7 +34,7 @@ public static IEnumerable ParseFragment(string diff) // Diff lines should only be separated using a '\n' (lines can contain a '\r'). while ((line = reader.ReadLine()) != null) { - if (hasCarriageReturn) + if (containsLooseCarriageReturn) { line = UnescapeCarriageReturns(line); } @@ -73,7 +74,7 @@ public static IEnumerable ParseFragment(string diff) }); var lineCount = 1; - if (hasCarriageReturn) + if (containsLooseCarriageReturn) { lineCount += CountCarriageReturns(line); } @@ -140,9 +141,9 @@ static string NormalizeLineEndings(string text) return text.Replace("\r\n", "\n"); } - static bool HasCarriageReturn(string text) + static bool ContainsLooseCarriageReturn(string text) { - return text.IndexOf('\r') != -1; + return ContainsLooseCarriageReturnRegex.IsMatch(text); } static string EscapeCarriageReturns(string text) From 741546e40661d62bda6b2b8628bbf02d40be3a0d Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Fri, 13 Oct 2017 16:02:03 +0100 Subject: [PATCH 06/12] Change to use custom diff line reader This is faster/cleaner than doing string manipulation inside the diff parser. --- src/GitHub.Exports/Models/DiffUtilities.cs | 180 +++++++++--------- .../Models/DiffUtilitiesTests.cs | 29 +++ 2 files changed, 121 insertions(+), 88 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index 8d1513b650..afbef98d3a 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -9,99 +9,77 @@ namespace GitHub.Models public static class DiffUtilities { static readonly Regex ChunkHeaderRegex = new Regex(@"^@@\s+\-(\d+),?\d*\s+\+(\d+),?\d*\s@@"); - static readonly Regex ContainsLooseCarriageReturnRegex = new Regex(@"[\r]([^\n]|\z)", RegexOptions.Compiled); public static IEnumerable ParseFragment(string diff) { - // Optimize for common case where there are no loose carriage returns. - var containsLooseCarriageReturn = ContainsLooseCarriageReturn(diff); - - if (containsLooseCarriageReturn) + var reader = new LineReader(diff); + string line; + DiffChunk chunk = null; + int diffLine = -1; + int oldLine = -1; + int newLine = -1; + + while ((line = reader.ReadLine()) != null) { - // Turn Windows line endings into Unix line endings. - diff = NormalizeLineEndings(diff); - diff = EscapeCarriageReturns(diff); - } + var headerMatch = ChunkHeaderRegex.Match(line); - using (var reader = new StringReader(diff)) - { - string line; - DiffChunk chunk = null; - int diffLine = -1; - int oldLine = -1; - int newLine = -1; - - // Diff lines should only be separated using a '\n' (lines can contain a '\r'). - while ((line = reader.ReadLine()) != null) + if (headerMatch.Success) { - if (containsLooseCarriageReturn) + if (chunk != null) { - line = UnescapeCarriageReturns(line); + yield return chunk; } - var headerMatch = ChunkHeaderRegex.Match(line); + if (diffLine == -1) diffLine = 0; - if (headerMatch.Success) + chunk = new DiffChunk { - if (chunk != null) - { - yield return chunk; - } - - if (diffLine == -1) diffLine = 0; + OldLineNumber = oldLine = int.Parse(headerMatch.Groups[1].Value), + NewLineNumber = newLine = int.Parse(headerMatch.Groups[2].Value), + DiffLine = diffLine, + }; + } + else if (chunk != null) + { + var type = GetLineChange(line[0]); - chunk = new DiffChunk - { - OldLineNumber = oldLine = int.Parse(headerMatch.Groups[1].Value), - NewLineNumber = newLine = int.Parse(headerMatch.Groups[2].Value), - DiffLine = diffLine, - }; - } - else if (chunk != null) + // This might contain info about previous line (e.g. "\ No newline at end of file"). + if (type != DiffChangeType.Control) { - var type = GetLineChange(line[0]); + chunk.Lines.Add(new DiffLine + { + Type = type, + OldLineNumber = type != DiffChangeType.Add ? oldLine : -1, + NewLineNumber = type != DiffChangeType.Delete ? newLine : -1, + DiffLineNumber = diffLine, + Content = line, + }); - // This might contain info about previous line (e.g. "\ No newline at end of file"). - if (type != DiffChangeType.Control) + var lineCount = 1; + lineCount += CountCarriageReturns(line); + + switch (type) { - chunk.Lines.Add(new DiffLine - { - Type = type, - OldLineNumber = type != DiffChangeType.Add ? oldLine : -1, - NewLineNumber = type != DiffChangeType.Delete ? newLine : -1, - DiffLineNumber = diffLine, - Content = line, - }); - - var lineCount = 1; - if (containsLooseCarriageReturn) - { - lineCount += CountCarriageReturns(line); - } - - switch (type) - { - case DiffChangeType.None: - oldLine += lineCount; - newLine += lineCount; - break; - case DiffChangeType.Delete: - oldLine += lineCount; - break; - case DiffChangeType.Add: - newLine += lineCount; - break; - } + case DiffChangeType.None: + oldLine += lineCount; + newLine += lineCount; + break; + case DiffChangeType.Delete: + oldLine += lineCount; + break; + case DiffChangeType.Add: + newLine += lineCount; + break; } } - - if (diffLine != -1) ++diffLine; } - if (chunk != null) - { - yield return chunk; - } + if (diffLine != -1) ++diffLine; + } + + if (chunk != null) + { + yield return chunk; } } @@ -136,24 +114,50 @@ public static DiffLine Match(IEnumerable diff, IList target return null; } - static string NormalizeLineEndings(string text) + public class LineReader { - return text.Replace("\r\n", "\n"); - } + readonly string text; + int index = 0; - static bool ContainsLooseCarriageReturn(string text) - { - return ContainsLooseCarriageReturnRegex.IsMatch(text); - } + public LineReader(string text) + { + this.text = text; + } - static string EscapeCarriageReturns(string text) - { - return text.Replace("\r", "\\r"); - } + public string ReadLine() + { + if (EndOfText) + { + if (StartOfText) + { + index = -1; + return string.Empty; + } - static string UnescapeCarriageReturns(string text) - { - return text.Replace("\\r", "\r"); + return null; + } + + var startIndex = index; + index = text.IndexOf('\n', index); + var endIndex = index != -1 ? index : text.Length; + var length = endIndex - startIndex; + + if (index != -1) + { + if (index > 0 && text[index - 1] == '\r') + { + length--; + } + + index++; + } + + return text.Substring(startIndex, length); + } + + bool StartOfText => index == 0; + + bool EndOfText => index == -1 || index == text.Length; } static int CountCarriageReturns(string text) diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index e52d759264..138ec5fc10 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -100,6 +100,7 @@ public void NoNewLineNotAtEndOfChunk_CheckDiffLineNumber() [InlineData("+foo\n+bar\n", "+foo", "+bar")] [InlineData("+fo\ro\n+bar\n", "+fo\ro", "+bar")] [InlineData("+foo\r\r\n+bar\n", "+foo\r", "+bar")] + [InlineData("+\\r\n+\r\n", "+\\r", "+")] public void FirstChunk_CheckLineContent(string diffLines, string contentLine0, string contentLine1) { var header = "@@ -1 +1 @@"; @@ -314,5 +315,33 @@ public void NoLineMatchesFromNoLines() Assert.Equal(null, line); } } + + public class TheLineReaderClass + { + [Theory] + [InlineData("", "", null, null)] + [InlineData("\n", "", null, null)] + [InlineData("\r\n", "", null, null)] + [InlineData("1", "1", null, null)] + [InlineData("1\n2\n", "1", "2", null)] + [InlineData("1\n2", "1", "2", null)] + [InlineData("1\r\n2\n", "1", "2", null)] + [InlineData("1\r\n2", "1", "2", null)] + [InlineData("\r", "\r", null, null)] + [InlineData("\r\r", "\r\r", null, null)] + [InlineData("\r\r\n", "\r", null, null)] + [InlineData("\r_\n", "\r_", null, null)] + public void ReadLines(string text, string line1, string line2, string line3) + { + var lines = new[] { line1, line2, line3 }; + var lineReader = new DiffUtilities.LineReader(text); + + foreach (var expectLine in lines) + { + var line = lineReader.ReadLine(); + Assert.Equal(expectLine, line); + } + } + } } } From 845a9c048e3a8e7012004a039654efb4038fa6f7 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Fri, 13 Oct 2017 16:13:09 +0100 Subject: [PATCH 07/12] Optimize counting of carriage returns This is the only time we explicitly hunt of the `r` char. --- src/GitHub.Exports/Models/DiffUtilities.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index afbef98d3a..43af7793a0 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -163,9 +163,11 @@ public string ReadLine() static int CountCarriageReturns(string text) { int count = 0; - foreach (var ch in text) + int index = 0; + while ((index = text.IndexOf('\r', index)) != -1) { - if (ch == '\r') count++; + index++; + count++; } return count; From c0d7f0e672c140b58b0c2581a9b57d445e679103 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Mon, 16 Oct 2017 15:02:21 +0100 Subject: [PATCH 08/12] Add null check for LineReader(text) --- src/GitHub.Exports/Models/DiffUtilities.cs | 2 ++ .../Models/DiffUtilitiesTests.cs | 35 +++++++++++-------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index 43af7793a0..07e6341fe1 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text.RegularExpressions; using System.Diagnostics.CodeAnalysis; +using GitHub.Extensions; namespace GitHub.Models { @@ -121,6 +122,7 @@ public class LineReader public LineReader(string text) { + Guard.ArgumentNotNull(text, nameof(text)); this.text = text; } diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index 138ec5fc10..3b709032fb 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -319,29 +319,34 @@ public void NoLineMatchesFromNoLines() public class TheLineReaderClass { [Theory] - [InlineData("", "", null, null)] - [InlineData("\n", "", null, null)] - [InlineData("\r\n", "", null, null)] - [InlineData("1", "1", null, null)] - [InlineData("1\n2\n", "1", "2", null)] - [InlineData("1\n2", "1", "2", null)] - [InlineData("1\r\n2\n", "1", "2", null)] - [InlineData("1\r\n2", "1", "2", null)] - [InlineData("\r", "\r", null, null)] - [InlineData("\r\r", "\r\r", null, null)] - [InlineData("\r\r\n", "\r", null, null)] - [InlineData("\r_\n", "\r_", null, null)] - public void ReadLines(string text, string line1, string line2, string line3) + [InlineData("", new[] { "", null })] + [InlineData("\n", new[] { "", null })] + [InlineData("\r\n", new[] { "", null })] + [InlineData("1", new[] { "1", null })] + [InlineData("1\n2\n", new[] { "1", "2", null })] + [InlineData("1\n2", new[] { "1", "2", null })] + [InlineData("1\r\n2\n", new[] { "1", "2", null })] + [InlineData("1\r\n2", new[] { "1", "2", null })] + [InlineData("\r", new[] { "\r", null })] + [InlineData("\r\r", new[] { "\r\r", null })] + [InlineData("\r\r\n", new[] { "\r", null })] + [InlineData("\r_\n", new[] { "\r_", null })] + public void ReadLines(string text, string[] expectLines) { - var lines = new[] { line1, line2, line3 }; var lineReader = new DiffUtilities.LineReader(text); - foreach (var expectLine in lines) + foreach (var expectLine in expectLines) { var line = lineReader.ReadLine(); Assert.Equal(expectLine, line); } } + + [Fact] + public void Constructor_NullText_ArgumentNullException() + { + Assert.Throws(() => new DiffUtilities.LineReader(null)); + } } } } From 62bcddd266dbf2169ccfa2e2daff8e7a768018ac Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Mon, 16 Oct 2017 15:38:48 +0100 Subject: [PATCH 09/12] Add simple benchmark --- src/GitHub.Exports/Models/DiffUtilities.cs | 2 +- .../Models/DiffUtilitiesTests.cs | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index 07e6341fe1..e501f16c43 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -162,7 +162,7 @@ public string ReadLine() bool EndOfText => index == -1 || index == text.Length; } - static int CountCarriageReturns(string text) + public static int CountCarriageReturns(string text) { int count = 0; int index = 0; diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index 3b709032fb..7dd178ab9b 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -347,6 +347,23 @@ public void Constructor_NullText_ArgumentNullException() { Assert.Throws(() => new DiffUtilities.LineReader(null)); } + + static void Benchmark_ReadLineAndCountCarriageReturns_100000() + { + + var file = new System.Diagnostics.StackFrame(true).GetFileName(); + var text = File.ReadAllText(file); + + for (int count = 0; count < 100000; count++) + { + var lineReader = new DiffUtilities.LineReader(text); + string line; + while ((line = lineReader.ReadLine()) != null) + { + DiffUtilities.CountCarriageReturns(line); + } + } + } } } } From 095d62c61dad46d289445c56ac02b07f2c57b62d Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Tue, 17 Oct 2017 16:50:56 +0100 Subject: [PATCH 10/12] Add tests and null check for CountCarriageReturns --- src/GitHub.Exports/Models/DiffUtilities.cs | 29 ++++++++++--------- .../Models/DiffUtilitiesTests.cs | 21 +++++++++++++- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index e501f16c43..ba4b6c8d98 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -57,7 +57,7 @@ public static IEnumerable ParseFragment(string diff) }); var lineCount = 1; - lineCount += CountCarriageReturns(line); + lineCount += LineReader.CountCarriageReturns(line); switch (type) { @@ -123,6 +123,7 @@ public class LineReader public LineReader(string text) { Guard.ArgumentNotNull(text, nameof(text)); + this.text = text; } @@ -157,22 +158,24 @@ public string ReadLine() return text.Substring(startIndex, length); } - bool StartOfText => index == 0; + public static int CountCarriageReturns(string text) + { + Guard.ArgumentNotNull(text, nameof(text)); - bool EndOfText => index == -1 || index == text.Length; - } + int count = 0; + int index = 0; + while ((index = text.IndexOf('\r', index)) != -1) + { + index++; + count++; + } - public static int CountCarriageReturns(string text) - { - int count = 0; - int index = 0; - while ((index = text.IndexOf('\r', index)) != -1) - { - index++; - count++; + return count; } - return count; + bool StartOfText => index == 0; + + bool EndOfText => index == -1 || index == text.Length; } [SuppressMessage("Microsoft.Globalization", "CA1305:SpecifyIFormatProvider", MessageId = "System.String.Format(System.String,System.Object)")] diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index 7dd178ab9b..b063c27a6b 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -348,6 +348,25 @@ public void Constructor_NullText_ArgumentNullException() Assert.Throws(() => new DiffUtilities.LineReader(null)); } + [Theory] + [InlineData("", 0)] + [InlineData("\r", 1)] + [InlineData("\r\n", 1)] + [InlineData("\r\r", 2)] + [InlineData("\r-\r", 2)] + public void CountCarriageReturns(string text, int expectCount) + { + var count = DiffUtilities.LineReader.CountCarriageReturns(text); + + Assert.Equal(expectCount, count); + } + + [Fact] + public void CountCarriageReturns_NullText_ArgumentNullException() + { + Assert.Throws(() => DiffUtilities.LineReader.CountCarriageReturns(null)); + } + static void Benchmark_ReadLineAndCountCarriageReturns_100000() { @@ -360,7 +379,7 @@ static void Benchmark_ReadLineAndCountCarriageReturns_100000() string line; while ((line = lineReader.ReadLine()) != null) { - DiffUtilities.CountCarriageReturns(line); + DiffUtilities.LineReader.CountCarriageReturns(line); } } } From 25414558d385be26b1ec5b66b9ed3be2b40d3772 Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Thu, 19 Oct 2017 14:38:40 +0100 Subject: [PATCH 11/12] Remove redundant benchmark method --- .../Models/DiffUtilitiesTests.cs | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs index b063c27a6b..6aca462cf3 100644 --- a/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs +++ b/test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs @@ -366,23 +366,6 @@ public void CountCarriageReturns_NullText_ArgumentNullException() { Assert.Throws(() => DiffUtilities.LineReader.CountCarriageReturns(null)); } - - static void Benchmark_ReadLineAndCountCarriageReturns_100000() - { - - var file = new System.Diagnostics.StackFrame(true).GetFileName(); - var text = File.ReadAllText(file); - - for (int count = 0; count < 100000; count++) - { - var lineReader = new DiffUtilities.LineReader(text); - string line; - while ((line = lineReader.ReadLine()) != null) - { - DiffUtilities.LineReader.CountCarriageReturns(line); - } - } - } } } } From 66c84b1c6f79659e8f9d977483d4e9c0e01949cb Mon Sep 17 00:00:00 2001 From: Jamie Cansdale Date: Thu, 19 Oct 2017 14:53:54 +0100 Subject: [PATCH 12/12] Add link to LineReader implementations --- src/GitHub.Exports/Models/DiffUtilities.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/GitHub.Exports/Models/DiffUtilities.cs b/src/GitHub.Exports/Models/DiffUtilities.cs index ba4b6c8d98..8500415460 100644 --- a/src/GitHub.Exports/Models/DiffUtilities.cs +++ b/src/GitHub.Exports/Models/DiffUtilities.cs @@ -115,6 +115,10 @@ public static DiffLine Match(IEnumerable diff, IList target return null; } + /// Here are some alternative implementations we tried: + /// https://gist.github.com/shana/200e4719d4f571caab9dbf5921fa5276 + /// Scanning with `text.IndexOf('\n', index)` appears to the the best compromise for average .diff files. + /// It's likely that `text.IndexOfAny(new [] {'\r', '\n'}, index)` would be faster if lines were much longer. public class LineReader { readonly string text;