Skip to content
This repository was archived by the owner on Jun 21, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 123 additions & 54 deletions src/GitHub.Exports/Models/DiffUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Diagnostics.CodeAnalysis;
using GitHub.Extensions;

namespace GitHub.Models
{
Expand All @@ -12,73 +13,74 @@ public static class DiffUtilities

public static IEnumerable<DiffChunk> ParseFragment(string diff)
{
using (var reader = new StringReader(diff))
var reader = new LineReader(diff);
string line;
DiffChunk chunk = null;
int diffLine = -1;
int oldLine = -1;
int newLine = -1;

while ((line = reader.ReadLine()) != null)
{
string line;
DiffChunk chunk = null;
int diffLine = -1;
int oldLine = -1;
int newLine = -1;
var headerMatch = ChunkHeaderRegex.Match(line);

while ((line = reader.ReadLine()) != null)
if (headerMatch.Success)
{
var headerMatch = ChunkHeaderRegex.Match(line);

if (headerMatch.Success)
if (chunk != null)
{
if (chunk != null)
{
yield return chunk;
}
yield return chunk;
}

if (diffLine == -1) diffLine = 0;
if (diffLine == -1) diffLine = 0;

chunk = new DiffChunk
{
OldLineNumber = oldLine = int.Parse(headerMatch.Groups[1].Value),
NewLineNumber = newLine = int.Parse(headerMatch.Groups[2].Value),
DiffLine = diffLine,
};
}
else if (chunk != null)
chunk = new DiffChunk
{
var type = GetLineChange(line[0]);
OldLineNumber = oldLine = int.Parse(headerMatch.Groups[1].Value),
NewLineNumber = newLine = int.Parse(headerMatch.Groups[2].Value),
DiffLine = diffLine,
};
}
else if (chunk != null)
{
var type = GetLineChange(line[0]);

// This might contain info about previous line (e.g. "\ No newline at end of file").
if (type != DiffChangeType.Control)
{
chunk.Lines.Add(new DiffLine
{
Type = type,
OldLineNumber = type != DiffChangeType.Add ? oldLine : -1,
NewLineNumber = type != DiffChangeType.Delete ? newLine : -1,
DiffLineNumber = diffLine,
Content = line,
});

var lineCount = 1;
lineCount += LineReader.CountCarriageReturns(line);

// This might contain info about previous line (e.g. "\ No newline at end of file").
if (type != DiffChangeType.Control)
switch (type)
{
chunk.Lines.Add(new DiffLine
{
Type = type,
OldLineNumber = type != DiffChangeType.Add ? oldLine : -1,
NewLineNumber = type != DiffChangeType.Delete ? newLine : -1,
DiffLineNumber = diffLine,
Content = line,
});

switch (type)
{
case DiffChangeType.None:
++oldLine;
++newLine;
break;
case DiffChangeType.Delete:
++oldLine;
break;
case DiffChangeType.Add:
++newLine;
break;
}
case DiffChangeType.None:
oldLine += lineCount;
newLine += lineCount;
break;
case DiffChangeType.Delete:
oldLine += lineCount;
break;
case DiffChangeType.Add:
newLine += lineCount;
break;
}
}

if (diffLine != -1) ++diffLine;
}

if (chunk != null)
{
yield return chunk;
}
if (diffLine != -1) ++diffLine;
}

if (chunk != null)
{
yield return chunk;
}
}

Expand Down Expand Up @@ -113,6 +115,73 @@ public static DiffLine Match(IEnumerable<DiffChunk> diff, IList<DiffLine> target
return null;
}

/// Here are some alternative implementations we tried:
/// https://gist.github.com/shana/200e4719d4f571caab9dbf5921fa5276
/// Scanning with `text.IndexOf('\n', index)` appears to the the best compromise for average .diff files.
/// It's likely that `text.IndexOfAny(new [] {'\r', '\n'}, index)` would be faster if lines were much longer.
public class LineReader
{
readonly string text;
int index = 0;

public LineReader(string text)
{
Guard.ArgumentNotNull(text, nameof(text));

this.text = text;
}

public string ReadLine()
{
if (EndOfText)
{
if (StartOfText)
{
index = -1;
return string.Empty;
}

return null;
}

var startIndex = index;
index = text.IndexOf('\n', index);
var endIndex = index != -1 ? index : text.Length;
var length = endIndex - startIndex;

if (index != -1)
{
if (index > 0 && text[index - 1] == '\r')
{
length--;
}

index++;
}

return text.Substring(startIndex, length);
}

public static int CountCarriageReturns(string text)
{
Guard.ArgumentNotNull(text, nameof(text));

int count = 0;
int index = 0;
while ((index = text.IndexOf('\r', index)) != -1)
{
index++;
count++;
}

return count;
}

bool StartOfText => index == 0;

bool EndOfText => index == -1 || index == text.Length;
}

[SuppressMessage("Microsoft.Globalization", "CA1305:SpecifyIFormatProvider", MessageId = "System.String.Format(System.String,System.Object)")]
static DiffChangeType GetLineChange(char c)
{
Expand Down
98 changes: 98 additions & 0 deletions test/GitHub.InlineReviews.UnitTests/Models/DiffUtilitiesTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,52 @@ public void NoNewLineNotAtEndOfChunk_CheckDiffLineNumber()
Assert.Equal(3, line.DiffLineNumber);
}

[Theory]
[InlineData("+foo\n+bar\n", "+foo", "+bar")]
[InlineData("+fo\ro\n+bar\n", "+fo\ro", "+bar")]
[InlineData("+foo\r\r\n+bar\n", "+foo\r", "+bar")]
[InlineData("+\\r\n+\r\n", "+\\r", "+")]
public void FirstChunk_CheckLineContent(string diffLines, string contentLine0, string contentLine1)
{
var header = "@@ -1 +1 @@";
var diff = header + "\n" + diffLines;

var chunk = DiffUtilities.ParseFragment(diff).First();

Assert.Equal(contentLine0, chunk.Lines[0].Content);
Assert.Equal(contentLine1, chunk.Lines[1].Content);
}

[Theory]
[InlineData("+foo\n+bar\n", 1, 2)]
[InlineData("+fo\ro\n+bar\n", 1, 3)]
[InlineData("+foo\r\r\n+bar\n", 1, 3)]
public void FirstChunk_CheckNewLineNumber(string diffLines, int lineNumber0, int lineNumber1)
{
var header = "@@ -1 +1 @@";
var diff = header + "\n" + diffLines;

var chunk = DiffUtilities.ParseFragment(diff).First();

Assert.Equal(lineNumber0, chunk.Lines[0].NewLineNumber);
Assert.Equal(lineNumber1, chunk.Lines[1].NewLineNumber);
}

[Theory]
[InlineData("-foo\n-bar\n", 1, 2)]
[InlineData("-fo\ro\n-bar\n", 1, 3)]
[InlineData("-foo\r\r\n-bar\n", 1, 3)]
public void FirstChunk_CheckOldLineNumber(string diffLines, int lineNumber0, int lineNumber1)
{
var header = "@@ -1 +1 @@";
var diff = header + "\n" + diffLines;

var chunk = DiffUtilities.ParseFragment(diff).First();

Assert.Equal(lineNumber0, chunk.Lines[0].OldLineNumber);
Assert.Equal(lineNumber1, chunk.Lines[1].OldLineNumber);
}

[Fact]
public void FirstChunk_CheckDiffLineZeroBased()
{
Expand Down Expand Up @@ -269,5 +315,57 @@ public void NoLineMatchesFromNoLines()
Assert.Equal(null, line);
}
}

public class TheLineReaderClass
{
[Theory]
[InlineData("", new[] { "", null })]
[InlineData("\n", new[] { "", null })]
[InlineData("\r\n", new[] { "", null })]
[InlineData("1", new[] { "1", null })]
[InlineData("1\n2\n", new[] { "1", "2", null })]
[InlineData("1\n2", new[] { "1", "2", null })]
[InlineData("1\r\n2\n", new[] { "1", "2", null })]
[InlineData("1\r\n2", new[] { "1", "2", null })]
[InlineData("\r", new[] { "\r", null })]
[InlineData("\r\r", new[] { "\r\r", null })]
[InlineData("\r\r\n", new[] { "\r", null })]
[InlineData("\r_\n", new[] { "\r_", null })]
public void ReadLines(string text, string[] expectLines)
{
var lineReader = new DiffUtilities.LineReader(text);

foreach (var expectLine in expectLines)
{
var line = lineReader.ReadLine();
Assert.Equal(expectLine, line);
}
}

[Fact]
public void Constructor_NullText_ArgumentNullException()
{
Assert.Throws<ArgumentNullException>(() => new DiffUtilities.LineReader(null));
}

[Theory]
[InlineData("", 0)]
[InlineData("\r", 1)]
[InlineData("\r\n", 1)]
[InlineData("\r\r", 2)]
[InlineData("\r-\r", 2)]
public void CountCarriageReturns(string text, int expectCount)
{
var count = DiffUtilities.LineReader.CountCarriageReturns(text);

Assert.Equal(expectCount, count);
}

[Fact]
public void CountCarriageReturns_NullText_ArgumentNullException()
{
Assert.Throws<ArgumentNullException>(() => DiffUtilities.LineReader.CountCarriageReturns(null));
}
}
}
}