diff --git a/diffparser.go b/diffparser.go index 385648c..65ad4d7 100644 --- a/diffparser.go +++ b/diffparser.go @@ -4,23 +4,36 @@ package diffparser import ( + "errors" "regexp" "strconv" "strings" - - "errors" ) // FileMode represents the file status in a diff type FileMode int +const ( + // FileModeDeleted if the file is deleted + FileModeDeleted FileMode = iota + // FileModeModified if the file is modified + FileModeModified + // FileModeNew if the file is created and there is no diff + FileModeNew + // FileModeRenamed if the file is renamed + FileModeRenamed +) + const ( // DELETED if the file is deleted - DELETED FileMode = iota + // Deprecated: use FileModeDeleted instead. + DELETED = FileModeDeleted // MODIFIED if the file is modified - MODIFIED + // Deprecated: use FileModeModified instead. + MODIFIED = FileModeModified // NEW if the file is created and there is no diff - NEW + // Deprecated: use FileModeNew instead. + NEW = FileModeNew ) // DiffRange contains the DiffLine's @@ -39,13 +52,25 @@ type DiffRange struct { // DiffLineMode tells the line if added, removed or unchanged type DiffLineMode rune +const ( + // DiffLineModeAdded if the line is added (shown green in diff) + DiffLineModeAdded DiffLineMode = iota + // DiffLineModeRemoved if the line is deleted (shown red in diff) + DiffLineModeRemoved + // DiffLineModeUnchanged if the line is unchanged (not colored in diff) + DiffLineModeUnchanged +) + const ( // ADDED if the line is added (shown green in diff) - ADDED DiffLineMode = iota + // Deprecated: use DiffLineModeAdded instead. + ADDED = DiffLineModeAdded // REMOVED if the line is deleted (shown red in diff) - REMOVED + // Deprecated: use DiffLineModeRemoved instead. + REMOVED = DiffLineModeRemoved // UNCHANGED if the line is unchanged (not colored in diff) - UNCHANGED + // Deprecated: use DiffLineModeUnchanged instead. + UNCHANGED = DiffLineModeUnchanged ) // DiffLine is the least part of an actual diff @@ -64,6 +89,11 @@ type DiffHunk struct { WholeRange DiffRange } +// Length returns the hunks line length +func (hunk *DiffHunk) Length() int { + return len(hunk.WholeRange.Lines) + 1 +} + // DiffFile is the sum of diffhunks and holds the changes of the file features type DiffFile struct { DiffHeader string @@ -71,6 +101,8 @@ type DiffFile struct { OrigName string NewName string Hunks []*DiffHunk + // SimilarityIndex only valid when the mode is FileModeRenamed, ranging from 0 to 100 + SimilarityIndex int } // Diff is the collection of DiffFiles @@ -81,23 +113,19 @@ type Diff struct { PullID uint `sql:"index"` } -func (d *Diff) addFile(file *DiffFile) { - d.Files = append(d.Files, file) -} - // Changed returns a map of filename to lines changed in that file. Deleted // files are ignored. func (d *Diff) Changed() map[string][]int { dFiles := make(map[string][]int) for _, f := range d.Files { - if f.Mode == DELETED { + if f.Mode == FileModeDeleted { continue } for _, h := range f.Hunks { for _, dl := range h.NewRange.Lines { - if dl.Mode == ADDED { // TODO(waigani) return removed + if dl.Mode == DiffLineModeAdded { // TODO(waigani) return removed dFiles[f.NewName] = append(dFiles[f.NewName], dl.Number) } } @@ -107,43 +135,50 @@ func (d *Diff) Changed() map[string][]int { return dFiles } -func regFind(s string, reg string, group int) string { - re := regexp.MustCompile(reg) - return re.FindStringSubmatch(s)[group] -} - -func lineMode(line string) (*DiffLineMode, error) { - var m DiffLineMode +func lineMode(line string) (DiffLineMode, error) { switch line[:1] { case " ": - m = UNCHANGED + return DiffLineModeUnchanged, nil case "+": - m = ADDED + return DiffLineModeAdded, nil case "-": - m = REMOVED + return DiffLineModeRemoved, nil default: - return nil, errors.New("could not parse line mode for line: \"" + line + "\"") + return DiffLineMode(0), errors.New("could not parse line mode for line: \"" + line + "\"") } - return &m, nil } +const ( + oldFilePrefix = "--- " + newFilePrefix = "+++ " + similarityPrefix = "similarity index " + renameFromPrefix = "rename from " + renameToPrefix = "rename to " + binaryPrefix = "Binary files " +) + +var ( + reinReg = regexp.MustCompile(`^index .+$`) + rempReg = regexp.MustCompile(`^(-|\+){3} .+$`) + hunkHeaderReg = regexp.MustCompile(`@@ \-(\d+),?(\d+)? \+(\d+),?(\d+)? @@ ?(.+)?`) +) + // Parse takes a diff, such as produced by "git diff", and parses it into a // Diff struct. func Parse(diffString string) (*Diff, error) { - var diff Diff - diff.Raw = diffString - lines := strings.Split(diffString, "\n") - - var file *DiffFile - var hunk *DiffHunk - var ADDEDCount int - var REMOVEDCount int - var inHunk bool - oldFilePrefix := "--- a/" - newFilePrefix := "+++ b/" - - var diffPosCount int - var firstHunkInFile bool + var ( + diff = Diff{Raw: diffString} + lines = strings.Split(diffString, "\n") + + file *DiffFile + hunk *DiffHunk + addedCount int + removedCount int + inHunk bool + + diffPosCount int + firstHunkInFile bool + ) // Parse each line of diff. for idx, l := range lines { diffPosCount++ @@ -155,15 +190,13 @@ func Parse(diffString string) (*Diff, error) { file = &DiffFile{} header := l if len(lines) > idx+3 { - rein := regexp.MustCompile(`^index .+$`) - remp := regexp.MustCompile(`^(-|\+){3} .+$`) index := lines[idx+1] - if rein.MatchString(index) { + if reinReg.MatchString(index) { header = header + "\n" + index } mp1 := lines[idx+2] mp2 := lines[idx+3] - if remp.MatchString(mp1) && remp.MatchString(mp2) { + if rempReg.MatchString(mp1) && rempReg.MatchString(mp2) { header = header + "\n" + mp1 + "\n" + mp2 } } @@ -172,15 +205,31 @@ func Parse(diffString string) (*Diff, error) { firstHunkInFile = true // File mode. - file.Mode = MODIFIED + file.Mode = FileModeModified case l == "+++ /dev/null": - file.Mode = DELETED + file.Mode = FileModeDeleted case l == "--- /dev/null": - file.Mode = NEW + file.Mode = FileModeNew + case strings.HasPrefix(l, similarityPrefix): + file.Mode = FileModeRenamed + file.SimilarityIndex, _ = strconv.Atoi(strings.TrimSuffix(strings.TrimPrefix(l, similarityPrefix), "%")) case strings.HasPrefix(l, oldFilePrefix): - file.OrigName = strings.TrimPrefix(l, oldFilePrefix) + file.OrigName = parseFileName(strings.TrimPrefix(l, oldFilePrefix)) case strings.HasPrefix(l, newFilePrefix): - file.NewName = strings.TrimPrefix(l, newFilePrefix) + file.NewName = parseFileName(strings.TrimPrefix(l, newFilePrefix)) + case strings.HasPrefix(l, renameFromPrefix): + file.OrigName = parseFileName(strings.TrimPrefix(l, renameFromPrefix)) + case strings.HasPrefix(l, renameToPrefix): + file.NewName = parseFileName(strings.TrimPrefix(l, renameToPrefix)) + case strings.HasPrefix(l, binaryPrefix): + file.Mode = FileModeModified + binaryDiffer := strings.TrimSuffix(strings.TrimPrefix(l, binaryPrefix), " differ") + fileNames := strings.Split(binaryDiffer, " and ") + if len(fileNames) != 2 { + return nil, errors.New("invalid binary diff") + } + file.OrigName = parseFileName(fileNames[0]) + file.NewName = parseFileName(fileNames[1]) case strings.HasPrefix(l, "@@ "): if firstHunkInFile { diffPosCount = 0 @@ -193,8 +242,7 @@ func Parse(diffString string) (*Diff, error) { file.Hunks = append(file.Hunks, hunk) // Parse hunk heading for ranges - re := regexp.MustCompile(`@@ \-(\d+),?(\d+)? \+(\d+),?(\d+)? @@ ?(.+)?`) - m := re.FindStringSubmatch(l) + m := hunkHeaderReg.FindStringSubmatch(l) if len(m) < 5 { return nil, errors.New("Error parsing line: " + l) } @@ -237,15 +285,15 @@ func Parse(diffString string) (*Diff, error) { } // (re)set line counts - ADDEDCount = hunk.NewRange.Start - REMOVEDCount = hunk.OrigRange.Start + addedCount = hunk.NewRange.Start + removedCount = hunk.OrigRange.Start case inHunk && isSourceLine(l): m, err := lineMode(l) if err != nil { return nil, err } line := DiffLine{ - Mode: *m, + Mode: m, Content: l[1:], Position: diffPosCount, } @@ -253,27 +301,27 @@ func Parse(diffString string) (*Diff, error) { origLine := line // add lines to ranges - switch *m { - case ADDED: - newLine.Number = ADDEDCount + switch m { + case DiffLineModeAdded: + newLine.Number = addedCount hunk.NewRange.Lines = append(hunk.NewRange.Lines, &newLine) hunk.WholeRange.Lines = append(hunk.WholeRange.Lines, &newLine) - ADDEDCount++ + addedCount++ - case REMOVED: - origLine.Number = REMOVEDCount + case DiffLineModeRemoved: + origLine.Number = removedCount hunk.OrigRange.Lines = append(hunk.OrigRange.Lines, &origLine) hunk.WholeRange.Lines = append(hunk.WholeRange.Lines, &origLine) - REMOVEDCount++ + removedCount++ - case UNCHANGED: - newLine.Number = ADDEDCount + case DiffLineModeUnchanged: + newLine.Number = addedCount hunk.NewRange.Lines = append(hunk.NewRange.Lines, &newLine) hunk.WholeRange.Lines = append(hunk.WholeRange.Lines, &newLine) - origLine.Number = REMOVEDCount + origLine.Number = removedCount hunk.OrigRange.Lines = append(hunk.OrigRange.Lines, &origLine) - ADDEDCount++ - REMOVEDCount++ + addedCount++ + removedCount++ } } } @@ -281,6 +329,26 @@ func Parse(diffString string) (*Diff, error) { return &diff, nil } +func parseFileName(filenameWithPrefix string) string { + if strings.HasPrefix(filenameWithPrefix, "a/") { + return strings.TrimPrefix(filenameWithPrefix, "a/") + } + if strings.HasPrefix(filenameWithPrefix, "b/") { + return strings.TrimPrefix(filenameWithPrefix, "b/") + } + if strings.HasPrefix(filenameWithPrefix, `"`) { + filenameWithPrefix = strings.TrimSuffix(strings.TrimPrefix(filenameWithPrefix, `"`), `"`) + if strings.HasPrefix(filenameWithPrefix, "a/") { + return decodeOctalString(strings.TrimPrefix(filenameWithPrefix, "a/")) + } + if strings.HasPrefix(filenameWithPrefix, "b/") { + return decodeOctalString(strings.TrimPrefix(filenameWithPrefix, "b/")) + } + return decodeOctalString(filenameWithPrefix) + } + return filenameWithPrefix +} + func isSourceLine(line string) bool { if line == `\ No newline at end of file` { return false @@ -291,7 +359,10 @@ func isSourceLine(line string) bool { return true } -// Length returns the hunks line length -func (hunk *DiffHunk) Length() int { - return len(hunk.WholeRange.Lines) + 1 +func decodeOctalString(s string) string { + s2, err := strconv.Unquote(`"` + s + `"`) + if err != nil { + return s + } + return s2 } diff --git a/diffparser_test.go b/diffparser_test.go index 75aeca1..a9911af 100644 --- a/diffparser_test.go +++ b/diffparser_test.go @@ -19,48 +19,75 @@ func setup(t *testing.T) *Diff { diff, err := Parse(string(byt)) require.NoError(t, err) - require.Equal(t, len(diff.Files), 6) return diff } + func TestFileModeAndNaming(t *testing.T) { diff := setup(t) - for i, expected := range []struct { + tts := []struct { mode FileMode origName string newName string }{ { - mode: MODIFIED, + mode: FileModeModified, origName: "file1", newName: "file1", }, { - mode: DELETED, + mode: FileModeDeleted, origName: "file2", newName: "", }, { - mode: DELETED, + mode: FileModeDeleted, origName: "file3", newName: "", }, { - mode: NEW, + mode: FileModeNew, origName: "", newName: "file4", }, { - mode: NEW, + mode: FileModeNew, origName: "", newName: "newname", }, { - mode: DELETED, + mode: FileModeDeleted, origName: "symlink", newName: "", }, - } { + { + mode: FileModeModified, + origName: "file5-中文", + newName: "file5-中文", + }, + { + mode: FileModeModified, + origName: "file6", + newName: "file6", + }, + { + mode: FileModeRenamed, + origName: "file7", + newName: "file7-renamed", + }, + { + mode: FileModeRenamed, + origName: "file8", + newName: "file8-中文", + }, + { + mode: FileModeModified, + origName: "file9.png", + newName: "file9.png", + }, + } + require.Equal(t, len(diff.Files), len(tts)) + for i, expected := range tts { file := diff.Files[i] t.Logf("testing file: %v", file) require.Equal(t, expected.mode, file.Mode) @@ -73,22 +100,22 @@ func TestHunk(t *testing.T) { diff := setup(t) expectedOrigLines := []DiffLine{ { - Mode: UNCHANGED, + Mode: DiffLineModeUnchanged, Number: 1, Content: "some", Position: 2, }, { - Mode: UNCHANGED, + Mode: DiffLineModeUnchanged, Number: 2, Content: "lines", Position: 3, }, { - Mode: REMOVED, + Mode: DiffLineModeRemoved, Number: 3, Content: "in", Position: 4, }, { - Mode: UNCHANGED, + Mode: DiffLineModeUnchanged, Number: 4, Content: "file1", Position: 5, @@ -97,22 +124,22 @@ func TestHunk(t *testing.T) { expectedNewLines := []DiffLine{ { - Mode: ADDED, + Mode: DiffLineModeAdded, Number: 1, Content: "add a line", Position: 1, }, { - Mode: UNCHANGED, + Mode: DiffLineModeUnchanged, Number: 2, Content: "some", Position: 2, }, { - Mode: UNCHANGED, + Mode: DiffLineModeUnchanged, Number: 3, Content: "lines", Position: 3, }, { - Mode: UNCHANGED, + Mode: DiffLineModeUnchanged, Number: 4, Content: "file1", Position: 5, @@ -135,3 +162,24 @@ func TestHunk(t *testing.T) { require.Equal(t, line, *newRange.Lines[i]) } } + +func TestDecodeOctalString(t *testing.T) { + tests := []struct { + input string + output string + }{ + { + input: `file-1.md`, + output: "file-1.md", + }, + { + input: `file-\344\270\255\346\226\207.md`, + output: "file-中文.md", + }, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + require.Equal(t, tt.output, decodeOctalString(tt.input)) + }) + } +} diff --git a/example.diff b/example.diff index cab81ec..1012b68 100644 --- a/example.diff +++ b/example.diff @@ -55,3 +55,44 @@ index 03b9162..0000000 @@ -1 +0,0 @@ -symlink-destination \ No newline at end of file +diff --git "a/file5-\344\270\255\346\226\207" "b/file5-\344\270\255\346\226\207" +index 2c95e05..7287b8f 100644 +--- "a/file5-\344\270\255\346\226\207" ++++ "b/file5-\344\270\255\346\226\207" +@@ -1,3 +1,4 @@ + some lines +-in ++ ++ + file5 +\ No newline at end of file +diff --git a/file6 b/file6 +index 508e616..d3bd7a2 100644 +--- a/file6 ++++ b/file6 +@@ -1,4 +1,4 @@ + @@some +-lines ++@@lines + in + file6 +\ No newline at end of file +diff --git a/file7 b/file7-renamed +similarity index 100% +rename from file7 +rename to file7-renamed +diff --git a/file8 "b/file8-\344\270\255\346\226\207" +similarity index 57% +rename from file8 +rename to "file8-\344\270\255\346\226\207" +index 02d41cb..a32f88b 100644 +--- a/file8 ++++ "b/file8-\344\270\255\346\226\207" +@@ -1,3 +1,2 @@ + some lines +-in + file8 +\ No newline at end of file +diff --git a/file9.png b/file9.png +index fb27f848f..1147ed3b4 100644 +Binary files a/file9.png and b/file9.png differ