Skip to content

Support file renamed mode and other repairs #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 140 additions & 69 deletions diffparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,36 @@
package diffparser

import (
"errors"
"regexp"
"strconv"
"strings"

"errors"
)

// FileMode represents the file status in a diff
type FileMode int

const (
// FileModeDeleted if the file is deleted
FileModeDeleted FileMode = iota
// FileModeModified if the file is modified
FileModeModified
// FileModeNew if the file is created and there is no diff
FileModeNew
// FileModeRenamed if the file is renamed
FileModeRenamed
)

const (
// DELETED if the file is deleted
DELETED FileMode = iota
// Deprecated: use FileModeDeleted instead.
DELETED = FileModeDeleted
// MODIFIED if the file is modified
MODIFIED
// Deprecated: use FileModeModified instead.
MODIFIED = FileModeModified
// NEW if the file is created and there is no diff
NEW
// Deprecated: use FileModeNew instead.
NEW = FileModeNew
)

// DiffRange contains the DiffLine's
Expand All @@ -39,13 +52,25 @@ type DiffRange struct {
// DiffLineMode tells the line if added, removed or unchanged
type DiffLineMode rune

const (
// DiffLineModeAdded if the line is added (shown green in diff)
DiffLineModeAdded DiffLineMode = iota
// DiffLineModeRemoved if the line is deleted (shown red in diff)
DiffLineModeRemoved
// DiffLineModeUnchanged if the line is unchanged (not colored in diff)
DiffLineModeUnchanged
)

const (
// ADDED if the line is added (shown green in diff)
ADDED DiffLineMode = iota
// Deprecated: use DiffLineModeAdded instead.
ADDED = DiffLineModeAdded
// REMOVED if the line is deleted (shown red in diff)
REMOVED
// Deprecated: use DiffLineModeRemoved instead.
REMOVED = DiffLineModeRemoved
// UNCHANGED if the line is unchanged (not colored in diff)
UNCHANGED
// Deprecated: use DiffLineModeUnchanged instead.
UNCHANGED = DiffLineModeUnchanged
)

// DiffLine is the least part of an actual diff
Expand All @@ -64,13 +89,20 @@ type DiffHunk struct {
WholeRange DiffRange
}

// Length returns the hunks line length
func (hunk *DiffHunk) Length() int {
return len(hunk.WholeRange.Lines) + 1
}

// DiffFile is the sum of diffhunks and holds the changes of the file features
type DiffFile struct {
DiffHeader string
Mode FileMode
OrigName string
NewName string
Hunks []*DiffHunk
// SimilarityIndex only valid when the mode is FileModeRenamed, ranging from 0 to 100
SimilarityIndex int
}

// Diff is the collection of DiffFiles
Expand All @@ -81,23 +113,19 @@ type Diff struct {
PullID uint `sql:"index"`
}

func (d *Diff) addFile(file *DiffFile) {
d.Files = append(d.Files, file)
}

// Changed returns a map of filename to lines changed in that file. Deleted
// files are ignored.
func (d *Diff) Changed() map[string][]int {
dFiles := make(map[string][]int)

for _, f := range d.Files {
if f.Mode == DELETED {
if f.Mode == FileModeDeleted {
continue
}

for _, h := range f.Hunks {
for _, dl := range h.NewRange.Lines {
if dl.Mode == ADDED { // TODO(waigani) return removed
if dl.Mode == DiffLineModeAdded { // TODO(waigani) return removed
dFiles[f.NewName] = append(dFiles[f.NewName], dl.Number)
}
}
Expand All @@ -107,43 +135,50 @@ func (d *Diff) Changed() map[string][]int {
return dFiles
}

func regFind(s string, reg string, group int) string {
re := regexp.MustCompile(reg)
return re.FindStringSubmatch(s)[group]
}

func lineMode(line string) (*DiffLineMode, error) {
var m DiffLineMode
func lineMode(line string) (DiffLineMode, error) {
switch line[:1] {
case " ":
m = UNCHANGED
return DiffLineModeUnchanged, nil
case "+":
m = ADDED
return DiffLineModeAdded, nil
case "-":
m = REMOVED
return DiffLineModeRemoved, nil
default:
return nil, errors.New("could not parse line mode for line: \"" + line + "\"")
return DiffLineMode(0), errors.New("could not parse line mode for line: \"" + line + "\"")
}
return &m, nil
}

const (
oldFilePrefix = "--- "
newFilePrefix = "+++ "
similarityPrefix = "similarity index "
renameFromPrefix = "rename from "
renameToPrefix = "rename to "
binaryPrefix = "Binary files "
)

var (
reinReg = regexp.MustCompile(`^index .+$`)
rempReg = regexp.MustCompile(`^(-|\+){3} .+$`)
hunkHeaderReg = regexp.MustCompile(`@@ \-(\d+),?(\d+)? \+(\d+),?(\d+)? @@ ?(.+)?`)
)

// Parse takes a diff, such as produced by "git diff", and parses it into a
// Diff struct.
func Parse(diffString string) (*Diff, error) {
var diff Diff
diff.Raw = diffString
lines := strings.Split(diffString, "\n")

var file *DiffFile
var hunk *DiffHunk
var ADDEDCount int
var REMOVEDCount int
var inHunk bool
oldFilePrefix := "--- a/"
newFilePrefix := "+++ b/"

var diffPosCount int
var firstHunkInFile bool
var (
diff = Diff{Raw: diffString}
lines = strings.Split(diffString, "\n")

file *DiffFile
hunk *DiffHunk
addedCount int
removedCount int
inHunk bool

diffPosCount int
firstHunkInFile bool
)
// Parse each line of diff.
for idx, l := range lines {
diffPosCount++
Expand All @@ -155,15 +190,13 @@ func Parse(diffString string) (*Diff, error) {
file = &DiffFile{}
header := l
if len(lines) > idx+3 {
rein := regexp.MustCompile(`^index .+$`)
remp := regexp.MustCompile(`^(-|\+){3} .+$`)
index := lines[idx+1]
if rein.MatchString(index) {
if reinReg.MatchString(index) {
header = header + "\n" + index
}
mp1 := lines[idx+2]
mp2 := lines[idx+3]
if remp.MatchString(mp1) && remp.MatchString(mp2) {
if rempReg.MatchString(mp1) && rempReg.MatchString(mp2) {
header = header + "\n" + mp1 + "\n" + mp2
}
}
Expand All @@ -172,15 +205,31 @@ func Parse(diffString string) (*Diff, error) {
firstHunkInFile = true

// File mode.
file.Mode = MODIFIED
file.Mode = FileModeModified
case l == "+++ /dev/null":
file.Mode = DELETED
file.Mode = FileModeDeleted
case l == "--- /dev/null":
file.Mode = NEW
file.Mode = FileModeNew
case strings.HasPrefix(l, similarityPrefix):
file.Mode = FileModeRenamed
file.SimilarityIndex, _ = strconv.Atoi(strings.TrimSuffix(strings.TrimPrefix(l, similarityPrefix), "%"))
case strings.HasPrefix(l, oldFilePrefix):
file.OrigName = strings.TrimPrefix(l, oldFilePrefix)
file.OrigName = parseFileName(strings.TrimPrefix(l, oldFilePrefix))
case strings.HasPrefix(l, newFilePrefix):
file.NewName = strings.TrimPrefix(l, newFilePrefix)
file.NewName = parseFileName(strings.TrimPrefix(l, newFilePrefix))
case strings.HasPrefix(l, renameFromPrefix):
file.OrigName = parseFileName(strings.TrimPrefix(l, renameFromPrefix))
case strings.HasPrefix(l, renameToPrefix):
file.NewName = parseFileName(strings.TrimPrefix(l, renameToPrefix))
case strings.HasPrefix(l, binaryPrefix):
file.Mode = FileModeModified
binaryDiffer := strings.TrimSuffix(strings.TrimPrefix(l, binaryPrefix), " differ")
fileNames := strings.Split(binaryDiffer, " and ")
if len(fileNames) != 2 {
return nil, errors.New("invalid binary diff")
}
file.OrigName = parseFileName(fileNames[0])
file.NewName = parseFileName(fileNames[1])
case strings.HasPrefix(l, "@@ "):
if firstHunkInFile {
diffPosCount = 0
Expand All @@ -193,8 +242,7 @@ func Parse(diffString string) (*Diff, error) {
file.Hunks = append(file.Hunks, hunk)

// Parse hunk heading for ranges
re := regexp.MustCompile(`@@ \-(\d+),?(\d+)? \+(\d+),?(\d+)? @@ ?(.+)?`)
m := re.FindStringSubmatch(l)
m := hunkHeaderReg.FindStringSubmatch(l)
if len(m) < 5 {
return nil, errors.New("Error parsing line: " + l)
}
Expand Down Expand Up @@ -237,50 +285,70 @@ func Parse(diffString string) (*Diff, error) {
}

// (re)set line counts
ADDEDCount = hunk.NewRange.Start
REMOVEDCount = hunk.OrigRange.Start
addedCount = hunk.NewRange.Start
removedCount = hunk.OrigRange.Start
case inHunk && isSourceLine(l):
m, err := lineMode(l)
if err != nil {
return nil, err
}
line := DiffLine{
Mode: *m,
Mode: m,
Content: l[1:],
Position: diffPosCount,
}
newLine := line
origLine := line

// add lines to ranges
switch *m {
case ADDED:
newLine.Number = ADDEDCount
switch m {
case DiffLineModeAdded:
newLine.Number = addedCount
hunk.NewRange.Lines = append(hunk.NewRange.Lines, &newLine)
hunk.WholeRange.Lines = append(hunk.WholeRange.Lines, &newLine)
ADDEDCount++
addedCount++

case REMOVED:
origLine.Number = REMOVEDCount
case DiffLineModeRemoved:
origLine.Number = removedCount
hunk.OrigRange.Lines = append(hunk.OrigRange.Lines, &origLine)
hunk.WholeRange.Lines = append(hunk.WholeRange.Lines, &origLine)
REMOVEDCount++
removedCount++

case UNCHANGED:
newLine.Number = ADDEDCount
case DiffLineModeUnchanged:
newLine.Number = addedCount
hunk.NewRange.Lines = append(hunk.NewRange.Lines, &newLine)
hunk.WholeRange.Lines = append(hunk.WholeRange.Lines, &newLine)
origLine.Number = REMOVEDCount
origLine.Number = removedCount
hunk.OrigRange.Lines = append(hunk.OrigRange.Lines, &origLine)
ADDEDCount++
REMOVEDCount++
addedCount++
removedCount++
}
}
}

return &diff, nil
}

func parseFileName(filenameWithPrefix string) string {
if strings.HasPrefix(filenameWithPrefix, "a/") {
return strings.TrimPrefix(filenameWithPrefix, "a/")
}
if strings.HasPrefix(filenameWithPrefix, "b/") {
return strings.TrimPrefix(filenameWithPrefix, "b/")
}
if strings.HasPrefix(filenameWithPrefix, `"`) {
filenameWithPrefix = strings.TrimSuffix(strings.TrimPrefix(filenameWithPrefix, `"`), `"`)
if strings.HasPrefix(filenameWithPrefix, "a/") {
return decodeOctalString(strings.TrimPrefix(filenameWithPrefix, "a/"))
}
if strings.HasPrefix(filenameWithPrefix, "b/") {
return decodeOctalString(strings.TrimPrefix(filenameWithPrefix, "b/"))
}
return decodeOctalString(filenameWithPrefix)
}
return filenameWithPrefix
}

func isSourceLine(line string) bool {
if line == `\ No newline at end of file` {
return false
Expand All @@ -291,7 +359,10 @@ func isSourceLine(line string) bool {
return true
}

// Length returns the hunks line length
func (hunk *DiffHunk) Length() int {
return len(hunk.WholeRange.Lines) + 1
func decodeOctalString(s string) string {
s2, err := strconv.Unquote(`"` + s + `"`)
if err != nil {
return s
}
return s2
}
Loading