Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion models/repo_indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,14 +207,15 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
// FIXME: UTF-16 files will probably fail here
return nil
}
indexerUpdate := indexer.RepoIndexerUpdate{
Filepath: update.Filename,
Op: indexer.RepoIndexerOpUpdate,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
Content: string(fileContents),
Content: string(base.ToUTF8DropErrors(fileContents)),
},
}
return indexerUpdate.AddToFlushingBatch(batch)
Expand Down
99 changes: 99 additions & 0 deletions modules/base/encoding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2014 The Gogs Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package base

import (
"fmt"

"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)

// ToUTF8WithErr converts content to UTF8 encoding
func ToUTF8WithErr(content []byte) (string, error) {
charsetLabel, err := DetectEncoding(content)
if err != nil {
return "", err
} else if charsetLabel == "UTF-8" {
return string(RemoveBOMIfPresent(content)), nil
}

encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return string(content), fmt.Errorf("Unknown encoding: %s", charsetLabel)
}

// If there is an error, we concatenate the nicely decoded part and the
// original left over. This way we won't lose data.
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
if err != nil {
result = append(result, content[n:]...)
}

result = RemoveBOMIfPresent(result)

return string(result), err
}

// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
func ToUTF8WithFallback(content []byte) []byte {
charsetLabel, err := DetectEncoding(content)
if err != nil || charsetLabel == "UTF-8" {
return RemoveBOMIfPresent(content)
}

encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return content
}

// If there is an error, we concatenate the nicely decoded part and the
// original left over. This way we won't lose data.
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
if err != nil {
return append(result, content[n:]...)
}

return RemoveBOMIfPresent(result)
}

// ToUTF8 converts content to UTF8 encoding and ignore error
func ToUTF8(content string) string {
res, _ := ToUTF8WithErr([]byte(content))
return res
}

// ToUTF8DropErrors makes sure the return string is valid utf-8; attempts conversion if possible
func ToUTF8DropErrors(content []byte) []byte {
charsetLabel, err := DetectEncoding(content)
if err != nil || charsetLabel == "UTF-8" {
return RemoveBOMIfPresent(content)
}

encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return content
}

// We ignore any non-decodable parts from the file.
// Some parts might be lost
var decoded []byte
decoder := encoding.NewDecoder()
idx := 0
for {
result, n, err := transform.Bytes(decoder, content[idx:])
decoded = append(decoded, result...)
if err == nil {
break
}
decoded = append(decoded, ' ')
idx = idx + n + 1
if idx >= len(content) {
break
}
}

return RemoveBOMIfPresent(decoded)
}
2 changes: 1 addition & 1 deletion modules/indexer/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const (
repoIndexerAnalyzer = "repoIndexerAnalyzer"
repoIndexerDocType = "repoIndexerDocType"

repoIndexerLatestVersion = 2
repoIndexerLatestVersion = 3
)

// repoIndexer (thread-safe) index for repository contents
Expand Down
56 changes: 0 additions & 56 deletions modules/templates/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ import (
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"

"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
"gopkg.in/editorconfig/editorconfig-core-go.v1"
)

Expand Down Expand Up @@ -274,60 +272,6 @@ func Sha1(str string) string {
return base.EncodeSha1(str)
}

// ToUTF8WithErr converts content to UTF8 encoding
func ToUTF8WithErr(content []byte) (string, error) {
charsetLabel, err := base.DetectEncoding(content)
if err != nil {
return "", err
} else if charsetLabel == "UTF-8" {
return string(base.RemoveBOMIfPresent(content)), nil
}

encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return string(content), fmt.Errorf("Unknown encoding: %s", charsetLabel)
}

// If there is an error, we concatenate the nicely decoded part and the
// original left over. This way we won't lose data.
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
if err != nil {
result = append(result, content[n:]...)
}

result = base.RemoveBOMIfPresent(result)

return string(result), err
}

// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
func ToUTF8WithFallback(content []byte) []byte {
charsetLabel, err := base.DetectEncoding(content)
if err != nil || charsetLabel == "UTF-8" {
return base.RemoveBOMIfPresent(content)
}

encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return content
}

// If there is an error, we concatenate the nicely decoded part and the
// original left over. This way we won't lose data.
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
if err != nil {
return append(result, content[n:]...)
}

return base.RemoveBOMIfPresent(result)
}

// ToUTF8 converts content to UTF8 encoding and ignore error
func ToUTF8(content string) string {
res, _ := ToUTF8WithErr([]byte(content))
return res
}

// ReplaceLeft replaces all prefixes 'oldS' in 's' with 'newS'.
func ReplaceLeft(s, oldS, newS string) string {
oldLen, newLen, i, n := len(oldS), len(newS), 0, 0
Expand Down
3 changes: 1 addition & 2 deletions routers/repo/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
)

const (
Expand Down Expand Up @@ -251,7 +250,7 @@ func Diff(ctx *context.Context) {
note := &git.Note{}
err = git.GetNote(ctx.Repo.GitRepo, commitID, note)
if err == nil {
ctx.Data["Note"] = string(templates.ToUTF8WithFallback(note.Message))
ctx.Data["Note"] = string(base.ToUTF8WithFallback(note.Message))
ctx.Data["NoteCommit"] = note.Commit
ctx.Data["NoteAuthor"] = models.ValidateCommitWithEmail(note.Commit)
}
Expand Down
3 changes: 1 addition & 2 deletions routers/repo/editor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/repofiles"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
"code.gitea.io/gitea/modules/upload"
"code.gitea.io/gitea/modules/util"
)
Expand Down Expand Up @@ -118,7 +117,7 @@ func editFile(ctx *context.Context, isNewFile bool) {

d, _ := ioutil.ReadAll(dataRc)
buf = append(buf, d...)
if content, err := templates.ToUTF8WithErr(buf); err != nil {
if content, err := base.ToUTF8WithErr(buf); err != nil {
log.Error("ToUTF8WithErr: %v", err)
ctx.Data["FileContent"] = string(buf)
} else {
Expand Down
7 changes: 3 additions & 4 deletions routers/repo/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/templates"
)

const (
Expand Down Expand Up @@ -160,7 +159,7 @@ func renderDirectory(ctx *context.Context, treeLink string) {
ctx.Data["FileSize"] = fileSize
} else {
d, _ := ioutil.ReadAll(dataRc)
buf = templates.ToUTF8WithFallback(append(buf, d...))
buf = base.ToUTF8WithFallback(append(buf, d...))

if markup.Type(readmeFile.Name()) != "" {
ctx.Data["IsMarkup"] = true
Expand Down Expand Up @@ -278,7 +277,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
}

d, _ := ioutil.ReadAll(dataRc)
buf = templates.ToUTF8WithFallback(append(buf, d...))
buf = base.ToUTF8WithFallback(append(buf, d...))

readmeExist := markup.IsReadmeFile(blob.Name())
ctx.Data["ReadmeExist"] = readmeExist
Expand All @@ -293,7 +292,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
} else {
// Building code view blocks with line number on server side.
var fileContent string
if content, err := templates.ToUTF8WithErr(buf); err != nil {
if content, err := base.ToUTF8WithErr(buf); err != nil {
log.Error("ToUTF8WithErr: %v", err)
fileContent = string(buf)
} else {
Expand Down