55package  repofiles
66
77import  (
8+ 	"bytes" 
89	"fmt" 
910	"path" 
1011	"strings" 
1112
13+ 	"golang.org/x/net/html/charset" 
14+ 	"golang.org/x/text/transform" 
15+ 
1216	"code.gitea.io/gitea/models" 
17+ 	"code.gitea.io/gitea/modules/base" 
1318	"code.gitea.io/gitea/modules/git" 
1419	"code.gitea.io/gitea/modules/lfs" 
20+ 	"code.gitea.io/gitea/modules/log" 
1521	"code.gitea.io/gitea/modules/setting" 
1622	"code.gitea.io/sdk/gitea" 
1723)
@@ -37,6 +43,70 @@ type UpdateRepoFileOptions struct {
3743	Committer     * IdentityOptions 
3844}
3945
46+ func  detectEncodingAndBOM (entry  * git.TreeEntry , repo  * models.Repository ) (string , bool ) {
47+ 	reader , err  :=  entry .Blob ().DataAsync ()
48+ 	if  err  !=  nil  {
49+ 		// return default 
50+ 		return  "UTF-8" , false 
51+ 	}
52+ 	defer  reader .Close ()
53+ 	buf  :=  make ([]byte , 1024 )
54+ 	n , err  :=  reader .Read (buf )
55+ 	if  err  !=  nil  {
56+ 		// return default 
57+ 		return  "UTF-8" , false 
58+ 	}
59+ 	buf  =  buf [:n ]
60+ 
61+ 	if  setting .LFS .StartServer  {
62+ 		meta  :=  lfs .IsPointerFile (& buf )
63+ 		if  meta  !=  nil  {
64+ 			meta , err  =  repo .GetLFSMetaObjectByOid (meta .Oid )
65+ 			if  err  !=  nil  &&  err  !=  models .ErrLFSObjectNotExist  {
66+ 				// return default 
67+ 				return  "UTF-8" , false 
68+ 			}
69+ 		}
70+ 		if  meta  !=  nil  {
71+ 			dataRc , err  :=  lfs .ReadMetaObject (meta )
72+ 			if  err  !=  nil  {
73+ 				// return default 
74+ 				return  "UTF-8" , false 
75+ 			}
76+ 			defer  dataRc .Close ()
77+ 			buf  =  make ([]byte , 1024 )
78+ 			n , err  =  dataRc .Read (buf )
79+ 			if  err  !=  nil  {
80+ 				// return default 
81+ 				return  "UTF-8" , false 
82+ 			}
83+ 			buf  =  buf [:n ]
84+ 		}
85+ 
86+ 	}
87+ 
88+ 	encoding , err  :=  base .DetectEncoding (buf )
89+ 	if  err  !=  nil  {
90+ 		// just default to utf-8 and no bom 
91+ 		return  "UTF-8" , false 
92+ 	}
93+ 	if  encoding  ==  "UTF-8"  {
94+ 		return  encoding , bytes .Equal (buf [0 :3 ], base .UTF8BOM )
95+ 	}
96+ 	charsetEncoding , _  :=  charset .Lookup (encoding )
97+ 	if  charsetEncoding  ==  nil  {
98+ 		return  "UTF-8" , false 
99+ 	}
100+ 
101+ 	result , n , err  :=  transform .String (charsetEncoding .NewDecoder (), string (buf ))
102+ 
103+ 	if  n  >  2  {
104+ 		return  encoding , bytes .Equal ([]byte (result )[0 :3 ], base .UTF8BOM )
105+ 	}
106+ 
107+ 	return  encoding , false 
108+ }
109+ 
40110// CreateOrUpdateRepoFile adds or updates a file in the given repository 
41111func  CreateOrUpdateRepoFile (repo  * models.Repository , doer  * models.User , opts  * UpdateRepoFileOptions ) (* gitea.FileResponse , error ) {
42112	// If no branch name is set, assume master 
@@ -118,6 +188,9 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
118188		opts .LastCommitID  =  commit .ID .String ()
119189	}
120190
191+ 	encoding  :=  "UTF-8" 
192+ 	bom  :=  false 
193+ 
121194	if  ! opts .IsNewFile  {
122195		fromEntry , err  :=  commit .GetTreeEntryByPath (fromTreePath )
123196		if  err  !=  nil  {
@@ -151,6 +224,7 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
151224			// haven't been made. We throw an error if one wasn't provided. 
152225			return  nil , models.ErrSHAOrCommitIDNotProvided {}
153226		}
227+ 		encoding , bom  =  detectEncodingAndBOM (fromEntry , repo )
154228	}
155229
156230	// For the path where this file will be created/updated, we need to make 
@@ -235,9 +309,28 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
235309	}
236310
237311	content  :=  opts .Content 
312+ 	if  bom  {
313+ 		content  =  string (base .UTF8BOM ) +  content 
314+ 	}
315+ 	if  encoding  !=  "UTF-8"  {
316+ 		charsetEncoding , _  :=  charset .Lookup (encoding )
317+ 		if  charsetEncoding  !=  nil  {
318+ 			result , _ , err  :=  transform .String (charsetEncoding .NewEncoder (), string (content ))
319+ 			if  err  !=  nil  {
320+ 				// Look if we can't encode back in to the original we should just stick with utf-8 
321+ 				log .Error ("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v" , opts .TreePath , opts .FromTreePath , encoding , err )
322+ 				result  =  content 
323+ 			}
324+ 			content  =  result 
325+ 		} else  {
326+ 			log .Error ("Unknown encoding: %s" , encoding )
327+ 		}
328+ 	}
329+ 	// Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content 
330+ 	opts .Content  =  content 
238331	var  lfsMetaObject  * models.LFSMetaObject 
239332
240- 	if  filename2attribute2info [treePath ] !=  nil  &&  filename2attribute2info [treePath ]["filter" ] ==  "lfs"  {
333+ 	if  setting . LFS . StartServer   &&   filename2attribute2info [treePath ] !=  nil  &&  filename2attribute2info [treePath ]["filter" ] ==  "lfs"  {
241334		// OK so we are supposed to LFS this data! 
242335		oid , err  :=  models .GenerateLFSOid (strings .NewReader (opts .Content ))
243336		if  err  !=  nil  {
0 commit comments