21
21
from gitdb .utils .encoding import (
22
22
string_types , # @UnusedImport
23
23
text_type , # @UnusedImport
24
- force_bytes , # @UnusedImport
25
- force_text # @UnusedImport
24
+ force_text , # @UnusedImport
26
25
)
27
26
28
27
@@ -77,7 +76,7 @@ def safe_decode(s):
77
76
def safe_encode (s ):
78
77
"""Safely decodes a binary string to unicode"""
79
78
if isinstance (s , unicode ):
80
- return s .encode (defenc )
79
+ return s .encode (defenc , 'surrogateescape' )
81
80
elif isinstance (s , bytes ):
82
81
return s
83
82
elif s is not None :
@@ -123,8 +122,8 @@ def __str__(self):
123
122
else : # Python 2
124
123
def __str__ (self ):
125
124
return self .__unicode__ ().encode (defenc )
126
-
127
-
125
+
126
+
128
127
"""
129
128
This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
130
129
handler of Python 3.
@@ -139,12 +138,14 @@ def __str__(self):
139
138
# # -- Python 2/3 compatibility -------------------------------------
140
139
# FS_ERRORS = 'my_surrogateescape'
141
140
141
+
142
142
def u (text ):
143
143
if PY3 :
144
144
return text
145
145
else :
146
146
return text .decode ('unicode_escape' )
147
147
148
+
148
149
def b (data ):
149
150
if PY3 :
150
151
return data .encode ('latin1' )
@@ -155,9 +156,10 @@ def b(data):
155
156
_unichr = chr
156
157
bytes_chr = lambda code : bytes ((code ,))
157
158
else :
158
- _unichr = unichr
159
+ _unichr = unichr # @UndefinedVariable
159
160
bytes_chr = chr
160
161
162
+
161
163
def surrogateescape_handler (exc ):
162
164
"""
163
165
Pure Python implementation of the PEP 383: the "surrogateescape" error
@@ -204,7 +206,7 @@ def replace_surrogate_encode(mystring):
204
206
# The following magic comes from Py3.3's Python/codecs.c file:
205
207
if not 0xD800 <= code <= 0xDCFF :
206
208
# Not a surrogate. Fail with the original exception.
207
- raise exc
209
+ raise
208
210
# mybytes = [0xe0 | (code >> 12),
209
211
# 0x80 | ((code >> 6) & 0x3f),
210
212
# 0x80 | (code & 0x3f)]
@@ -256,9 +258,8 @@ def encodefilename(fn):
256
258
elif 0xDC80 <= code <= 0xDCFF :
257
259
ch = bytes_chr (code - 0xDC00 )
258
260
else :
259
- raise UnicodeEncodeError (FS_ENCODING ,
260
- fn , index , index + 1 ,
261
- 'ordinal not in range(128)' )
261
+ raise UnicodeEncodeError (FS_ENCODING , fn , index , index + 1 ,
262
+ 'ordinal not in range(128)' )
262
263
encoded .append (ch )
263
264
return bytes ().join (encoded )
264
265
elif FS_ENCODING == 'utf-8' :
@@ -272,20 +273,22 @@ def encodefilename(fn):
272
273
ch = bytes_chr (code - 0xDC00 )
273
274
encoded .append (ch )
274
275
else :
275
- raise UnicodeEncodeError (
276
- FS_ENCODING ,
277
- fn , index , index + 1 , 'surrogates not allowed' )
276
+ raise UnicodeEncodeError (FS_ENCODING , fn , index , index + 1 ,
277
+ 'surrogates not allowed' )
278
278
else :
279
279
ch_utf8 = ch .encode ('utf-8' )
280
280
encoded .append (ch_utf8 )
281
281
return bytes ().join (encoded )
282
282
else :
283
283
return fn .encode (FS_ENCODING , FS_ERRORS )
284
284
285
+
285
286
def decodefilename (fn ):
286
287
return fn .decode (FS_ENCODING , FS_ERRORS )
287
288
288
- FS_ENCODING = 'ascii' ; fn = b ('[abc\xff ]' ); encoded = u ('[abc\udcff ]' )
289
+ FS_ENCODING = 'ascii'
290
+ fn = b ('[abc\xff ]' )
291
+ encoded = u ('[abc\udcff ]' )
289
292
# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
290
293
# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
291
294
0 commit comments