Skip to content

Commit 64d487b

Browse files
gh-123803: Support arbitrary code page encodings on Windows
If the cpXXX encoding is not directly implemented in Python, fall back to use the Windows-specific API codecs.code_page_encode() and codecs.code_page_decode().
1 parent 033510e commit 64d487b

File tree

1 file changed

+41
-15
lines changed

1 file changed

+41
-15
lines changed

Lib/encodings/__init__.py

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -156,19 +156,45 @@ def search_function(encoding):
156156
codecs.register(search_function)
157157

158158
if sys.platform == 'win32':
159-
# bpo-671666, bpo-46668: If Python does not implement a codec for current
160-
# Windows ANSI code page, use the "mbcs" codec instead:
161-
# WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP.
162-
# Python does not support custom code pages.
163-
def _alias_mbcs(encoding):
159+
def _code_page_search_function(encoding):
160+
encoding = encoding.lower()
161+
if not encoding.startswith('cp'):
162+
return None
164163
try:
165-
import _winapi
166-
ansi_code_page = "cp%s" % _winapi.GetACP()
167-
if encoding == ansi_code_page:
168-
import encodings.mbcs
169-
return encodings.mbcs.getregentry()
170-
except ImportError:
171-
# Imports may fail while we are shutting down
172-
pass
173-
174-
codecs.register(_alias_mbcs)
164+
cp = int(encoding[2:])
165+
except ValueError:
166+
return None
167+
168+
def encode(input, errors='strict'):
169+
return codecs.code_page_encode(cp, input, errors)
170+
171+
def decode(input, errors='strict'):
172+
return codecs.code_page_decode(cp, input, errors, True)
173+
174+
class IncrementalEncoder(codecs.IncrementalEncoder):
175+
def encode(self, input, final=False):
176+
return codecs.code_page_encode(cp, input, self.errors)[0]
177+
178+
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
179+
def _buffer_decode(self, input, errors, final):
180+
return codecs.code_page_decode(cp, input, errors, final)
181+
182+
class StreamWriter(Codec, codecs.StreamWriter):
183+
def encode(self, input, errors='strict'):
184+
return codecs.code_page_encode(cp, input, errors)
185+
186+
class StreamReader(Codec, codecs.StreamReader):
187+
def decode(self, input, errors, final):
188+
return codecs.code_page_decode(cp, input, errors, final)
189+
190+
return codecs.CodecInfo(
191+
name=f'cp{cp}',
192+
encode=encode,
193+
decode=decode,
194+
incrementalencoder=IncrementalEncoder,
195+
incrementaldecoder=IncrementalDecoder,
196+
streamreader=StreamReader,
197+
streamwriter=StreamWriter,
198+
)
199+
200+
codecs.register(_code_page_search_function)

0 commit comments

Comments
 (0)