2222else :
2323 C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
2424
25- # XXX (ncoghlan): The above is probably still wrong for :
25+ # Note that the above is probably still wrong in some cases, such as :
2626# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
2727# * AIX and any other platforms that use latin-1 in the C locale
28+ #
29+ # Options for dealing with this:
30+ # * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
31+ # * Fix the test expectations to match the actual platform behaviour
2832
2933# In order to get the warning messages to match up as expected, the candidate
3034# order here must much the target locale order in Python/pylifecycle.c
31- _C_UTF8_LOCALES = ("C.UTF-8" , "C.utf8" , "UTF-8" )
35+ _C_UTF8_LOCALES = ("C.UTF-8" , "C.utf8" ) #, "UTF-8")
36+
37+ # XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
38+ # problems encountered on *BSD systems with those test cases
39+ # For additional details see:
40+ # nl_langinfo CODESET error: https://bugs.python.org/issue30647
41+ # locale handling differences: https://bugs.python.org/issue30672
3242
3343# There's no reliable cross-platform way of checking locale alias
3444# lists, so the only way of knowing which of these locales will work
@@ -40,28 +50,39 @@ def _set_locale_in_subprocess(locale_name):
4050 result , py_cmd = run_python_until_end ("-c" , cmd , __isolated = True )
4151 return result .rc == 0
4252
43- _EncodingDetails = namedtuple ( "EncodingDetails" ,
44- "fsencoding stdin_info stdout_info stderr_info" )
53+ _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
54+ _EncodingDetails = namedtuple ( "EncodingDetails" , _fields )
4555
4656class EncodingDetails (_EncodingDetails ):
57+ # XXX (ncoghlan): Using JSON for child state reporting may be less fragile
4758 CHILD_PROCESS_SCRIPT = ";" .join ([
48- "import sys" ,
59+ "import sys, os " ,
4960 "print(sys.getfilesystemencoding())" ,
5061 "print(sys.stdin.encoding + ':' + sys.stdin.errors)" ,
5162 "print(sys.stdout.encoding + ':' + sys.stdout.errors)" ,
5263 "print(sys.stderr.encoding + ':' + sys.stderr.errors)" ,
64+ "print(os.environ.get('LANG', 'not set'))" ,
65+ "print(os.environ.get('LC_CTYPE', 'not set'))" ,
66+ "print(os.environ.get('LC_ALL', 'not set'))" ,
5367 ])
5468
5569 @classmethod
56- def get_expected_details (cls , fs_encoding , stream_encoding ):
70+ def get_expected_details (cls , coercion_expected , fs_encoding , stream_encoding , env_vars ):
5771 """Returns expected child process details for a given encoding"""
5872 _stream = stream_encoding + ":{}"
5973 # stdin and stdout should use surrogateescape either because the
6074 # coercion triggered, or because the C locale was detected
6175 stream_info = 2 * [_stream .format ("surrogateescape" )]
6276 # stderr should always use backslashreplace
6377 stream_info .append (_stream .format ("backslashreplace" ))
64- return dict (cls (fs_encoding , * stream_info )._asdict ())
78+ expected_lang = env_vars .get ("LANG" , "not set" ).lower ()
79+ if coercion_expected :
80+ expected_lc_ctype = CLI_COERCION_TARGET .lower ()
81+ else :
82+ expected_lc_ctype = env_vars .get ("LC_CTYPE" , "not set" ).lower ()
83+ expected_lc_all = env_vars .get ("LC_ALL" , "not set" ).lower ()
84+ env_info = expected_lang , expected_lc_ctype , expected_lc_all
85+ return dict (cls (fs_encoding , * stream_info , * env_info )._asdict ())
6586
6687 @staticmethod
6788 def _handle_output_variations (data ):
@@ -97,64 +118,20 @@ def get_child_details(cls, env_vars):
97118 result .fail (py_cmd )
98119 # All subprocess outputs in this test case should be pure ASCII
99120 adjusted_output = cls ._handle_output_variations (result .out )
100- stdout_lines = adjusted_output .decode ("ascii" ).rstrip (). splitlines ()
121+ stdout_lines = adjusted_output .decode ("ascii" ).splitlines ()
101122 child_encoding_details = dict (cls (* stdout_lines )._asdict ())
102123 stderr_lines = result .err .decode ("ascii" ).rstrip ().splitlines ()
103124 return child_encoding_details , stderr_lines
104125
105126
106- class _ChildProcessEncodingTestCase (unittest .TestCase ):
107- # Base class to check for expected encoding details in a child process
108-
109- def _check_child_encoding_details (self ,
110- env_vars ,
111- expected_fs_encoding ,
112- expected_stream_encoding ,
113- expected_warning ):
114- """Check the C locale handling for the given process environment
115-
116- Parameters:
117- expected_fs_encoding: expected sys.getfilesystemencoding() result
118- expected_stream_encoding: expected encoding for standard streams
119- expected_warning: stderr output to expect (if any)
120- """
121- result = EncodingDetails .get_child_details (env_vars )
122- encoding_details , stderr_lines = result
123- self .assertEqual (encoding_details ,
124- EncodingDetails .get_expected_details (
125- expected_fs_encoding ,
126- expected_stream_encoding ))
127- self .assertEqual (stderr_lines , expected_warning )
128-
129127# Details of the shared library warning emitted at runtime
130- LIBRARY_C_LOCALE_WARNING = (
128+ LEGACY_LOCALE_WARNING = (
131129 "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
132130 "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
133131 "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
134132 "locales is recommended."
135133)
136134
137- @unittest .skipUnless (sysconfig .get_config_var ("PY_WARN_ON_C_LOCALE" ),
138- "C locale runtime warning disabled at build time" )
139- class LocaleWarningTests (_ChildProcessEncodingTestCase ):
140- # Test warning emitted when running in the C locale
141-
142- def test_library_c_locale_warning (self ):
143- self .maxDiff = None
144- for locale_to_set in ("C" , "POSIX" , "invalid.ascii" ):
145- # XXX (ncoghlan): Mac OS X doesn't behave as expected in the
146- # POSIX locale, so we skip that for now
147- if sys .platform == "darwin" and locale_to_set == "POSIX" :
148- continue
149- var_dict = {
150- "LC_ALL" : locale_to_set
151- }
152- with self .subTest (forced_locale = locale_to_set ):
153- self ._check_child_encoding_details (var_dict ,
154- C_LOCALE_FS_ENCODING ,
155- C_LOCALE_STREAM_ENCODING ,
156- [LIBRARY_C_LOCALE_WARNING ])
157-
158135# Details of the CLI locale coercion warning emitted at runtime
159136CLI_COERCION_WARNING_FMT = (
160137 "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
@@ -163,9 +140,13 @@ def test_library_c_locale_warning(self):
163140
164141
165142AVAILABLE_TARGETS = None
143+ CLI_COERCION_TARGET = None
144+ CLI_COERCION_WARNING = None
166145
167146def setUpModule ():
168147 global AVAILABLE_TARGETS
148+ global CLI_COERCION_TARGET
149+ global CLI_COERCION_WARNING
169150
170151 if AVAILABLE_TARGETS is not None :
171152 # initialization already done
@@ -177,26 +158,57 @@ def setUpModule():
177158 if _set_locale_in_subprocess (target_locale ):
178159 AVAILABLE_TARGETS .append (target_locale )
179160
161+ if AVAILABLE_TARGETS :
162+ # Coercion is expected to use the first available target locale
163+ CLI_COERCION_TARGET = AVAILABLE_TARGETS [0 ]
164+ CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT .format (CLI_COERCION_TARGET )
180165
181166
182- class _LocaleCoercionTargetsTestCase ( _ChildProcessEncodingTestCase ):
183- # Base class for test cases that rely on coercion targets being defined
167+ class _LocaleHandlingTestCase ( unittest . TestCase ):
168+ # Base class to check expected locale handling behaviour
184169
185- @classmethod
186- def setUpClass (cls ):
187- if not AVAILABLE_TARGETS :
188- raise unittest .SkipTest ("No C-with-UTF-8 locale available" )
170+ def _check_child_encoding_details (self ,
171+ env_vars ,
172+ expected_fs_encoding ,
173+ expected_stream_encoding ,
174+ expected_warnings ,
175+ coercion_expected ):
176+ """Check the C locale handling for the given process environment
189177
178+ Parameters:
179+ expected_fs_encoding: expected sys.getfilesystemencoding() result
180+ expected_stream_encoding: expected encoding for standard streams
181+ expected_warning: stderr output to expect (if any)
182+ """
183+ result = EncodingDetails .get_child_details (env_vars )
184+ encoding_details , stderr_lines = result
185+ expected_details = EncodingDetails .get_expected_details (
186+ coercion_expected ,
187+ expected_fs_encoding ,
188+ expected_stream_encoding ,
189+ env_vars
190+ )
191+ self .assertEqual (encoding_details , expected_details )
192+ if expected_warnings is None :
193+ expected_warnings = []
194+ self .assertEqual (stderr_lines , expected_warnings )
190195
191- class LocaleConfigurationTests (_LocaleCoercionTargetsTestCase ):
196+
197+ class LocaleConfigurationTests (_LocaleHandlingTestCase ):
192198 # Test explicit external configuration via the process environment
193199
200+ def setUpClass ():
201+ # This relies on setupModule() having been run, so it can't be
202+ # handled via the @unittest.skipUnless decorator
203+ if not AVAILABLE_TARGETS :
204+ raise unittest .SkipTest ("No C-with-UTF-8 locale available" )
205+
194206 def test_external_target_locale_configuration (self ):
207+
195208 # Explicitly setting a target locale should give the same behaviour as
196209 # is seen when implicitly coercing to that target locale
197210 self .maxDiff = None
198211
199- expected_warning = []
200212 expected_fs_encoding = "utf-8"
201213 expected_stream_encoding = "utf-8"
202214
@@ -209,6 +221,7 @@ def test_external_target_locale_configuration(self):
209221 for locale_to_set in AVAILABLE_TARGETS :
210222 # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
211223 # expected, so skip that combination for now
224+ # See https://bugs.python.org/issue30672 for discussion
212225 if env_var == "LANG" and locale_to_set == "UTF-8" :
213226 continue
214227
@@ -219,17 +232,23 @@ def test_external_target_locale_configuration(self):
219232 self ._check_child_encoding_details (var_dict ,
220233 expected_fs_encoding ,
221234 expected_stream_encoding ,
222- expected_warning )
235+ expected_warnings = None ,
236+ coercion_expected = False )
223237
224238
225239
226240@test .support .cpython_only
227241@unittest .skipUnless (sysconfig .get_config_var ("PY_COERCE_C_LOCALE" ),
228242 "C locale coercion disabled at build time" )
229- class LocaleCoercionTests (_LocaleCoercionTargetsTestCase ):
243+ class LocaleCoercionTests (_LocaleHandlingTestCase ):
230244 # Test implicit reconfiguration of the environment during CLI startup
231245
232- def _check_c_locale_coercion (self , fs_encoding , stream_encoding , coerce_c_locale ):
246+ def _check_c_locale_coercion (self ,
247+ fs_encoding , stream_encoding ,
248+ coerce_c_locale ,
249+ expected_warnings = None ,
250+ coercion_expected = True ,
251+ ** extra_vars ):
233252 """Check the C locale handling for various configurations
234253
235254 Parameters:
@@ -238,27 +257,31 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
238257 coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
239258 None: don't set the variable at all
240259 str: the value set in the child's environment
260+ expected_warnings: expected warning lines on stderr
261+ extra_vars: additional environment variables to set in subprocess
241262 """
242-
243- # Check for expected warning on stderr if C locale is coerced
244263 self .maxDiff = None
245264
246- expected_warning = []
247- if coerce_c_locale != "0" :
248- # Expect coercion to use the first available locale
249- warning_msg = CLI_COERCION_WARNING_FMT .format (AVAILABLE_TARGETS [0 ])
250- expected_warning .append (warning_msg )
265+ if not AVAILABLE_TARGETS :
266+ # Locale coercion is disabled when there aren't any target locales
267+ fs_encoding = C_LOCALE_FS_ENCODING
268+ stream_encoding = C_LOCALE_STREAM_ENCODING
269+ coercion_expected = False
270+ if expected_warnings :
271+ expected_warnings = [LEGACY_LOCALE_WARNING ]
251272
252273 base_var_dict = {
253274 "LANG" : "" ,
254275 "LC_CTYPE" : "" ,
255276 "LC_ALL" : "" ,
256277 }
278+ base_var_dict .update (extra_vars )
257279 for env_var in ("LANG" , "LC_CTYPE" ):
258280 for locale_to_set in ("" , "C" , "POSIX" , "invalid.ascii" ):
259- # XXX (ncoghlan): Mac OS X doesn 't behave as expected in the
281+ # XXX (ncoghlan): *BSD platforms don 't behave as expected in the
260282 # POSIX locale, so we skip that for now
261- if sys .platform == "darwin" and locale_to_set == "POSIX" :
283+ # See https://bugs.python.org/issue30672 for discussion
284+ if locale_to_set == "POSIX" :
262285 continue
263286 with self .subTest (env_var = env_var ,
264287 nominal_locale = locale_to_set ,
@@ -267,33 +290,62 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale
267290 var_dict [env_var ] = locale_to_set
268291 if coerce_c_locale is not None :
269292 var_dict ["PYTHONCOERCECLOCALE" ] = coerce_c_locale
293+ # Check behaviour on successful coercion
270294 self ._check_child_encoding_details (var_dict ,
271295 fs_encoding ,
272296 stream_encoding ,
273- expected_warning )
297+ expected_warnings ,
298+ coercion_expected )
274299
275300 def test_test_PYTHONCOERCECLOCALE_not_set (self ):
276301 # This should coerce to the first available target locale by default
277302 self ._check_c_locale_coercion ("utf-8" , "utf-8" , coerce_c_locale = None )
278303
279304 def test_PYTHONCOERCECLOCALE_not_zero (self ):
280- # *Any* string other that "0" is considered "set" for our purposes
305+ # *Any* string other than "0" is considered "set" for our purposes
281306 # and hence should result in the locale coercion being enabled
282307 for setting in ("" , "1" , "true" , "false" ):
283308 self ._check_c_locale_coercion ("utf-8" , "utf-8" , coerce_c_locale = setting )
284309
310+ def test_PYTHONCOERCECLOCALE_set_to_warn (self ):
311+ # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
312+ self ._check_c_locale_coercion ("utf-8" , "utf-8" ,
313+ coerce_c_locale = "warn" ,
314+ expected_warnings = [CLI_COERCION_WARNING ])
315+
316+
285317 def test_PYTHONCOERCECLOCALE_set_to_zero (self ):
286318 # The setting "0" should result in the locale coercion being disabled
287319 self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
288320 C_LOCALE_STREAM_ENCODING ,
289- coerce_c_locale = "0" )
321+ coerce_c_locale = "0" ,
322+ coercion_expected = False )
323+ # Setting LC_ALL=C shouldn't make any difference to the behaviour
324+ self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
325+ C_LOCALE_STREAM_ENCODING ,
326+ coerce_c_locale = "0" ,
327+ LC_ALL = "C" ,
328+ coercion_expected = False )
290329
330+ def test_LC_ALL_set_to_C (self ):
331+ # Setting LC_ALL should render the locale coercion ineffective
332+ self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
333+ C_LOCALE_STREAM_ENCODING ,
334+ coerce_c_locale = None ,
335+ LC_ALL = "C" ,
336+ coercion_expected = False )
337+ # And result in a warning about a lack of locale compatibility
338+ self ._check_c_locale_coercion (C_LOCALE_FS_ENCODING ,
339+ C_LOCALE_STREAM_ENCODING ,
340+ coerce_c_locale = "warn" ,
341+ LC_ALL = "C" ,
342+ expected_warnings = [LEGACY_LOCALE_WARNING ],
343+ coercion_expected = False )
291344
292345def test_main ():
293346 test .support .run_unittest (
294347 LocaleConfigurationTests ,
295- LocaleCoercionTests ,
296- LocaleWarningTests
348+ LocaleCoercionTests
297349 )
298350 test .support .reap_children ()
299351
0 commit comments