From f051cc56a7cc9b040b4eb1a11582cf017521f51f Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 24 Jun 2017 18:11:09 +1000 Subject: [PATCH 1/6] bpo-30647: Check nl_langinfo(CODESET) in locale coercion - On some versions of FreeBSD, setting the "UTF-8" locale succeeds, but a subsequent "nl_langinfo(CODESET)" fails - adding a check for this in the coercion logic means that coercion will happen on systems where this check succeeds, and will be skipped otherwise - that way CPython should automatically adapt to changes in platform behaviour, rather than needing a new release to enable coercion at build time - this also allows UTF-8 to be re-enabled as a coercion target, restoring the locale coercion behaviour on Mac OS X --- Lib/test/test_c_locale_coercion.py | 15 ++++++++------- Python/pylifecycle.c | 20 ++++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index a4b4626756adce..e829499f6377ec 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -1,6 +1,7 @@ # Tests the attempted automatic coercion of the C locale to a UTF-8 locale import unittest +import locale import os import sys import sysconfig @@ -32,20 +33,20 @@ # In order to get the warning messages to match up as expected, the candidate # order here must much the target locale order in Python/pylifecycle.c -_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") - -# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to -# problems encountered on *BSD systems with those test cases -# For additional details see: -# nl_langinfo CODESET error: https://bugs.python.org/issue30647 -# locale handling differences: https://bugs.python.org/issue30672 +_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") # There's no reliable cross-platform way of checking locale alias # lists, so the only way of knowing which of these locales will work # is to try them with locale.setlocale(). We do that in a subprocess # to avoid altering the locale of the test runner. +# +# If the relevant locale module attributes exist, we also check that +# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter +# will skip locale coercion for that particular target locale def _set_locale_in_subprocess(locale_name): cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" + if hasattr(locale, "nl_langinfo") and hasattr(locale, "CODESET"): + cmd_fmt += "; print(locale.nl_langinfo(locale.CODESET))" cmd = cmd_fmt.format(locale_name) result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) return result.rc == 0 diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 953bc90a456bdb..d34173b7bd233e 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -399,17 +399,10 @@ typedef struct _CandidateLocale { static _LocaleCoercionTarget _TARGET_LOCALES[] = { {"C.UTF-8"}, {"C.utf8"}, - /* {"UTF-8"}, */ + {"UTF-8"}, {NULL} }; -/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to - * problems encountered on *BSD systems with those test cases - * For additional details see: - * nl_langinfo CODESET error: https://bugs.python.org/issue30647 - * locale handling differences: https://bugs.python.org/issue30672 - */ - static char * get_default_standard_stream_error_handler(void) { @@ -485,11 +478,22 @@ _Py_CoerceLegacyLocale(void) const char *locale_override = getenv("LC_ALL"); if (locale_override == NULL || *locale_override == '\0') { /* LC_ALL is also not set (or is set to an empty string) */ + const char *initial_locale = setlocale(LC_CTYPE, NULL); const _LocaleCoercionTarget *target = NULL; for (target = _TARGET_LOCALES; target->locale_name; target++) { const char *new_locale = setlocale(LC_CTYPE, target->locale_name); if (new_locale != NULL) { +#if defined(HAVE_LANGINFO_H) && defined(CODESET) + /* Also ensure that nl_langinfo works in this locale */ + char *codeset = nl_langinfo(CODESET); + if (!codeset || *codeset == '\0') { + /* CODESET is not set or empty, so skip coercion */ + new_locale = NULL; + setlocale(LC_CTYPE, initial_locale); + continue; + } +#endif /* Successfully configured locale, so make it the default */ _coerce_default_locale_settings(target); return; From 64a708cc5801eb41e0c07e17dd6d778a4f481e2a Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 24 Jun 2017 22:43:24 +1000 Subject: [PATCH 2/6] Attempt to fix skip condition in tests --- Lib/test/test_c_locale_coercion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index e829499f6377ec..235d66a5578c10 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -46,7 +46,8 @@ def _set_locale_in_subprocess(locale_name): cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" if hasattr(locale, "nl_langinfo") and hasattr(locale, "CODESET"): - cmd_fmt += "; print(locale.nl_langinfo(locale.CODESET))" + # If there's no valid CODESET, we expect coercion to be skipped + cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" cmd = cmd_fmt.format(locale_name) result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) return result.rc == 0 From 5f262ef550ea9a5860dc92d232fd4a307f059f54 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 25 Jun 2017 00:04:34 +1000 Subject: [PATCH 3/6] Conditionally check nl_langinfo in locale coercion tests The problem with dynamically adaptive tests is that they can sometimes pass without actually testing anything useful. On Linux and Mac OS X, if setlocale works, we also expect nl_langinfo(CODESET) to *always* work for the coercion target locales. The tests now reflect this by always assuming the target locale will work if setlocale succeeds when running on Linux or Mac OS X. --- Lib/test/test_c_locale_coercion.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 235d66a5578c10..f5a9fe34847a5e 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -40,18 +40,27 @@ # is to try them with locale.setlocale(). We do that in a subprocess # to avoid altering the locale of the test runner. # -# If the relevant locale module attributes exist, we also check that +# If the relevant locale module attributes exist, and we're not on a platform +# where we expect it to always succeed, we also check that # `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter # will skip locale coercion for that particular target locale +_check_nl_langinfo_CODESET = bool( + sys.platform not in ("darwin", "linux") and + hasattr(locale, "nl_langinfo") and + hasattr(locale, "CODESET") +) + def _set_locale_in_subprocess(locale_name): cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" - if hasattr(locale, "nl_langinfo") and hasattr(locale, "CODESET"): + if _check_nl_langinfo_CODESET: # If there's no valid CODESET, we expect coercion to be skipped cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" cmd = cmd_fmt.format(locale_name) result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) return result.rc == 0 + + _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" _EncodingDetails = namedtuple("EncodingDetails", _fields) From f5fe87425940adbc6a64fed39247b1a57bdb40f1 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 25 Jun 2017 15:14:13 +1000 Subject: [PATCH 4/6] Skip the nl_langinfo check on OS X --- Python/pylifecycle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index d34173b7bd233e..80c5ddab6bc082 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -484,7 +484,7 @@ _Py_CoerceLegacyLocale(void) const char *new_locale = setlocale(LC_CTYPE, target->locale_name); if (new_locale != NULL) { -#if defined(HAVE_LANGINFO_H) && defined(CODESET) +#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET) /* Also ensure that nl_langinfo works in this locale */ char *codeset = nl_langinfo(CODESET); if (!codeset || *codeset == '\0') { From 598c21e17b55bfc08abeb9159829541e47797e34 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sun, 25 Jun 2017 15:40:15 +1000 Subject: [PATCH 5/6] Avoid unused variable warning --- Python/pylifecycle.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 80c5ddab6bc082..152056c69eef65 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -478,7 +478,11 @@ _Py_CoerceLegacyLocale(void) const char *locale_override = getenv("LC_ALL"); if (locale_override == NULL || *locale_override == '\0') { /* LC_ALL is also not set (or is set to an empty string) */ - const char *initial_locale = setlocale(LC_CTYPE, NULL); +#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET) + /* Save the initial locale on platforms where we may need it later */ + const char *initial_locale = +#endif + setlocale(LC_CTYPE, NULL); const _LocaleCoercionTarget *target = NULL; for (target = _TARGET_LOCALES; target->locale_name; target++) { const char *new_locale = setlocale(LC_CTYPE, From 631fd5a9bce79f9d07b23ffbf25ab7b97d9ba912 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Thu, 29 Jun 2017 22:52:41 +1000 Subject: [PATCH 6/6] Set locale from environment if nl_langinfo fails Saving and restoring the environment when nl_langinfo fails doesn't work as intended on at least FreeBSD, and potentially other systems as well. Configuring the locale from the environment in this case does the right thing (since we make the exact same call later on in Py_Initialize anyway) --- Python/pylifecycle.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 152056c69eef65..3953fec0f52446 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -478,11 +478,6 @@ _Py_CoerceLegacyLocale(void) const char *locale_override = getenv("LC_ALL"); if (locale_override == NULL || *locale_override == '\0') { /* LC_ALL is also not set (or is set to an empty string) */ -#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET) - /* Save the initial locale on platforms where we may need it later */ - const char *initial_locale = -#endif - setlocale(LC_CTYPE, NULL); const _LocaleCoercionTarget *target = NULL; for (target = _TARGET_LOCALES; target->locale_name; target++) { const char *new_locale = setlocale(LC_CTYPE, @@ -494,7 +489,7 @@ _Py_CoerceLegacyLocale(void) if (!codeset || *codeset == '\0') { /* CODESET is not set or empty, so skip coercion */ new_locale = NULL; - setlocale(LC_CTYPE, initial_locale); + setlocale(LC_CTYPE, ""); continue; } #endif