@@ -144,6 +144,17 @@ locale_is_ascii(const char *str)
144144 return (strlen (str ) == 1 && ((unsigned char )str [0 ]) <= 127 );
145145}
146146
147+ static int
148+ is_all_ascii (const char * str )
149+ {
150+ for (; * str ; str ++ ) {
151+ if ((unsigned char )* str > 127 ) {
152+ return 0 ;
153+ }
154+ }
155+ return 1 ;
156+ }
157+
147158static int
148159locale_decode_monetary (PyObject * dict , struct lconv * lc )
149160{
@@ -478,113 +489,153 @@ _locale__getdefaultlocale_impl(PyObject *module)
478489#endif
479490
480491#ifdef HAVE_LANGINFO_H
481- #define LANGINFO (X ) {#X, X}
492+ #define LANGINFO (X , Y ) {#X, X, Y }
482493static struct langinfo_constant {
483- char * name ;
494+ const char * name ;
484495 int value ;
496+ int category ;
485497} langinfo_constants [] =
486498{
487499 /* These constants should exist on any langinfo implementation */
488- LANGINFO (DAY_1 ),
489- LANGINFO (DAY_2 ),
490- LANGINFO (DAY_3 ),
491- LANGINFO (DAY_4 ),
492- LANGINFO (DAY_5 ),
493- LANGINFO (DAY_6 ),
494- LANGINFO (DAY_7 ),
495-
496- LANGINFO (ABDAY_1 ),
497- LANGINFO (ABDAY_2 ),
498- LANGINFO (ABDAY_3 ),
499- LANGINFO (ABDAY_4 ),
500- LANGINFO (ABDAY_5 ),
501- LANGINFO (ABDAY_6 ),
502- LANGINFO (ABDAY_7 ),
503-
504- LANGINFO (MON_1 ),
505- LANGINFO (MON_2 ),
506- LANGINFO (MON_3 ),
507- LANGINFO (MON_4 ),
508- LANGINFO (MON_5 ),
509- LANGINFO (MON_6 ),
510- LANGINFO (MON_7 ),
511- LANGINFO (MON_8 ),
512- LANGINFO (MON_9 ),
513- LANGINFO (MON_10 ),
514- LANGINFO (MON_11 ),
515- LANGINFO (MON_12 ),
516-
517- LANGINFO (ABMON_1 ),
518- LANGINFO (ABMON_2 ),
519- LANGINFO (ABMON_3 ),
520- LANGINFO (ABMON_4 ),
521- LANGINFO (ABMON_5 ),
522- LANGINFO (ABMON_6 ),
523- LANGINFO (ABMON_7 ),
524- LANGINFO (ABMON_8 ),
525- LANGINFO (ABMON_9 ),
526- LANGINFO (ABMON_10 ),
527- LANGINFO (ABMON_11 ),
528- LANGINFO (ABMON_12 ),
500+ LANGINFO (DAY_1 , LC_TIME ),
501+ LANGINFO (DAY_2 , LC_TIME ),
502+ LANGINFO (DAY_3 , LC_TIME ),
503+ LANGINFO (DAY_4 , LC_TIME ),
504+ LANGINFO (DAY_5 , LC_TIME ),
505+ LANGINFO (DAY_6 , LC_TIME ),
506+ LANGINFO (DAY_7 , LC_TIME ),
507+
508+ LANGINFO (ABDAY_1 , LC_TIME ),
509+ LANGINFO (ABDAY_2 , LC_TIME ),
510+ LANGINFO (ABDAY_3 , LC_TIME ),
511+ LANGINFO (ABDAY_4 , LC_TIME ),
512+ LANGINFO (ABDAY_5 , LC_TIME ),
513+ LANGINFO (ABDAY_6 , LC_TIME ),
514+ LANGINFO (ABDAY_7 , LC_TIME ),
515+
516+ LANGINFO (MON_1 , LC_TIME ),
517+ LANGINFO (MON_2 , LC_TIME ),
518+ LANGINFO (MON_3 , LC_TIME ),
519+ LANGINFO (MON_4 , LC_TIME ),
520+ LANGINFO (MON_5 , LC_TIME ),
521+ LANGINFO (MON_6 , LC_TIME ),
522+ LANGINFO (MON_7 , LC_TIME ),
523+ LANGINFO (MON_8 , LC_TIME ),
524+ LANGINFO (MON_9 , LC_TIME ),
525+ LANGINFO (MON_10 , LC_TIME ),
526+ LANGINFO (MON_11 , LC_TIME ),
527+ LANGINFO (MON_12 , LC_TIME ),
528+
529+ LANGINFO (ABMON_1 , LC_TIME ),
530+ LANGINFO (ABMON_2 , LC_TIME ),
531+ LANGINFO (ABMON_3 , LC_TIME ),
532+ LANGINFO (ABMON_4 , LC_TIME ),
533+ LANGINFO (ABMON_5 , LC_TIME ),
534+ LANGINFO (ABMON_6 , LC_TIME ),
535+ LANGINFO (ABMON_7 , LC_TIME ),
536+ LANGINFO (ABMON_8 , LC_TIME ),
537+ LANGINFO (ABMON_9 , LC_TIME ),
538+ LANGINFO (ABMON_10 , LC_TIME ),
539+ LANGINFO (ABMON_11 , LC_TIME ),
540+ LANGINFO (ABMON_12 , LC_TIME ),
529541
530542#ifdef RADIXCHAR
531543 /* The following are not available with glibc 2.0 */
532- LANGINFO (RADIXCHAR ),
533- LANGINFO (THOUSEP ),
544+ LANGINFO (RADIXCHAR , LC_NUMERIC ),
545+ LANGINFO (THOUSEP , LC_NUMERIC ),
534546 /* YESSTR and NOSTR are deprecated in glibc, since they are
535547 a special case of message translation, which should be rather
536548 done using gettext. So we don't expose it to Python in the
537549 first place.
538- LANGINFO(YESSTR),
539- LANGINFO(NOSTR),
550+ LANGINFO(YESSTR, LC_MESSAGES ),
551+ LANGINFO(NOSTR, LC_MESSAGES ),
540552 */
541- LANGINFO (CRNCYSTR ),
553+ LANGINFO (CRNCYSTR , LC_MONETARY ),
542554#endif
543555
544- LANGINFO (D_T_FMT ),
545- LANGINFO (D_FMT ),
546- LANGINFO (T_FMT ),
547- LANGINFO (AM_STR ),
548- LANGINFO (PM_STR ),
556+ LANGINFO (D_T_FMT , LC_TIME ),
557+ LANGINFO (D_FMT , LC_TIME ),
558+ LANGINFO (T_FMT , LC_TIME ),
559+ LANGINFO (AM_STR , LC_TIME ),
560+ LANGINFO (PM_STR , LC_TIME ),
549561
550562 /* The following constants are available only with XPG4, but...
551563 OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have
552564 a few of the others.
553565 Solution: ifdef-test them all. */
554566#ifdef CODESET
555- LANGINFO (CODESET ),
567+ LANGINFO (CODESET , LC_CTYPE ),
556568#endif
557569#ifdef T_FMT_AMPM
558- LANGINFO (T_FMT_AMPM ),
570+ LANGINFO (T_FMT_AMPM , LC_TIME ),
559571#endif
560572#ifdef ERA
561- LANGINFO (ERA ),
573+ LANGINFO (ERA , LC_TIME ),
562574#endif
563575#ifdef ERA_D_FMT
564- LANGINFO (ERA_D_FMT ),
576+ LANGINFO (ERA_D_FMT , LC_TIME ),
565577#endif
566578#ifdef ERA_D_T_FMT
567- LANGINFO (ERA_D_T_FMT ),
579+ LANGINFO (ERA_D_T_FMT , LC_TIME ),
568580#endif
569581#ifdef ERA_T_FMT
570- LANGINFO (ERA_T_FMT ),
582+ LANGINFO (ERA_T_FMT , LC_TIME ),
571583#endif
572584#ifdef ALT_DIGITS
573- LANGINFO (ALT_DIGITS ),
585+ LANGINFO (ALT_DIGITS , LC_TIME ),
574586#endif
575587#ifdef YESEXPR
576- LANGINFO (YESEXPR ),
588+ LANGINFO (YESEXPR , LC_MESSAGES ),
577589#endif
578590#ifdef NOEXPR
579- LANGINFO (NOEXPR ),
591+ LANGINFO (NOEXPR , LC_MESSAGES ),
580592#endif
581593#ifdef _DATE_FMT
582594 /* This is not available in all glibc versions that have CODESET. */
583- LANGINFO (_DATE_FMT ),
595+ LANGINFO (_DATE_FMT , LC_TIME ),
584596#endif
585- {0 , 0 }
597+ {0 , 0 , 0 }
586598};
587599
600+ /* Temporary make the LC_CTYPE locale to be the same as
601+ * the locale of the specified category. */
602+ static int
603+ change_locale (int category , char * * oldloc )
604+ {
605+ /* Keep a copy of the LC_CTYPE locale */
606+ * oldloc = setlocale (LC_CTYPE , NULL );
607+ if (!* oldloc ) {
608+ PyErr_SetString (PyExc_RuntimeError , "faild to get LC_CTYPE locale" );
609+ return -1 ;
610+ }
611+ * oldloc = _PyMem_Strdup (* oldloc );
612+ if (!* oldloc ) {
613+ PyErr_NoMemory ();
614+ return -1 ;
615+ }
616+
617+ /* Set a new locale if it is different. */
618+ char * loc = setlocale (category , NULL );
619+ if (loc == NULL || strcmp (loc , * oldloc ) == 0 ) {
620+ PyMem_Free (* oldloc );
621+ * oldloc = NULL ;
622+ return 0 ;
623+ }
624+
625+ setlocale (LC_CTYPE , loc );
626+ return 1 ;
627+ }
628+
629+ /* Restore the old LC_CTYPE locale. */
630+ static void
631+ restore_locale (char * oldloc )
632+ {
633+ if (oldloc != NULL ) {
634+ setlocale (LC_CTYPE , oldloc );
635+ PyMem_Free (oldloc );
636+ }
637+ }
638+
588639/*[clinic input]
589640_locale.nl_langinfo
590641
@@ -602,14 +653,24 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
602653 /* Check whether this is a supported constant. GNU libc sometimes
603654 returns numeric values in the char* return value, which would
604655 crash PyUnicode_FromString. */
605- for (i = 0 ; langinfo_constants [i ].name ; i ++ )
656+ for (i = 0 ; langinfo_constants [i ].name ; i ++ ) {
606657 if (langinfo_constants [i ].value == item ) {
607658 /* Check NULL as a workaround for GNU libc's returning NULL
608659 instead of an empty string for nl_langinfo(ERA). */
609660 const char * result = nl_langinfo (item );
610661 result = result != NULL ? result : "" ;
611- return PyUnicode_DecodeLocale (result , NULL );
662+ char * oldloc = NULL ;
663+ if (langinfo_constants [i ].category != LC_CTYPE
664+ && !is_all_ascii (result )
665+ && change_locale (langinfo_constants [i ].category , & oldloc ) < 0 )
666+ {
667+ return NULL ;
668+ }
669+ PyObject * unicode = PyUnicode_DecodeLocale (result , NULL );
670+ restore_locale (oldloc );
671+ return unicode ;
612672 }
673+ }
613674 PyErr_SetString (PyExc_ValueError , "unsupported langinfo constant" );
614675 return NULL ;
615676}
0 commit comments