diff --git a/source/atomics.tex b/source/atomics.tex index 626482db15..b248b2ef8b 100644 --- a/source/atomics.tex +++ b/source/atomics.tex @@ -43,6 +43,7 @@ // \ref{atomics.lockfree}, lock-free property #define ATOMIC_BOOL_LOCK_FREE @\unspec@ #define ATOMIC_CHAR_LOCK_FREE @\unspec@ + #define ATOMIC_CHAR8_T_LOCK_FREE @\unspec@ #define ATOMIC_CHAR16_T_LOCK_FREE @\unspec@ #define ATOMIC_CHAR32_T_LOCK_FREE @\unspec@ #define ATOMIC_WCHAR_T_LOCK_FREE @\unspec@ @@ -203,6 +204,7 @@ using atomic_ulong = atomic; using atomic_llong = atomic; using atomic_ullong = atomic; + using atomic_char8_t = atomic; using atomic_char16_t = atomic; using atomic_char32_t = atomic; using atomic_wchar_t = atomic; @@ -272,6 +274,7 @@ \indexlibrary{\idxcode{atomic_ulong}}% \indexlibrary{\idxcode{atomic_llong}}% \indexlibrary{\idxcode{atomic_ullong}}% +\indexlibrary{\idxcode{atomic_char8_t}}% \indexlibrary{\idxcode{atomic_char16_t}}% \indexlibrary{\idxcode{atomic_char32_t}}% \indexlibrary{\idxcode{atomic_wchar_t}}% @@ -535,6 +538,7 @@ \indexlibrary{\idxcode{ATOMIC_BOOL_LOCK_FREE}}% \indexlibrary{\idxcode{ATOMIC_CHAR_LOCK_FREE}}% +\indexlibrary{\idxcode{ATOMIC_CHAR8_T_LOCK_FREE}}% \indexlibrary{\idxcode{ATOMIC_CHAR16_T_LOCK_FREE}}% \indexlibrary{\idxcode{ATOMIC_CHAR32_T_LOCK_FREE}}% \indexlibrary{\idxcode{ATOMIC_WCHAR_T_LOCK_FREE}}% @@ -547,6 +551,7 @@ \begin{codeblock} #define ATOMIC_BOOL_LOCK_FREE @\unspec@ #define ATOMIC_CHAR_LOCK_FREE @\unspec@ +#define ATOMIC_CHAR8_T_LOCK_FREE @\unspec@ #define ATOMIC_CHAR16_T_LOCK_FREE @\unspec@ #define ATOMIC_CHAR32_T_LOCK_FREE @\unspec@ #define ATOMIC_WCHAR_T_LOCK_FREE @\unspec@ @@ -927,6 +932,7 @@ \tcode{unsigned long}, \tcode{long long}, \tcode{unsigned long long}, +\tcode{char8_t}, \tcode{char16_t}, \tcode{char32_t}, \tcode{wchar_t}, @@ -1745,6 +1751,7 @@ \tcode{unsigned long}, \tcode{long long}, \tcode{unsigned long long}, +\tcode{char8_t}, \tcode{char16_t}, \tcode{char32_t}, \tcode{wchar_t}, diff --git a/source/basic.tex b/source/basic.tex index a20d22e4e0..ce17b59965 100644 --- a/source/basic.tex +++ b/source/basic.tex @@ -3575,7 +3575,7 @@ \tcode{alignof} expression\iref{expr.alignof}. Furthermore, the narrow character types\iref{basic.fundamental} shall have the weakest alignment requirement. -\begin{note} This enables the narrow character types to be used as the +\begin{note} This enables the ordinary character types to be used as the underlying type for an aligned memory area\iref{dcl.align}.\end{note} \pnum @@ -4289,6 +4289,7 @@ \defnx{extended integer types}{extended integer type}. \pnum +\indextext{underlying type|see{type, underlying}}% A fundamental type specified to have a signed or unsigned integer type as its \defn{underlying type} has the same object representation, @@ -4300,6 +4301,7 @@ \pnum \indextext{type!\idxcode{char}}% \indextext{type!character}% +\indextext{type!ordinary character}% \indextext{type!narrow character}% \indextext{\idxcode{char}!implementation-defined sign of}% \indextext{type!\idxcode{signed char}}% @@ -4311,6 +4313,9 @@ The values of type \tcode{char} can represent distinct codes for all members of the implementation's basic character set. The three types \tcode{char}, \tcode{signed char}, and \tcode{unsigned char} +are collectively called +\defnx{ordinary character types}{type!ordinary character}. +The ordinary character types and \tcode{char8_t} are collectively called \defnx{narrow character types}{narrow character type}. For narrow character types, each possible bit pattern of the object representation represents @@ -4326,7 +4331,6 @@ \pnum \indextext{\idxcode{wchar_t}|see{type, \tcode{wchar_t}}}% \indextext{type!\idxcode{wchar_t}}% -\indextext{underlying type|see{type, underlying}}% \indextext{type!underlying!\idxcode{wchar_t}}% Type \tcode{wchar_t} is a distinct type that has an \impldef{underlying type of \tcode{wchar_t}} @@ -4334,6 +4338,13 @@ The values of type \tcode{wchar_t} can represent distinct codes for all members of the largest extended character set specified among the supported locales\iref{locale}. + +\pnum +\indextext{\idxcode{char8_t}|see{type, \tcode{char8_t}}}% +\indextext{type!\idxcode{char8_t}}% +\indextext{type!underlying!\idxcode{char8_t}}% +Type \tcode{char8_t} denotes a distinct type +whose underlying type is \tcode{unsigned char}. \indextext{\idxcode{char16_t}|see{type, \tcode{char16_t}}}% \indextext{\idxcode{char32_t}|see{type, \tcode{char32_t}}}% \indextext{type!\idxcode{char16_t}}% @@ -4364,8 +4375,11 @@ \pnum \indextext{type!integral}% -Types \tcode{bool}, \tcode{char}, \tcode{char16_t}, \tcode{char32_t}, -\tcode{wchar_t}, and the signed and unsigned integer types are +Types +\tcode{bool}, +\tcode{char}, \tcode{wchar_t}, +\tcode{char8_t}, \tcode{char16_t}, \tcode{char32_t}, +and the signed and unsigned integer types are collectively called \defnx{integral types}{integral type}. A synonym for integral type is \defn{integer type}. @@ -4737,7 +4751,7 @@ \indextext{type!\idxcode{wchar_t}}% \indextext{type!\idxcode{char16_t}}% \indextext{type!\idxcode{char32_t}}% -\item The ranks of \tcode{char16_t}, \tcode{char32_t}, and +\item The ranks of \tcode{char8_t}, \tcode{char16_t}, \tcode{char32_t}, and \tcode{wchar_t} shall equal the ranks of their underlying types\iref{basic.fundamental}. diff --git a/source/compatibility.tex b/source/compatibility.tex index b0402039f7..89e29541ac 100644 --- a/source/compatibility.tex +++ b/source/compatibility.tex @@ -64,6 +64,9 @@ The type of a string literal is changed from ``array of \tcode{char}'' to ``array of \tcode{const char}''. +The type of a UTF-8 string literal is changed +from ``array of \tcode{char}'' +to ``array of \tcode{const char8_t}''. The type of a \tcode{char16_t} string literal is changed from ``array of \textit{some-integer-type}'' to ``array of \tcode{const char16_t}''. @@ -1796,9 +1799,11 @@ to introduce constraints through a \grammarterm{requires-clause} or a \grammarterm{requires-expression}. The \tcode{concept} keyword is added to enable the definition of concepts\iref{temp.concept}. +The \tcode{char8_t} keyword is added to differentiate +the types of ordinary and UTF-8 literals\iref{lex.string}. \effect -Valid ISO \CppXVII{} code using \tcode{concept} or \tcode{requires} -as an identifier is not valid in this International Standard. +Valid ISO \CppXVII{} code using \tcode{concept}, \tcode{requires}, +or \tcode{char8_t} as an identifier is not valid in this International Standard. \diffref{lex.operators} \change New operator \tcode{<=>}. @@ -1815,6 +1820,34 @@ } \end{codeblock} +\diffref{lex.literal} +\change Type of UTF-8 string and character literals. +\rationale Required for new features. +The changed types enable function overloading, template specialization, and +type deduction to distinguish ordinary and UTF-8 string and character literals. +\effect Valid ISO \CppXVII{} code that depends on +UTF-8 string literals having type ``array of \tcode{const char}'' and +UTF-8 character literals having type ``char'' +is not valid in this International Standard. +\begin{codeblock} +const auto *u8s = u8"text"; // \tcode{u8s} previously deduced as \tcode{const char*}; now deduced as \tcode{const char8_t*} +const char *ps = u8s; // ill-formed; previously well-formed + +auto u8c = u8'c'; // \tcode{u8c} previously deduced as \tcode{char}; now deduced as \tcode{char8_t} +char *pc = &u8c; // ill-formed; previously well-formed + +std::string s = u8"text"; // ill-formed; previously well-formed + +void f(const char *s); +f(u8"text"); // ill-formed; previously well-formed + +template struct ct; +template<> struct ct { + using type = char; +}; +ct::type x; // ill-formed; previously well-formed. +\end{codeblock} + \rSec2[diff.cpp17.basic]{\ref{basic}: basics} \diffref{intro.races} @@ -2031,6 +2064,39 @@ Translation units compiled against this version of \Cpp{} may be incompatible with translation units compiled against \CppXVII{}, either failing to link or having undefined behavior. +\rSec2[diff.cpp17.input.output]{\ref{input.output}: input/output library} + +\diffref{ostream.inserters.character} +\change +Overload resolution for ostream inserters used with UTF-8 literals. +\rationale +Required for new features. +\effect +Valid ISO \CppXVII{} code that passes UTF-8 literals +to \tcode{basic_ostream::\brk{}operator<<} +no longer calls character-related overloads. +\begin{codeblock} +std::cout << u8"text"; // previously called \tcode{operator<<(const char*)} and printed a string; + // now calls \tcode{operator<<(const void*)} and prints a pointer value +std::cout << u8'X'; // previously called \tcode{operator<<(char)} and printed a character; + // now calls \tcode{operator<<(int)} and prints an integer value +\end{codeblock} + +\diffref{fs.class.path} +\change +Return type of filesystem path format observer member functions. +\rationale +Required for new features. +\effect +Valid ISO \CppXVII{} code that depends on the \tcode{u8string()} and +\tcode{generic_u8string()} member functions of \tcode{std::filesystem::path} +returning \tcode{std::string} is not valid in this International Standard. +\begin{codeblock} +std::filesystem::path p; +std::string s1 = p.u8string(); // ill-formed; previously well-formed +std::string s2 = p.generic_u8string(); // ill-formed; previously well-formed +\end{codeblock} + \rSec2[diff.cpp17.depr]{\ref{depr}: compatibility features} \nodiffref diff --git a/source/declarations.tex b/source/declarations.tex index 5b7ce22b8a..db307461b6 100644 --- a/source/declarations.tex +++ b/source/declarations.tex @@ -1247,6 +1247,7 @@ nested-name-specifier \terminal{template} simple-template-id\br \opt{nested-name-specifier} template-name\br \terminal{char}\br + \terminal{char8_t}\br \terminal{char16_t}\br \terminal{char32_t}\br \terminal{wchar_t}\br @@ -1278,6 +1279,7 @@ \pnum \indextext{type specifier!\idxcode{char}}% +\indextext{type specifier!\idxcode{char8_t}}% \indextext{type specifier!\idxcode{char16_t}}% \indextext{type specifier!\idxcode{char32_t}}% \indextext{type specifier!\idxcode{wchar_t}}% @@ -1326,6 +1328,7 @@ \tcode{char} & ``\tcode{char}'' \\ \tcode{unsigned char} & ``\tcode{unsigned char}'' \\ \tcode{signed char} & ``\tcode{signed char}'' \\ +\tcode{char8_t} & ``\tcode{char8_t}'' \\ \tcode{char16_t} & ``\tcode{char16_t}'' \\ \tcode{char32_t} & ``\tcode{char32_t}'' \\ \tcode{bool} & ``\tcode{bool}'' \\ @@ -4170,7 +4173,7 @@ \begin{itemize} \item If an indeterminate value of -unsigned narrow character type\iref{basic.fundamental} +unsigned ordinary character type\iref{basic.fundamental} or \tcode{std::byte} type\iref{cstddef.syn} is produced by the evaluation of: \begin{itemize} @@ -4178,7 +4181,7 @@ \item the right operand of a comma expression\iref{expr.comma}, \item the operand of a cast or conversion~(\ref{conv.integral}, \ref{expr.type.conv}, \ref{expr.static.cast}, \ref{expr.cast}) to an -unsigned narrow character type +unsigned ordinary character type or \tcode{std::byte} type\iref{cstddef.syn}, or \item a discarded-value expression\iref{expr.prop}, \end{itemize} @@ -4186,25 +4189,25 @@ \item If an indeterminate value of -unsigned narrow character type +unsigned ordinary character type or \tcode{std::byte} type is produced by the evaluation of the right operand of a simple assignment operator\iref{expr.ass} whose first operand is an lvalue of -unsigned narrow character type +unsigned ordinary character type or \tcode{std::byte} type, an indeterminate value replaces the value of the object referred to by the left operand. \item -If an indeterminate value of unsigned narrow character type is produced by the +If an indeterminate value of unsigned ordinary character type is produced by the evaluation of the initialization expression when initializing an object of -unsigned narrow character type, that object is initialized to an indeterminate +unsigned ordinary character type, that object is initialized to an indeterminate value. \item If an indeterminate value of -unsigned narrow character type +unsigned ordinary character type or \tcode{std::byte} type is produced by the evaluation of the initialization expression when initializing an object of @@ -4292,6 +4295,7 @@ If the destination type is a reference type, see~\ref{dcl.init.ref}. \item If the destination type is an array of characters, +an array of \tcode{char8_t}, an array of \tcode{char16_t}, an array of \tcode{char32_t}, or an array of @@ -4986,13 +4990,17 @@ \indextext{initialization!character array} \pnum -An array of narrow character type\iref{basic.fundamental}, +An array of ordinary character type\iref{basic.fundamental}, +\tcode{char8_t} array, \tcode{char16_t} array, \tcode{char32_t} array, or \tcode{wchar_t} array -can be initialized by a -narrow string literal, \tcode{char16_t} string literal, \tcode{char32_t} string -literal, or wide string literal, +can be initialized by +an ordinary string literal, +\tcode{char8_t} string literal, +\tcode{char16_t} string literal, +\tcode{char32_t} string literal, or +wide string literal, respectively, or by an appropriately-typed string literal enclosed in braces\iref{lex.string}. \indextext{initialization!character array}% diff --git a/source/expressions.tex b/source/expressions.tex index 4c1e192daf..7e9a29bbb9 100644 --- a/source/expressions.tex +++ b/source/expressions.tex @@ -1086,7 +1086,7 @@ converted to \tcode{float}. \item Otherwise, the integral promotions\iref{conv.prom} shall be -performed on both operands.\footnote{As a consequence, operands of type \tcode{bool}, \tcode{char16_t}, +performed on both operands.\footnote{As a consequence, operands of type \tcode{bool}, \tcode{char8_t}, \tcode{char16_t}, \tcode{char32_t}, \tcode{wchar_t}, or an enumerated type are converted to some integral type.} Then the following rules shall be applied to the promoted operands: @@ -4267,8 +4267,9 @@ has function or incomplete type, to the parenthesized name of such types, or to a glvalue that designates a bit-field. -\tcode{sizeof(char)}, \tcode{sizeof(signed char)} and -\tcode{sizeof(unsigned char)} are \tcode{1}. The result of +The result of \tcode{sizeof} +applied to any of the narrow character types is \tcode{1}. +The result of \tcode{sizeof} applied to any other fundamental type\iref{basic.fundamental} is \impldef{\tcode{sizeof} applied to fundamental types diff --git a/source/future.tex b/source/future.tex index 48744a49e9..4c8057352b 100644 --- a/source/future.tex +++ b/source/future.tex @@ -2309,3 +2309,97 @@ \pnum \effects The destructor shall delete \tcode{cvtptr}. \end{itemdescr} + +\rSec1[depr.locale.category]{Deprecated locale category facets} + +\pnum +The \tcode{ctype} locale category includes the following facets +as if they were specified +in table \tref{localization.category.facets} of \ref{locale.category}. + +\begin{tcode} +codecvt +codecvt +\end{tcode} + +\pnum +The \tcode{ctype} locale category includes the following facets +as if they were specified +in table \tref{localization.required.specializations} of \ref{locale.category}. + +\begin{tcode} +codecvt_byname +codecvt_byname +\end{tcode} + +\pnum +The following class template specializations are required +in addition to those specified in \iref{locale.codecvt}. +The specialization \tcode{codecvt} +converts between the UTF-16 and UTF-8 encoding forms, and +the specialization \tcode{codecvt} +converts between the UTF-32 and UTF-8 encoding forms. + +\rSec1[depr.fs.path.factory]{Deprecated filesystem path factory functions} + +\indexlibrary{\idxcode{u8path}}% +\begin{itemdecl} +template + path u8path(const Source& source); +template + path u8path(InputIterator first, InputIterator last); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\requires The \tcode{source} and \range{first}{last} + sequences are UTF-8 encoded. The value type of \tcode{Source} + and \tcode{InputIterator} is \tcode{char}. + \tcode{Source} meets the requirements specified in \ref{fs.path.req}. + +\pnum +\returns +\begin{itemize} +\item If \tcode{value_type} is \tcode{char} and the current native + narrow encoding\iref{fs.path.type.cvt} is UTF-8, + return \tcode{path(source)} or \tcode{path(first, last)}; + otherwise, +\item if \tcode{value_type} is \tcode{wchar_t} and the + native wide encoding is UTF-16, or + if \tcode{value_type} is \tcode{char16_t} or \tcode{char32_t}, + convert \tcode{source} or \range{first}{last} + to a temporary, \tcode{tmp}, of type \tcode{string_type} and + return \tcode{path(tmp)}; + otherwise, +\item convert \tcode{source} or \range{first}{last} + to a temporary, \tcode{tmp}, of type \tcode{u32string} and + return \tcode{path(tmp)}. +\end{itemize} + +\pnum +\remarks Argument format conversion\iref{fs.path.fmt.cvt} applies to the + arguments for these functions. How Unicode encoding conversions are performed is + unspecified. + +\pnum +\begin{example} +A string is to be read from a database that is encoded in UTF-8, and used + to create a directory using the native encoding for filenames: +\begin{codeblock} +namespace fs = std::filesystem; +std::string utf8_string = read_utf8_data(); +fs::create_directory(fs::u8path(utf8_string)); +\end{codeblock} + +For POSIX-based operating systems with the native narrow encoding set + to UTF-8, no encoding or type conversion occurs. + +For POSIX-based operating systems with the native narrow encoding not + set to UTF-8, a conversion to UTF-32 occurs, followed by a conversion to the + current native narrow encoding. Some Unicode characters may have no native character + set representation. + +For Windows-based operating systems a conversion from UTF-8 to + UTF-16 occurs. +\end{example} +\end{itemdescr} diff --git a/source/iostreams.tex b/source/iostreams.tex index c2ac94cbb1..f7de0bff78 100644 --- a/source/iostreams.tex +++ b/source/iostreams.tex @@ -166,6 +166,7 @@ namespace std { template class char_traits; template<> class char_traits; + template<> class char_traits; template<> class char_traits; template<> class char_traits; template<> class char_traits; @@ -259,6 +260,7 @@ template class fpos; using streampos = fpos::state_type>; using wstreampos = fpos::state_type>; + using u8streampos = fpos::state_type>; } \end{codeblock} @@ -10762,7 +10764,7 @@ \rSec2[fs.req]{Requirements} \pnum -Throughout this subclause, \tcode{char}, \tcode{wchar_t}, +Throughout this subclause, \tcode{char}, \tcode{wchar_t}, \tcode{char8_t}, \tcode{char16_t}, and \tcode{char32_t} are collectively called \defnx{encoded character types}{encoded character type}. @@ -10814,12 +10816,6 @@ path operator/ (const path& lhs, const path& rhs); - // \ref{fs.path.factory}, \tcode{path} factory functions - template - path u8path(const Source& source); - template - path u8path(InputIterator first, InputIterator last); - // \ref{fs.class.filesystem_error}, filesystem errors class filesystem_error; @@ -11205,7 +11201,7 @@ string(const Allocator& a = Allocator()) const; std::string string() const; std::wstring wstring() const; - std::string u8string() const; + std::u8string u8string() const; std::u16string u16string() const; std::u32string u32string() const; @@ -11216,7 +11212,7 @@ generic_string(const Allocator& a = Allocator()) const; std::string generic_string() const; std::wstring generic_wstring() const; - std::string generic_u8string() const; + std::u8string generic_u8string() const; std::u16string generic_u16string() const; std::u32string generic_u32string() const; @@ -11507,7 +11503,7 @@ \rSec4[fs.path.type.cvt]{Type and encoding conversions} \pnum -The \defn{native encoding} of a narrow character string is +The \defn{native encoding} of an ordinary character string is the operating system dependent current encoding for pathnames\iref{fs.class.path}. The \defn{native encoding} for wide character strings is @@ -11523,18 +11519,18 @@ to be converted to is determined by its value type: \begin{itemize} -\item \tcode{char}: The encoding is the native narrow encoding. +\item \tcode{char}: The encoding is the native ordinary encoding. The method of conversion, if any, is operating system dependent. \begin{note} For POSIX-based operating systems \tcode{path::value_type} is \tcode{char} so no conversion from \tcode{char} value type arguments or to \tcode{char} value type return values is performed. For Windows-based operating systems, the -native narrow encoding is determined by calling a Windows API function. +native ordinary encoding is determined by calling a Windows API function. \end{note} \begin{note} This results in behavior identical to other C and \Cpp{} -standard library functions that perform file operations using narrow character +standard library functions that perform file operations using ordinary character strings to identify paths. Changing this behavior would be surprising and error prone. \end{note} @@ -11545,6 +11541,8 @@ so no conversion from \tcode{wchar_t} value type arguments or to \tcode{wchar_t} value type return values is performed. \end{note} +\item \tcode{char8_t}: The encoding is UTF-8. The method of conversion +is unspecified. \item \tcode{char16_t}: The encoding is UTF-16. The method of conversion is unspecified. \item \tcode{char32_t}: The encoding is UTF-32. The method of conversion @@ -11698,7 +11696,7 @@ \item Otherwise a conversion is performed using the \tcode{codecvt} facet of \tcode{loc}, and then a second -conversion to the current narrow encoding. +conversion to the current ordinary encoding. \end{itemize} \pnum @@ -11720,12 +11718,12 @@ \tcode{latin1_facet} to convert ISO/IEC 8859-1 encoded \tcode{latin1_string} to a wide character string in the native wide encoding\iref{fs.path.type.cvt}. The resulting wide string is then -converted to a narrow character -pathname string in the current native narrow encoding. If the -native wide encoding is UTF-16 or UTF-32, and the current native narrow +converted to an ordinary character +pathname string in the current native ordinary encoding. If the +native wide encoding is UTF-16 or UTF-32, and the current native ordinary encoding is UTF-8, all of the characters in the ISO/IEC 8859-1 character set will be converted to their Unicode representation, but for other native -narrow encodings some characters may have no representation. +ordinary encodings some characters may have no representation. For Windows-based operating systems, the path is constructed by using \tcode{latin1_facet} to convert ISO/IEC 8859-1 encoded @@ -12138,7 +12136,7 @@ \begin{itemdecl} std::string string() const; std::wstring wstring() const; -std::string u8string() const; +std::u8string u8string() const; std::u16string u16string() const; std::u32string u32string() const; \end{itemdecl} @@ -12150,7 +12148,6 @@ \pnum \remarks Conversion, if any, is performed as specified by \ref{fs.path.cvt}. -The encoding of the string returned by \tcode{u8string()} is always UTF-8. \end{itemdescr} @@ -12196,7 +12193,7 @@ \begin{itemdecl} std::string generic_string() const; std::wstring generic_wstring() const; -std::string generic_u8string() const; +std::u8string generic_u8string() const; std::u16string generic_u16string() const; std::u32string generic_u32string() const; \end{itemdecl} @@ -12207,8 +12204,6 @@ \pnum \remarks Conversion, if any, is specified by~\ref{fs.path.cvt}. -The encoding of the string returned by \tcode{generic_u8string()} is always -UTF-8. \end{itemdescr} \rSec4[fs.path.compare]{Compare} @@ -12842,70 +12837,6 @@ \effects Equivalent to: \tcode{return path(lhs) /= rhs;} \end{itemdescr} -\rSec4[fs.path.factory]{Factory functions} - -\indexlibrary{\idxcode{u8path}}% -\begin{itemdecl} -template - path u8path(const Source& source); -template - path u8path(InputIterator first, InputIterator last); -\end{itemdecl} - -\begin{itemdescr} -\pnum -\requires The \tcode{source} and \range{first}{last} - sequences are UTF-8 encoded. The value type of \tcode{Source} - and \tcode{InputIterator} is \tcode{char}. - -\pnum -\returns -\begin{itemize} -\item If \tcode{value_type} is \tcode{char} and the current native - narrow encoding\iref{fs.path.type.cvt} is UTF-8, - return \tcode{path(source)} or \tcode{path(first, last)}; - otherwise, -\item if \tcode{value_type} is \tcode{wchar_t} and the - native wide encoding is UTF-16, or - if \tcode{value_type} is \tcode{char16_t} or \tcode{char32_t}, - convert \tcode{source} or \range{first}{last} - to a temporary, \tcode{tmp}, of type \tcode{string_type} and - return \tcode{path(tmp)}; - otherwise, -\item convert \tcode{source} or \range{first}{last} - to a temporary, \tcode{tmp}, of type \tcode{u32string} and - return \tcode{path(tmp)}. -\end{itemize} - -\pnum -\remarks Argument format conversion\iref{fs.path.fmt.cvt} applies to the - arguments for these functions. How Unicode encoding conversions are performed is - unspecified. - -\pnum -\begin{example} -A string is to be read from a database that is encoded in UTF-8, and used - to create a directory using the native encoding for filenames: -\begin{codeblock} -namespace fs = std::filesystem; -std::string utf8_string = read_utf8_data(); -fs::create_directory(fs::u8path(utf8_string)); -\end{codeblock} - -For POSIX-based operating systems with the native narrow encoding set - to UTF-8, no encoding or type conversion occurs. - -For POSIX-based operating systems with the native narrow encoding not - set to UTF-8, a conversion to UTF-32 occurs, followed by a conversion to the - current native narrow encoding. Some Unicode characters may have no native character - set representation. - -For Windows-based operating systems a conversion from UTF-8 to - UTF-16 occurs. -\end{example} -\end{itemdescr} - - \rSec2[fs.class.filesystem_error]{Class \tcode{filesystem_error}} \indexlibrary{\idxcode{filesystem_error}}% diff --git a/source/lex.tex b/source/lex.tex index a6e35a6633..16478475c7 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -670,101 +670,89 @@ as keywords (that is, they are unconditionally treated as keywords in phase 7) except in an \grammarterm{attribute-token}\iref{dcl.attr.grammar}: -\begin{floattable}{Keywords}{tab:keywords} +\begin{multicolfloattable}{Keywords}{tab:keywords} {lllll} -\topline - -\tcode{alignas} & -\tcode{const_cast} & -\tcode{for} & -\tcode{public} & -\tcode{thread_local} \\ - -\tcode{alignof} & -\tcode{continue} & -\tcode{friend} & -\tcode{register} & -\tcode{throw} \\ - -\tcode{asm} & -\tcode{decltype} & -\tcode{goto} & -\tcode{reinterpret_cast} & -\tcode{true} \\ - -\tcode{auto} & -\tcode{default} & -\tcode{if} & -\tcode{requires} & -\tcode{try} \\ - -\tcode{bool} & -\tcode{delete} & -\tcode{inline} & -\tcode{return} & -\tcode{typedef} \\ - -\tcode{break} & -\tcode{do} & -\tcode{int} & -\tcode{short} & -\tcode{typeid} \\ - -\tcode{case} & -\tcode{double} & -\tcode{long} & -\tcode{signed} & -\tcode{typename} \\ - -\tcode{catch} & -\tcode{dynamic_cast} & -\tcode{mutable} & -\tcode{sizeof} & -\tcode{union} \\ - -\tcode{char} & -\tcode{else} & -\tcode{namespace} & -\tcode{static} & -\tcode{unsigned} \\ - -\tcode{char16_t} & -\tcode{enum} & -\tcode{new} & -\tcode{static_assert} & -\tcode{using} \\ - -\tcode{char32_t} & -\tcode{explicit} & -\tcode{noexcept} & -\tcode{static_cast} & -\tcode{virtual} \\ - -\tcode{class} & -\tcode{export} & -\tcode{nullptr} & -\tcode{struct} & -\tcode{void} \\ - -\tcode{concept} & -\tcode{extern} & -\tcode{operator} & -\tcode{switch} & -\tcode{volatile} \\ - -\tcode{const} & -\tcode{false} & -\tcode{private} & -\tcode{template} & -\tcode{wchar_t} \\ - -\tcode{constexpr} & -\tcode{float} & -\tcode{protected} & -\tcode{this} & -\tcode{while} \\ - -\end{floattable} +\tcode{alignas} \\ +\tcode{alignof} \\ +\tcode{asm} \\ +\tcode{auto} \\ +\tcode{bool} \\ +\tcode{break} \\ +\tcode{case} \\ +\tcode{catch} \\ +\tcode{char} \\ +\tcode{char8_t} \\ +\tcode{char16_t} \\ +\tcode{char32_t} \\ +\tcode{class} \\ +\tcode{concept} \\ +\tcode{const} \\ +\tcode{constexpr} \\ +\columnbreak +\tcode{const_cast} \\ +\tcode{continue} \\ +\tcode{decltype} \\ +\tcode{default} \\ +\tcode{delete} \\ +\tcode{double} \\ +\tcode{do} \\ +\tcode{dynamic_cast} \\ +\tcode{else} \\ +\tcode{enum} \\ +\tcode{explicit} \\ +\tcode{export} \\ +\tcode{extern} \\ +\tcode{false} \\ +\tcode{float} \\ +\tcode{for} \\ +\columnbreak +\tcode{friend} \\ +\tcode{goto} \\ +\tcode{if} \\ +\tcode{inline} \\ +\tcode{int} \\ +\tcode{long} \\ +\tcode{mutable} \\ +\tcode{namespace} \\ +\tcode{new} \\ +\tcode{noexcept} \\ +\tcode{nullptr} \\ +\tcode{operator} \\ +\tcode{private} \\ +\tcode{protected} \\ +\tcode{public} \\ +\tcode{register} \\ +\columnbreak +\tcode{reinterpret_cast} \\ +\tcode{requires} \\ +\tcode{return} \\ +\tcode{short} \\ +\tcode{signed} \\ +\tcode{sizeof} \\ +\tcode{static} \\ +\tcode{static_assert} \\ +\tcode{static_cast} \\ +\tcode{struct} \\ +\tcode{switch} \\ +\tcode{template} \\ +\tcode{this} \\ +\tcode{thread_local} \\ +\tcode{throw} \\ +\tcode{true} \\ +\columnbreak +\tcode{try} \\ +\tcode{typedef} \\ +\tcode{typeid} \\ +\tcode{typename} \\ +\tcode{union} \\ +\tcode{unsigned} \\ +\tcode{using} \\ +\tcode{virtual} \\ +\tcode{void} \\ +\tcode{volatile} \\ +\tcode{wchar_t} \\ +\tcode{while} \\ +\end{multicolfloattable} \begin{note} The \tcode{export} and \tcode{register} keywords are unused but are reserved for future use.\end{note} @@ -1138,7 +1126,7 @@ A character literal that begins with \tcode{u8}, such as \tcode{u8'w'}, \indextext{prefix!\idxcode{u8}}% -is a character literal of type \tcode{char}, +is a character literal of type \tcode{char8_t}, known as a \defn{UTF-8 character literal}. The value of a UTF-8 character literal is equal to its ISO/IEC 10646 code point value, @@ -1528,28 +1516,30 @@ \pnum \indextext{string!type of}% \indextext{literal!string!narrow}% -After translation phase 6, a \grammarterm{string-literal} that does not begin with an \grammarterm{encoding-prefix} is an -\defn{ordinary string literal}, and is initialized with the given characters. +After translation phase 6, a \grammarterm{string-literal} +that does not begin with an \grammarterm{encoding-prefix} is an +\defn{ordinary string literal}. +An ordinary string literal +has type ``array of \placeholder{n} \tcode{const char}'' +where \placeholder{n} is the size of the string as defined below, +has static storage duration\iref{basic.stc}, and +is initialized with the given characters. \pnum \indextext{literal!string!UTF-8}% A \grammarterm{string-literal} that begins with \tcode{u8}, \indextext{prefix!\idxcode{u8}}% -such as \tcode{u8"asdf"}, is a \defn{UTF-8 string literal}. +such as \tcode{u8"asdf"}, is a \defn{UTF-8 string literal}, +also referred to as a \tcode{char8_t} string literal. +A \tcode{char8_t} string literal +has type ``array of \placeholder{n} \tcode{const char8_t}'', +where \placeholder{n} is the size of the string as defined below; +each successive element of the object representation\iref{basic.types} has +the value of the corresponding code unit of the UTF-8 encoding of the string. \pnum Ordinary string literals and UTF-8 string literals are -also referred to as narrow -string literals. A narrow string literal has type -\indextext{literal!string!type of}% -``array of \placeholder{n} \tcode{const char}'', where \placeholder{n} is the size of -the string as defined below, and has static storage -duration\iref{basic.stc}. - -\pnum -For a UTF-8 string literal, each successive element of the object -representation\iref{basic.types} has the value of the corresponding -code unit of the UTF-8 encoding of the string. +also referred to as narrow string literals. \pnum \indextext{literal!string!\idxcode{char16_t}}% @@ -1655,7 +1645,7 @@ \tcode{char16_t} string literal may yield a surrogate pair. \indextext{string!\idxcode{sizeof}}% In a narrow string literal, a \grammarterm{universal-character-name} may map to more -than one \tcode{char} element due to \defnadj{multibyte}{encoding}. The +than one \tcode{char} or \tcode{char8_t} element due to \defnadj{multibyte}{encoding}. The size of a \tcode{char32_t} or wide string literal is the total number of escape sequences, \grammarterm{universal-character-name}{s}, and other characters, plus one for the terminating \tcode{U'\textbackslash 0'} or diff --git a/source/lib-intro.tex b/source/lib-intro.tex index 33b4170713..e335122a07 100644 --- a/source/lib-intro.tex +++ b/source/lib-intro.tex @@ -64,14 +64,11 @@ \pnum The strings library\iref{strings} provides support for manipulating text represented -as sequences of type -\tcode{char}, -sequences of type -\tcode{char16_t}, -sequences of type -\tcode{char32_t}, -sequences of type -\tcode{wchar_t}, +as sequences of type \tcode{char}, +sequences of type \tcode{char8_t}, +sequences of type \tcode{char16_t}, +sequences of type \tcode{char32_t}, +sequences of type \tcode{wchar_t}, and sequences of any other character-like type. \pnum @@ -157,6 +154,7 @@ \begin{defnote} The term does not mean only \tcode{char}, +\tcode{char8_t}, \tcode{char16_t}, \tcode{char32_t}, and diff --git a/source/locales.tex b/source/locales.tex index 3acd4d086f..da9cff27cd 100644 --- a/source/locales.tex +++ b/source/locales.tex @@ -349,8 +349,8 @@ collate & \tcode{collate}, \tcode{collate} \\ \rowsep ctype & \tcode{ctype}, \tcode{ctype} \\ & \tcode{codecvt} \\ - & \tcode{codecvt} \\ - & \tcode{codecvt} \\ + & \tcode{codecvt} \\ + & \tcode{codecvt} \\ & \tcode{codecvt} \\ \rowsep monetary & \tcode{moneypunct}, \tcode{moneypunct} \\ & \tcode{moneypunct}, \tcode{moneypunct} \\ @@ -390,8 +390,8 @@ collate & \tcode{collate_byname}, \tcode{collate_byname} \\ \rowsep ctype & \tcode{ctype_byname}, \tcode{ctype_byname} \\ & \tcode{codecvt_byname} \\ - & \tcode{codecvt_byname} \\ - & \tcode{codecvt_byname} \\ + & \tcode{codecvt_byname} \\ + & \tcode{codecvt_byname} \\ & \tcode{codecvt_byname} \\ \rowsep monetary & \tcode{moneypunct_byname} \\ & \tcode{moneypunct_byname} \\ @@ -1900,12 +1900,12 @@ \tcode{codecvt} implements a degenerate conversion; it does not convert at all. -The specialization \tcode{codecvt} +The specialization \tcode{codecvt} converts between the UTF-16 and UTF-8 encoding forms, and -the specialization \tcode{codecvt} \tcode{} +the specialization \tcode{codecvt} \tcode{} converts between the UTF-32 and UTF-8 encoding forms. \tcode{codecvt} -converts between the native character sets for narrow and wide characters. +converts between the native character sets for ordinary and wide characters. Specializations on \tcode{mbstate_t} perform conversion between encodings known to the library implementer. diff --git a/source/overloading.tex b/source/overloading.tex index c7c685190d..0d86f265f0 100644 --- a/source/overloading.tex +++ b/source/overloading.tex @@ -3536,10 +3536,12 @@ long double char wchar_t +char8_t char16_t char32_t const char*, std::size_t const wchar_t*, std::size_t +const char8_t*, std::size_t const char16_t*, std::size_t const char32_t*, std::size_t \end{codeblock} diff --git a/source/preprocessor.tex b/source/preprocessor.tex index eba9c93b8a..6f0af8b19c 100644 --- a/source/preprocessor.tex +++ b/source/preprocessor.tex @@ -1476,6 +1476,7 @@ \defnxname{cpp_attributes} & \tcode{200809L} \\ \rowsep \defnxname{cpp_binary_literals} & \tcode{201304L} \\ \rowsep \defnxname{cpp_capture_star_this} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_char8_t} & \tcode{201811L} \\ \rowsep \defnxname{cpp_constexpr} & \tcode{201603L} \\ \rowsep \defnxname{cpp_decltype} & \tcode{200707L} \\ \rowsep \defnxname{cpp_decltype_auto} & \tcode{201304L} \\ \rowsep diff --git a/source/strings.tex b/source/strings.tex index af28028e55..6691200d82 100644 --- a/source/strings.tex +++ b/source/strings.tex @@ -36,9 +36,10 @@ \term{character traits}, and defines a class template \tcode{char_traits}, -along with four specializations, +along with five specializations, \tcode{char_traits}, -\tcode{char_traits},\\ +\tcode{char_traits}, +\tcode{char_traits}, \tcode{char_traits}, and \tcode{char_traits}, @@ -70,8 +71,9 @@ \pnum This subclause specifies a class template, \tcode{char_traits}, -and four explicit specializations of it, +and five explicit specializations of it, \tcode{char_traits<\brk{}char>}, +\tcode{char_traits}, \tcode{char_traits}, \tcode{char_traits}, and @@ -287,6 +289,7 @@ \begin{codeblock} namespace std { template<> struct char_traits; + template<> struct char_traits; template<> struct char_traits; template<> struct char_traits; template<> struct char_traits; @@ -296,10 +299,11 @@ \pnum The header \tcode{} -shall define four +shall define five specializations of the class template \tcode{char_traits}: \tcode{char_traits<\brk{}char>}, +\tcode{char_traits}, \tcode{char_traits}, \tcode{char_traits}, and @@ -392,6 +396,47 @@ shall return \tcode{EOF}. +\rSec3[char.traits.specializations.char8_t]{\tcode{struct char_traits}} + +\indexlibrary{\idxcode{char_traits}}% +\begin{codeblock} +namespace std { + template<> struct char_traits { + using char_type = char8_t; + using int_type = unsigned int; + using off_type = streamoff; + using pos_type = u8streampos; + using state_type = mbstate_t; + + static constexpr void assign(char_type& c1, const char_type& c2) noexcept; + static constexpr bool eq(char_type c1, char_type c2) noexcept; + static constexpr bool lt(char_type c1, char_type c2) noexcept; + + static constexpr int compare(const char_type* s1, const char_type* s2, size_t n); + static constexpr size_t length(const char_type* s); + static constexpr const char_type* find(const char_type* s, size_t n, + const char_type& a); + static char_type* move(char_type* s1, const char_type* s2, size_t n); + static char_type* copy(char_type* s1, const char_type* s2, size_t n); + static char_type* assign(char_type* s, size_t n, char_type a); + static constexpr int_type not_eof(int_type c) noexcept; + static constexpr char_type to_char_type(int_type c) noexcept; + static constexpr int_type to_int_type(char_type c) noexcept; + static constexpr bool eq_int_type(int_type c1, int_type c2) noexcept; + static constexpr int_type eof() noexcept; + }; +} +\end{codeblock} + +\pnum +The two-argument members \tcode{assign}, \tcode{eq}, and \tcode{lt} +are defined identically to +the built-in operators \tcode{=}, \tcode{==}, and \tcode{<} respectively. + +\pnum +The member \tcode{eof()} returns an implementation-defined constant +that cannot appear as a valid UTF-8 code unit. + \rSec3[char.traits.specializations.char16_t]{\tcode{struct char_traits}} \indexlibrary{\idxcode{char_traits}}% @@ -577,13 +622,15 @@ \pnum The header \tcode{} defines the \tcode{basic_string} class template for manipulating -varying-length sequences of char-like objects and four +varying-length sequences of char-like objects and five \grammarterm{typedef-name}{s}, \tcode{string}, +\tcode{u8string}, \tcode{u16string}, \tcode{u32string}, and \tcode{wstring}, that name the specializations \tcode{basic_string}, +\tcode{basic_string}, \tcode{basic_string}, \tcode{basic_string}, and @@ -599,6 +646,7 @@ // \ref{char.traits}, character traits template struct char_traits; template<> struct char_traits; + template<> struct char_traits; template<> struct char_traits; template<> struct char_traits; template<> struct char_traits; @@ -749,6 +797,7 @@ // \tcode{basic_string} typedef names using string = basic_string; + using u8string = basic_string; using u16string = basic_string; using u32string = basic_string; using wstring = basic_string; @@ -795,6 +844,7 @@ using basic_string = std::basic_string>; using string = basic_string; + using u8string = basic_string; using u16string = basic_string; using u32string = basic_string; using wstring = basic_string; @@ -803,10 +853,12 @@ // \ref{basic.string.hash}, hash support template struct hash; template<> struct hash; + template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; + template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; @@ -815,6 +867,7 @@ inline namespace string_literals { // \ref{basic.string.literals}, suffix for \tcode{basic_string} literals string operator""s(const char* str, size_t len); + u8string operator""s(const char8_t* str, size_t len); u16string operator""s(const char16_t* str, size_t len); u32string operator""s(const char32_t* str, size_t len); wstring operator""s(const wchar_t* str, size_t len); @@ -4718,10 +4771,12 @@ \indexlibrary{\idxcode{hash}!\idxcode{pmr::wstring}}% \begin{itemdecl} template<> struct hash; +template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; +template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; @@ -4748,6 +4803,16 @@ \tcode{string\{str, len\}}. \end{itemdescr} +\indexlibrarymember{operator""""s}{u8string}% +\begin{itemdecl} +u8string operator""s(const char8_t* str, size_t len); +\end{itemdecl} +\begin{itemdescr} +\pnum +\returns +\tcode{u8string\{str, len\}}. +\end{itemdescr} + \indexlibrarymember{operator""""s}{u16string}% \begin{itemdecl} u16string operator""s(const char16_t* str, size_t len); @@ -4832,6 +4897,7 @@ // \tcode{basic_string_view} typedef names using string_view = basic_string_view; + using u8string_view = basic_string_view; using u16string_view = basic_string_view; using u32string_view = basic_string_view; using wstring_view = basic_string_view; @@ -4839,6 +4905,7 @@ // \ref{string.view.hash}, hash support template struct hash; template<> struct hash; + template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; @@ -4847,6 +4914,7 @@ inline namespace string_view_literals { // \ref{string.view.literals}, suffix for \tcode{basic_string_view} literals constexpr string_view operator""sv(const char* str, size_t len) noexcept; + constexpr u8string_view operator""sv(const char8_t* str, size_t len) noexcept; constexpr u16string_view operator""sv(const char16_t* str, size_t len) noexcept; constexpr u32string_view operator""sv(const char32_t* str, size_t len) noexcept; constexpr wstring_view operator""sv(const wchar_t* str, size_t len) noexcept; @@ -5870,11 +5938,13 @@ \rSec2[string.view.hash]{Hash support} \indexlibrary{\idxcode{hash}!\idxcode{string_view}}% +\indexlibrary{\idxcode{hash}!\idxcode{u8string_view}}% \indexlibrary{\idxcode{hash}!\idxcode{u16string_view}}% \indexlibrary{\idxcode{hash}!\idxcode{u32string_view}}% \indexlibrary{\idxcode{hash}!\idxcode{wstring_view}}% \begin{itemdecl} template<> struct hash; +template<> struct hash; template<> struct hash; template<> struct hash; template<> struct hash; @@ -5902,6 +5972,16 @@ \tcode{string_view\{str, len\}}. \end{itemdescr} +\indexlibrarymember{operator""""sv}{u8string_view}% +\begin{itemdecl} +constexpr u8string_view operator""sv(const char8_t* str, size_t len) noexcept; +\end{itemdecl} +\begin{itemdescr} +\pnum +\returns +\tcode{u8string_view\{str, len\}}. +\end{itemdescr} + \indexlibrarymember{operator""""sv}{u16string_view}% \begin{itemdecl} constexpr u16string_view operator""sv(const char16_t* str, size_t len) noexcept; @@ -6298,6 +6378,8 @@ \indexhdr{cuchar}% \indexlibrary{\idxcode{mbstate_t}}% \indexlibrary{\idxcode{size_t}}% +\indexlibrary{\idxcode{mbrtoc8}}% +\indexlibrary{\idxcode{c8rtomb}}% \indexlibrary{\idxcode{mbrtoc16}}% \indexlibrary{\idxcode{c16rtomb}}% \indexlibrary{\idxcode{mbrtoc32}}% @@ -6307,6 +6389,8 @@ using mbstate_t = @\seebelow@; using size_t = @\textit{see \ref{support.types.layout}}@; + size_t mbrtoc8(char8_t* pc8, const char* s, size_t n, mbstate_t* ps); + size_t c8rtomb(char* s, char8_t c8, mbstate_t* ps); size_t mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps); size_t c16rtomb(char* s, char16_t c16, mbstate_t* ps); size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps); @@ -6318,8 +6402,9 @@ \indexhdr{uchar.h}% The contents and meaning of the header \tcode{} are the same as the C standard library header -\tcode{}, except that it does not declare types \tcode{char16_t} nor -\tcode{char32_t}. +\tcode{}, except that it +declares the additional \tcode{mbrtoc8} and \tcode{c8rtomb} functions +and does not declare types \tcode{char16_t} nor \tcode{char32_t}. \xrefc{7.28} @@ -6329,7 +6414,8 @@ \indexhdr{cstdlib}% \indexhdr{cwchar}% \begin{note} -The headers \tcode{}\iref{cstdlib.syn} +The headers \tcode{}\iref{cstdlib.syn}, +\tcode{}\iref{cuchar.syn}, and \tcode{}\iref{cwchar.syn} declare the functions described in this subclause. \end{note} @@ -6402,3 +6488,92 @@ \end{itemdescr} \xrefc{7.29.6.3} + +\indexlibrary{\idxcode{mbrtoc8}}% +\begin{itemdecl} +size_t mbrtoc8(char8_t* pc8, const char* s, size_t n, mbstate_t* ps); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +If \tcode{s} is a null pointer, +equivalent to \tcode{mbrtoc8(nullptr, "", 1, ps)}. +Otherwise, the function inspects at most \tcode{n} bytes +beginning with the byte pointed to by \tcode{s} +to determine the number of bytes needed to complete +the next multibyte character (including any shift sequences). +If the function determines +that the next multibyte character is complete and valid, +it determines the values of the corresponding UTF-8 code units and then, +if \tcode{pc8} is not a null pointer, +stores the value of the first (or only) such code unit +in the object pointed to by \tcode{pc8}. +Subsequent calls will store successive UTF-8 code units +without consuming any additional input +until all the code units have been stored. +If the corresponding Unicode character is U+0000, +the resulting state described is the initial conversion state. + +\pnum +\returns +The first of the following that applies (given the current conversion state): +\begin{itemize} +\item \tcode{0}, if the next \tcode{n} or fewer bytes complete +the multibyte character +that corresponds to the U+0000 Unicode character +(which is the value stored). +\item between \tcode{1} and \tcode{n} inclusive, +if the next n or fewer bytes complete a valid multibyte character +(which is the value stored); +the value returned is the number of bytes that complete the multibyte character. +\item \tcode{(size_t)(-3)}, if the next character +resulting from a previous call has been stored +(no bytes from the input have been consumed by this call). +\item \tcode{(size_t)(-2)}, if the next \tcode{n} bytes +contribute to an incomplete (but potentially valid) multibyte character, and +all \tcode{n} bytes have been processed (no value is stored). +\item \tcode{(size_t)(-1)}, if an encoding error occurs, +in which case the next \tcode{n} or fewer bytes do not contribute to +a complete and valid multibyte character (no value is stored); +the value of the macro \tcode{EILSEQ} is stored in \tcode{errno}, and +the conversion state is unspecified. +\end{itemize} +\end{itemdescr} + +\indexlibrary{\idxcode{c8rtomb}}% +\begin{itemdecl} +size_t c8rtomb(char* s, char8_t c8, mbstate_t* ps); +\end{itemdecl} + +\begin{itemdescr} +\pnum +\effects +If \tcode{s} is a null pointer, equivalent to +\tcode{c8rtomb(buf, u8'$\backslash$0', ps)} +where \tcode{buf} is an internal buffer. +Otherwise, if \tcode{c8} completes a sequence of valid UTF-8 code units, +determines the number of bytes needed +to represent the multibyte character (including any shift sequences), +and stores the multibyte character representation in the array +whose first element is pointed to by \tcode{s}. +At most \tcode{MB_CUR_MAX} bytes are stored. +If the multibyte character is a null character, a null byte is stored, +preceded by any shift sequence needed to restore the initial shift state; +the resulting state described is the initial conversion state. + +\pnum +\returns +The number of bytes stored in the array object (including any shift sequences). +If \tcode{c8} does not contribute to a sequence of \tcode{char8_t} +corresponding to a valid multibyte character, +the value of the macro \tcode{EILSEQ} is stored in \tcode{errno}, +\tcode{(size_t) (-1)} is returned, and the conversion state is unspecified. + +\pnum +\remarks +Calls to \tcode{c8rtomb} with a null pointer argument for \tcode{s} +may introduce a data race\iref{res.on.data.races} +with other calls to \tcode{c8rtomb} +with a null pointer argument for \tcode{s}. +\end{itemdescr} diff --git a/source/support.tex b/source/support.tex index 10c93c9777..926a18fb3a 100644 --- a/source/support.tex +++ b/source/support.tex @@ -561,6 +561,10 @@ \tcode{} \\ \rowsep \defnlibxname{cpp_lib_byte} & \tcode{201603L} & \tcode{} \\ \rowsep +\defnlibxname{cpp_lib_char8_t} & \tcode{201811L} & + \tcode{} \tcode{} \tcode{} \tcode{} + \tcode{} \tcode{} \tcode{} \tcode{} + \\ \rowsep \defnlibxname{cpp_lib_chrono} & \tcode{201611L} & \tcode{} \\ \rowsep \defnlibxname{cpp_lib_clamp} & \tcode{201603L} & @@ -707,6 +711,7 @@ template<> class numeric_limits; template<> class numeric_limits; template<> class numeric_limits; + template<> class numeric_limits; template<> class numeric_limits; template<> class numeric_limits; template<> class numeric_limits;