Merge 2019-02 CWG Motion 6

zygoloid · web-flow · commit cad039d7a5a3 · 2019-03-08T17:51:21.000-08:00
P1041R4 Make char16_t/char32_t string literals be UTF-16/32 Fixes #2686.
diff --git a/source/compatibility.tex b/source/compatibility.tex
@@ -67,10 +67,10 @@
 The type of a UTF-8 string literal is changed
 from ``array of \tcode{char}''
 to ``array of \tcode{const char8_t}''.
-The type of a \tcode{char16_t} string literal is changed
+The type of a UTF-16 string literal is changed
 from ``array of \textit{some-integer-type}''
 to ``array of \tcode{const char16_t}''.
-The type of a \tcode{char32_t} string literal is changed
+The type of a UTF-32 string literal is changed
 from ``array of \textit{some-integer-type}''
 to ``array of \tcode{const char32_t}''.
 The type of a wide string literal is changed
diff --git a/source/declarations.tex b/source/declarations.tex
@@ -5081,9 +5081,9 @@
 or \tcode{wchar_t} array
 can be initialized by
 an ordinary string literal,
-\tcode{char8_t} string literal,
-\tcode{char16_t} string literal,
-\tcode{char32_t} string literal, or
+UTF-8 string literal,
+UTF-16 string literal,
+UTF-32 string literal, or
 wide string literal,
 respectively, or by an appropriately-typed string literal enclosed in
 braces\iref{lex.string}.
diff --git a/source/lex.tex b/source/lex.tex
@@ -1139,30 +1139,34 @@
 A UTF-8 character literal containing multiple \grammarterm{c-char}{s} is ill-formed.
 
 \pnum
-\indextext{literal!character!\tcode{char16_t}}%
-\indextext{char16_t character@\tcode{char16_t} character}%
+\indextext{literal!character!UTF-16}%
 \indextext{type!\idxcode{char16_t}}%
 A character literal that
 begins with the letter \tcode{u}, such as \tcode{u'x'},
 \indextext{prefix!\idxcode{u}}%
-is a character literal of type \tcode{char16_t}. The value
-of a \tcode{char16_t} character literal containing a single \grammarterm{c-char} is
+is a character literal of type \tcode{char16_t},
+known as a \defn{UTF-16 character literal}.
+The value
+of a UTF-16 character literal containing a single \grammarterm{c-char} is
 equal to its ISO/IEC 10646 code point value, provided that the code point value is
 representable with a single 16-bit code unit (that is, provided it is in the
 basic multi-lingual plane). If the value is not representable
-with a single 16-bit code unit, the program is ill-formed. A \tcode{char16_t} character literal
+with a single 16-bit code unit, the program is ill-formed.
+A UTF-16 character literal
 containing multiple \grammarterm{c-char}{s} is ill-formed.
 
 \pnum
-\indextext{literal!character!\tcode{char32_t}}%
-\indextext{char32_t character@\tcode{char32_t} character}%
+\indextext{literal!character!UTF-32}%
 \indextext{type!\idxcode{char32_t}}%
 A character literal that
 begins with the letter \tcode{U}, such as \tcode{U'y'},
 \indextext{prefix!\idxcode{U}}%
-is a character literal of type \tcode{char32_t}. The value of a
-\tcode{char32_t} character literal containing a single \grammarterm{c-char} is equal
-to its ISO/IEC 10646 code point value. A \tcode{char32_t} character literal containing
+is a character literal of type \tcode{char32_t},
+known as a \defn{UTF-32 character literal}.
+The value of a
+UTF-32 character literal containing a single \grammarterm{c-char} is equal
+to its ISO/IEC 10646 code point value.
+A UTF-32 character literal containing
 multiple \grammarterm{c-char}{s} is ill-formed.
 
 \pnum
@@ -1530,9 +1534,8 @@
 \indextext{literal!string!UTF-8}%
 A \grammarterm{string-literal} that begins with \tcode{u8},
 \indextext{prefix!\idxcode{u8}}%
-such as \tcode{u8"asdf"}, is a \defn{UTF-8 string literal},
-also referred to as a \tcode{char8_t} string literal.
-A \tcode{char8_t} string literal
+such as \tcode{u8"asdf"}, is a \defn{UTF-8 string literal}.
+A UTF-8 string literal
 has type ``array of \placeholder{n} \tcode{const char8_t}'',
 where \placeholder{n} is the size of the string as defined below;
 each successive element of the object representation\iref{basic.types} has
@@ -1543,28 +1546,37 @@
 also referred to as narrow string literals.
 
 \pnum
-\indextext{literal!string!\idxcode{char16_t}}%
+\indextext{literal!string!UTF-16}%
 \indextext{type!\idxcode{char16_t}}%
 A \grammarterm{string-literal} that begins with \tcode{u},
 \indextext{prefix!\idxcode{u}}%
 such as \tcode{u"asdf"}, is
-a \tcode{char16_t} string literal. A \tcode{char16_t} string literal has
+a \defn{UTF-16 string literal}.
+A UTF-16 string literal has
 type ``array of \placeholder{n} \tcode{const char16_t}'', where \placeholder{n} is the
-size of the string as defined below; it
-is initialized with the given characters. A single \grammarterm{c-char} may
+size of the string as defined below;
+each successive element of the array
+has the value of the corresponding code unit of
+the UTF-16 encoding of the string.
+\begin{note}
+A single \grammarterm{c-char} may
 produce more than one \tcode{char16_t} character in the form of
 surrogate pairs.
+\end{note}
 
 \pnum
-\indextext{literal!string!\idxcode{char32_t}}%
+\indextext{literal!string!UTF-32}%
 \indextext{type!\idxcode{char32_t}}%
 A \grammarterm{string-literal} that begins with \tcode{U},
 \indextext{prefix!\idxcode{U}}%
 such as \tcode{U"asdf"}, is
-a \tcode{char32_t} string literal. A \tcode{char32_t} string literal has
+a \defn{UTF-32 string literal}.
+A UTF-32 string literal has
 type ``array of \placeholder{n} \tcode{const char32_t}'', where \placeholder{n} is the
-size of the string as defined below; it
-is initialized with the given characters.
+size of the string as defined below;
+each successive element of the array
+has the value of the corresponding code unit of
+the UTF-32 encoding of the string.
 
 \pnum
 \indextext{literal!string!wide}%
@@ -1643,14 +1655,14 @@
 \tcode{\textbackslash'}, and the double quote \tcode{"} shall be preceded by a
 \tcode{\textbackslash},
 and except that a \grammarterm{universal-character-name} in a
-\tcode{char16_t} string literal may yield a surrogate pair.
+UTF-16 string literal may yield a surrogate pair.
 \indextext{string!\idxcode{sizeof}}%
 In a narrow string literal, a \grammarterm{universal-character-name} may map to more
 than one \tcode{char} or \tcode{char8_t} element due to \defnadj{multibyte}{encoding}. The
 size of a \tcode{char32_t} or wide string literal is the total number of
 escape sequences, \grammarterm{universal-character-name}{s}, and other characters, plus
 one for the terminating \tcode{U'\textbackslash 0'} or
-\tcode{L'\textbackslash 0'}. The size of a \tcode{char16_t} string
+\tcode{L'\textbackslash 0'}. The size of a UTF-16 string
 literal is the total number of escape sequences,
 \grammarterm{universal-character-name}{s}, and other characters, plus one for each
 character requiring a surrogate pair, plus one for the terminating