diff --git a/source/lex.tex b/source/lex.tex
index 02e1aa07ba..807ed53d79 100644
--- a/source/lex.tex
+++ b/source/lex.tex
@@ -133,19 +133,22 @@
 source file to be processed from phase 1 through phase 4, recursively.
 All preprocessing directives are then deleted.
 
-\item Each basic source character set member in a \grammarterm{character-literal} or a
-\grammarterm{string-literal}, as well as each escape sequence and \grammarterm{universal-character-name} in a
-\grammarterm{character-literal} or a non-raw string literal, is converted to the corresponding
-member of the execution character set~(\ref{lex.ccon}, \ref{lex.string}); if
-there is no corresponding member, it is converted to an \impldef{converting
-characters from source character set to execution character set} member other
-than the null (wide) character.
-\begin{footnote}
-An implementation need not convert all
-non-corresponding source characters to the same execution character.
-\end{footnote}
+\item
+Each
+\grammarterm{basic-c-char},
+\grammarterm{basic-s-char}, and
+\grammarterm{r-char}
+in a \grammarterm{character-literal} or a \grammarterm{string-literal},
+as well as each
+\grammarterm{escape-sequence} and \grammarterm{universal-character-name}
+in a \grammarterm{character-literal} or a non-raw string literal,
+is encoded in the literal's associated character encoding as specified in
+\ref{lex.ccon} and \ref{lex.string}.
 
-\item Adjacent string literal tokens are concatenated.
+\item
+Adjacent \grammarterm{string-literal}s are concatenated
+and a null character is appended to the result
+as specified in \ref{lex.string}.
 
 \item White-space characters separating tokens are no longer
 significant. Each preprocessing token is converted into a
@@ -1241,22 +1244,37 @@
 
 \begin{bnf}
 \nontermdef{c-char}\br
-    \textnormal{any member of the basic source character set except the single-quote \terminal{'}, backslash \terminal{\textbackslash}, or new-line character}\br
+    basic-c-char\br
     escape-sequence\br
     universal-character-name
 \end{bnf}
 
+\begin{bnf}
+\nontermdef{basic-c-char}\br
+    \textnormal{any member of the basic source character set except the single-quote \terminal{'}, backslash \terminal{\textbackslash}, or new-line character}
+\end{bnf}
+
 \begin{bnf}
 \nontermdef{escape-sequence}\br
     simple-escape-sequence\br
-    octal-escape-sequence\br
-    hexadecimal-escape-sequence
+    numeric-escape-sequence\br
+    conditional-escape-sequence
 \end{bnf}
 
 \begin{bnf}
-\nontermdef{simple-escape-sequence} \textnormal{one of}\br
-    \terminal{\textbackslash'}\quad\terminal{\textbackslash"}\quad\terminal{\textbackslash ?}\quad\terminal{\textbackslash\textbackslash}\br
-    \terminal{\textbackslash a}\quad\terminal{\textbackslash b}\quad\terminal{\textbackslash f}\quad\terminal{\textbackslash n}\quad\terminal{\textbackslash r}\quad\terminal{\textbackslash t}\quad\terminal{\textbackslash v}
+\nontermdef{simple-escape-sequence}\br
+    \terminal{\textbackslash} simple-escape-sequence-char
+\end{bnf}
+
+\begin{bnf}
+\nontermdef{simple-escape-sequence-char} \textnormal{one of}\br
+    \terminal{'  "  ?  \textbackslash{} a  b  f  n  r  t  v}
+\end{bnf}
+
+\begin{bnf}
+\nontermdef{numeric-escape-sequence}\br
+    octal-escape-sequence\br
+    hexadecimal-escape-sequence
 \end{bnf}
 
 \begin{bnf}
@@ -1272,135 +1290,182 @@
     hexadecimal-escape-sequence hexadecimal-digit
 \end{bnf}
 
+\begin{bnf}
+\nontermdef{conditional-escape-sequence}\br
+    \terminal{\textbackslash} conditional-escape-sequence-char
+\end{bnf}
+
+\begin{bnf}
+\nontermdef{conditional-escape-sequence-char}\br
+    \textnormal{any member of the basic source character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{u}, \terminal{U}, or \terminal{x}}
+\end{bnf}
+
 \pnum
 \indextext{literal!character}%
+\indextext{literal!\idxcode{char8_t}}%
 \indextext{literal!\idxcode{char16_t}}%
 \indextext{literal!\idxcode{char32_t}}%
 \indextext{literal!type of character}%
-\indextext{literal!character!ordinary}%
-A \grammarterm{character-literal} that does not begin with
-\tcode{u8}, \tcode{u}, \tcode{U}, or \tcode{L}
-is an \defn{ordinary character literal}.
-An ordinary character literal that contains a
-single \grammarterm{c-char} representable in the execution character
-set has type \tcode{char}, with value equal to the
-numerical value of the encoding of the \grammarterm{c-char} in the
-execution character set. An ordinary character literal that contains
-more than one \grammarterm{c-char} is a
-\indextext{multicharacter literal|see{literal, multicharacter}}%
-\defnadj{multicharacter}{literal}.
-A multicharacter literal, or an ordinary character literal containing
-a single \grammarterm{c-char} not representable in the execution
-character set, is conditionally-supported, has type \tcode{int},
-\indextext{literal!multicharacter!implementation-defined value of}%
-and has an \impldef{value of multicharacter literal} value.
-
-\pnum
-\indextext{literal!character!UTF-8}%
-A \grammarterm{character-literal} that
-begins with \tcode{u8}, such as \tcode{u8'w'},
-\indextext{prefix!\idxcode{u8}}%
-is a \grammarterm{character-literal} of type \tcode{char8_t},
-known as a \defn{UTF-8 character literal}.
-The value of a UTF-8 character literal
-is equal to its ISO/IEC 10646 code point value,
-provided that the code point value
-can be encoded as a single UTF-8 code unit.
-\begin{note}
-That is, provided the code point value is in the range $[0, \mathrm{7F}]$ (hexadecimal).
-\end{note}
-If the value is not representable with a single UTF-8 code unit,
-the program is ill-formed.
-A UTF-8 character literal containing multiple \grammarterm{c-char}{s} is ill-formed.
-
-\pnum
-\indextext{literal!character!UTF-16}%
+\indextext{type!\idxcode{char8_t}}%
 \indextext{type!\idxcode{char16_t}}%
-A \grammarterm{character-literal} that
-begins with the letter \tcode{u}, such as \tcode{u'x'},
-\indextext{prefix!\idxcode{u}}%
-is a \grammarterm{character-literal} of type \tcode{char16_t},
-known as a \defn{UTF-16 character literal}.
-The value of a UTF-16 character literal
-is equal to its ISO/IEC 10646 code point value,
-provided that the code point value is
-representable with a single 16-bit code unit.
+\indextext{type!\idxcode{char32_t}}%
+\indextext{wide-character}%
+\indextext{type!\idxcode{wchar_t}}%
+A \defnx{non-encodable character literal}{literal!character!non-encodable}
+is a \grammarterm{character-literal}
+whose \grammarterm{c-char-sequence} consists of a single \grammarterm{c-char}
+that is not a \grammarterm{numeric-escape-sequence} and
+that specifies a character
+that either lacks representation in the literal's associated character encoding
+or that cannot be encoded as a single code unit.
+A \defnadj{multicharacter}{literal} is a \grammarterm{character-literal}
+whose \grammarterm{c-char-sequence} consists of
+more than one \grammarterm{c-char}.
+The \grammarterm{encoding-prefix} of
+a non-encodable character literal or a multicharacter literal
+shall be absent or \tcode{L}.
+Such \grammarterm{character-literal}s are conditionally-supported.
+
+\pnum
+The kind of a \grammarterm{character-literal},
+its type, and its associated character encoding
+are determined by
+its \grammarterm{encoding-prefix} and its \grammarterm{c-char-sequence}
+as defined by \tref{lex.ccon.literal}.
+The special cases for
+non-encodable character literals and multicharacter literals
+take precedence over their respective base kinds.
 \begin{note}
-That is, provided the code point value is in the range $[0, \mathrm{FFFF}]$ (hexadecimal).
+The associated character encoding for ordinary and wide character literals
+determines encodability,
+but does not determine the value of
+non-encodable ordinary or wide character literals or
+ordinary or wide multicharacter literals.
+The examples in \tref{lex.ccon.literal}
+for non-encodable ordinary and wide character literals assume that
+the specified character lacks representation in
+the execution character set or execution wide-character set, respectively, or
+that encoding it would require more than one code unit.
 \end{note}
-If the value is not representable
-with a single 16-bit code unit, the program is ill-formed.
-A UTF-16 character literal
-containing multiple \grammarterm{c-char}{s} is ill-formed.
 
-\pnum
-\indextext{literal!character!UTF-32}%
-\indextext{type!\idxcode{char32_t}}%
-A \grammarterm{character-literal} that
-begins with the letter \tcode{U}, such as \tcode{U'y'},
-\indextext{prefix!\idxcode{U}}%
-is a \grammarterm{character-literal} of type \tcode{char32_t},
-known as a \defn{UTF-32 character literal}.
-The value of a
-UTF-32 character literal containing a single \grammarterm{c-char} is equal
-to its ISO/IEC 10646 code point value.
-A UTF-32 character literal containing
-multiple \grammarterm{c-char}{s} is ill-formed.
+\begin{floattable}{Character literals}{lex.ccon.literal}
+{lllll}
+\topline
+Encoding & Kind & Type & Associated char- & Example \\
+prefix & & & acter encoding & \\
+\capsep
+none &
+\defnx{ordinary character literal}{literal!character!ordinary} &
+\keyword{char} &
+encoding of &
+\tcode{'v'} \\
+ &
+non-encodable ordinary character literal &
+\keyword{int} &
+the execution &
+\tcode{'\textbackslash U0001F525'} \\
+ &
+ordinary multicharacter literal &
+\keyword{int} &
+character set &
+\tcode{'abcd'} \\ \hline
+\tcode{L} &
+\defnx{wide character literal}{literal!character!wide} &
+\keyword{wchar_t} &
+encoding of &
+\tcode{L'w'} \\
+ &
+non-encodable wide character literal &
+\keyword{wchar_t} &
+the execution &
+\tcode{L'\textbackslash U0001F32A'} \\
+ &
+wide multicharacter literal &
+\keyword{wchar_t} &
+wide-character set &
+\tcode{L'abcd'} \\ \hline
+\tcode{u8} &
+\defnx{UTF-8 character literal}{literal!character!UTF-8} &
+\keyword{char8_t} &
+UTF-8 &
+\tcode{u8'x'} \\ \hline
+\tcode{u} &
+\defnx{UTF-16 character literal}{literal!character!UTF-16} &
+\keyword{char16_t} &
+UTF-16 &
+\tcode{u'y'} \\ \hline
+\tcode{U} &
+\defnx{UTF-32 character literal}{literal!character!UTF-32} &
+\keyword{char32_t} &
+UTF-32 &
+\tcode{U'z'} \\
+\end{floattable}
 
 \pnum
-\indextext{literal!character!wide}%
-\indextext{wide-character}%
-\indexhdr{stddef.h}%
-\indextext{type!\idxcode{wchar_t}}%
-A \grammarterm{character-literal} that
-begins with the letter \tcode{L}, such as \tcode{L'z'},
-\indextext{prefix!\idxcode{L}}%
-is a \defn{wide-character literal}. A wide-character literal has type
-\tcode{wchar_t}.
-\begin{footnote}
-They are intended for character sets where a character does
-not fit into a single byte.
-\end{footnote}
-The value of a wide-character literal containing a single
-\grammarterm{c-char} has value equal to the numerical value of the encoding
-of the \grammarterm{c-char} in the execution wide-character set, unless the
-\grammarterm{c-char} has no representation in the execution wide-character set, in which
-case the value is \impldef{value of wide-character literal with single c-char that is
-not in execution wide-character set}.
+In translation phase 4,
+the value of a \grammarterm{character-literal} is determined
+using the range of representable values
+of the \grammarterm{character-literal}'s type in translation phase 7.
+A non-encodable character literal or a multicharacter literal
+has an
+\impldef{value of non-encodable character literal or multicharacter literal}
+value.
+The value of any other kind of \grammarterm{character-literal}
+is determined as follows:
+\begin{itemize}
+\item
+A \grammarterm{character-literal} with
+a \grammarterm{c-char-sequence} consisting of a single
+\grammarterm{basic-c-char},
+\grammarterm{simple-escape-sequence}, or
+\grammarterm{universal-character-name}
+is the code unit value of the specified character
+as encoded in the literal's associated character encoding.
 \begin{note}
-The type \tcode{wchar_t} is able to
-represent all members of the execution wide-character set (see~\ref{basic.fundamental}).
+If the specified character lacks
+representation in the literal's associated character encoding or
+if it cannot be encoded as a single code unit,
+then the literal is a non-encodable character literal.
 \end{note}
-The value
-of a wide-character literal containing multiple \grammarterm{c-char}{s} is
-\impldef{value of wide-character literal containing multiple characters}.
+\item
+A \grammarterm{character-literal} with
+a \grammarterm{c-char-sequence} consisting of
+a single \grammarterm{numeric-escape-sequence}
+that specifies an integer value $v$ has a value as follows:
+\begin{itemize}
+\item
+If $v$ does not exceed
+the range of representable values of the \grammarterm{character-literal}'s type,
+then the value is $v$.
+\item
+Otherwise,
+if the \grammarterm{character-literal}'s \grammarterm{encoding-prefix}
+is absent or \tcode{L}, and
+$v$ does not exceed the range of representable values of the corresponding unsigned type for the underlying type of the \grammarterm{character-literal}'s type,
+then the value is the unique value of the \grammarterm{character-literal}'s type \tcode{T} that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}.
+\item
+Otherwise, the \grammarterm{character-literal} is ill-formed.
+\end{itemize}
+\item
+A \grammarterm{character-literal} with
+a \grammarterm{c-char-sequence} consisting of
+a single \grammarterm{conditional-escape-sequence}
+is conditionally-supported and
+has an \impldef{value of \grammarterm{conditional-escape-sequence}} value.
+\end{itemize}
 
 \pnum
-Certain non-graphic characters, the single quote \tcode{'}, the double quote \tcode{"},
-the question mark \tcode{?},
-\begin{footnote}
-Using an escape sequence for a question mark
-is supported for compatibility with ISO \CppXIV{} and ISO C.
-\end{footnote}
-and the backslash
 \indextext{backslash character}%
 \indextext{\idxcode{\textbackslash}|see{backslash character}}%
 \indextext{escape character|see{backslash character}}%
-\tcode{\textbackslash}, can be represented according to
-\tref{lex.ccon.esc}.
-\indextext{escape sequence!undefined}%
-The double quote \tcode{"}  and the question mark \tcode{?}, can be
-represented as themselves or by the escape sequences
-\tcode{\textbackslash "} and \tcode{\textbackslash ?} respectively, but
-the single quote \tcode{'} and the backslash \tcode{\textbackslash}
-shall be represented by the escape sequences \tcode{\textbackslash'} and
-\tcode{\textbackslash\textbackslash} respectively. Escape sequences in
-which the character following the backslash is not listed in
-\tref{lex.ccon.esc} are conditionally-supported, with \impldef{semantics of
-non-standard escape sequences} semantics. An escape sequence specifies a single
-character.
-
-\begin{floattable}{Escape sequences}{lex.ccon.esc}
+The character specified by a \grammarterm{simple-escape-sequence}
+is specified in \tref{lex.ccon.esc}.
+\begin{note}
+Using an escape sequence for a question mark
+is supported for compatibility with ISO C++ 2014 and ISO C.
+\end{note}
+
+\begin{floattable}{Simple escape sequences}{lex.ccon.esc}
 {lll}
 \topline
 new-line        &   NL(LF)          &   \tcode{\textbackslash n}                \\
@@ -1414,50 +1479,8 @@
 question mark   &   ?               &   \tcode{\textbackslash ?}                \\
 single quote    &   \tcode{'}       &   \tcode{\textbackslash '}                \\
 double quote    &   \tcode{"}       &   \tcode{\textbackslash "}                \\
-octal number    &   \numconst{ooo}  &   \tcode{\textbackslash\numconst{ooo}}    \\
-hex number      &   \numconst{hhh}  &   \tcode{\textbackslash x\numconst{hhh}}  \\
 \end{floattable}
 
-\pnum
-The escape
-\indextext{number!octal}%
-\tcode{\textbackslash\numconst{ooo}} consists of the backslash followed by one,
-two, or three octal digits that are taken to specify the value of the
-desired character. The escape
-\indextext{number!hex}%
-\tcode{\textbackslash x\numconst{hhh}}
-consists of the backslash followed by \tcode{x} followed by one or more
-hexadecimal digits that are taken to specify the value of the desired
-character. There is no limit to the number of digits in a hexadecimal
-sequence. A sequence of octal or hexadecimal digits is terminated by the
-first character that is not an octal digit or a hexadecimal digit,
-respectively.
-\indextext{literal!implementation-defined value of char@implementation-defined value of \tcode{char}}%
-The value of a \grammarterm{character-literal} is \impldef{value of \grammarterm{character-literal} outside range of
-corresponding type} if it falls outside of the \impldef{range defined for \grammarterm{character-literal}s}
-range defined for \tcode{char} (for \grammarterm{character-literal}s with no prefix) or
-\tcode{wchar_t} (for \grammarterm{character-literal}s prefixed by \tcode{L}).
-\begin{note}
-If the value of a \grammarterm{character-literal} prefixed by
-\tcode{u}, \tcode{u8}, or \tcode{U}
-is outside the range defined for its type,
-the program is ill-formed.
-\end{note}
-
-\pnum
-A \grammarterm{universal-character-name} is translated to the encoding, in the appropriate
-execution character set, of the character named. If there is no such
-encoding, the \grammarterm{universal-character-name} is translated to an
-\impldef{encoding of universal character name not in execution character set} encoding.
-\begin{note}
-In translation phase 1, a \grammarterm{universal-character-name} is introduced whenever an
-actual extended
-character is encountered in the source text. Therefore, all extended
-characters are described in terms of \grammarterm{universal-character-name}{s}. However,
-the actual compiler implementation can use its own native character set,
-so long as the same results are obtained.
-\end{note}
-
 \rSec2[lex.fcon]{Floating-point literals}
 
 \indextext{literal!floating-point}%
@@ -1600,11 +1623,16 @@
 
 \begin{bnf}
 \nontermdef{s-char}\br
-    \textnormal{any member of the basic source character set except the double-quote \terminal{"}, backslash \terminal{\textbackslash}, or new-line character}\br
+    basic-s-char\br
     escape-sequence\br
     universal-character-name
 \end{bnf}
 
+\begin{bnf}
+\nontermdef{basic-s-char}\br
+    \textnormal{any member of the basic source character set except the double-quote \terminal{"}, backslash \terminal{\textbackslash}, or new-line character}
+\end{bnf}
+
 \begin{bnf}
 \nontermdef{raw-string}\br
     \terminal{"} \opt{d-char-sequence} \terminal{(} \opt{r-char-sequence} \terminal{)} \opt{d-char-sequence} \terminal{"}
@@ -1635,6 +1663,62 @@
     \bnfindent\textnormal{representing horizontal tab, vertical tab, form feed, and newline.}
 \end{bnf}
 
+\pnum
+\indextext{literal!string}%
+\indextext{character string}%
+\indextext{string!type of}%
+\indextext{type!\idxcode{wchar_t}}%
+\indextext{prefix!\idxcode{L}}%
+\indextext{literal!string!\idxcode{char16_t}}%
+\indextext{type!\idxcode{char16_t}}%
+\indextext{literal!string!\idxcode{char32_t}}%
+\indextext{type!\idxcode{char32_t}}%
+The kind of a \grammarterm{string-literal},
+its type, and
+its associated character encoding
+are determined by its encoding prefix and sequence of
+\grammarterm{s-char}s or \grammarterm{r-char}s
+as defined by \tref{lex.string.literal}
+where $n$ is the number of encoded code units as described below.
+
+\begin{floattable}{String literals}{lex.string.literal}
+{llp{2.6cm}p{2.3cm}p{4.7cm}}
+\topline
+Encoding & Kind & Type & Associated & Examples \\
+prefix & & & character encoding & \\
+\capsep
+none &
+\defnx{ordinary string literal}{literal!string!ordinary} &
+array of $n$\newline \tcode{\keyword{const} \keyword{char}} &
+encoding of the execution character set &
+\tcode{"ordinary string"}\newline
+\tcode{R"(ordinary raw string)"} \\
+\tcode{L} &
+\defnx{wide string literal}{literal!string!wide} &
+array of $n$\newline \tcode{\keyword{const} \keyword{wchar_t}} &
+encoding of the execution wide-character set &
+\tcode{L"wide string"}\newline
+\tcode{LR"w(wide raw string)w"} \\
+\tcode{u8} &
+\defnx{UTF-8 string literal}{literal!string!UTF-8} &
+array of $n$\newline \tcode{\keyword{const} \keyword{char8_t}} &
+UTF-8 &
+\tcode{u8"UTF-8 string"}\newline
+\tcode{u8R"x(UTF-8 raw string)x"} \\
+\tcode{u} &
+\defnx{UTF-16 string literal}{literal!string!UTF-16} &
+array of $n$\newline \tcode{\keyword{const} \keyword{char16_t}} &
+UTF-16 &
+\tcode{u"UTF-16 string"}\newline
+\tcode{uR"y(UTF-16 raw string)y"} \\
+\tcode{U} &
+\defnx{UTF-32 string literal}{literal!string!UTF-32} &
+array of $n$\newline \tcode{\keyword{const} \keyword{char32_t}} &
+UTF-32 &
+\tcode{U"UTF-32 string"}\newline
+\tcode{UR"z(UTF-32 raw string)z"} \\
+\end{floattable}
+
 \pnum
 \indextext{literal!string!raw}%
 A \grammarterm{string-literal} that has an \tcode{R}
@@ -1682,85 +1766,11 @@
 \end{example}
 
 \pnum
-\indextext{literal!string}%
-\indextext{character string}%
-\indextext{string!type of}%
 \indextext{literal!string!narrow}%
-After translation phase 6, a \grammarterm{string-literal}
-that does not begin with an \grammarterm{encoding-prefix} is an
-\defn{ordinary string literal}.
-An ordinary string literal
-has type ``array of \placeholder{n} \tcode{const char}''
-where \placeholder{n} is the size of the string as defined below,
-has static storage duration\iref{basic.stc}, and
-is initialized with the given characters.
-
-\pnum
-\indextext{literal!string!UTF-8}%
-A \grammarterm{string-literal} that begins with \tcode{u8},
-\indextext{prefix!\idxcode{u8}}%
-such as \tcode{u8"asdf"}, is a \defn{UTF-8 string literal}.
-A UTF-8 string literal
-has type ``array of \placeholder{n} \tcode{const char8_t}'',
-where \placeholder{n} is the size of the string as defined below;
-each successive element of the object representation\iref{basic.types} has
-the value of the corresponding code unit of the UTF-8 encoding of the string.
-
-\pnum
 \indextext{literal!narrow-character}%
 Ordinary string literals and UTF-8 string literals are
 also referred to as narrow string literals.
 
-\pnum
-\indextext{literal!string!UTF-16}%
-\indextext{literal!string!\idxcode{char16_t}}%
-\indextext{type!\idxcode{char16_t}}%
-A \grammarterm{string-literal} that begins with \tcode{u},
-\indextext{prefix!\idxcode{u}}%
-such as \tcode{u"asdf"}, is
-a \defn{UTF-16 string literal}.
-A UTF-16 string literal has
-type ``array of \placeholder{n} \tcode{const char16_t}'', where \placeholder{n} is the
-size of the string as defined below;
-each successive element of the array
-has the value of the corresponding code unit of
-the UTF-16 encoding of the string.
-\begin{note}
-A single \grammarterm{c-char} may
-produce more than one \tcode{char16_t} character in the form of
-surrogate pairs.
-A surrogate pair is a representation for a single code point
-as a sequence of two 16-bit code units.
-\end{note}
-
-\pnum
-\indextext{literal!string!UTF-32}%
-\indextext{literal!string!\idxcode{char32_t}}%
-\indextext{type!\idxcode{char32_t}}%
-A \grammarterm{string-literal} that begins with \tcode{U},
-\indextext{prefix!\idxcode{U}}%
-such as \tcode{U"asdf"}, is
-a \defn{UTF-32 string literal}.
-A UTF-32 string literal has
-type ``array of \placeholder{n} \tcode{const char32_t}'', where \placeholder{n} is the
-size of the string as defined below;
-each successive element of the array
-has the value of the corresponding code unit of
-the UTF-32 encoding of the string.
-
-\pnum
-\indextext{literal!string!wide}%
-A \grammarterm{string-literal} that begins with \tcode{L},
-\indextext{prefix!\idxcode{L}}%
-such as \tcode{L"asdf"}, is a \defn{wide string literal}.
-\indexhdr{stddef.h}%
-\indextext{type!\idxcode{wchar_t}}%
-\indextext{literal!string!wide}%
-\indextext{prefix!\idxcode{L}}%
-A wide string literal has type ``array of \placeholder{n} \tcode{const
-wchar_t}'', where \placeholder{n} is the size of the string as defined below; it
-is initialized with the given characters.
-
 \pnum
 \indextext{concatenation!string}%
 In translation phase 6\iref{lex.phases}, adjacent \grammarterm{string-literal}{s} are concatenated. If
@@ -1771,12 +1781,12 @@
 conditionally-supported with \impldef{concatenation of some types of \grammarterm{string-literal}s}
 behavior.
 \begin{note}
-This
-concatenation is an interpretation, not a conversion.
-Because the interpretation happens in translation phase 6 (after each character from a
-\grammarterm{string-literal} has been translated into a value from the appropriate character set), a
-\grammarterm{string-literal}'s initial rawness has no effect on the interpretation or
-well-formedness of the concatenation.
+This concatenation is an interpretation, not a conversion.
+Because the interpretation happens in translation phase 6
+(after the string literal contents have been encoded in
+the \grammarterm{string-literal}'s associated character encoding),
+a \grammarterm{string-literal}'s initial rawness
+has no effect on the interpretation or well-formedness of the concatenation.
 \end{note}
 \tref{lex.string.concat} has some examples of valid concatenations.
 
@@ -1815,48 +1825,13 @@
 \indextext{\idxcode{0}|seealso{zero, null}}%
 \indextext{\idxcode{0}!string terminator}%
 \indextext{\idxcode{0}!null character|see {character, null}}%
-After any necessary concatenation, in translation phase
-7\iref{lex.phases}, \tcode{'\textbackslash 0'} is appended to every
-\grammarterm{string-literal} so that programs that scan a string can find its end.
-
-\pnum
-Escape sequences and \grammarterm{universal-character-name}{s} in non-raw string literals
-have the same meaning as in \grammarterm{character-literal}s\iref{lex.ccon}, except that
-the single quote \tcode{'} is representable either by itself or by the escape sequence
-\tcode{\textbackslash'}, and the double quote \tcode{"} shall be preceded by a
-\tcode{\textbackslash},
-and except that a \grammarterm{universal-character-name} in a
-UTF-16 string literal may yield a surrogate pair.
-\indextext{string!\idxcode{sizeof}}%
-In a narrow string literal, a \grammarterm{universal-character-name} may map to more
-than one \tcode{char} or \tcode{char8_t} element due to \defnadj{multibyte}{encoding}. The
-size of a \tcode{char32_t} or wide string literal is the total number of
-escape sequences, \grammarterm{universal-character-name}{s}, and other characters, plus
-one for the terminating \tcode{U'\textbackslash 0'} or
-\tcode{L'\textbackslash 0'}. The size of a UTF-16 string
-literal is the total number of escape sequences,
-\grammarterm{universal-character-name}{s}, and other characters, plus one for each
-character requiring a surrogate pair, plus one for the terminating
-\tcode{u'\textbackslash 0'}.
-\begin{note}
-The size of a \tcode{char16_t}
-string literal is the number of code units, not the number of
-characters.
-\end{note}
-\begin{note}
-Any \grammarterm{universal-character-name}{s} are required to
-correspond to a code point in the range
-$[0,$ $\mathrm{D800})$ or $[\mathrm{E000},$ $\mathrm{10FFFF}]$ (hexadecimal)\iref{lex.charset}.
-\end{note}
-The size of a narrow string literal is
-the total number of escape sequences and other characters, plus at least
-one for the multibyte encoding of each \grammarterm{universal-character-name}, plus
-one for the terminating \tcode{'\textbackslash 0'}.
+In translation phase 6\iref{lex.phases},
+after adjacent \grammarterm{string-literal}s are concatenated,
+a null character is appended to the result.
 
 \pnum
 Evaluating a \grammarterm{string-literal} results in a string literal object
-with static storage duration, initialized from the given characters as
-specified above.
+with static storage duration\iref{basic.stc}.
 \indextext{string!distinct}%
 Whether all \grammarterm{string-literal}s are distinct (that is, are stored in
 nonoverlapping objects) and whether successive evaluations of a
@@ -1867,6 +1842,79 @@
 The effect of attempting to modify a \grammarterm{string-literal} is undefined.
 \end{note}
 
+\pnum
+String literal objects are initialized with
+the sequence of code unit values
+corresponding to the \grammarterm{string-literal}'s sequence of
+\grammarterm{s-char}s (for a non-raw string literal) and
+\grammarterm{r-char}s (for a raw string literal)
+in order as follows:
+\begin{itemize}
+\item
+The sequence of characters denoted by each contiguous sequence of
+\grammarterm{basic-s-char}s,
+\grammarterm{r-char}s,
+\grammarterm{simple-escape-sequence}s\iref{lex.ccon}, and
+\grammarterm{universal-character-name}s\iref{lex.charset}
+is encoded to a code unit sequence
+using the \grammarterm{string-literal}'s associated character encoding.
+If a character lacks representation in the associated character encoding, then:
+\begin{itemize}
+\item
+If the \grammarterm{string-literal}'s \grammarterm{encoding-prefix}
+is absent or \tcode{L},
+then the \grammarterm{string-literal} is conditionally-supported and
+an
+\impldef{code unit sequence for non-representable \grammarterm{string-literal}}
+code unit sequence is encoded.
+\item
+Otherwise, the \grammarterm{string-literal} is ill-formed.
+\end{itemize}
+When encoding a stateful character encoding,
+implementations should encode the first such sequence
+beginning with the initial encoding state and
+encode subsequent sequences
+beginning with the final encoding state of the prior sequence.
+\begin{note}
+The encoded code unit sequence can differ from
+the sequence of code units that would be obtained by
+encoding each character independently.
+\end{note}
+\item
+Each \grammarterm{numeric-escape-sequence}\iref{lex.ccon}
+that specifies an integer value $v$
+contributes a single code unit with a value as follows:
+\begin{itemize}
+\item
+If $v$ does not exceed the range of representable values of
+the \grammarterm{string-literal}'s array element type,
+then the value is $v$.
+\item
+Otherwise,
+if the \grammarterm{string-literal}'s \grammarterm{encoding-prefix}
+is absent or \tcode{L}, and
+$v$ does not exceed the range of representable values of
+the corresponding unsigned type for the underlying type of
+the \grammarterm{string-literal}'s array element type,
+then the value is the unique value of
+the \grammarterm{string-literal}'s array element type \tcode{T}
+that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}.
+\item
+Otherwise, the \grammarterm{string-literal} is ill-formed.
+\end{itemize}
+When encoding a stateful character encoding,
+these sequences should have no effect on encoding state.
+\item
+Each \grammarterm{conditional-escape-sequence}\iref{lex.ccon}
+contributes an
+\impldef{code unit sequence for \grammarterm{conditional-escape-sequence}}
+code unit sequence.
+When encoding a stateful character encoding,
+it is
+\impldef{effect of \grammarterm{conditional-escape-sequence} on encoding state}
+what effect these sequences have on encoding state.
+\end{itemize}
+
 \rSec2[lex.bool]{Boolean literals}
 
 \indextext{literal!boolean}%