cplusplus · zygoloid · Nov 26, 2019 · Mar 11, 2019
diff --git a/source/lex.tex b/source/lex.tex
@@ -241,18 +241,17 @@
     \terminal{\textbackslash U} hex-quad hex-quad
 \end{bnf}
 
-The character designated by the \grammarterm{universal-character-name} \tcode{\textbackslash
-U00NNNNNN} is that character
-that has \tcode{U+NNNNNN} as a code point short identifier;
-the character designated by the \grammarterm{universal-character-name}
-\tcode{\textbackslash uNNNN} is that character
-that has \tcode{U+NNNN} as a code point short identifier.
-If a \grammarterm{universal-character-name} does not correspond to
-a code point in ISO/IEC 10646 or
-if a \grammarterm{universal-character-name} corresponds to
-a surrogate code point,
-the program is ill-formed. Additionally, if
-a \grammarterm{universal-character-name} outside
+A \grammarterm{universal-character-name}
+designates the character in ISO/IEC 10646 (if any)
+whose code point is the hexadecimal number represented by
+the sequence of \grammarterm{hexadecimal-digit}s
+in the \grammarterm{universal-character-name}.
+The program is ill-formed if that number is not a code point
+or if it is a surrogate code point.
+Noncharacter code points and reserved code points
+are considered to designate separate characters distinct from
+any ISO/IEC 10646 character.
+If a \grammarterm{universal-character-name} outside
 the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or
 \grammarterm{r-char-sequence} of
 a character or
@@ -262,10 +261,10 @@
 \grammarterm{r-char-sequence}\iref{lex.string} does not form a
 \grammarterm{universal-character-name}.}
 \begin{note}
-ISO/IEC 10646 code points are within the range 0x0-0x10FFFF (inclusive).
-A surrogate code point is a value in the range 0xD800-0xDFFF (inclusive).
+ISO/IEC 10646 code points are integers in the range $[0, \mathrm{10FFFF}]$ (hexadecimal).
+A surrogate code point is a value in the range $[\mathrm{D800}, \mathrm{DFFF}]$ (hexadecimal).
 A control character is a character whose code point is
-in either of the ranges 0x0-0x1F or 0x7F-0x9F (both inclusive).
+in either of the ranges $[0, \mathrm{1F}]$ or $[\mathrm{7F}, \mathrm{9F}]$ (hexadecimal).
 \end{note}
 
 \pnum
@@ -1219,7 +1218,7 @@
 provided that the code point value
 can be encoded as a single UTF-8 code unit.
 \begin{note}
-That is, provided the code point value is in the range 0x0-0x7F (inclusive).
+That is, provided the code point value is in the range $[0, \mathrm{7F}]$ (hexadecimal).
 \end{note}
 If the value is not representable with a single UTF-8 code unit,
 the program is ill-formed.
@@ -1238,7 +1237,7 @@
 provided that the code point value is
 representable with a single 16-bit code unit.
 \begin{note}
-That is, provided the code point value is in the range 0x0-0xFFFF (inclusive).
+That is, provided the code point value is in the range $[0, \mathrm{FFFF}]$ (hexadecimal).
 \end{note}
 If the value is not representable
 with a single 16-bit code unit, the program is ill-formed.
@@ -1771,9 +1770,12 @@
 string literal is the number of code units, not the number of
 characters.
 \end{note}
-Within \tcode{char32_t} and \tcode{char16_t}
-string literals, any \grammarterm{universal-character-name}{s} shall be within the range
-\tcode{0x0} to \tcode{0x10FFFF}. The size of a narrow string literal is
+\begin{note}
+Any \grammarterm{universal-character-name}{s} are required to
+correspond to a code point in the range
+$[0, \mathrm{D800})$ or $[\mathrm{E000}, \mathrm{10FFFF}]$ (hexadecimal)\iref{lex.charset}.
+\end{note}
+The size of a narrow string literal is
 the total number of escape sequences and other characters, plus at least
 one for the multibyte encoding of each \grammarterm{universal-character-name}, plus
 one for the terminating \tcode{'\textbackslash 0'}.