From 651d289405b9d48005a3e6a14f0f76a792d60c5f Mon Sep 17 00:00:00 2001 From: Smitty Date: Sat, 6 Feb 2021 19:17:14 -0500 Subject: [PATCH 1/4] Add note about encoding when null bytes found --- compiler/rustc_parse/src/lexer/mod.rs | 3 +++ src/test/ui/parser/issue-66473.stderr | Bin 2660 -> 5180 bytes src/test/ui/parser/issue-68629.stderr | Bin 1441 -> 1819 bytes src/test/ui/parser/issue-68730.stderr | Bin 966 -> 1218 bytes 4 files changed, 3 insertions(+) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 4a638ec3f8020..bb63dbd5303a3 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -268,6 +268,9 @@ impl<'a> StringReader<'a> { // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, // as there will be less overall work to do this way. let token = unicode_chars::check_for_substitution(self, start, c, &mut err); + if c == '\x00' { + err.help("source files must be encoded in UTF-8, unexpected null bytes might occur when the wrong text encoding is used"); + } err.emit(); token? } diff --git a/src/test/ui/parser/issue-66473.stderr b/src/test/ui/parser/issue-66473.stderr index b370b125cfefda4f82cbb4866fec528b1465a2ef..25f462d3dca98b14c5cc32ca326f3292438b1cef 100644 GIT binary patch literal 5180 zcmd^@%}T>S6h^y_&rtY8myLGXB&`8mi6C?(x>8Eova7aoaLl{2cV$PxTKx&8)w3rYN@nBxsUiZxbT=xRUm)AF^nNo(_ zAn-cv6RAQkY~`zXb$UI#;^^bO_TwtnfxhF=&Th@^z1o#~6?@8W=SXQf3!#LOF=3QV zN#W5bjUj<}ED}nPaopWrx!p!S2wKc2*7-|rrc(@`_44~{oaSSRVD(GLQ>C~q`oyt18b6YtV!CnCh3_qNm{vj>%!J*>p+fhA@SEw%m;EwN;qfAGJ!j)Qtouhj~Cs^&I!6)&G;<>J+< mJfMDAcNNBOEv^PbA@$N|C5Cbth?a+$q^C?6g~H+~$N2;tYx|1; delta 143 zcmdm^@kC_8&55~AlV>t&fv9FCFqO*;rovgklp`yc(q;owqU_p0m6JF?KZ4Q xcaRHAt>Fe!(|N#D6EB#`xbBOU_S8 zO;O0qQwR-l)3wl1D9uZ)C`e5%0Se}o=Hw_ORhEF%XQpSADC8$6mli3MXQbvSlw_nT zlo#dar7M)AR+K=@$prE;ixo&TEIRY=WC&QD2A zQOL|w2n})5wa`&0%}cE)NKGyQ3g(sOSW7Acfxq~+W6rb696fvU!DK} delta 23 dcmX@ad5nER>f{ql%9Gg`U4gXAW?rVPOaN)z2nPTF From a9bb3fbd86c89ac85d4cc21ae4f17e1001825aba Mon Sep 17 00:00:00 2001 From: Smitty Date: Sun, 7 Feb 2021 10:05:43 -0500 Subject: [PATCH 2/4] Add tests for new UTF-16 behavior --- src/test/ui/parser/utf16-be-without-bom.rs | Bin 0 -> 125 bytes src/test/ui/parser/utf16-be-without-bom.stderr | Bin 0 -> 3485 bytes src/test/ui/parser/utf16-le-without-bom.rs | Bin 0 -> 126 bytes src/test/ui/parser/utf16-le-without-bom.stderr | Bin 0 -> 3448 bytes 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/test/ui/parser/utf16-be-without-bom.rs create mode 100644 src/test/ui/parser/utf16-be-without-bom.stderr create mode 100644 src/test/ui/parser/utf16-le-without-bom.rs create mode 100644 src/test/ui/parser/utf16-le-without-bom.stderr diff --git a/src/test/ui/parser/utf16-be-without-bom.rs b/src/test/ui/parser/utf16-be-without-bom.rs new file mode 100644 index 0000000000000000000000000000000000000000..22aa19717873a58a19379fb56c99177e018ecf06 GIT binary patch literal 125 zcmZX~!3x4a3`Nnb=PO=!6_nY}*JQ96km5^;DGK7hTi5RHg;Ogm!sD{_Oba&bTpTI!WlysILyEjtToZ06Qk`+Ys)U!0&^=V0)XPsz?;TTe`q`~Z|SAbkr4;41|h;qh2oriN8)wwQkm z?`)Cqj`T&1%OBKz)o1DiH@%z;0uM;7a!2>gCWr}hCnL~{8qH^obUW{$FEQ~Cs4^wk literal 0 HcmV?d00001 diff --git a/src/test/ui/parser/utf16-le-without-bom.stderr b/src/test/ui/parser/utf16-le-without-bom.stderr new file mode 100644 index 0000000000000000000000000000000000000000..857cc085b2e818ffbc2288e36950741cef4df03a GIT binary patch literal 3448 zcmds)&r8EF7>0B1U-1ea6joj593!A75rmyYZzA2=uWMMBjQp^ZZU4L3CSxaEz=P!0 zl$Z84H#+h79J; zSWE~c3sq7HA@Tb1m<~IBOj<6eu>PfVO@qm&-R8x#_Cpti(`ajwur;C8N={*EY4_g| zekBZ?p)yJ6#SQ^$lo2*8esdlwB7cN(vdGx$og1l`Vh()?3#y3k}vNH9!>xN literal 0 HcmV?d00001 From c6cb014ad650f7b46c357d40f469c116cc752575 Mon Sep 17 00:00:00 2001 From: Smitty Date: Sun, 7 Feb 2021 11:02:53 -0500 Subject: [PATCH 3/4] Clarify error message wording --- compiler/rustc_parse/src/lexer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index bb63dbd5303a3..4bf870eb7ce7e 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -269,7 +269,7 @@ impl<'a> StringReader<'a> { // as there will be less overall work to do this way. let token = unicode_chars::check_for_substitution(self, start, c, &mut err); if c == '\x00' { - err.help("source files must be encoded in UTF-8, unexpected null bytes might occur when the wrong text encoding is used"); + err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); } err.emit(); token? From ed8c68644c9a352f61c3b4591b6fc18653e2ffc2 Mon Sep 17 00:00:00 2001 From: Smitty Date: Sun, 7 Feb 2021 11:11:38 -0500 Subject: [PATCH 4/4] Bless tests with new error wording --- src/test/ui/parser/issue-66473.stderr | Bin 5180 -> 5260 bytes src/test/ui/parser/issue-68629.stderr | Bin 1819 -> 1831 bytes src/test/ui/parser/issue-68730.stderr | Bin 1218 -> 1226 bytes .../ui/parser/utf16-be-without-bom.stderr | Bin 3485 -> 3537 bytes .../ui/parser/utf16-le-without-bom.stderr | Bin 3448 -> 3500 bytes 5 files changed, 0 insertions(+), 0 deletions(-) diff --git a/src/test/ui/parser/issue-66473.stderr b/src/test/ui/parser/issue-66473.stderr index 25f462d3dca98b14c5cc32ca326f3292438b1cef..8a16d7f955129811997464d47c4e10238db77340 100644 GIT binary patch literal 5260 zcmd^@%}&BV6h>LsXCV2EE~zrm7Hpy`V`9R}=t@Hh?JYFm44EInsC!?|ce0d(R1MID z3uU_KqLbd+@21H;IV7c!Aq<~zG3QWvAT>k^EhfZ6JeZfZ*L`yU*S)~;<@L>Jrj#K! z2)s`FM5@pWTlpzoon8;GIQn?6{kV#Apzk=evs-g}uXg2L#W&@*bBt*^3!#LOF(J*S zr0{5zMl84$E~H|1F%`25s+e6=)!H+vV#zj-;eT-^2lb#{s}-14&5`UXUfy!$;?=5e zH86fy-YMUT|9>L#!}1RKv+>q$N^n}ry67;1kcjT122x>of+&dcA8M1c^TVp#=VEiO pmGLVNsbBVHg+W|Pu)$DBy|iPApS6h^y_&rtY8myLGXB&`8mi6C?(x>8Eova7aoaLl{2cV$PxTKx&8)w3rYN@nBxsUiZxbT=xRUm)AF^nNo(_ zAn-cv6RAQkY~`zXb$UI#;^^bO_TwtnfxhF=&Th@^z1o#~6?@8W=SXQf3!#LOF=3QV zN#W5bjUj<}ED}nPaopWrx!p!S2wKc2*7-|rrc(@`_44~{oaSSRVD(GLQ>C~q`oyt18b6YtV!CnCh3_qNm{vj>%!J*>p+fhA@SEw%m;EwN;qfAGJ!j)Qtouhj~Cs^&I!6)&G;<>J+< mJfMDAcNNBOEv^PbA@$N|C5Cbth?a+$q^C?6g~H+~$N2;tYx|1; diff --git a/src/test/ui/parser/issue-68629.stderr b/src/test/ui/parser/issue-68629.stderr index 034c714f3d66f157aabf818ede719e864e5a43f0..19c9ef30f9049537fca85864287462c8f875eee4 100644 GIT binary patch delta 154 zcmbQux14W-6QfjeeqKppW}ZT5h?}m3LTX-eeoAVJLP=^x$z*wEO|C?Rl+3iW)S}e9 Z$puWIlOHg;lcZw~Nvfqc7cj481^~NuGxGod delta 146 zcmZ3^H=A#R6QgiaszPdBa(+r`ib7_dLTHGauEk_MW=-Cbj8uj4qWrvcg_6{Y$%)L; YlUFe|lBC*d@>wQ#;&g0IWIo0W0F`qu#sB~S diff --git a/src/test/ui/parser/issue-68730.stderr b/src/test/ui/parser/issue-68730.stderr index ff683bf8be773b10c05684f0befce1d2ba6beb1d..8602abacabd325988d4f2b8a9116d85688365341 100644 GIT binary patch delta 101 zcmX@ad5UvFyHs+1UP)qRozbZFZAZlrE(0l(T82 zW@p;u!#wk2SSO*wC*7&|_o24PlQB{yqpkHu@|;cHVM`jz2_sQ3ensz9Pv4aa2HQ6nS0@&??6b3NHNJ=YT>q l47ehV@ypKpu2NP?+f9jS9QlTZY>2t0PpO>2w$7L&`2q8pTI!WlysILyEjtToZ06Qk`+Ys)U!0&^=V0)XPsz?;TTe`q`~Z|SAQ|7}~+WvQ^t(2~`11@BC zNq9--+{NKt(pqT_C#O;^B#bSzg{ooIlqAQSTXi=Hkf!%Ie|UPncDBw2X-nxs*hV>< zj@9f+8{X$VKZbQO8exr^Nd`7!1rz@2f?fK%72B}g2=Kli_49XVsGV~d<#b7?MN0-V zXDmu3Z6PFHUmnvTNS3Oi3YM0(<4;S=IhEEwkgjbp{EyAY4+79*V;_dF#|EEugXp8f53rrrTy)CILYx<6072%9 zGJ!Dv#TY>^P=Zlr5Ng6GLkQMkd@0=W>}VzUUiJUerhn)RO47FCxzgI!ZGN%93X-Zn-6 literal 3448 zcmds)&r8EF7>0B1U-1ea6joj593!A75rmyYZzA2=uWMMBjQp^ZZU4L3CSxaEz=P!0 zl$Z84H#+h79J; zSWE~c3sq7HA@Tb1m<~IBOj<6eu>PfVO@qm&-R8x#_Cpti(`ajwur;C8N={*EY4_g| zekBZ?p)yJ6#SQ^$lo2*8esdlwB7cN(vdGx$og1l`Vh()?3#y3k}vNH9!>xN