@@ -172,6 +172,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
172172 os .unlink (TESTFN )
173173
174174
175+ BUFSIZ = 2 ** 13
176+
175177class AbstractSourceEncodingTest :
176178
177179 def test_default_coding (self ):
@@ -184,14 +186,20 @@ def test_first_coding_line(self):
184186 self .check_script_output (src , br"'\xc3\u20ac'" )
185187
186188 def test_second_coding_line (self ):
187- src = (b'#\n '
189+ src = (b'#!/usr/bin/python\n '
190+ b'#coding:iso8859-15\n '
191+ b'print(ascii("\xc3 \xa4 "))\n ' )
192+ self .check_script_output (src , br"'\xc3\u20ac'" )
193+
194+ def test_second_coding_line_empty_first_line (self ):
195+ src = (b'\n '
188196 b'#coding:iso8859-15\n '
189197 b'print(ascii("\xc3 \xa4 "))\n ' )
190198 self .check_script_output (src , br"'\xc3\u20ac'" )
191199
192200 def test_third_coding_line (self ):
193201 # Only first two lines are tested for a magic comment.
194- src = (b'#\n '
202+ src = (b'#!/usr/bin/python \n '
195203 b'#\n '
196204 b'#coding:iso8859-15\n '
197205 b'print(ascii("\xc3 \xa4 "))\n ' )
@@ -209,13 +217,52 @@ def test_double_coding_same_line(self):
209217 b'print(ascii("\xc3 \xa4 "))\n ' )
210218 self .check_script_output (src , br"'\xc3\u20ac'" )
211219
220+ def test_double_coding_utf8 (self ):
221+ src = (b'#coding:utf-8\n '
222+ b'#coding:latin1\n '
223+ b'print(ascii("\xc3 \xa4 "))\n ' )
224+ self .check_script_output (src , br"'\xe4'" )
225+
226+ def test_long_first_coding_line (self ):
227+ src = (b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
228+ b'print(ascii("\xc3 \xa4 "))\n ' )
229+ self .check_script_output (src , br"'\xc3\u20ac'" )
230+
231+ def test_long_second_coding_line (self ):
232+ src = (b'#!/usr/bin/python\n '
233+ b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
234+ b'print(ascii("\xc3 \xa4 "))\n ' )
235+ self .check_script_output (src , br"'\xc3\u20ac'" )
236+
237+ def test_long_coding_line (self ):
238+ src = (b'#coding:iso-8859-15' + b' ' * BUFSIZ + b'\n '
239+ b'print(ascii("\xc3 \xa4 "))\n ' )
240+ self .check_script_output (src , br"'\xc3\u20ac'" )
241+
242+ def test_long_coding_name (self ):
243+ src = (b'#coding:iso-8859-1-' + b'x' * BUFSIZ + b'\n '
244+ b'print(ascii("\xc3 \xa4 "))\n ' )
245+ self .check_script_output (src , br"'\xc3\xa4'" )
246+
247+ def test_long_first_utf8_line (self ):
248+ src = b'#' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
249+ self .check_script_output (src , b'' )
250+ src = b'# ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
251+ self .check_script_output (src , b'' )
252+
253+ def test_long_second_utf8_line (self ):
254+ src = b'\n #' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
255+ self .check_script_output (src , b'' )
256+ src = b'\n # ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
257+ self .check_script_output (src , b'' )
258+
212259 def test_first_non_utf8_coding_line (self ):
213260 src = (b'#coding:iso-8859-15 \xa4 \n '
214261 b'print(ascii("\xc3 \xa4 "))\n ' )
215262 self .check_script_output (src , br"'\xc3\u20ac'" )
216263
217264 def test_second_non_utf8_coding_line (self ):
218- src = (b'\n '
265+ src = (b'#!/usr/bin/python \n '
219266 b'#coding:iso-8859-15 \xa4 \n '
220267 b'print(ascii("\xc3 \xa4 "))\n ' )
221268 self .check_script_output (src , br"'\xc3\u20ac'" )
@@ -224,27 +271,56 @@ def test_utf8_bom(self):
224271 src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
225272 self .check_script_output (src , br"'\xe4'" )
226273
274+ def test_utf8_bom_utf8_comments (self ):
275+ src = (b'\xef \xbb \xbf #\xc3 \xa4 \n '
276+ b'#\xc3 \xa4 \n '
277+ b'print(ascii("\xc3 \xa4 "))\n ' )
278+ self .check_script_output (src , br"'\xe4'" )
279+
227280 def test_utf8_bom_and_utf8_coding_line (self ):
228281 src = (b'\xef \xbb \xbf #coding:utf-8\n '
229282 b'print(ascii("\xc3 \xa4 "))\n ' )
230283 self .check_script_output (src , br"'\xe4'" )
231284
285+ def test_utf8_non_utf8_comment_line_error (self ):
286+ src = (b'#coding: utf8\n '
287+ b'#\n '
288+ b'#\xa4 \n '
289+ b'raise RuntimeError\n ' )
290+ self .check_script_error (src ,
291+ br"'utf-8' codec can't decode byte|"
292+ br"encoding problem: utf8" )
293+
232294 def test_crlf (self ):
233295 src = (b'print(ascii("""\r \n """))\n ' )
234- out = self .check_script_output (src , br"'\n'" )
296+ self .check_script_output (src , br"'\n'" )
235297
236298 def test_crcrlf (self ):
237299 src = (b'print(ascii("""\r \r \n """))\n ' )
238- out = self .check_script_output (src , br"'\n\n'" )
300+ self .check_script_output (src , br"'\n\n'" )
239301
240302 def test_crcrcrlf (self ):
241303 src = (b'print(ascii("""\r \r \r \n """))\n ' )
242- out = self .check_script_output (src , br"'\n\n\n'" )
304+ self .check_script_output (src , br"'\n\n\n'" )
243305
244306 def test_crcrcrlf2 (self ):
245307 src = (b'#coding:iso-8859-1\n '
246308 b'print(ascii("""\r \r \r \n """))\n ' )
247- out = self .check_script_output (src , br"'\n\n\n'" )
309+ self .check_script_output (src , br"'\n\n\n'" )
310+
311+ def test_nul_in_first_coding_line (self ):
312+ src = (b'#coding:iso8859-15\x00 \n '
313+ b'\n '
314+ b'\n '
315+ b'raise RuntimeError\n ' )
316+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
317+
318+ def test_nul_in_second_coding_line (self ):
319+ src = (b'#!/usr/bin/python\n '
320+ b'#coding:iso8859-15\x00 \n '
321+ b'\n '
322+ b'raise RuntimeError\n ' )
323+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
248324
249325
250326class UTF8ValidatorTest (unittest .TestCase ):
@@ -324,6 +400,10 @@ def check_script_output(self, src, expected):
324400 out = stdout .getvalue ().encode ('latin1' )
325401 self .assertEqual (out .rstrip (), expected )
326402
403+ def check_script_error (self , src , expected ):
404+ with self .assertRaisesRegex (SyntaxError , expected .decode ()) as cm :
405+ exec (src )
406+
327407
328408class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
329409
@@ -335,6 +415,14 @@ def check_script_output(self, src, expected):
335415 res = script_helper .assert_python_ok (fn )
336416 self .assertEqual (res .out .rstrip (), expected )
337417
418+ def check_script_error (self , src , expected ):
419+ with tempfile .TemporaryDirectory () as tmpd :
420+ fn = os .path .join (tmpd , 'test.py' )
421+ with open (fn , 'wb' ) as fp :
422+ fp .write (src )
423+ res = script_helper .assert_python_failure (fn )
424+ self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
425+
338426
339427if __name__ == "__main__" :
340428 unittest .main ()
0 commit comments