@@ -173,6 +173,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
173173 os .unlink (TESTFN )
174174
175175
176+ BUFSIZ = 2 ** 13
177+
176178class AbstractSourceEncodingTest :
177179
178180 def test_default_coding (self ):
@@ -185,14 +187,20 @@ def test_first_coding_line(self):
185187 self .check_script_output (src , br"'\xc3\u20ac'" )
186188
187189 def test_second_coding_line (self ):
188- src = (b'#\n '
190+ src = (b'#!/usr/bin/python\n '
191+ b'#coding:iso8859-15\n '
192+ b'print(ascii("\xc3 \xa4 "))\n ' )
193+ self .check_script_output (src , br"'\xc3\u20ac'" )
194+
195+ def test_second_coding_line_empty_first_line (self ):
196+ src = (b'\n '
189197 b'#coding:iso8859-15\n '
190198 b'print(ascii("\xc3 \xa4 "))\n ' )
191199 self .check_script_output (src , br"'\xc3\u20ac'" )
192200
193201 def test_third_coding_line (self ):
194202 # Only first two lines are tested for a magic comment.
195- src = (b'#\n '
203+ src = (b'#!/usr/bin/python \n '
196204 b'#\n '
197205 b'#coding:iso8859-15\n '
198206 b'print(ascii("\xc3 \xa4 "))\n ' )
@@ -210,13 +218,52 @@ def test_double_coding_same_line(self):
210218 b'print(ascii("\xc3 \xa4 "))\n ' )
211219 self .check_script_output (src , br"'\xc3\u20ac'" )
212220
221+ def test_double_coding_utf8 (self ):
222+ src = (b'#coding:utf-8\n '
223+ b'#coding:latin1\n '
224+ b'print(ascii("\xc3 \xa4 "))\n ' )
225+ self .check_script_output (src , br"'\xe4'" )
226+
227+ def test_long_first_coding_line (self ):
228+ src = (b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
229+ b'print(ascii("\xc3 \xa4 "))\n ' )
230+ self .check_script_output (src , br"'\xc3\u20ac'" )
231+
232+ def test_long_second_coding_line (self ):
233+ src = (b'#!/usr/bin/python\n '
234+ b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
235+ b'print(ascii("\xc3 \xa4 "))\n ' )
236+ self .check_script_output (src , br"'\xc3\u20ac'" )
237+
238+ def test_long_coding_line (self ):
239+ src = (b'#coding:iso-8859-15' + b' ' * BUFSIZ + b'\n '
240+ b'print(ascii("\xc3 \xa4 "))\n ' )
241+ self .check_script_output (src , br"'\xc3\u20ac'" )
242+
243+ def test_long_coding_name (self ):
244+ src = (b'#coding:iso-8859-1-' + b'x' * BUFSIZ + b'\n '
245+ b'print(ascii("\xc3 \xa4 "))\n ' )
246+ self .check_script_output (src , br"'\xc3\xa4'" )
247+
248+ def test_long_first_utf8_line (self ):
249+ src = b'#' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
250+ self .check_script_output (src , b'' )
251+ src = b'# ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
252+ self .check_script_output (src , b'' )
253+
254+ def test_long_second_utf8_line (self ):
255+ src = b'\n #' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
256+ self .check_script_output (src , b'' )
257+ src = b'\n # ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 ) + b'\n '
258+ self .check_script_output (src , b'' )
259+
213260 def test_first_non_utf8_coding_line (self ):
214261 src = (b'#coding:iso-8859-15 \xa4 \n '
215262 b'print(ascii("\xc3 \xa4 "))\n ' )
216263 self .check_script_output (src , br"'\xc3\u20ac'" )
217264
218265 def test_second_non_utf8_coding_line (self ):
219- src = (b'\n '
266+ src = (b'#!/usr/bin/python \n '
220267 b'#coding:iso-8859-15 \xa4 \n '
221268 b'print(ascii("\xc3 \xa4 "))\n ' )
222269 self .check_script_output (src , br"'\xc3\u20ac'" )
@@ -225,27 +272,56 @@ def test_utf8_bom(self):
225272 src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
226273 self .check_script_output (src , br"'\xe4'" )
227274
275+ def test_utf8_bom_utf8_comments (self ):
276+ src = (b'\xef \xbb \xbf #\xc3 \xa4 \n '
277+ b'#\xc3 \xa4 \n '
278+ b'print(ascii("\xc3 \xa4 "))\n ' )
279+ self .check_script_output (src , br"'\xe4'" )
280+
228281 def test_utf8_bom_and_utf8_coding_line (self ):
229282 src = (b'\xef \xbb \xbf #coding:utf-8\n '
230283 b'print(ascii("\xc3 \xa4 "))\n ' )
231284 self .check_script_output (src , br"'\xe4'" )
232285
286+ def test_utf8_non_utf8_comment_line_error (self ):
287+ src = (b'#coding: utf8\n '
288+ b'#\n '
289+ b'#\xa4 \n '
290+ b'raise RuntimeError\n ' )
291+ self .check_script_error (src ,
292+ br"'utf-8' codec can't decode byte|"
293+ br"encoding problem: utf8" )
294+
233295 def test_crlf (self ):
234296 src = (b'print(ascii("""\r \n """))\n ' )
235- out = self .check_script_output (src , br"'\n'" )
297+ self .check_script_output (src , br"'\n'" )
236298
237299 def test_crcrlf (self ):
238300 src = (b'print(ascii("""\r \r \n """))\n ' )
239- out = self .check_script_output (src , br"'\n\n'" )
301+ self .check_script_output (src , br"'\n\n'" )
240302
241303 def test_crcrcrlf (self ):
242304 src = (b'print(ascii("""\r \r \r \n """))\n ' )
243- out = self .check_script_output (src , br"'\n\n\n'" )
305+ self .check_script_output (src , br"'\n\n\n'" )
244306
245307 def test_crcrcrlf2 (self ):
246308 src = (b'#coding:iso-8859-1\n '
247309 b'print(ascii("""\r \r \r \n """))\n ' )
248- out = self .check_script_output (src , br"'\n\n\n'" )
310+ self .check_script_output (src , br"'\n\n\n'" )
311+
312+ def test_nul_in_first_coding_line (self ):
313+ src = (b'#coding:iso8859-15\x00 \n '
314+ b'\n '
315+ b'\n '
316+ b'raise RuntimeError\n ' )
317+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
318+
319+ def test_nul_in_second_coding_line (self ):
320+ src = (b'#!/usr/bin/python\n '
321+ b'#coding:iso8859-15\x00 \n '
322+ b'\n '
323+ b'raise RuntimeError\n ' )
324+ self .check_script_error (src , br"source code (string )?cannot contain null bytes" )
249325
250326
251327class UTF8ValidatorTest (unittest .TestCase ):
@@ -325,6 +401,10 @@ def check_script_output(self, src, expected):
325401 out = stdout .getvalue ().encode ('latin1' )
326402 self .assertEqual (out .rstrip (), expected )
327403
404+ def check_script_error (self , src , expected ):
405+ with self .assertRaisesRegex (SyntaxError , expected .decode ()) as cm :
406+ exec (src )
407+
328408
329409class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
330410
@@ -336,6 +416,14 @@ def check_script_output(self, src, expected):
336416 res = script_helper .assert_python_ok (fn )
337417 self .assertEqual (res .out .rstrip (), expected )
338418
419+ def check_script_error (self , src , expected ):
420+ with tempfile .TemporaryDirectory () as tmpd :
421+ fn = os .path .join (tmpd , 'test.py' )
422+ with open (fn , 'wb' ) as fp :
423+ fp .write (src )
424+ res = script_helper .assert_python_failure (fn )
425+ self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
426+
339427
340428if __name__ == "__main__" :
341429 unittest .main ()
0 commit comments