Skip to content

Commit 2a99fd9

Browse files
authored
bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-14376)
1 parent c755ca8 commit 2a99fd9

File tree

4 files changed

+213
-119
lines changed

4 files changed

+213
-119
lines changed

Doc/library/mimetypes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ behavior of the module.
8888
Specifying an empty list for *files* will prevent the system defaults from
8989
being applied: only the well-known values will be present from a built-in list.
9090

91+
If *files* is ``None`` the internal data structure is completely rebuilt to its
92+
initial default value. This is a stable operation and will produce the same results
93+
when called multiple times.
94+
9195
.. versionchanged:: 3.2
9296
Previously, Windows registry settings were ignored.
9397

Lib/mimetypes.py

Lines changed: 132 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ class MimeTypes:
6666
def __init__(self, filenames=(), strict=True):
6767
if not inited:
6868
init()
69-
self.encodings_map = encodings_map.copy()
70-
self.suffix_map = suffix_map.copy()
69+
self.encodings_map = _encodings_map_default.copy()
70+
self.suffix_map = _suffix_map_default.copy()
7171
self.types_map = ({}, {}) # dict for (non-strict, strict)
7272
self.types_map_inv = ({}, {})
73-
for (ext, type) in types_map.items():
73+
for (ext, type) in _types_map_default.items():
7474
self.add_type(type, ext, True)
75-
for (ext, type) in common_types.items():
75+
for (ext, type) in _common_types_default.items():
7676
self.add_type(type, ext, False)
7777
for name in filenames:
7878
self.read(name, strict)
@@ -113,6 +113,7 @@ def guess_type(self, url, strict=True):
113113
Optional `strict' argument when False adds a bunch of commonly found,
114114
but non-standard types.
115115
"""
116+
url = os.fspath(url)
116117
scheme, url = urllib.parse.splittype(url)
117118
if scheme == 'data':
118119
# syntax of data URLs:
@@ -345,11 +346,19 @@ def init(files=None):
345346
global suffix_map, types_map, encodings_map, common_types
346347
global inited, _db
347348
inited = True # so that MimeTypes.__init__() doesn't call us again
348-
db = MimeTypes()
349-
if files is None:
349+
350+
if files is None or _db is None:
351+
db = MimeTypes()
350352
if _winreg:
351353
db.read_windows_registry()
352-
files = knownfiles
354+
355+
if files is None:
356+
files = knownfiles
357+
else:
358+
files = knownfiles + list(files)
359+
else:
360+
db = _db
361+
353362
for file in files:
354363
if os.path.isfile(file):
355364
db.read(file)
@@ -373,12 +382,12 @@ def read_mime_types(file):
373382

374383

375384
def _default_mime_types():
376-
global suffix_map
377-
global encodings_map
378-
global types_map
379-
global common_types
385+
global suffix_map, _suffix_map_default
386+
global encodings_map, _encodings_map_default
387+
global types_map, _types_map_default
388+
global common_types, _common_types_default
380389

381-
suffix_map = {
390+
suffix_map = _suffix_map_default = {
382391
'.svgz': '.svg.gz',
383392
'.tgz': '.tar.gz',
384393
'.taz': '.tar.gz',
@@ -387,7 +396,7 @@ def _default_mime_types():
387396
'.txz': '.tar.xz',
388397
}
389398

390-
encodings_map = {
399+
encodings_map = _encodings_map_default = {
391400
'.gz': 'gzip',
392401
'.Z': 'compress',
393402
'.bz2': 'bzip2',
@@ -398,151 +407,155 @@ def _default_mime_types():
398407
# at http://www.iana.org/assignments/media-types
399408
# or extensions, i.e. using the x- prefix
400409

401-
# If you add to these, please keep them sorted!
402-
types_map = {
410+
# If you add to these, please keep them sorted by mime type.
411+
# Make sure the entry with the preferred file extension for a particular mime type
412+
# appears before any others of the same mimetype.
413+
types_map = _types_map_default = {
414+
'.js' : 'application/javascript',
415+
'.mjs' : 'application/javascript',
416+
'.json' : 'application/json',
417+
'.doc' : 'application/msword',
418+
'.dot' : 'application/msword',
419+
'.wiz' : 'application/msword',
420+
'.bin' : 'application/octet-stream',
403421
'.a' : 'application/octet-stream',
422+
'.dll' : 'application/octet-stream',
423+
'.exe' : 'application/octet-stream',
424+
'.o' : 'application/octet-stream',
425+
'.obj' : 'application/octet-stream',
426+
'.so' : 'application/octet-stream',
427+
'.oda' : 'application/oda',
428+
'.pdf' : 'application/pdf',
429+
'.p7c' : 'application/pkcs7-mime',
430+
'.ps' : 'application/postscript',
404431
'.ai' : 'application/postscript',
405-
'.aif' : 'audio/x-aiff',
406-
'.aifc' : 'audio/x-aiff',
407-
'.aiff' : 'audio/x-aiff',
408-
'.au' : 'audio/basic',
409-
'.avi' : 'video/x-msvideo',
410-
'.bat' : 'text/plain',
432+
'.eps' : 'application/postscript',
433+
'.m3u' : 'application/vnd.apple.mpegurl',
434+
'.m3u8' : 'application/vnd.apple.mpegurl',
435+
'.xls' : 'application/vnd.ms-excel',
436+
'.xlb' : 'application/vnd.ms-excel',
437+
'.ppt' : 'application/vnd.ms-powerpoint',
438+
'.pot' : 'application/vnd.ms-powerpoint',
439+
'.ppa' : 'application/vnd.ms-powerpoint',
440+
'.pps' : 'application/vnd.ms-powerpoint',
441+
'.pwz' : 'application/vnd.ms-powerpoint',
442+
'.wasm' : 'application/wasm',
411443
'.bcpio' : 'application/x-bcpio',
412-
'.bin' : 'application/octet-stream',
413-
'.bmp' : 'image/bmp',
414-
'.c' : 'text/plain',
415-
'.cdf' : 'application/x-netcdf',
416444
'.cpio' : 'application/x-cpio',
417445
'.csh' : 'application/x-csh',
418-
'.css' : 'text/css',
419-
'.csv' : 'text/csv',
420-
'.dll' : 'application/octet-stream',
421-
'.doc' : 'application/msword',
422-
'.dot' : 'application/msword',
423446
'.dvi' : 'application/x-dvi',
424-
'.eml' : 'message/rfc822',
425-
'.eps' : 'application/postscript',
426-
'.etx' : 'text/x-setext',
427-
'.exe' : 'application/octet-stream',
428-
'.gif' : 'image/gif',
429447
'.gtar' : 'application/x-gtar',
430-
'.h' : 'text/plain',
431448
'.hdf' : 'application/x-hdf',
432-
'.htm' : 'text/html',
433-
'.html' : 'text/html',
434-
'.ico' : 'image/vnd.microsoft.icon',
435-
'.ief' : 'image/ief',
436-
'.jpe' : 'image/jpeg',
437-
'.jpeg' : 'image/jpeg',
438-
'.jpg' : 'image/jpeg',
439-
'.js' : 'application/javascript',
440-
'.json' : 'application/json',
441-
'.ksh' : 'text/plain',
442449
'.latex' : 'application/x-latex',
443-
'.m1v' : 'video/mpeg',
444-
'.m3u' : 'application/vnd.apple.mpegurl',
445-
'.m3u8' : 'application/vnd.apple.mpegurl',
446-
'.man' : 'application/x-troff-man',
447-
'.me' : 'application/x-troff-me',
448-
'.mht' : 'message/rfc822',
449-
'.mhtml' : 'message/rfc822',
450450
'.mif' : 'application/x-mif',
451-
'.mjs' : 'application/javascript',
452-
'.mov' : 'video/quicktime',
453-
'.movie' : 'video/x-sgi-movie',
454-
'.mp2' : 'audio/mpeg',
455-
'.mp3' : 'audio/mpeg',
456-
'.mp4' : 'video/mp4',
457-
'.mpa' : 'video/mpeg',
458-
'.mpe' : 'video/mpeg',
459-
'.mpeg' : 'video/mpeg',
460-
'.mpg' : 'video/mpeg',
461-
'.ms' : 'application/x-troff-ms',
451+
'.cdf' : 'application/x-netcdf',
462452
'.nc' : 'application/x-netcdf',
463-
'.nws' : 'message/rfc822',
464-
'.o' : 'application/octet-stream',
465-
'.obj' : 'application/octet-stream',
466-
'.oda' : 'application/oda',
467453
'.p12' : 'application/x-pkcs12',
468-
'.p7c' : 'application/pkcs7-mime',
469-
'.pbm' : 'image/x-portable-bitmap',
470-
'.pdf' : 'application/pdf',
471454
'.pfx' : 'application/x-pkcs12',
472-
'.pgm' : 'image/x-portable-graymap',
473-
'.pl' : 'text/plain',
474-
'.png' : 'image/png',
475-
'.pnm' : 'image/x-portable-anymap',
476-
'.pot' : 'application/vnd.ms-powerpoint',
477-
'.ppa' : 'application/vnd.ms-powerpoint',
478-
'.ppm' : 'image/x-portable-pixmap',
479-
'.pps' : 'application/vnd.ms-powerpoint',
480-
'.ppt' : 'application/vnd.ms-powerpoint',
481-
'.ps' : 'application/postscript',
482-
'.pwz' : 'application/vnd.ms-powerpoint',
483-
'.py' : 'text/x-python',
455+
'.ram' : 'application/x-pn-realaudio',
484456
'.pyc' : 'application/x-python-code',
485457
'.pyo' : 'application/x-python-code',
486-
'.qt' : 'video/quicktime',
487-
'.ra' : 'audio/x-pn-realaudio',
488-
'.ram' : 'application/x-pn-realaudio',
489-
'.ras' : 'image/x-cmu-raster',
490-
'.rdf' : 'application/xml',
491-
'.rgb' : 'image/x-rgb',
492-
'.roff' : 'application/x-troff',
493-
'.rtx' : 'text/richtext',
494-
'.sgm' : 'text/x-sgml',
495-
'.sgml' : 'text/x-sgml',
496458
'.sh' : 'application/x-sh',
497459
'.shar' : 'application/x-shar',
498-
'.snd' : 'audio/basic',
499-
'.so' : 'application/octet-stream',
500-
'.src' : 'application/x-wais-source',
460+
'.swf' : 'application/x-shockwave-flash',
501461
'.sv4cpio': 'application/x-sv4cpio',
502462
'.sv4crc' : 'application/x-sv4crc',
503-
'.svg' : 'image/svg+xml',
504-
'.swf' : 'application/x-shockwave-flash',
505-
'.t' : 'application/x-troff',
506463
'.tar' : 'application/x-tar',
507464
'.tcl' : 'application/x-tcl',
508465
'.tex' : 'application/x-tex',
509466
'.texi' : 'application/x-texinfo',
510467
'.texinfo': 'application/x-texinfo',
511-
'.tif' : 'image/tiff',
512-
'.tiff' : 'image/tiff',
468+
'.roff' : 'application/x-troff',
469+
'.t' : 'application/x-troff',
513470
'.tr' : 'application/x-troff',
514-
'.tsv' : 'text/tab-separated-values',
515-
'.txt' : 'text/plain',
471+
'.man' : 'application/x-troff-man',
472+
'.me' : 'application/x-troff-me',
473+
'.ms' : 'application/x-troff-ms',
516474
'.ustar' : 'application/x-ustar',
517-
'.vcf' : 'text/x-vcard',
518-
'.wav' : 'audio/x-wav',
519-
'.webm' : 'video/webm',
520-
'.wiz' : 'application/msword',
475+
'.src' : 'application/x-wais-source',
476+
'.xsl' : 'application/xml',
477+
'.rdf' : 'application/xml',
521478
'.wsdl' : 'application/xml',
522-
'.xbm' : 'image/x-xbitmap',
523-
'.xlb' : 'application/vnd.ms-excel',
524-
'.xls' : 'application/vnd.ms-excel',
525-
'.xml' : 'text/xml',
526479
'.xpdl' : 'application/xml',
480+
'.zip' : 'application/zip',
481+
'.au' : 'audio/basic',
482+
'.snd' : 'audio/basic',
483+
'.mp3' : 'audio/mpeg',
484+
'.mp2' : 'audio/mpeg',
485+
'.aif' : 'audio/x-aiff',
486+
'.aifc' : 'audio/x-aiff',
487+
'.aiff' : 'audio/x-aiff',
488+
'.ra' : 'audio/x-pn-realaudio',
489+
'.wav' : 'audio/x-wav',
490+
'.bmp' : 'image/bmp',
491+
'.gif' : 'image/gif',
492+
'.ief' : 'image/ief',
493+
'.jpg' : 'image/jpeg',
494+
'.jpe' : 'image/jpeg',
495+
'.jpeg' : 'image/jpeg',
496+
'.png' : 'image/png',
497+
'.svg' : 'image/svg+xml',
498+
'.tiff' : 'image/tiff',
499+
'.tif' : 'image/tiff',
500+
'.ico' : 'image/vnd.microsoft.icon',
501+
'.ras' : 'image/x-cmu-raster',
502+
'.bmp' : 'image/x-ms-bmp',
503+
'.pnm' : 'image/x-portable-anymap',
504+
'.pbm' : 'image/x-portable-bitmap',
505+
'.pgm' : 'image/x-portable-graymap',
506+
'.ppm' : 'image/x-portable-pixmap',
507+
'.rgb' : 'image/x-rgb',
508+
'.xbm' : 'image/x-xbitmap',
527509
'.xpm' : 'image/x-xpixmap',
528-
'.xsl' : 'application/xml',
529510
'.xwd' : 'image/x-xwindowdump',
530-
'.zip' : 'application/zip',
511+
'.eml' : 'message/rfc822',
512+
'.mht' : 'message/rfc822',
513+
'.mhtml' : 'message/rfc822',
514+
'.nws' : 'message/rfc822',
515+
'.css' : 'text/css',
516+
'.csv' : 'text/csv',
517+
'.html' : 'text/html',
518+
'.htm' : 'text/html',
519+
'.txt' : 'text/plain',
520+
'.bat' : 'text/plain',
521+
'.c' : 'text/plain',
522+
'.h' : 'text/plain',
523+
'.ksh' : 'text/plain',
524+
'.pl' : 'text/plain',
525+
'.rtx' : 'text/richtext',
526+
'.tsv' : 'text/tab-separated-values',
527+
'.py' : 'text/x-python',
528+
'.etx' : 'text/x-setext',
529+
'.sgm' : 'text/x-sgml',
530+
'.sgml' : 'text/x-sgml',
531+
'.vcf' : 'text/x-vcard',
532+
'.xml' : 'text/xml',
533+
'.mp4' : 'video/mp4',
534+
'.mpeg' : 'video/mpeg',
535+
'.m1v' : 'video/mpeg',
536+
'.mpa' : 'video/mpeg',
537+
'.mpe' : 'video/mpeg',
538+
'.mpg' : 'video/mpeg',
539+
'.mov' : 'video/quicktime',
540+
'.qt' : 'video/quicktime',
541+
'.webm' : 'video/webm',
542+
'.avi' : 'video/x-msvideo',
543+
'.movie' : 'video/x-sgi-movie',
531544
}
532545

533546
# These are non-standard types, commonly found in the wild. They will
534547
# only match if strict=0 flag is given to the API methods.
535548

536549
# Please sort these too
537-
common_types = {
538-
'.jpg' : 'image/jpg',
539-
'.mid' : 'audio/midi',
550+
common_types = _common_types_default = {
551+
'.rtf' : 'application/rtf',
540552
'.midi': 'audio/midi',
553+
'.mid' : 'audio/midi',
554+
'.jpg' : 'image/jpg',
555+
'.pict': 'image/pict',
541556
'.pct' : 'image/pict',
542557
'.pic' : 'image/pict',
543-
'.pict': 'image/pict',
544-
'.rtf' : 'application/rtf',
545-
'.xul' : 'text/xul'
558+
'.xul' : 'text/xul',
546559
}
547560

548561

0 commit comments

Comments
 (0)