From 91d0c3f03ec63479ed1d66301d4eddf806dc89a8 Mon Sep 17 00:00:00 2001 From: Federico Cerisola Date: Tue, 2 Nov 2021 14:35:16 +0000 Subject: [PATCH 1/2] Enable the UTF-8 bit flag by default Following the ZIP Format Specification, when using UTF-8 encoded strings for the filenames, it is recommended to set the Bit 11 of the general purpose bit flag (see sections 4.4.4 and APPENDIX D of the ZIP specification v6.3.9). This change is required to produce zip files containing unicode filenames that are compatible with other zip tools and libraries (such as Python's zipfile module). --- src/ZipFile.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ZipFile.jl b/src/ZipFile.jl index 77efc57..7fb8b03 100644 --- a/src/ZipFile.jl +++ b/src/ZipFile.jl @@ -58,6 +58,9 @@ const Deflate = UInt16(8) const _Method2Str = Dict{UInt16,String}(Store => "Store", Deflate => "Deflate") +"Unicode filename flag" +const _UnicodeFlag = 0x800 + mutable struct ReadableFile <: IO _io :: IO name :: String # filename @@ -368,7 +371,7 @@ function flush(w::Writer) _writele(w._io, UInt32(_CentralDirSig)) _writele(w._io, UInt16(_ZipVersion)) _writele(w._io, UInt16(_ZipVersion)) - _writele(w._io, UInt16(0)) + _writele(w._io, UInt16(_UnicodeFlag)) _writele(w._io, UInt16(f.method)) _writele(w._io, UInt16(f.dostime)) _writele(w._io, UInt16(f.dosdate)) @@ -563,7 +566,7 @@ function addfile(w::Writer, name::AbstractString; method::Integer=Store, mtime:: # Write local file header. Missing entries will be filled in later. _writele(w._io, UInt32(_LocalFileHdrSig)) _writele(w._io, UInt16(_ZipVersion)) - _writele(w._io, UInt16(0)) + _writele(w._io, UInt16(_UnicodeFlag)) _writele(w._io, UInt16(f.method)) _writele(w._io, UInt16(f.dostime)) _writele(w._io, UInt16(f.dosdate)) From 0ad1399d96a1401634f2790dcc0121154673bff9 Mon Sep 17 00:00:00 2001 From: Federico Cerisola Date: Tue, 2 Nov 2021 15:02:05 +0000 Subject: [PATCH 2/2] Add OS creation information and default file attributes Set the "version made by" field depending on the creating OS (currently either Windows on generic Unix) and add default file attributes for the files when being created on *nix. These changes are needed for proper UTF-8 support since some zip tools assume that if the creation OS is MSDOS (the value being set before this commit) then the non-ASCII characters are encoded using Code Page 437 and not UTF-8 (even though we already set the UTF-8 encoding flag these tools seem to ignore it for MSDOS). This is for example the case for the "unzip" tool by INFO-Zip found on most Linux distros. --- src/ZipFile.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ZipFile.jl b/src/ZipFile.jl index 7fb8b03..d972344 100644 --- a/src/ZipFile.jl +++ b/src/ZipFile.jl @@ -35,7 +35,7 @@ Julia """ module ZipFile -import Base: read, read!, eof, write, flush, close, mtime, position, show, unsafe_write +import Base: read, read!, eof, write, flush, close, mtime, position, show, unsafe_write, Sys using Printf export read, read!, eof, write, close, mtime, position, show @@ -61,6 +61,12 @@ const _Method2Str = Dict{UInt16,String}(Store => "Store", Deflate => "Deflate") "Unicode filename flag" const _UnicodeFlag = 0x800 +"Version made by" +const _VersionMadeBy = (Sys.iswindows() ? 0x0a00 : 0x0300) | _ZipVersion + +"Default external file attributes: -rw-r-----" +const _DefaultExtFileAttr = Sys.iswindows() ? 0 : (UInt32(0o640) << 16) + mutable struct ReadableFile <: IO _io :: IO name :: String # filename @@ -369,7 +375,7 @@ function flush(w::Writer) # write central directory record for f in w.files _writele(w._io, UInt32(_CentralDirSig)) - _writele(w._io, UInt16(_ZipVersion)) + _writele(w._io, UInt16(_VersionMadeBy)) _writele(w._io, UInt16(_ZipVersion)) _writele(w._io, UInt16(_UnicodeFlag)) _writele(w._io, UInt16(f.method)) @@ -384,7 +390,7 @@ function flush(w::Writer) _writele(w._io, UInt16(0)) _writele(w._io, UInt16(0)) _writele(w._io, UInt16(0)) - _writele(w._io, UInt32(0)) + _writele(w._io, UInt32(_DefaultExtFileAttr)) _writele(w._io, UInt32(f._offset)) _writele(w._io, b) cdsize += 46+length(b)