From 5381af96469c7c82d2bccb4bfa81a5db66e446c3 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Thu, 23 May 2019 10:45:16 +0100 Subject: [PATCH 1/5] Avoid copying memory for zip file contents twice during extraction --- jbmc/src/java_bytecode/mz_zip_archive.cpp | 31 +++++++++++++---------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/jbmc/src/java_bytecode/mz_zip_archive.cpp b/jbmc/src/java_bytecode/mz_zip_archive.cpp index a193f47e1fb..13acbe0b7c5 100644 --- a/jbmc/src/java_bytecode/mz_zip_archive.cpp +++ b/jbmc/src/java_bytecode/mz_zip_archive.cpp @@ -73,13 +73,17 @@ size_t mz_zip_archivet::get_num_files() std::string mz_zip_archivet::get_filename(const size_t index) { - const auto id=static_cast(index); - std::vector buffer; - buffer.resize(mz_zip_reader_get_filename(m_state.get(), id, nullptr, 0)); - mz_zip_reader_get_filename(m_state.get(), id, buffer.data(), buffer.size()); - // Buffer may contain junk returned after \0 - const auto null_char_it=std::find(buffer.cbegin(), buffer.cend(), '\0'); - return { buffer.cbegin(), null_char_it }; + const auto id = static_cast(index); + mz_uint name_size = mz_zip_reader_get_filename(m_state.get(), id, nullptr, 0); + if(name_size == 0) + return {}; // Failure + // It is valid to directly write to a string's buffer (see C++11 standard, + // basic_string general requirements [string.require], 21.4.1.5) + std::string buffer(name_size, '\0'); + mz_zip_reader_get_filename(m_state.get(), id, &buffer[0], buffer.size()); + // Buffer contains trailing \0 + buffer.resize(name_size - 1); + return buffer; } std::string mz_zip_archivet::extract(const size_t index) @@ -89,12 +93,13 @@ std::string mz_zip_archivet::extract(const size_t index) const mz_bool stat_ok=mz_zip_reader_file_stat(m_state.get(), id, &file_stat); if(stat_ok==MZ_TRUE) { - std::vector buffer(file_stat.m_uncomp_size); - const mz_bool read_ok=mz_zip_reader_extract_to_mem( - m_state.get(), id, buffer.data(), buffer.size(), 0); - if(read_ok==MZ_TRUE) - return { buffer.cbegin(), buffer.cend() }; + // It is valid to directly write to a string's buffer (see C++11 standard, + // basic_string general requirements [string.require], 21.4.1.5) + std::string buffer(file_stat.m_uncomp_size, '\0'); + const mz_bool read_ok = mz_zip_reader_extract_to_mem( + m_state.get(), id, &buffer[0], buffer.size(), 0); + if(read_ok == MZ_TRUE) + return buffer; } throw std::runtime_error("Could not extract the file"); } - From 1cb05e36bb66313197171ad8d6de0f00a6b5388e Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Thu, 23 May 2019 11:26:45 +0100 Subject: [PATCH 2/5] Add ability to extract contents of zip archive to a file This can be used to extract nested jar files to a temporary folder. --- jbmc/src/java_bytecode/mz_zip_archive.cpp | 13 +++++++++++++ jbmc/src/java_bytecode/mz_zip_archive.h | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/jbmc/src/java_bytecode/mz_zip_archive.cpp b/jbmc/src/java_bytecode/mz_zip_archive.cpp index 13acbe0b7c5..09e18264cd7 100644 --- a/jbmc/src/java_bytecode/mz_zip_archive.cpp +++ b/jbmc/src/java_bytecode/mz_zip_archive.cpp @@ -103,3 +103,16 @@ std::string mz_zip_archivet::extract(const size_t index) } throw std::runtime_error("Could not extract the file"); } + +void mz_zip_archivet::extract_to_file( + const size_t index, + const std::string &path) +{ + const auto id = static_cast(index); + if( + mz_zip_reader_extract_to_file(m_state.get(), id, path.c_str(), 0) != + MZ_TRUE) + { + throw std::runtime_error("Could not extract the file"); + } +} diff --git a/jbmc/src/java_bytecode/mz_zip_archive.h b/jbmc/src/java_bytecode/mz_zip_archive.h index 88e3456a91e..e1bc002e595 100644 --- a/jbmc/src/java_bytecode/mz_zip_archive.h +++ b/jbmc/src/java_bytecode/mz_zip_archive.h @@ -51,6 +51,12 @@ class mz_zip_archivet final /// \throw Throws std::runtime_error if file cannot be extracted /// \return Contents of the file in the archive std::string extract(size_t index); + /// Write contents of nth file in the archive to a file + /// \param index: id of the file in the archive + /// \param path: path to which to write the contents of the file + /// \throw Throws std::runtime_error if file cannot be written + void extract_to_file(size_t index, const std::string &path); + private: std::unique_ptr m_state; }; From d3ec3d1b5bfcff5d3902373c2a477e3272c48fe5 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Thu, 23 May 2019 11:34:05 +0100 Subject: [PATCH 3/5] Move string into map rather than copy it --- jbmc/src/java_bytecode/jar_file.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/jbmc/src/java_bytecode/jar_file.cpp b/jbmc/src/java_bytecode/jar_file.cpp index 4df0880309c..4a4e29e8b1b 100644 --- a/jbmc/src/java_bytecode/jar_file.cpp +++ b/jbmc/src/java_bytecode/jar_file.cpp @@ -20,10 +20,7 @@ void jar_filet::initialize_file_index() { const size_t file_count=m_zip_archive.get_num_files(); for(size_t index=0; index Date: Thu, 23 May 2019 12:20:38 +0100 Subject: [PATCH 4/5] Remove hack only needed for old version of Visual Studio --- jbmc/src/java_bytecode/jar_pool.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/jbmc/src/java_bytecode/jar_pool.cpp b/jbmc/src/java_bytecode/jar_pool.cpp index 5e72ffb0454..f0236112e66 100644 --- a/jbmc/src/java_bytecode/jar_pool.cpp +++ b/jbmc/src/java_bytecode/jar_pool.cpp @@ -13,11 +13,7 @@ jar_filet &jar_poolt::operator()(const std::string &file_name) { const auto it = m_archives.find(file_name); if(it == m_archives.end()) - { - // VS: Can't construct in place - auto file = jar_filet(file_name); - return m_archives.emplace(file_name, std::move(file)).first->second; - } + return m_archives.emplace(file_name, jar_filet(file_name)).first->second; else return it->second; } From 72936d15fd7aabd02bb537bf08a69bd785a5824f Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Thu, 23 May 2019 14:25:46 +0100 Subject: [PATCH 5/5] Corrected copy and pasted comment --- jbmc/src/java_bytecode/jar_file.h | 2 +- jbmc/src/java_bytecode/mz_zip_archive.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jbmc/src/java_bytecode/jar_file.h b/jbmc/src/java_bytecode/jar_file.h index 811e9b0da7c..f1dc5f406bd 100644 --- a/jbmc/src/java_bytecode/jar_file.h +++ b/jbmc/src/java_bytecode/jar_file.h @@ -31,7 +31,7 @@ class jar_filet final /// Open a JAR file of size \p size loaded in memory at address \p data. /// \param data: memory buffer with the contents of the jar file /// \param size: size of the memory buffer - /// \throw Throws std::runtime_error if file cannot be opened + /// \throw Throws std::runtime_error if data is not in correct format jar_filet(const void *data, size_t size); jar_filet(const jar_filet &)=delete; diff --git a/jbmc/src/java_bytecode/mz_zip_archive.h b/jbmc/src/java_bytecode/mz_zip_archive.h index e1bc002e595..b17c2f11958 100644 --- a/jbmc/src/java_bytecode/mz_zip_archive.h +++ b/jbmc/src/java_bytecode/mz_zip_archive.h @@ -28,7 +28,7 @@ class mz_zip_archivet final /// Loads a zip buffer /// \param data: pointer to the memory buffer /// \param size: size of the buffer - /// \throw Throws std::runtime_error if file cannot be opened + /// \throw Throws std::runtime_error if data is not in correct format mz_zip_archivet(const void *data, size_t size); mz_zip_archivet(const mz_zip_archivet &)=delete;