From cab22f2dc3c5f41b5163f74cbb233e390edff6ff Mon Sep 17 00:00:00 2001 From: "B. Stack" Date: Tue, 11 Oct 2022 11:16:39 -0400 Subject: add upstream 11.26 --- zen/zlib_wrap.cpp | 139 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 106 insertions(+), 33 deletions(-) (limited to 'zen/zlib_wrap.cpp') diff --git a/zen/zlib_wrap.cpp b/zen/zlib_wrap.cpp index e87a284f..28b85c5c 100644 --- a/zen/zlib_wrap.cpp +++ b/zen/zlib_wrap.cpp @@ -8,8 +8,9 @@ //Windows: use the SAME zlib version that wxWidgets is linking against! //C:\Data\Projects\wxWidgets\Source\src\zlib\zlib.h //Linux/macOS: use zlib system header for both wxWidgets and libcurl (zlib is required for HTTP, SFTP) // => don't compile wxWidgets with: --with-zlib=builtin -#include //https://www.zlib.net/manual.html -#include +#include +#include "scope_guard.h" +#include "serialize.h" using namespace zen; @@ -20,9 +21,9 @@ std::wstring getZlibErrorLiteral(int sc) { switch (sc) { - ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK); - ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END); ZEN_CHECK_CASE_FOR_CONSTANT(Z_NEED_DICT); + ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END); + ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK); ZEN_CHECK_CASE_FOR_CONSTANT(Z_ERRNO); ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_ERROR); ZEN_CHECK_CASE_FOR_CONSTANT(Z_DATA_ERROR); @@ -34,16 +35,15 @@ std::wstring getZlibErrorLiteral(int sc) return replaceCpy(L"zlib error %x", L"%x", numberTo(sc)); } } -} -size_t zen::impl::zlib_compressBound(size_t len) +size_t zlib_compressBound(size_t len) { return ::compressBound(static_cast(len)); //upper limit for buffer size, larger than input size!!! } -size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError +size_t zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError { uLongf bufSize = static_cast(trgLen); const int rv = ::compress2(static_cast(trg), //Bytef* dest @@ -61,7 +61,7 @@ size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_ } -size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError +size_t zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError { uLongf bufSize = static_cast(trgLen); const int rv = ::uncompress(static_cast(trg), //Bytef* dest @@ -77,13 +77,80 @@ size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, siz return bufSize; } +} + + +#undef compress //mitigate zlib macro shit... + +std::string zen::compress(const std::string_view& stream, int level) //throw SysError +{ + std::string output; + if (!stream.empty()) //don't dereference iterator into empty container! + { + //save uncompressed stream size for decompression + const uint64_t uncompressedSize = stream.size(); //use portable number type! + output.resize(sizeof(uncompressedSize)); + std::memcpy(output.data(), &uncompressedSize, sizeof(uncompressedSize)); + + const size_t bufferEstimate = zlib_compressBound(stream.size()); //upper limit for buffer size, larger than input size!!! + + output.resize(output.size() + bufferEstimate); + + const size_t bytesWritten = zlib_compress(stream.data(), + stream.size(), + output.data() + output.size() - bufferEstimate, + bufferEstimate, + level); //throw SysError + if (bytesWritten < bufferEstimate) + output.resize(output.size() - bufferEstimate + bytesWritten); //caveat: unsigned arithmetics + //caveat: physical memory consumption still *unchanged*! + } + return output; +} + + +std::string zen::decompress(const std::string_view& stream) //throw SysError +{ + std::string output; + if (!stream.empty()) //don't dereference iterator into empty container! + { + //retrieve size of uncompressed data + uint64_t uncompressedSize = 0; //use portable number type! + if (stream.size() < sizeof(uncompressedSize)) + throw SysError(L"zlib error: stream size < 8"); + + std::memcpy(&uncompressedSize, stream.data(), sizeof(uncompressedSize)); + + //attention: output MUST NOT be empty! Else it will pass a nullptr to zlib_decompress() => Z_STREAM_ERROR although "uncompressedSize == 0"!!! + if (uncompressedSize == 0) //cannot be 0: compress() directly maps empty -> empty container skipping zlib! + throw SysError(L"zlib error: uncompressed size == 0"); + + try + { + output.resize(static_cast(uncompressedSize)); //throw std::bad_alloc + } + //most likely this is due to data corruption: + catch (const std::length_error& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo(e.what())); } + catch (const std::bad_alloc& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo(e.what())); } + + const size_t bytesWritten = zlib_decompress(stream.data() + sizeof(uncompressedSize), + stream.size() - sizeof(uncompressedSize), + output.data(), + static_cast(uncompressedSize)); //throw SysError + if (bytesWritten != static_cast(uncompressedSize)) + throw SysError(formatSystemError("zlib_decompress", L"", L"bytes written != uncompressed size.")); + } + return output; +} class InputStreamAsGzip::Impl { public: - Impl(const std::function& readBlock /*throw X*/) : //throw SysError; returning 0 signals EOF: Posix read() semantics - readBlock_(readBlock) + Impl(const std::function& tryReadBlock /*throw X; may return short, only 0 means EOF!*/, + size_t blockSize) : //throw SysError + tryReadBlock_(tryReadBlock), + blockSize_(blockSize) { const int windowBits = MAX_WBITS + 16; //"add 16 to windowBits to write a simple gzip header" @@ -105,6 +172,7 @@ public: { [[maybe_unused]] const int rv = ::deflateEnd(&gzipStream_); assert(rv == Z_OK); + warn_static("log on error") } size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream! @@ -117,20 +185,18 @@ public: for (;;) { + //refill input buffer once avail_in == 0: https://www.zlib.net/manual.html if (gzipStream_.avail_in == 0 && !eof_) { - if (bufIn_.size() < bytesToRead) - bufIn_.resize(bytesToRead); - - const size_t bytesRead = readBlock_(&bufIn_[0], bufIn_.size()); //throw X; returning 0 signals EOF: Posix read() semantics - gzipStream_.next_in = reinterpret_cast(&bufIn_[0]); + const size_t bytesRead = tryReadBlock_(bufIn_.data(), blockSize_); //throw X; may return short, only 0 means EOF! + gzipStream_.next_in = reinterpret_cast(bufIn_.data()); gzipStream_.avail_in = static_cast(bytesRead); if (bytesRead == 0) eof_ = true; } const int rv = ::deflate(&gzipStream_, eof_ ? Z_FINISH : Z_NO_FLUSH); - if (rv == Z_STREAM_END) + if (eof_ && rv == Z_STREAM_END) return bytesToRead - gzipStream_.avail_out; if (rv != Z_OK) throw SysError(formatSystemError("zlib deflate", getZlibErrorLiteral(rv), L"")); @@ -140,34 +206,41 @@ public: } } + size_t getBlockSize() const { return blockSize_; } //returning input blockSize_ makes sense for low compression ratio + private: - const std::function readBlock_; //throw X + const std::function tryReadBlock_; //throw X + const size_t blockSize_; bool eof_ = false; - std::vector bufIn_; + std::vector bufIn_{blockSize_}; z_stream gzipStream_ = {}; }; -zen::InputStreamAsGzip::InputStreamAsGzip(const std::function& readBlock /*throw X*/) : pimpl_(std::make_unique(readBlock)) {} //throw SysError -zen::InputStreamAsGzip::~InputStreamAsGzip() {} -size_t zen::InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X +InputStreamAsGzip::InputStreamAsGzip(const std::function& tryReadBlock /*throw X*/, size_t blockSize) : + pimpl_(std::make_unique(tryReadBlock, blockSize)) {} //throw SysError + +InputStreamAsGzip::~InputStreamAsGzip() {} + +size_t InputStreamAsGzip::getBlockSize() const { return pimpl_->getBlockSize(); } +size_t InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X -std::string zen::compressAsGzip(const void* buffer, size_t bufSize) //throw SysError + +std::string zen::compressAsGzip(const std::string_view& stream) //throw SysError { - struct MemoryStreamAsGzip : InputStreamAsGzip + MemoryStreamIn memStream(stream); + + auto tryReadBlock = [&](void* buffer, size_t bytesToRead) //may return short, only 0 means EOF! { - explicit MemoryStreamAsGzip(const std::function& readBlock /*throw X*/) : InputStreamAsGzip(readBlock) {} //throw SysError - static size_t getBlockSize() { return 128 * 1024; } //InputStreamAsGzip has no idea what it's wrapping => has no getBlockSize() member! + return memStream.read(buffer, bytesToRead); //return "bytesToRead" bytes unless end of stream! }; - MemoryStreamAsGzip gzipStream([&](void* bufIn, size_t bytesToRead) //throw SysError + InputStreamAsGzip gzipStream(tryReadBlock, 1024 * 1024 /*blockSize*/); //throw SysError + + return unbufferedLoad([&](void* buffer, size_t bytesToRead) { - const size_t bytesRead = std::min(bufSize, bytesToRead); - std::memcpy(bufIn, buffer, bytesRead); - buffer = static_cast(buffer) + bytesRead; - bufSize -= bytesRead; - return bytesRead; //returning 0 signals EOF: Posix read() semantics - }); - return bufferedLoad(gzipStream); //throw SysError + return gzipStream.read(buffer, bytesToRead); //throw SysError; return "bytesToRead" bytes unless end of stream! + }, + gzipStream.getBlockSize()); //throw SysError } -- cgit