diff options
Diffstat (limited to 'zen/zlib_wrap.cpp')
-rw-r--r-- | zen/zlib_wrap.cpp | 139 |
1 files changed, 106 insertions, 33 deletions
diff --git a/zen/zlib_wrap.cpp b/zen/zlib_wrap.cpp index e87a284f..28b85c5c 100644 --- a/zen/zlib_wrap.cpp +++ b/zen/zlib_wrap.cpp @@ -8,8 +8,9 @@ //Windows: use the SAME zlib version that wxWidgets is linking against! //C:\Data\Projects\wxWidgets\Source\src\zlib\zlib.h //Linux/macOS: use zlib system header for both wxWidgets and libcurl (zlib is required for HTTP, SFTP) // => don't compile wxWidgets with: --with-zlib=builtin -#include <zlib.h> //https://www.zlib.net/manual.html -#include <zen/scope_guard.h> +#include <zlib.h> +#include "scope_guard.h" +#include "serialize.h" using namespace zen; @@ -20,9 +21,9 @@ std::wstring getZlibErrorLiteral(int sc) { switch (sc) { - ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK); - ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END); ZEN_CHECK_CASE_FOR_CONSTANT(Z_NEED_DICT); + ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END); + ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK); ZEN_CHECK_CASE_FOR_CONSTANT(Z_ERRNO); ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_ERROR); ZEN_CHECK_CASE_FOR_CONSTANT(Z_DATA_ERROR); @@ -34,16 +35,15 @@ std::wstring getZlibErrorLiteral(int sc) return replaceCpy<std::wstring>(L"zlib error %x", L"%x", numberTo<std::wstring>(sc)); } } -} -size_t zen::impl::zlib_compressBound(size_t len) +size_t zlib_compressBound(size_t len) { return ::compressBound(static_cast<uLong>(len)); //upper limit for buffer size, larger than input size!!! } -size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError +size_t zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError { uLongf bufSize = static_cast<uLong>(trgLen); const int rv = ::compress2(static_cast<Bytef*>(trg), //Bytef* dest @@ -61,7 +61,7 @@ size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_ } -size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError +size_t zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError { uLongf bufSize = static_cast<uLong>(trgLen); const int rv = ::uncompress(static_cast<Bytef*>(trg), //Bytef* dest @@ -77,13 +77,80 @@ size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, siz return bufSize; } +} + + +#undef compress //mitigate zlib macro shit... + +std::string zen::compress(const std::string_view& stream, int level) //throw SysError +{ + std::string output; + if (!stream.empty()) //don't dereference iterator into empty container! + { + //save uncompressed stream size for decompression + const uint64_t uncompressedSize = stream.size(); //use portable number type! + output.resize(sizeof(uncompressedSize)); + std::memcpy(output.data(), &uncompressedSize, sizeof(uncompressedSize)); + + const size_t bufferEstimate = zlib_compressBound(stream.size()); //upper limit for buffer size, larger than input size!!! + + output.resize(output.size() + bufferEstimate); + + const size_t bytesWritten = zlib_compress(stream.data(), + stream.size(), + output.data() + output.size() - bufferEstimate, + bufferEstimate, + level); //throw SysError + if (bytesWritten < bufferEstimate) + output.resize(output.size() - bufferEstimate + bytesWritten); //caveat: unsigned arithmetics + //caveat: physical memory consumption still *unchanged*! + } + return output; +} + + +std::string zen::decompress(const std::string_view& stream) //throw SysError +{ + std::string output; + if (!stream.empty()) //don't dereference iterator into empty container! + { + //retrieve size of uncompressed data + uint64_t uncompressedSize = 0; //use portable number type! + if (stream.size() < sizeof(uncompressedSize)) + throw SysError(L"zlib error: stream size < 8"); + + std::memcpy(&uncompressedSize, stream.data(), sizeof(uncompressedSize)); + + //attention: output MUST NOT be empty! Else it will pass a nullptr to zlib_decompress() => Z_STREAM_ERROR although "uncompressedSize == 0"!!! + if (uncompressedSize == 0) //cannot be 0: compress() directly maps empty -> empty container skipping zlib! + throw SysError(L"zlib error: uncompressed size == 0"); + + try + { + output.resize(static_cast<size_t>(uncompressedSize)); //throw std::bad_alloc + } + //most likely this is due to data corruption: + catch (const std::length_error& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); } + catch (const std::bad_alloc& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); } + + const size_t bytesWritten = zlib_decompress(stream.data() + sizeof(uncompressedSize), + stream.size() - sizeof(uncompressedSize), + output.data(), + static_cast<size_t>(uncompressedSize)); //throw SysError + if (bytesWritten != static_cast<size_t>(uncompressedSize)) + throw SysError(formatSystemError("zlib_decompress", L"", L"bytes written != uncompressed size.")); + } + return output; +} class InputStreamAsGzip::Impl { public: - Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : //throw SysError; returning 0 signals EOF: Posix read() semantics - readBlock_(readBlock) + Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X; may return short, only 0 means EOF!*/, + size_t blockSize) : //throw SysError + tryReadBlock_(tryReadBlock), + blockSize_(blockSize) { const int windowBits = MAX_WBITS + 16; //"add 16 to windowBits to write a simple gzip header" @@ -105,6 +172,7 @@ public: { [[maybe_unused]] const int rv = ::deflateEnd(&gzipStream_); assert(rv == Z_OK); + warn_static("log on error") } size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream! @@ -117,20 +185,18 @@ public: for (;;) { + //refill input buffer once avail_in == 0: https://www.zlib.net/manual.html if (gzipStream_.avail_in == 0 && !eof_) { - if (bufIn_.size() < bytesToRead) - bufIn_.resize(bytesToRead); - - const size_t bytesRead = readBlock_(&bufIn_[0], bufIn_.size()); //throw X; returning 0 signals EOF: Posix read() semantics - gzipStream_.next_in = reinterpret_cast<z_const Bytef*>(&bufIn_[0]); + const size_t bytesRead = tryReadBlock_(bufIn_.data(), blockSize_); //throw X; may return short, only 0 means EOF! + gzipStream_.next_in = reinterpret_cast<z_const Bytef*>(bufIn_.data()); gzipStream_.avail_in = static_cast<uInt>(bytesRead); if (bytesRead == 0) eof_ = true; } const int rv = ::deflate(&gzipStream_, eof_ ? Z_FINISH : Z_NO_FLUSH); - if (rv == Z_STREAM_END) + if (eof_ && rv == Z_STREAM_END) return bytesToRead - gzipStream_.avail_out; if (rv != Z_OK) throw SysError(formatSystemError("zlib deflate", getZlibErrorLiteral(rv), L"")); @@ -140,34 +206,41 @@ public: } } + size_t getBlockSize() const { return blockSize_; } //returning input blockSize_ makes sense for low compression ratio + private: - const std::function<size_t(void* buffer, size_t bytesToRead)> readBlock_; //throw X + const std::function<size_t(void* buffer, size_t bytesToRead)> tryReadBlock_; //throw X + const size_t blockSize_; bool eof_ = false; - std::vector<std::byte> bufIn_; + std::vector<std::byte> bufIn_{blockSize_}; z_stream gzipStream_ = {}; }; -zen::InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : pimpl_(std::make_unique<Impl>(readBlock)) {} //throw SysError -zen::InputStreamAsGzip::~InputStreamAsGzip() {} -size_t zen::InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X +InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X*/, size_t blockSize) : + pimpl_(std::make_unique<Impl>(tryReadBlock, blockSize)) {} //throw SysError + +InputStreamAsGzip::~InputStreamAsGzip() {} + +size_t InputStreamAsGzip::getBlockSize() const { return pimpl_->getBlockSize(); } +size_t InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X -std::string zen::compressAsGzip(const void* buffer, size_t bufSize) //throw SysError + +std::string zen::compressAsGzip(const std::string_view& stream) //throw SysError { - struct MemoryStreamAsGzip : InputStreamAsGzip + MemoryStreamIn memStream(stream); + + auto tryReadBlock = [&](void* buffer, size_t bytesToRead) //may return short, only 0 means EOF! { - explicit MemoryStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : InputStreamAsGzip(readBlock) {} //throw SysError - static size_t getBlockSize() { return 128 * 1024; } //InputStreamAsGzip has no idea what it's wrapping => has no getBlockSize() member! + return memStream.read(buffer, bytesToRead); //return "bytesToRead" bytes unless end of stream! }; - MemoryStreamAsGzip gzipStream([&](void* bufIn, size_t bytesToRead) //throw SysError + InputStreamAsGzip gzipStream(tryReadBlock, 1024 * 1024 /*blockSize*/); //throw SysError + + return unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead) { - const size_t bytesRead = std::min(bufSize, bytesToRead); - std::memcpy(bufIn, buffer, bytesRead); - buffer = static_cast<const char*>(buffer) + bytesRead; - bufSize -= bytesRead; - return bytesRead; //returning 0 signals EOF: Posix read() semantics - }); - return bufferedLoad<std::string>(gzipStream); //throw SysError + return gzipStream.read(buffer, bytesToRead); //throw SysError; return "bytesToRead" bytes unless end of stream! + }, + gzipStream.getBlockSize()); //throw SysError } |