summaryrefslogtreecommitdiff
path: root/zen/zlib_wrap.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'zen/zlib_wrap.cpp')
-rw-r--r--zen/zlib_wrap.cpp139
1 files changed, 106 insertions, 33 deletions
diff --git a/zen/zlib_wrap.cpp b/zen/zlib_wrap.cpp
index e87a284f..28b85c5c 100644
--- a/zen/zlib_wrap.cpp
+++ b/zen/zlib_wrap.cpp
@@ -8,8 +8,9 @@
//Windows: use the SAME zlib version that wxWidgets is linking against! //C:\Data\Projects\wxWidgets\Source\src\zlib\zlib.h
//Linux/macOS: use zlib system header for both wxWidgets and libcurl (zlib is required for HTTP, SFTP)
// => don't compile wxWidgets with: --with-zlib=builtin
-#include <zlib.h> //https://www.zlib.net/manual.html
-#include <zen/scope_guard.h>
+#include <zlib.h>
+#include "scope_guard.h"
+#include "serialize.h"
using namespace zen;
@@ -20,9 +21,9 @@ std::wstring getZlibErrorLiteral(int sc)
{
switch (sc)
{
- ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK);
- ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END);
ZEN_CHECK_CASE_FOR_CONSTANT(Z_NEED_DICT);
+ ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END);
+ ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK);
ZEN_CHECK_CASE_FOR_CONSTANT(Z_ERRNO);
ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_ERROR);
ZEN_CHECK_CASE_FOR_CONSTANT(Z_DATA_ERROR);
@@ -34,16 +35,15 @@ std::wstring getZlibErrorLiteral(int sc)
return replaceCpy<std::wstring>(L"zlib error %x", L"%x", numberTo<std::wstring>(sc));
}
}
-}
-size_t zen::impl::zlib_compressBound(size_t len)
+size_t zlib_compressBound(size_t len)
{
return ::compressBound(static_cast<uLong>(len)); //upper limit for buffer size, larger than input size!!!
}
-size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError
+size_t zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError
{
uLongf bufSize = static_cast<uLong>(trgLen);
const int rv = ::compress2(static_cast<Bytef*>(trg), //Bytef* dest
@@ -61,7 +61,7 @@ size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_
}
-size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError
+size_t zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError
{
uLongf bufSize = static_cast<uLong>(trgLen);
const int rv = ::uncompress(static_cast<Bytef*>(trg), //Bytef* dest
@@ -77,13 +77,80 @@ size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, siz
return bufSize;
}
+}
+
+
+#undef compress //mitigate zlib macro shit...
+
+std::string zen::compress(const std::string_view& stream, int level) //throw SysError
+{
+ std::string output;
+ if (!stream.empty()) //don't dereference iterator into empty container!
+ {
+ //save uncompressed stream size for decompression
+ const uint64_t uncompressedSize = stream.size(); //use portable number type!
+ output.resize(sizeof(uncompressedSize));
+ std::memcpy(output.data(), &uncompressedSize, sizeof(uncompressedSize));
+
+ const size_t bufferEstimate = zlib_compressBound(stream.size()); //upper limit for buffer size, larger than input size!!!
+
+ output.resize(output.size() + bufferEstimate);
+
+ const size_t bytesWritten = zlib_compress(stream.data(),
+ stream.size(),
+ output.data() + output.size() - bufferEstimate,
+ bufferEstimate,
+ level); //throw SysError
+ if (bytesWritten < bufferEstimate)
+ output.resize(output.size() - bufferEstimate + bytesWritten); //caveat: unsigned arithmetics
+ //caveat: physical memory consumption still *unchanged*!
+ }
+ return output;
+}
+
+
+std::string zen::decompress(const std::string_view& stream) //throw SysError
+{
+ std::string output;
+ if (!stream.empty()) //don't dereference iterator into empty container!
+ {
+ //retrieve size of uncompressed data
+ uint64_t uncompressedSize = 0; //use portable number type!
+ if (stream.size() < sizeof(uncompressedSize))
+ throw SysError(L"zlib error: stream size < 8");
+
+ std::memcpy(&uncompressedSize, stream.data(), sizeof(uncompressedSize));
+
+ //attention: output MUST NOT be empty! Else it will pass a nullptr to zlib_decompress() => Z_STREAM_ERROR although "uncompressedSize == 0"!!!
+ if (uncompressedSize == 0) //cannot be 0: compress() directly maps empty -> empty container skipping zlib!
+ throw SysError(L"zlib error: uncompressed size == 0");
+
+ try
+ {
+ output.resize(static_cast<size_t>(uncompressedSize)); //throw std::bad_alloc
+ }
+ //most likely this is due to data corruption:
+ catch (const std::length_error& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); }
+ catch (const std::bad_alloc& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); }
+
+ const size_t bytesWritten = zlib_decompress(stream.data() + sizeof(uncompressedSize),
+ stream.size() - sizeof(uncompressedSize),
+ output.data(),
+ static_cast<size_t>(uncompressedSize)); //throw SysError
+ if (bytesWritten != static_cast<size_t>(uncompressedSize))
+ throw SysError(formatSystemError("zlib_decompress", L"", L"bytes written != uncompressed size."));
+ }
+ return output;
+}
class InputStreamAsGzip::Impl
{
public:
- Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : //throw SysError; returning 0 signals EOF: Posix read() semantics
- readBlock_(readBlock)
+ Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X; may return short, only 0 means EOF!*/,
+ size_t blockSize) : //throw SysError
+ tryReadBlock_(tryReadBlock),
+ blockSize_(blockSize)
{
const int windowBits = MAX_WBITS + 16; //"add 16 to windowBits to write a simple gzip header"
@@ -105,6 +172,7 @@ public:
{
[[maybe_unused]] const int rv = ::deflateEnd(&gzipStream_);
assert(rv == Z_OK);
+ warn_static("log on error")
}
size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream!
@@ -117,20 +185,18 @@ public:
for (;;)
{
+ //refill input buffer once avail_in == 0: https://www.zlib.net/manual.html
if (gzipStream_.avail_in == 0 && !eof_)
{
- if (bufIn_.size() < bytesToRead)
- bufIn_.resize(bytesToRead);
-
- const size_t bytesRead = readBlock_(&bufIn_[0], bufIn_.size()); //throw X; returning 0 signals EOF: Posix read() semantics
- gzipStream_.next_in = reinterpret_cast<z_const Bytef*>(&bufIn_[0]);
+ const size_t bytesRead = tryReadBlock_(bufIn_.data(), blockSize_); //throw X; may return short, only 0 means EOF!
+ gzipStream_.next_in = reinterpret_cast<z_const Bytef*>(bufIn_.data());
gzipStream_.avail_in = static_cast<uInt>(bytesRead);
if (bytesRead == 0)
eof_ = true;
}
const int rv = ::deflate(&gzipStream_, eof_ ? Z_FINISH : Z_NO_FLUSH);
- if (rv == Z_STREAM_END)
+ if (eof_ && rv == Z_STREAM_END)
return bytesToRead - gzipStream_.avail_out;
if (rv != Z_OK)
throw SysError(formatSystemError("zlib deflate", getZlibErrorLiteral(rv), L""));
@@ -140,34 +206,41 @@ public:
}
}
+ size_t getBlockSize() const { return blockSize_; } //returning input blockSize_ makes sense for low compression ratio
+
private:
- const std::function<size_t(void* buffer, size_t bytesToRead)> readBlock_; //throw X
+ const std::function<size_t(void* buffer, size_t bytesToRead)> tryReadBlock_; //throw X
+ const size_t blockSize_;
bool eof_ = false;
- std::vector<std::byte> bufIn_;
+ std::vector<std::byte> bufIn_{blockSize_};
z_stream gzipStream_ = {};
};
-zen::InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : pimpl_(std::make_unique<Impl>(readBlock)) {} //throw SysError
-zen::InputStreamAsGzip::~InputStreamAsGzip() {}
-size_t zen::InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X
+InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X*/, size_t blockSize) :
+ pimpl_(std::make_unique<Impl>(tryReadBlock, blockSize)) {} //throw SysError
+
+InputStreamAsGzip::~InputStreamAsGzip() {}
+
+size_t InputStreamAsGzip::getBlockSize() const { return pimpl_->getBlockSize(); }
+size_t InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X
-std::string zen::compressAsGzip(const void* buffer, size_t bufSize) //throw SysError
+
+std::string zen::compressAsGzip(const std::string_view& stream) //throw SysError
{
- struct MemoryStreamAsGzip : InputStreamAsGzip
+ MemoryStreamIn memStream(stream);
+
+ auto tryReadBlock = [&](void* buffer, size_t bytesToRead) //may return short, only 0 means EOF!
{
- explicit MemoryStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : InputStreamAsGzip(readBlock) {} //throw SysError
- static size_t getBlockSize() { return 128 * 1024; } //InputStreamAsGzip has no idea what it's wrapping => has no getBlockSize() member!
+ return memStream.read(buffer, bytesToRead); //return "bytesToRead" bytes unless end of stream!
};
- MemoryStreamAsGzip gzipStream([&](void* bufIn, size_t bytesToRead) //throw SysError
+ InputStreamAsGzip gzipStream(tryReadBlock, 1024 * 1024 /*blockSize*/); //throw SysError
+
+ return unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead)
{
- const size_t bytesRead = std::min(bufSize, bytesToRead);
- std::memcpy(bufIn, buffer, bytesRead);
- buffer = static_cast<const char*>(buffer) + bytesRead;
- bufSize -= bytesRead;
- return bytesRead; //returning 0 signals EOF: Posix read() semantics
- });
- return bufferedLoad<std::string>(gzipStream); //throw SysError
+ return gzipStream.read(buffer, bytesToRead); //throw SysError; return "bytesToRead" bytes unless end of stream!
+ },
+ gzipStream.getBlockSize()); //throw SysError
}
bgstack15