summaryrefslogtreecommitdiff
path: root/zen/zlib_wrap.cpp
blob: 7e6801313056aedccbe51bf54b2b5127f987a574 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
// *****************************************************************************
// * This file is part of the FreeFileSync project. It is distributed under    *
// * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0          *
// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved *
// *****************************************************************************

#include "zlib_wrap.h"
//Windows:     use the SAME zlib version that wxWidgets is linking against! //C:\Data\Projects\wxWidgets\Source\src\zlib\zlib.h
//Linux/macOS: use zlib system header for wxWidgets, libcurl (HTTP), libssh2 (SFTP)
//             => don't compile wxWidgets with: --with-zlib=builtin
#include <zlib.h>
#include "scope_guard.h"
#include "serialize.h"

using namespace zen;


namespace
{
std::wstring getZlibErrorLiteral(int sc)
{
    switch (sc)
    {
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_NEED_DICT);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_ERRNO);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_ERROR);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_DATA_ERROR);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_MEM_ERROR);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_BUF_ERROR);
            ZEN_CHECK_CASE_FOR_CONSTANT(Z_VERSION_ERROR);

        default:
            return replaceCpy<std::wstring>(L"zlib error %x", L"%x", numberTo<std::wstring>(sc));
    }
}


size_t zlib_compressBound(size_t len)
{
    return ::compressBound(static_cast<uLong>(len)); //upper limit for buffer size, larger than input size!!!
}


size_t zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError
{
    uLongf bufSize = static_cast<uLong>(trgLen);
    const int rv = ::compress2(static_cast<Bytef*>(trg),       //Bytef* dest
                               &bufSize,                       //uLongf* destLen
                               static_cast<const Bytef*>(src), //const Bytef* source
                               static_cast<uLong>(srcLen),     //uLong sourceLen
                               level);                         //int level
    // Z_OK: success
    // Z_MEM_ERROR: not enough memory
    // Z_BUF_ERROR: not enough room in the output buffer
    if (rv != Z_OK || bufSize > trgLen)
        throw SysError(formatSystemError("zlib compress2", getZlibErrorLiteral(rv), L""));

    return bufSize;
}


size_t zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError
{
    uLongf bufSize = static_cast<uLong>(trgLen);
    const int rv = ::uncompress(static_cast<Bytef*>(trg),       //Bytef* dest
                                &bufSize,                       //uLongf* destLen
                                static_cast<const Bytef*>(src), //const Bytef* source
                                static_cast<uLong>(srcLen));    //uLong sourceLen
    // Z_OK: success
    // Z_MEM_ERROR: not enough memory
    // Z_BUF_ERROR: not enough room in the output buffer
    // Z_DATA_ERROR: input data was corrupted or incomplete
    if (rv != Z_OK || bufSize > trgLen)
        throw SysError(formatSystemError("zlib uncompress", getZlibErrorLiteral(rv), L""));

    return bufSize;
}
}


#undef compress //mitigate zlib macro shit...

std::string zen::compress(const std::string_view& stream, int level) //throw SysError
{
    std::string output;
    if (!stream.empty()) //don't dereference iterator into empty container!
    {
        //save uncompressed stream size for decompression
        const uint64_t uncompressedSize = stream.size(); //use portable number type!
        output.resize(sizeof(uncompressedSize));
        std::memcpy(output.data(), &uncompressedSize, sizeof(uncompressedSize));

        const size_t bufferEstimate = zlib_compressBound(stream.size()); //upper limit for buffer size, larger than input size!!!

        output.resize(output.size() + bufferEstimate);

        const size_t bytesWritten = zlib_compress(stream.data(),
                                                  stream.size(),
                                                  output.data() + output.size() - bufferEstimate,
                                                  bufferEstimate,
                                                  level); //throw SysError
        if (bytesWritten < bufferEstimate)
            output.resize(output.size() - bufferEstimate + bytesWritten); //caveat: unsigned arithmetics
        //caveat: physical memory consumption still *unchanged*!
    }
    return output;
}


std::string zen::decompress(const std::string_view& stream) //throw SysError
{
    std::string output;
    if (!stream.empty()) //don't dereference iterator into empty container!
    {
        //retrieve size of uncompressed data
        uint64_t uncompressedSize = 0; //use portable number type!
        if (stream.size() < sizeof(uncompressedSize))
            throw SysError(L"zlib error: stream size < 8");

        std::memcpy(&uncompressedSize, stream.data(), sizeof(uncompressedSize));

        //attention: output MUST NOT be empty! Else it will pass a nullptr to zlib_decompress() => Z_STREAM_ERROR although "uncompressedSize == 0"!!!
        if (uncompressedSize == 0) //cannot be 0: compress() directly maps empty -> empty container skipping zlib!
            throw SysError(L"zlib error: uncompressed size == 0");

        try
        {
            output.resize(static_cast<size_t>(uncompressedSize)); //throw std::bad_alloc
        }
        //most likely this is due to data corruption:
        catch (const std::length_error& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); }
        catch (const    std::bad_alloc& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); }

        const size_t bytesWritten = zlib_decompress(stream.data() + sizeof(uncompressedSize),
                                                    stream.size() - sizeof(uncompressedSize),
                                                    output.data(),
                                                    static_cast<size_t>(uncompressedSize)); //throw SysError
        if (bytesWritten != static_cast<size_t>(uncompressedSize))
            throw SysError(formatSystemError("zlib_decompress", L"", L"bytes written != uncompressed size."));
    }
    return output;
}


class InputStreamAsGzip::Impl
{
public:
    Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X; may return short, only 0 means EOF!*/,
         size_t blockSize) : //throw SysError
        tryReadBlock_(tryReadBlock),
        blockSize_(blockSize)
    {
        const int windowBits = MAX_WBITS + 16; //"add 16 to windowBits to write a simple gzip header"

        //"memLevel=1 uses minimum memory but is slow and reduces compression ratio; memLevel=9 uses maximum memory for optimal speed.
        const int memLevel = 9; //test; 280 MB installer file: level 9 shrinks runtime by ~8% compared to level 8 (==DEF_MEM_LEVEL) at the cost of 128 KB extra memory
        static_assert(memLevel <= MAX_MEM_LEVEL);

        const int rv = ::deflateInit2(&gzipStream_,          //z_streamp strm
                                      3 /*see db_file.cpp*/, //int level
                                      Z_DEFLATED,            //int method
                                      windowBits,            //int windowBits
                                      memLevel,              //int memLevel
                                      Z_DEFAULT_STRATEGY);   //int strategy
        if (rv != Z_OK)
            throw SysError(formatSystemError("zlib deflateInit2", getZlibErrorLiteral(rv), L""));
    }

    ~Impl()
    {
        [[maybe_unused]] const int rv = ::deflateEnd(&gzipStream_);
        assert(rv == Z_OK);
        warn_static("log on error")
    }

    size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream!
    {
        if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check!
            throw std::logic_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Contract violation!");

        gzipStream_.next_out  = static_cast<Bytef*>(buffer);
        gzipStream_.avail_out = static_cast<uInt>(bytesToRead);

        for (;;)
        {
            //refill input buffer once avail_in == 0: https://www.zlib.net/manual.html
            if (gzipStream_.avail_in == 0 && !eof_)
            {
                const size_t bytesRead = tryReadBlock_(bufIn_.data(), blockSize_); //throw X; may return short, only 0 means EOF!
                gzipStream_.next_in  = reinterpret_cast<z_const Bytef*>(bufIn_.data());
                gzipStream_.avail_in = static_cast<uInt>(bytesRead);
                if (bytesRead == 0)
                    eof_ = true;
            }

            const int rv = ::deflate(&gzipStream_, eof_ ? Z_FINISH : Z_NO_FLUSH);
            if (eof_ && rv == Z_STREAM_END)
                return bytesToRead - gzipStream_.avail_out;
            if (rv != Z_OK)
                throw SysError(formatSystemError("zlib deflate", getZlibErrorLiteral(rv), L""));

            if (gzipStream_.avail_out == 0)
                return bytesToRead;
        }
    }

    size_t getBlockSize() const { return blockSize_; } //returning input blockSize_ makes sense for low compression ratio

private:
    const std::function<size_t(void* buffer, size_t bytesToRead)> tryReadBlock_; //throw X
    const size_t blockSize_;
    bool eof_ = false;
    std::vector<std::byte> bufIn_{blockSize_};
    z_stream gzipStream_ = {};
};


InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X*/, size_t blockSize) :
    pimpl_(std::make_unique<Impl>(tryReadBlock, blockSize)) {} //throw SysError

InputStreamAsGzip::~InputStreamAsGzip() {}

size_t InputStreamAsGzip::getBlockSize() const { return pimpl_->getBlockSize(); }

size_t InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X


std::string zen::compressAsGzip(const std::string_view& stream) //throw SysError
{
    MemoryStreamIn memStream(stream);

    auto tryReadBlock = [&](void* buffer, size_t bytesToRead) //may return short, only 0 means EOF!
    {
        return memStream.read(buffer, bytesToRead); //return "bytesToRead" bytes unless end of stream!
    };

    InputStreamAsGzip gzipStream(tryReadBlock, 1024 * 1024 /*blockSize*/); //throw SysError

    return unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead)
    {
        return gzipStream.read(buffer, bytesToRead); //throw SysError;  return "bytesToRead" bytes unless end of stream!
    },
    gzipStream.getBlockSize()); //throw SysError
}
bgstack15