summaryrefslogtreecommitdiff
path: root/zen
diff options
context:
space:
mode:
authorB. Stack <bgstack15@gmail.com>2022-09-07 18:55:24 +0000
committerB. Stack <bgstack15@gmail.com>2022-09-07 18:55:24 +0000
commit1e582c4e99fe08c70c75fef7cd8ed22343253297 (patch)
treeb0047c655d52e4e479ceb73c713414f8d0744c38 /zen
parentMerge branch 'b11.24' into 'master' (diff)
parentadd upstream 11.25 (diff)
downloadFreeFileSync-1e582c4e99fe08c70c75fef7cd8ed22343253297.tar.gz
FreeFileSync-1e582c4e99fe08c70c75fef7cd8ed22343253297.tar.bz2
FreeFileSync-1e582c4e99fe08c70c75fef7cd8ed22343253297.zip
Merge branch 'b11.25' into 'master'11.25
add upstream 11.25 See merge request opensource-tracking/FreeFileSync!48
Diffstat (limited to 'zen')
-rw-r--r--zen/json.h2
-rw-r--r--zen/process_exec.cpp4
-rw-r--r--zen/serialize.h86
-rw-r--r--zen/string_tools.h2
-rw-r--r--zen/utf.h21
-rw-r--r--zen/zstring.cpp70
-rw-r--r--zen/zstring.h5
7 files changed, 109 insertions, 81 deletions
diff --git a/zen/json.h b/zen/json.h
index 3a9d73f3..be2cfbab 100644
--- a/zen/json.h
+++ b/zen/json.h
@@ -140,7 +140,7 @@ namespace
{
UtfDecoder<impl::Char16> decoder(utf16Buf.c_str(), utf16Buf.size());
while (std::optional<impl::CodePoint> cp = decoder.getNext())
- impl::codePointToUtf<char>(*cp, [&](char c) { output += c; });
+ codePointToUtf<char>(*cp, [&](char c) { output += c; });
utf16Buf.clear();
}
};
diff --git a/zen/process_exec.cpp b/zen/process_exec.cpp
index df41a627..fb691151 100644
--- a/zen/process_exec.cpp
+++ b/zen/process_exec.cpp
@@ -19,7 +19,7 @@ using namespace zen;
Zstring zen::escapeCommandArg(const Zstring& arg)
{
-//*INDENT-OFF*
+//*INDENT-OFF* if not put exactly here, Astyle will seriously mess this .cpp file up!
Zstring output;
for (const Zchar c : arg)
switch (c)
@@ -27,7 +27,7 @@ Zstring zen::escapeCommandArg(const Zstring& arg)
case '"': output += "\\\""; break; //Windows: not needed; " cannot be used as file name
case '\\': output += "\\\\"; break; //Windows: path separator! => don't escape
case '`': output += "\\`"; break; //yes, used in some paths => Windows: no escaping required
- default: output += c; break;
+ default: output += c; break;
}
//*INDENT-ON*
if (contains(output, Zstr(' ')))
diff --git a/zen/serialize.h b/zen/serialize.h
index b2561808..26202d96 100644
--- a/zen/serialize.h
+++ b/zen/serialize.h
@@ -8,9 +8,6 @@
#define SERIALIZE_H_839405783574356
#include <functional>
-//#include <cstdint>
-//#include <stdexcept>
-//#include "string_base.h"
#include "sys_error.h"
//keep header clean from specific stream implementations! (e.g.file_io.h)! used by abstract.h!
@@ -19,36 +16,35 @@ namespace zen
{
/* high-performance unformatted serialization (avoiding wxMemoryOutputStream/wxMemoryInputStream inefficiencies)
---------------------------
-|Binary Container Concept|
---------------------------
-binary container for data storage: must support "basic" std::vector interface (e.g. std::vector<std::byte>, std::string, Zbase<char>)
+ ----------------------------
+ | Binary Container Concept |
+ ----------------------------
+ binary container for data storage: must support "basic" std::vector interface (e.g. std::vector<std::byte>, std::string, Zbase<char>)
+ ---------------------------------
+ | Buffered Input Stream Concept |
+ ---------------------------------
+ struct BufferedInputStream
+ {
+ size_t read(void* buffer, size_t bytesToRead); //throw X; return "bytesToRead" bytes unless end of stream!
+
+ Optional: support stream-copying
+ --------------------------------
+ size_t getBlockSize() const;
+ const IoCallback& notifyUnbufferedIO
+ };
+
+ ----------------------------------
+ | Buffered Output Stream Concept |
+ ----------------------------------
+ struct BufferedOutputStream
+ {
+ void write(const void* buffer, size_t bytesToWrite); //throw X
--------------------------------
-|Buffered Input Stream Concept|
--------------------------------
-struct BufferedInputStream
-{
- size_t read(void* buffer, size_t bytesToRead); //throw X; return "bytesToRead" bytes unless end of stream!
-
-Optional: support stream-copying
---------------------------------
- size_t getBlockSize() const;
- const IoCallback& notifyUnbufferedIO
-};
-
---------------------------------
-|Buffered Output Stream Concept|
---------------------------------
-struct BufferedOutputStream
-{
- void write(const void* buffer, size_t bytesToWrite); //throw X
-
-Optional: support stream-copying
---------------------------------
- const IoCallback& notifyUnbufferedIO
-}; */
+ Optional: support stream-copying
+ --------------------------------
+ const IoCallback& notifyUnbufferedIO
+ }; */
using IoCallback = std::function<void(int64_t bytesDelta)>; //throw X
@@ -116,6 +112,7 @@ private:
size_t pos_ = 0;
};
+
template <class BinContainer>
struct MemoryStreamOut
{
@@ -144,9 +141,6 @@ private:
-
-
-
//-----------------------implementation-------------------------------
template <class BufferedInputStream, class BufferedOutputStream> inline
void bufferedStreamCopy(BufferedInputStream& streamIn, //throw X
@@ -214,10 +208,13 @@ void writeNumber(BufferedOutputStream& stream, const N& num)
template <class C, class BufferedOutputStream> inline
void writeContainer(BufferedOutputStream& stream, const C& cont) //don't even consider UTF8 conversions here, we're handling arbitrary binary data!
{
- const auto len = cont.size();
- writeNumber(stream, static_cast<uint32_t>(len));
- if (len > 0)
- writeArray(stream, &cont[0], sizeof(typename C::value_type) * len); //don't use c_str(), but access uniformly via STL interface
+ const auto size = cont.size();
+
+ assert(size <= INT32_MAX);
+ writeNumber(stream, static_cast<int32_t>(size)); //use *signed* integer to help catch data corruption
+
+ if (size > 0)
+ writeArray(stream, &cont[0], sizeof(typename C::value_type) * size); //don't use c_str(), but access uniformly via STL interface
}
@@ -244,18 +241,21 @@ N readNumber(BufferedInputStream& stream) //throw SysErrorUnexpectedEos
template <class C, class BufferedInputStream> inline
C readContainer(BufferedInputStream& stream) //throw SysErrorUnexpectedEos
{
+ const auto size = readNumber<int32_t>(stream); //throw SysErrorUnexpectedEos
+ if (size < 0) //most likely due to data corruption!
+ throw SysErrorUnexpectedEos();
+
C cont;
- auto strLength = readNumber<uint32_t>(stream); //throw SysErrorUnexpectedEos
- if (strLength > 0)
+ if (size > 0)
{
try
{
- cont.resize(strLength); //throw std::length_error, std::bad_alloc
+ cont.resize(size); //throw std::length_error, std::bad_alloc
}
- catch (std::length_error&) { throw SysErrorUnexpectedEos(); } //most likely this is due to data corruption!
+ catch (std::length_error&) { throw SysErrorUnexpectedEos(); } //most likely due to data corruption!
catch ( std::bad_alloc&) { throw SysErrorUnexpectedEos(); } //
- readArray(stream, &cont[0], sizeof(typename C::value_type) * strLength); //throw SysErrorUnexpectedEos
+ readArray(stream, &cont[0], sizeof(typename C::value_type) * size); //throw SysErrorUnexpectedEos
}
return cont;
}
diff --git a/zen/string_tools.h b/zen/string_tools.h
index cafff3d5..181a3951 100644
--- a/zen/string_tools.h
+++ b/zen/string_tools.h
@@ -263,7 +263,7 @@ bool equalString(const S& lhs, const T& rhs)
template <class S, class T> inline
bool equalAsciiNoCase(const S& lhs, const T& rhs)
{
- assert(isAsciiString(lhs) || isAsciiString(rhs));
+ //assert(isAsciiString(lhs) || isAsciiString(rhs));
const size_t lhsLen = strLength(lhs);
return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == std::weak_ordering::equivalent;
}
diff --git a/zen/utf.h b/zen/utf.h
index ca231602..56b1ff55 100644
--- a/zen/utf.h
+++ b/zen/utf.h
@@ -222,15 +222,9 @@ private:
//----------------------------------------------------------------------------------------------------------------
-template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, std::integral_constant<int, 1>) { codePointToUtf8 (cp, writeOutput); } //UTF8-char
-template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, std::integral_constant<int, 2>) { codePointToUtf16(cp, writeOutput); } //Windows: UTF16-wchar_t
-template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, std::integral_constant<int, 4>) { writeOutput(cp); } //other OS: UTF32-wchar_t
-
-template <class CharType, class Function> inline
-void codePointToUtf(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a CharType
-{
- return codePointToUtf(cp, writeOutput, std::integral_constant<int, sizeof(CharType)>());
-}
+template <class Function> inline void codePointToUtfImpl(CodePoint cp, Function writeOutput, std::integral_constant<int, 1>) { codePointToUtf8 (cp, writeOutput); } //UTF8-char
+template <class Function> inline void codePointToUtfImpl(CodePoint cp, Function writeOutput, std::integral_constant<int, 2>) { codePointToUtf16(cp, writeOutput); } //Windows: UTF16-wchar_t
+template <class Function> inline void codePointToUtfImpl(CodePoint cp, Function writeOutput, std::integral_constant<int, 4>) { writeOutput(cp); } //other OS: UTF32-wchar_t
//----------------------------------------------------------------------------------------------------------------
@@ -277,9 +271,18 @@ private:
};
}
+
template <class CharType>
using UtfDecoder = impl::UtfDecoderImpl<CharType, sizeof(CharType)>;
+
+template <class CharType, class Function> inline
+void codePointToUtf(impl::CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a CharType
+{
+ return impl::codePointToUtfImpl(cp, writeOutput, std::integral_constant<int, sizeof(CharType)>());
+}
+
+
//-------------------------------------------------------------------------------------------
template <class UtfString> inline
diff --git a/zen/zstring.cpp b/zen/zstring.cpp
index 1e29e461..3f5328f7 100644
--- a/zen/zstring.cpp
+++ b/zen/zstring.cpp
@@ -15,7 +15,7 @@ Zstring getUnicodeNormalFormNonAscii(const Zstring& str)
{
//Example: const char* decomposed = "\x6f\xcc\x81";
// const char* precomposed = "\xc3\xb3";
- assert(!isAsciiString(str));
+ assert(!isAsciiString(str)); //includes "not-empty" check
assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls!
try
@@ -51,14 +51,14 @@ Zstring getUpperCaseNonAscii(const Zstring& str)
Zstring strNorm = getUnicodeNormalFormNonAscii(str);
try
{
- static_assert(sizeof(impl::CodePoint) == sizeof(gunichar));
Zstring output;
output.reserve(strNorm.size());
UtfDecoder<char> decoder(strNorm.c_str(), strNorm.size());
while (const std::optional<impl::CodePoint> cp = decoder.getNext())
- impl::codePointToUtf<char>(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent!
+ codePointToUtf<char>(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent!
+ static_assert(sizeof(impl::CodePoint) == sizeof(gunichar));
return output;
}
@@ -89,6 +89,10 @@ namespace
{
std::weak_ordering compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen)
{
+ //expect Unicode normalized strings!
+ assert(std::string(lhs, lhsLen) == getUnicodeNormalForm(std::string(lhs, lhsLen)));
+ assert(std::string(rhs, rhsLen) == getUnicodeNormalForm(std::string(rhs, rhsLen)));
+
//- strncasecmp implements ASCII CI-comparsion only! => signature is broken for UTF8-input; toupper() similarly doesn't support Unicode
//- wcsncasecmp: https://opensource.apple.com/source/Libc/Libc-763.12/string/wcsncasecmp-fbsd.c
// => re-implement comparison based on g_unichar_tolower() to avoid memory allocations
@@ -103,12 +107,13 @@ std::weak_ordering compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char*
return !cpR <=> !cpL;
static_assert(sizeof(gunichar) == sizeof(impl::CodePoint));
+ static_assert(std::is_unsigned_v<gunichar>, "unsigned char-comparison is the convention!");
//ordering: "to lower" converts to higher code points than "to upper"
const gunichar charL = ::g_unichar_toupper(*cpL); //note: tolower can be ambiguous, so don't use:
const gunichar charR = ::g_unichar_toupper(*cpR); //e.g. "Σ" (upper case) can be lower-case "ς" in the end of the word or "σ" in the middle.
if (charL != charR)
- return makeUnsigned(charL) <=> makeUnsigned(charR); //unsigned char-comparison is the convention!
+ return charL <=> charR;
}
}
}
@@ -206,25 +211,48 @@ std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs)
std::weak_ordering compareNoCase(const Zstring& lhs, const Zstring& rhs)
{
- //fast path: no need for extra memory allocations => ~ 6x speedup
- const size_t minSize = std::min(lhs.size(), rhs.size());
+ //fast path: no memory allocations => ~ 6x speedup
+ if (isAsciiString(lhs) && isAsciiString(rhs))
+ {
+ const size_t minSize = std::min(lhs.size(), rhs.size());
+ for (size_t i = 0; i < minSize; ++i)
+ {
+ //ordering: do NOT call compareAsciiNoCase(), which uses asciiToLower()!
+ const Zchar lUp = asciiToUpper(lhs[i]); //
+ const Zchar rUp = asciiToUpper(rhs[i]); //no surprises: emulate getUpperCase() [verified!]
+ if (lUp != rUp) //
+ return lUp <=> rUp; //
+ }
+ return lhs.size() <=> rhs.size();
+ }
+ //--------------------------------------
+
+ //can't we instead skip isAsciiString() and compare chars as long as isAsciiChar()?
+ // => NOPE! e.g. decomposed Unicode! A seemingly single isAsciiChar() might be followed by a combining character!!!
+
+ return getUpperCase(lhs) <=> getUpperCase(rhs);
+}
+
+
+bool equalNoCase(const Zstring& lhs, const Zstring& rhs)
+{
+ //fast-path: no need for extra memory allocations
+ const bool isAsciiL = isAsciiString(lhs);
+ const bool isAsciiR = isAsciiString(rhs);
+ if (isAsciiL != isAsciiR)
+ return false;
- size_t i = 0;
- for (; i < minSize; ++i)
+ if (isAsciiL)
{
- const Zchar l = lhs[i];
- const Zchar r = rhs[i];
- if (!isAsciiChar(l) || !isAsciiChar(r))
- goto slowPath; //=> let's NOT make assumptions how getUpperCase() compares "ASCII <=> non-ASCII"
-
- const Zchar lUp = asciiToUpper(l); //
- const Zchar rUp = asciiToUpper(r); //no surprises: emulate getUpperCase() [verified!]
- if (lUp != rUp) //
- return lUp <=> rUp; //
+ if (lhs.size() != rhs.size())
+ return false;
+
+ for (size_t i = 0; i < lhs.size(); ++i)
+ if (asciiToUpper(lhs[i]) !=
+ asciiToUpper(rhs[i]))
+ return false;
+ return true;
}
- return lhs.size() <=> rhs.size();
-slowPath: //--------------------------------------
- return compareNoCaseUtf8(lhs.c_str() + i, lhs.size() - i,
- rhs.c_str() + i, rhs.size() - i);
+ return getUpperCaseNonAscii(lhs) == getUpperCaseNonAscii(rhs);
}
diff --git a/zen/zstring.h b/zen/zstring.h
index 70b9f448..692217c1 100644
--- a/zen/zstring.h
+++ b/zen/zstring.h
@@ -63,10 +63,7 @@ template<> struct std::hash<ZstringNoCase> { size_t operator()(const ZstringNoCa
std::weak_ordering compareNoCase(const Zstring& lhs, const Zstring& rhs);
-inline
-bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return compareNoCase(lhs, rhs) == std::weak_ordering::equivalent; }
-//note: the "lhs.size() != rhs.size()" short-cut would require two isAsciiString() checks
-//=> generally SLOWER than starting comparison directly during first pass and breaking on first difference!
+bool equalNoCase(const Zstring& lhs, const Zstring& rhs);
//------------------------------------------------------------------------------------------
std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs);
bgstack15