8.10

author: Daniel Wilhelm <shieldwed@outlook.com> 2017-03-12 22:00:35 -0600
committer: Daniel Wilhelm <shieldwed@outlook.com> 2017-03-12 22:00:35 -0600
commit: 3ba62ef1de77153e5a8c7bad4451b96f6a1678b0 (patch)
tree: e6e69717e394a528a2e2aca3af036d4befaa9658 /zen
parent: 8.9 (diff)
download: FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.tar.gz
FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.tar.bz2
FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.zip
18 files changed, 480 insertions, 450 deletions
diff --git a/zen/file_access.cpp b/zen/file_access.cpp
index 61a003bb..71d00386 100755
--- a/zen/file_access.cpp
+++ b/zen/file_access.cpp
@@ -27,7 +27,7 @@
 using namespace zen;
 
 
-Opt<PathComponents> zen::getPathComponents(const Zstring& itemPath)
+Opt<PathComponents> zen::parsePathComponents(const Zstring& itemPath)
 {
     if (startsWith(itemPath, "/"))
     {
@@ -44,7 +44,7 @@ Opt<PathComponents> zen::getPathComponents(const Zstring& itemPath)
 
 Opt<Zstring> zen::getParentFolderPath(const Zstring& itemPath)
 {
-    if (const Opt<PathComponents> comp = getPathComponents(itemPath))
+    if (const Opt<PathComponents> comp = parsePathComponents(itemPath))
     {
         if (comp->relPath.empty())
             return NoValue();
@@ -73,7 +73,7 @@ ItemType zen::getItemType(const Zstring& itemPath) //throw FileError
 }
 
 
-PathDetails zen::getPathDetails(const Zstring& itemPath) //throw FileError
+PathStatus zen::getPathStatus(const Zstring& itemPath) //throw FileError
 {
     const Opt<Zstring> parentPath = getParentFolderPath(itemPath);
     try
@@ -91,7 +91,7 @@ PathDetails zen::getPathDetails(const Zstring& itemPath) //throw FileError
     const Zstring itemName = afterLast(itemPath, FILE_NAME_SEPARATOR, IF_MISSING_RETURN_ALL);
     assert(!itemName.empty());
 
-    PathDetails pd = getPathDetails(*parentPath); //throw FileError
+    PathStatus pd = getPathStatus(*parentPath); //throw FileError
     if (!pd.relPath.empty())
     {
         pd.relPath.push_back(itemName);
@@ -115,7 +115,7 @@ PathDetails zen::getPathDetails(const Zstring& itemPath) //throw FileError
 
 Opt<ItemType> zen::getItemTypeIfExists(const Zstring& itemPath) //throw FileError
 {
-    const PathDetails pd = getPathDetails(itemPath); //throw FileError
+    const PathStatus pd = getPathStatus(itemPath); //throw FileError
     if (pd.relPath.empty())
         return pd.existingType;
     return NoValue();
@@ -502,8 +502,8 @@ void zen::createDirectoryIfMissingRecursion(const Zstring& dirPath) //throw File
     }
     catch (FileError&)
     {
-        Opt<PathDetails> pd;
-        try { pd = getPathDetails(dirPath); /*throw FileError*/ }
+        Opt<PathStatus> pd;
+        try { pd = getPathStatus(dirPath); /*throw FileError*/ }
         catch (FileError&) {} //previous exception is more relevant
 
         if (pd && pd->existingType != ItemType::FILE)
diff --git a/zen/file_access.h b/zen/file_access.h
index c3a52f8a..a6b221e5 100755
--- a/zen/file_access.h
+++ b/zen/file_access.h
@@ -22,7 +22,7 @@ struct PathComponents
     Zstring rootPath; //itemPath = rootPath + (FILE_NAME_SEPARATOR?) + relPath
     Zstring relPath;  //
 };
-Opt<PathComponents> getPathComponents(const Zstring& itemPath); //no value on failure
+Opt<PathComponents> parsePathComponents(const Zstring& itemPath); //no value on failure
 
 Opt<Zstring> getParentFolderPath(const Zstring& itemPath);
 
@@ -43,13 +43,13 @@ ItemType      getItemType        (const Zstring& itemPath); //throw FileError
 //execute potentially SLOW folder traversal but distinguish error/not existing
 Opt<ItemType> getItemTypeIfExists(const Zstring& itemPath); //throw FileError
 
-struct PathDetails
+struct PathStatus
 {
     ItemType existingType;
     Zstring existingPath;         //itemPath =: existingPath + relPath
     std::vector<Zstring> relPath; //
 };
-PathDetails getPathDetails(const Zstring& itemPath); //throw FileError
+PathStatus getPathStatus(const Zstring& itemPath); //throw FileError
 
 enum class ProcSymlink
 {
diff --git a/zen/file_error.h b/zen/file_error.h
index 87f9525b..949c644f 100755
--- a/zen/file_error.h
+++ b/zen/file_error.h
@@ -46,7 +46,7 @@ DEFINE_NEW_FILE_ERROR(ErrorDifferentVolume);
 //----------- facilitate usage of std::wstring for error messages --------------------
 
 inline std::wstring fmtPath(const std::wstring& displayPath) { return L'\"' + displayPath + L'\"'; }
-inline std::wstring fmtPath(const Zstring& displayPath) { return fmtPath(utfCvrtTo<std::wstring>(displayPath)); }
+inline std::wstring fmtPath(const Zstring& displayPath) { return fmtPath(utfTo<std::wstring>(displayPath)); }
 inline std::wstring fmtPath(const wchar_t* displayPath) { return fmtPath(std::wstring(displayPath)); } //resolve overload ambiguity
 }
 
diff --git a/zen/file_io.cpp b/zen/file_io.cpp
index b4affd37..0c5ff490 100755
--- a/zen/file_io.cpp
+++ b/zen/file_io.cpp
@@ -140,7 +140,7 @@ size_t FileInput::read(void* buffer, size_t bytesToRead) //throw FileError, X; r
         if (notifyUnbufferedIO_) notifyUnbufferedIO_(bytesRead); //throw X
 
         if (bytesRead == 0) //end of file
-            bytesToRead = memBuf_.size();
+            bytesToRead = std::min(bytesToRead, memBuf_.size());
     }
 
     std::copy(memBuf_.begin(), memBuf_.begin() + bytesToRead, static_cast<char*>(buffer));
@@ -185,9 +185,10 @@ FileOutput::FileOutput(const Zstring& filePath, AccessFlag access, const IOCallb
 
 FileOutput::~FileOutput()
 {
+    notifyUnbufferedIO_ = nullptr; //no call-backs during destruction!!!
     try
     {
-        flushBuffers(); //throw FileError, X
+        flushBuffers(); //throw FileError, (X)
     }
     catch (...) { assert(false); }
 }
diff --git a/zen/file_io.h b/zen/file_io.h
index 8a5e0f7f..827abd9e 100755
--- a/zen/file_io.h
+++ b/zen/file_io.h
@@ -90,7 +90,7 @@ private:
     size_t tryWrite(const void* buffer, size_t bytesToWrite); //throw FileError; may return short! CONTRACT: bytesToWrite > 0
 
     std::vector<char> memBuf_;
-    const IOCallback notifyUnbufferedIO_; //throw X
+    IOCallback notifyUnbufferedIO_; //throw X
 };
 
 //-----------------------------------------------------------------------------------------------
diff --git a/zen/format_unit.cpp b/zen/format_unit.cpp
index cf17c8d4..a2208b3e 100755
--- a/zen/format_unit.cpp
+++ b/zen/format_unit.cpp
@@ -5,7 +5,7 @@
 // *****************************************************************************
 
 #include "format_unit.h"
-#include <cwchar> //swprintf
+//#include <cwchar> //swprintf
 #include <ctime>
 #include <cstdio>
 #include "basic_math.h"
@@ -168,7 +168,7 @@ std::wstring zen::ffs_Impl::includeNumberSeparator(const std::wstring& number)
 
     //::setlocale (LC_ALL, ""); -> implicitly called by wxLocale
     const lconv* localInfo = ::localeconv(); //always bound according to doc
-    const std::wstring& thousandSep = utfCvrtTo<std::wstring>(localInfo->thousands_sep);
+    const std::wstring& thousandSep = utfTo<std::wstring>(localInfo->thousands_sep);
 
     // THOUSANDS_SEPARATOR = std::use_facet<std::numpunct<wchar_t>>(std::locale("")).thousands_sep(); - why not working?
     // DECIMAL_POINT       = std::use_facet<std::numpunct<wchar_t>>(std::locale("")).decimal_point();
diff --git a/zen/globals.h b/zen/globals.h
index a1fd2764..b6c5dd28 100755
--- a/zen/globals.h
+++ b/zen/globals.h
@@ -18,7 +18,11 @@ template <class T>
 class Global
 {
 public:
-    Global() { static_assert(std::is_trivially_destructible<Pod>::value, "this memory needs to live forever"); }
+    Global()
+    {
+        static_assert(std::is_trivially_destructible<Pod>::value, "this memory needs to live forever");
+        assert(!pod.inst && !pod.spinLock); //we depend on static zero-initialization!
+    }
     explicit Global(std::unique_ptr<T>&& newInst) { set(std::move(newInst)); }
     ~Global() { set(nullptr); }
 
@@ -50,9 +54,9 @@ private:
     //=> use trivially-destructible POD only!!!
     struct Pod
     {
-        std::shared_ptr<T>* inst = nullptr;
+        std::shared_ptr<T>* inst;   // = nullptr;
+        std::atomic<bool> spinLock; // { false }; rely entirely on static zero-initialization! => avoid potential contention with worker thread during Global<> construction!
         //serialize access; can't use std::mutex: has non-trival destructor
-        std::atomic<bool> spinLock { false };
     } pod;
 };
 
diff --git a/zen/recycler.cpp b/zen/recycler.cpp
index 02ea026a..0c71bf3b 100755
--- a/zen/recycler.cpp
+++ b/zen/recycler.cpp
@@ -45,7 +45,7 @@ bool zen::recycleOrDeleteIfExists(const Zstring& itemPath) //throw FileError
             return true;
         }
 
-        throw FileError(errorMsg, replaceCpy<std::wstring>(L"Glib Error Code %x:", L"%x", numberTo<std::wstring>(error->code)) + L" " + utfCvrtTo<std::wstring>(error->message));
+        throw FileError(errorMsg, formatSystemError(L"g_file_trash", L"Glib Error Code " + numberTo<std::wstring>(error->code), utfTo<std::wstring>(error->message)));
         //g_quark_to_string(error->domain)
     }
     return true;
diff --git a/zen/scope_guard.h b/zen/scope_guard.h
index 09a7fbdb..62552f7b 100755
--- a/zen/scope_guard.h
+++ b/zen/scope_guard.h
@@ -13,7 +13,7 @@
 
 
 //std::uncaught_exceptions() currently unsupported on GCC and Clang => clean up ASAP
-    static_assert(__GNUC__ < 6 || (__GNUC__ == 6 && (__GNUC_MINOR__ < 2 || (__GNUC_MINOR__ == 2 && __GNUC_PATCHLEVEL__ <= 1))), "check std::uncaught_exceptions support");
+    static_assert(__GNUC__ < 6 || (__GNUC__ == 6 && (__GNUC_MINOR__ < 3 || (__GNUC_MINOR__ == 3 && __GNUC_PATCHLEVEL__ <= 1))), "check std::uncaught_exceptions support");
 
 namespace __cxxabiv1
 {
diff --git a/zen/serialize.h b/zen/serialize.h
index bb2f7a45..c8dfb96d 100755
--- a/zen/serialize.h
+++ b/zen/serialize.h
@@ -241,6 +241,7 @@ template <class BufferedInputStream> inline
 void readArray(BufferedInputStream& stream, void* buffer, size_t len) //throw UnexpectedEndOfStreamError
 {
     const size_t bytesRead = stream.read(buffer, len);
+    assert(bytesRead <= len); //buffer overflow otherwise not always detected!
     if (bytesRead < len)
         throw UnexpectedEndOfStreamError();
 }
diff --git a/zen/shell_execute.h b/zen/shell_execute.h
index 9ba0aef0..5e4ddf1a 100755
--- a/zen/shell_execute.h
+++ b/zen/shell_execute.h
@@ -41,7 +41,7 @@ void shellExecute(const Zstring& command, ExecutionType type) //throw FileError
         //Posix::system - execute a shell command
         int rv = ::system(command.c_str()); //do NOT use std::system as its documentation says nothing about "WEXITSTATUS(rv)", ect...
         if (rv == -1 || WEXITSTATUS(rv) == 127) //http://linux.die.net/man/3/system    "In case /bin/sh could not be executed, the exit status will be that of a command that does exit(127)"
-            throw FileError(_("Incorrect command line:") + L"\n" + utfCvrtTo<std::wstring>(command));
+            throw FileError(_("Incorrect command line:") + L"\n" + utfTo<std::wstring>(command));
     }
     else
         runAsync([=] { int rv = ::system(command.c_str()); (void)rv; });
diff --git a/zen/string_base.h b/zen/string_base.h
index 3afa66c6..b5e45c0e 100755
--- a/zen/string_base.h
+++ b/zen/string_base.h
@@ -264,8 +264,8 @@ public:
     void push_back(Char val) { operator+=(val); } //STL access
     void pop_back();
 
-    Zbase& operator=(const Zbase& str);
     Zbase& operator=(Zbase&& tmp) noexcept;
+    Zbase& operator=(const Zbase& str);
     Zbase& operator=(const Char* str)   { return assign(str, strLength(str)); }
     Zbase& operator=(Char ch)           { return assign(&ch, 1); }
     Zbase& operator+=(const Zbase& str) { return append(str.c_str(), str.length()); }
@@ -573,11 +573,14 @@ template <class InputIterator> inline
 Zbase<Char, SP>& Zbase<Char, SP>::append(InputIterator first, InputIterator last)
 {
     const size_t len = std::distance(first, last);
-    const size_t thisLen = length();
-    reserve(thisLen + len); //make unshared and check capacity
-
-    *std::copy(first, last, rawStr_ + thisLen) = 0;
-    this->setLength(rawStr_, thisLen + len);
+	if (len > 0) //avoid making this string unshared for no reason
+	{
+		const size_t thisLen = length();
+		reserve(thisLen + len); //make unshared and check capacity
+
+		*std::copy(first, last, rawStr_ + thisLen) = 0;
+		this->setLength(rawStr_, thisLen + len);
+	}
     return *this;
 }
 
diff --git a/zen/string_tools.h b/zen/string_tools.h
index 5a82e0ed..236f8df6 100755
--- a/zen/string_tools.h
+++ b/zen/string_tools.h
@@ -25,11 +25,31 @@ namespace zen
 template <class Char> bool isWhiteSpace(Char ch);
 template <class Char> bool isDigit     (Char ch); //not exactly the same as "std::isdigit" -> we consider '0'-'9' only!
 template <class Char> bool isHexDigit  (Char ch);
-template <class Char> bool isAlpha     (Char ch);
+template <class Char> bool isAsciiAlpha(Char ch);
 
-template <class S, class T> bool startsWith(const S& str, const T& prefix);  //
-template <class S, class T> bool endsWith  (const S& str, const T& postfix); //both S and T can be strings or char/wchar_t arrays or simple char/wchar_t
-template <class S, class T> bool contains  (const S& str, const T& term);    //
+//case-sensitive comparison (compile-time correctness:  use different number of arguments as STL comparison predicates!)
+struct CmpBinary { template <class Char> int operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const; };
+
+//basic case-insensitive comparison (considering A-Z only!)
+struct CmpAsciiNoCase { template <class Char> int operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const; };
+
+struct LessAsciiNoCase
+{
+    template <class S> //don't support heterogenous input! => use as container predicate only!
+    bool operator()(const S& lhs, const S& rhs) const { return CmpAsciiNoCase()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
+};
+
+//both S and T can be strings or char/wchar_t arrays or simple char/wchar_t
+template <class S, class T> bool contains(const S& str, const T& term);
+
+template <class S, class T>                 bool startsWith(const S& str, const T& prefix);
+template <class S, class T, class Function> bool startsWith(const S& str, const T& prefix,  Function cmpStringFun);
+
+template <class S, class T>                 bool endsWith  (const S& str, const T& postfix);
+template <class S, class T, class Function> bool endsWith  (const S& str, const T& postfix, Function cmpStringFun);
+
+template <class S, class T>                 bool strEqual(const S& lhs, const T& rhs);
+template <class S, class T, class Function> bool strEqual(const S& lhs, const T& rhs, Function cmpStringFun);
 
 enum FailureReturnVal
 {
@@ -42,16 +62,23 @@ template <class S, class T> S beforeLast (const S& str, const T& term, FailureRe
 template <class S, class T> S afterFirst (const S& str, const T& term, FailureReturnVal rv);
 template <class S, class T> S beforeFirst(const S& str, const T& term, FailureReturnVal rv);
 
-template <class S, class T> std::vector<S> split(const S& str, const T& delimiter);
-template <class S> S    trimCpy(S  str, bool fromLeft = true, bool fromRight = true);
-template <class S> void trim   (S& str, bool fromLeft = true, bool fromRight = true);
+enum class SplitType
+{
+    ALLOW_EMPTY,
+    SKIP_EMPTY
+};
+template <class S, class T> std::vector<S> split(const S& str, const T& delimiter, SplitType st);
+
+template <class S>                 S    trimCpy(S  str, bool fromLeft = true, bool fromRight = true);
+template <class S>                 void trim   (S& str, bool fromLeft = true, bool fromRight = true);
 template <class S, class Function> void trim(S& str, bool fromLeft, bool fromRight, Function trimThisChar);
+
 template <class S, class T, class U> void replace   (      S& str, const T& oldTerm, const U& newTerm, bool replaceAll = true);
 template <class S, class T, class U> S    replaceCpy(const S& str, const T& oldTerm, const U& newTerm, bool replaceAll = true);
 
 //high-performance conversion between numbers and strings
 template <class S,   class Num> S   numberTo(const Num& number);
-template <class Num, class S  > Num stringTo(const S&   str);
+template <class Num, class S>   Num stringTo(const S&   str);
 
 std::pair<char, char> hexify  (unsigned char c, bool upperCase = true);
 char                  unhexify(char high, char low);
@@ -61,9 +88,6 @@ template <class S, class T, class Num> S printNumber(const T& format, const Num&
 //string to string conversion: converts string-like type into char-compatible target string class
 template <class T, class S> T copyStringTo(S&& str);
 
-//case-sensitive comparison
-template <class S, class T> int cmpString(const S& lhs, const T& rhs);
-
 
 
 
@@ -99,7 +123,7 @@ bool isWhiteSpace(wchar_t ch)
 
 
 template <class Char> inline
-bool isDigit(Char ch) //similar to implmenetation of std::::isdigit()!
+bool isDigit(Char ch) //similar to implmenetation of std::isdigit()!
 {
     static_assert(IsSameType<Char, char>::value || IsSameType<Char, wchar_t>::value, "");
     return static_cast<Char>('0') <= ch && ch <= static_cast<Char>('9');
@@ -116,40 +140,52 @@ bool isHexDigit(Char c)
 }
 
 
-template <> bool isAlpha(char ch) = delete; //probably not a good idea with UTF-8 anyway...
-
-template <> inline bool isAlpha(wchar_t ch) { return std::iswalpha(ch) != 0; }
+template <class Char> inline
+bool isAsciiAlpha(Char c)
+{
+    static_assert(IsSameType<Char, char>::value || IsSameType<Char, wchar_t>::value, "");
+    return (static_cast<Char>('A') <= c && c <= static_cast<Char>('Z')) ||
+           (static_cast<Char>('a') <= c && c <= static_cast<Char>('z'));
+}
 
 
-template <class S, class T> inline
-bool startsWith(const S& str, const T& prefix)
+template <class S, class T, class Function> inline
+bool startsWith(const S& str, const T& prefix, Function cmpStringFun)
 {
-    static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
     const size_t pfLen = strLength(prefix);
     if (strLength(str) < pfLen)
         return false;
 
-    const auto* const cmpFirst = strBegin(str);
-    return std::equal(cmpFirst, cmpFirst + pfLen,
-                      strBegin(prefix));
+    return cmpStringFun(strBegin(str),    pfLen,
+                        strBegin(prefix), pfLen) == 0;
 }
 
 
-template <class S, class T> inline
-bool endsWith(const S& str, const T& postfix)
+template <class S, class T, class Function> inline
+bool endsWith(const S& str, const T& postfix, Function cmpStringFun)
 {
-    static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
     const size_t strLen = strLength(str);
     const size_t pfLen  = strLength(postfix);
     if (strLen < pfLen)
         return false;
 
-    const auto* const cmpFirst = strBegin(str) + strLen - pfLen;
-    return std::equal(cmpFirst, cmpFirst + pfLen,
-                      strBegin(postfix));
+    return cmpStringFun(strBegin(str) + strLen - pfLen, pfLen,
+                        strBegin(postfix), pfLen) == 0;
 }
 
 
+template <class S, class T, class Function> inline
+bool strEqual(const S& lhs, const T& rhs, Function cmpStringFun)
+{
+    return cmpStringFun(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) == 0;
+}
+
+
+template <class S, class T> inline bool startsWith(const S& str, const T& prefix ) { return startsWith(str, prefix,  CmpBinary()); }
+template <class S, class T> inline bool endsWith  (const S& str, const T& postfix) { return endsWith  (str, postfix, CmpBinary()); }
+template <class S, class T> inline bool strEqual  (const S& lhs, const T& rhs    ) { return strEqual  (lhs, rhs,     CmpBinary()); }
+
+
 template <class S, class T> inline
 bool contains(const S& str, const T& term)
 {
@@ -173,6 +209,7 @@ S afterLast(const S& str, const T& term, FailureReturnVal rv)
 {
     static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
     const size_t termLen = strLength(term);
+    assert(termLen > 0);
 
     const auto* const strFirst  = strBegin(str);
     const auto* const strLast   = strFirst + strLength(str);
@@ -192,12 +229,15 @@ template <class S, class T> inline
 S beforeLast(const S& str, const T& term, FailureReturnVal rv)
 {
     static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
+    const size_t termLen = strLength(term);
+    assert(termLen > 0);
+
     const auto* const strFirst  = strBegin(str);
     const auto* const strLast   = strFirst + strLength(str);
     const auto* const termFirst = strBegin(term);
 
     const auto* it = search_last(strFirst, strLast,
-                                 termFirst, termFirst + strLength(term));
+                                 termFirst, termFirst + termLen);
     if (it == strLast)
         return rv == IF_MISSING_RETURN_ALL ? str : S();
 
@@ -210,6 +250,8 @@ S afterFirst(const S& str, const T& term, FailureReturnVal rv)
 {
     static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
     const size_t termLen = strLength(term);
+    assert(termLen > 0);
+
     const auto* const strFirst  = strBegin(str);
     const auto* const strLast   = strFirst + strLength(str);
     const auto* const termFirst = strBegin(term);
@@ -228,12 +270,15 @@ template <class S, class T> inline
 S beforeFirst(const S& str, const T& term, FailureReturnVal rv)
 {
     static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
+    const size_t termLen = strLength(term);
+    assert(termLen > 0);
+
     const auto* const strFirst  = strBegin(str);
     const auto* const strLast   = strFirst + strLength(str);
     const auto* const termFirst = strBegin(term);
 
     auto it = std::search(strFirst, strLast,
-                          termFirst,  termFirst  + strLength(term));
+                          termFirst,  termFirst  + termLen);
     if (it == strLast)
         return rv == IF_MISSING_RETURN_ALL ? str : S();
 
@@ -242,34 +287,35 @@ S beforeFirst(const S& str, const T& term, FailureReturnVal rv)
 
 
 template <class S, class T> inline
-std::vector<S> split(const S& str, const T& delimiter)
+std::vector<S> split(const S& str, const T& delimiter, SplitType st)
 {
     static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
-
     const size_t delimLen = strLength(delimiter);
-
+    assert(delimLen > 0);
     if (delimLen == 0)
-        return { str };
-    else
     {
-        const auto* const delimFirst = strBegin(delimiter);
-        const auto* const delimLast  = delimFirst + delimLen;
+        if (str.empty() && st == SplitType::SKIP_EMPTY)
+            return {};
+        return { str };
+    }
 
-        const auto* blockStart    = strBegin(str);
-        const auto* const strLast = blockStart + strLength(str);
+    const auto* const delimFirst = strBegin(delimiter);
+    const auto* const delimLast  = delimFirst + delimLen;
 
-        std::vector<S> output;
-
-        for (;;)
-        {
-            const auto* const blockEnd = std::search(blockStart, strLast,
-                                                     delimFirst, delimLast);
+    const auto* blockStart    = strBegin(str);
+    const auto* const strLast = blockStart + strLength(str);
 
+    std::vector<S> output;
+    for (;;)
+    {
+        const auto* const blockEnd = std::search(blockStart, strLast,
+                                                 delimFirst, delimLast);
+        if (blockStart != blockEnd || st == SplitType::ALLOW_EMPTY)
             output.emplace_back(blockStart, blockEnd - blockStart);
-            if (blockEnd == strLast) //clients expect: if delimiter not found, return str
-                return output;
-            blockStart = blockEnd + delimLen;
-        }
+
+        if (blockEnd == strLast)
+            return output;
+        blockStart = blockEnd + delimLen;
     }
 }
 
@@ -389,33 +435,47 @@ struct CopyStringToString<T, T> //perf: we don't need a deep copy if string type
     template <class S>
     T copy(S&& str) const { return std::forward<S>(str); }
 };
+
+inline int strcmpWithNulls(const char*    ptr1, const char*    ptr2, size_t num) { return std::memcmp (ptr1, ptr2, num); }
+inline int strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num); }
 }
 
 template <class T, class S> inline
 T copyStringTo(S&& str) { return impl::CopyStringToString<std::decay_t<S>, T>().copy(std::forward<S>(str)); }
 
 
-template <class S, class T> inline
-int cmpString(const S& lhs, const T& rhs)
+template <class Char> inline
+int CmpBinary::operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const
 {
-    static_assert(IsSameType<typename GetCharType<S>::Type, typename GetCharType<T>::Type>::value, "");
+    //support embedded 0, unlike strncmp/wcsncmp!
+    const int rv = impl::strcmpWithNulls(lhs, rhs, std::min(lhsLen, rhsLen));
+    if (rv != 0)
+        return rv;
+    return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
+}
 
-    const size_t lenL = strLength(lhs);
-    const size_t lenR = strLength(rhs);
 
-    const auto* strPosL = strBegin(lhs);
-    const auto* strPosR = strBegin(rhs);
+template <class Char> inline
+int CmpAsciiNoCase::operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const
+{
+    auto asciiToLower = [](Char c) //ordering: lower-case chars have higher code points than uppper-case
+    {
+        if (static_cast<Char>('A') <= c && c <= static_cast<Char>('Z'))
+            return static_cast<Char>(c - static_cast<Char>('A') + static_cast<Char>('a'));
+        return c;
+    };
 
-    const auto* const strPosLLast = strPosL + std::min(lenL, lenR);
+    const auto* const lhsLast = lhs + std::min(lhsLen, rhsLen);
 
-    while (strPosL != strPosLLast)
+    while (lhs != lhsLast)
     {
-        const auto charL = static_cast<unsigned int>(*strPosL++); //unsigned char-comparison is the convention!
-        const auto charR = static_cast<unsigned int>(*strPosR++);
+        const Char charL = asciiToLower(*lhs++);
+        const Char charR = asciiToLower(*rhs++);
         if (charL != charR)
-            return static_cast<int>(charL) - static_cast<int>(charR);
+            return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention!
+        //unsigned underflow is well-defined!
     }
-    return static_cast<int>(lenL) - static_cast<int>(lenR);
+    return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
 }
 
 
@@ -424,13 +484,13 @@ namespace impl
 template <class Num> inline
 int saferPrintf(char* buffer, size_t bufferSize, const char* format, const Num& number) //there is no such thing as a "safe" printf ;)
 {
-    return std::snprintf(buffer, bufferSize, format, number); //C99
+    return std::snprintf(buffer, bufferSize, format, number); //C99: returns number of chars written if successful, < 0 or >= bufferSize on failure
 }
 
 template <class Num> inline
 int saferPrintf(wchar_t* buffer, size_t bufferSize, const wchar_t* format, const Num& number)
 {
-    return std::swprintf(buffer, bufferSize, format, number); //C99
+    return std::swprintf(buffer, bufferSize, format, number); //C99: returns number of chars written if successful, < 0 on failure (including buffer too small)
 }
 }
 
@@ -444,7 +504,7 @@ S printNumber(const T& format, const Num& number) //format a single number using
     CharType buffer[BUFFER_SIZE]; //zero-initialize?
     const int charsWritten = impl::saferPrintf(buffer, BUFFER_SIZE, strBegin(format), number);
 
-    return charsWritten > 0 ? S(buffer, charsWritten) : S();
+    return 0 < charsWritten && charsWritten < BUFFER_SIZE ? S(buffer, charsWritten) : S();
 }
 
 
@@ -607,12 +667,8 @@ Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to i
             number *= 10;
             number += c - static_cast<CharType>('0');
         }
-        else
-        {
-            //rest of string should contain whitespace only, it's NOT a bug if there is something else!
-            //assert(std::all_of(iter, last, &isWhiteSpace<CharType>)); -> this is NO assert situation
-            break;
-        }
+        else //rest of string should contain whitespace only, it's NOT a bug if there is something else!
+            break; //assert(std::all_of(iter, last, &isWhiteSpace<CharType>)); -> this is NO assert situation
     }
     return number;
 }
diff --git a/zen/sys_error.h b/zen/sys_error.h
index a19409ab..f7c128ef 100755
--- a/zen/sys_error.h
+++ b/zen/sys_error.h
@@ -67,7 +67,7 @@ std::wstring formatSystemErrorRaw(ErrorCode ec) //return empty string on error
     std::wstring errorMsg;
     ZEN_ON_SCOPE_EXIT(errno = currentError);
 
-    errorMsg = utfCvrtTo<std::wstring>(::strerror(ec));
+    errorMsg = utfTo<std::wstring>(::strerror(ec));
     trim(errorMsg); //Windows messages seem to end with a blank...
 
     return errorMsg;
diff --git a/zen/thread.h b/zen/thread.h
index a59f3807..ae4c347e 100755
--- a/zen/thread.h
+++ b/zen/thread.h
@@ -28,26 +28,26 @@ public:
     template <class Function>
     InterruptibleThread(Function&& f);
 
-    bool joinable () const { return stdThread.joinable(); }
+    bool joinable () const { return stdThread_.joinable(); }
     void interrupt();
-    void join     () { stdThread.join(); }
-    void detach   () { stdThread.detach(); }
+    void join     () { stdThread_.join(); }
+    void detach   () { stdThread_.detach(); }
 
     template <class Rep, class Period>
     bool tryJoinFor(const std::chrono::duration<Rep, Period>& relTime)
     {
-        if (threadCompleted.wait_for(relTime) == std::future_status::ready)
+        if (threadCompleted_.wait_for(relTime) == std::future_status::ready)
         {
-            stdThread.join(); //runs thread-local destructors => this better be fast!!!
+            stdThread_.join(); //runs thread-local destructors => this better be fast!!!
             return true;
         }
         return false;
     }
 
 private:
-    std::thread stdThread;
+    std::thread stdThread_;
     std::shared_ptr<InterruptionStatus> intStatus_;
-    std::future<void> threadCompleted;
+    std::future<void> threadCompleted_;
 };
 
 //context of worker thread:
@@ -376,9 +376,9 @@ template <class Function> inline
 InterruptibleThread::InterruptibleThread(Function&& f) : intStatus_(std::make_shared<InterruptionStatus>())
 {
     std::promise<void> pFinished;
-    threadCompleted = pFinished.get_future();
+    threadCompleted_ = pFinished.get_future();
 
-    stdThread = std::thread([f = std::forward<Function>(f),
+    stdThread_ = std::thread([f = std::forward<Function>(f),
                                intStatus = this->intStatus_,
                                pFinished = std::move(pFinished)]() mutable
     {
diff --git a/zen/utf.h b/zen/utf.h
index 41fdf58c..ab8fda50 100755
--- a/zen/utf.h
+++ b/zen/utf.h
@@ -10,40 +10,25 @@
 #include <cstdint>
 #include <iterator>
 #include "string_tools.h" //copyStringTo
+#include "optional.h"
 
 namespace zen
 {
 //convert all(!) char- and wchar_t-based "string-like" objects applying a UTF8 conversions (but only if necessary!)
 template <class TargetString, class SourceString>
-TargetString utfCvrtTo(const SourceString& str);
+TargetString utfTo(const SourceString& str);
 
 const char BYTE_ORDER_MARK_UTF8[] = "\xEF\xBB\xBF";
 
-template <class CharString>
-bool isValidUtf8(const CharString& str); //check for UTF-8 encoding errors
-
-//---- explicit conversion: wide <-> utf8 ----
-template <class CharString, class WideString>
-CharString wideToUtf8(const WideString& str); //example: std::string tmp = wideToUtf8<std::string>(L"abc");
-
-template <class WideString, class CharString>
-WideString utf8ToWide(const CharString& str); //std::wstring tmp = utf8ToWide<std::wstring>("abc");
+template <class UtfString>
+bool isValidUtf(const UtfString& str); //check for UTF-8 encoding errors
 
 //access unicode characters in UTF-encoded string (char- or wchar_t-based)
 template <class UtfString>
 size_t unicodeLength(const UtfString& str); //return number of code points for UTF-encoded string
 
 template <class UtfString>
-size_t findUnicodePos(const UtfString& str, size_t unicodePos); //return position of unicode char in UTF-encoded string
-
-
-
-
-
-
-
-
-
+UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t uniPosLast);
 
 
 
@@ -58,7 +43,7 @@ namespace implementation
 {
 using CodePoint = uint32_t;
 using Char16    = uint16_t;
-using Char8     = unsigned char;
+using Char8     = uint8_t;
 
 const CodePoint LEAD_SURROGATE      = 0xd800;
 const CodePoint TRAIL_SURROGATE     = 0xdc00; //== LEAD_SURROGATE_MAX + 1
@@ -72,7 +57,6 @@ template <class Function> inline
 void codePointToUtf16(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char16
 {
     //http://en.wikipedia.org/wiki/UTF-16
-
     if (cp < LEAD_SURROGATE)
         writeOutput(static_cast<Char16>(cp));
     else if (cp <= TRAIL_SURROGATE_MAX) //invalid code point
@@ -82,8 +66,8 @@ void codePointToUtf16(CodePoint cp, Function writeOutput) //"writeOutput" is a u
     else if (cp <= CODE_POINT_MAX)
     {
         cp -= 0x10000;
-        writeOutput(LEAD_SURROGATE  + static_cast<Char16>(cp >> 10));
-        writeOutput(TRAIL_SURROGATE + static_cast<Char16>(cp & 0x3ff));
+        writeOutput(static_cast<Char16>( LEAD_SURROGATE + (cp >> 10)));
+        writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0x3ff)));
     }
     else //invalid code point
         codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
@@ -104,15 +88,19 @@ size_t getUtf16Len(Char16 ch) //ch must be first code unit! returns 0 on error!
 }
 
 
-template <class CharIterator, class Function> inline
-void utf16ToCodePoint(CharIterator first, CharIterator last, Function writeOutput) //"writeOutput" is a unary function taking a CodePoint
+class Utf16Decoder
 {
-    static_assert(sizeof(typename std::iterator_traits<CharIterator>::value_type) == 2, "");
+public:
+    Utf16Decoder(const Char16* str, size_t len) : it_(str), last_(str + len) {}
 
-    for ( ; first != last; ++first)
+    Opt<CodePoint> getNext()
     {
-        CodePoint cp = static_cast<Char16>(*first);
-        switch (getUtf16Len(static_cast<Char16>(cp)))
+        if (it_ == last_)
+            return NoValue();
+
+        const Char16 ch = *it_++;
+        CodePoint cp = ch;
+        switch (getUtf16Len(ch))
         {
             case 0: //invalid utf16 character
                 cp = REPLACEMENT_CHAR;
@@ -120,23 +108,33 @@ void utf16ToCodePoint(CharIterator first, CharIterator last, Function writeOutpu
             case 1:
                 break;
             case 2:
-                if (++first != last) //trail surrogate expected!
-                {
-                    const Char16 ch = static_cast<Char16>(*first);
-                    if (TRAIL_SURROGATE <= ch && ch <= TRAIL_SURROGATE_MAX) //trail surrogate expected!
-                    {
-                        cp = ((cp - LEAD_SURROGATE) << 10) + (ch - TRAIL_SURROGATE) + 0x10000;
-                        break;
-                    }
-                }
-                --first;
-                cp = REPLACEMENT_CHAR;
+                decodeTrail(cp);
                 break;
         }
-        writeOutput(cp);
+        return cp;
+    }
+
+private:
+    void decodeTrail(CodePoint& cp)
+    {
+        if (it_ != last_) //trail surrogate expected!
+        {
+            const Char16 ch = *it_;
+            if (TRAIL_SURROGATE <= ch && ch <= TRAIL_SURROGATE_MAX) //trail surrogate expected!
+            {
+                cp = ((cp - LEAD_SURROGATE) << 10) + (ch - TRAIL_SURROGATE) + 0x10000;
+                ++it_;
+                return;
+            }
+        }
+        cp = REPLACEMENT_CHAR;
     }
-}
 
+    const Char16* it_;
+    const Char16* const last_;
+};
+
+//----------------------------------------------------------------------------------------------------------------
 
 template <class Function> inline
 void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char8
@@ -155,14 +153,14 @@ void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a un
     {
         writeOutput(static_cast<Char8>( (cp >> 12       ) | 0xe0));
         writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80));
-        writeOutput(static_cast<Char8>( (cp & 0x3f      ) | 0x80));
+        writeOutput(static_cast<Char8>( (cp       & 0x3f) | 0x80));
     }
     else if (cp <= CODE_POINT_MAX)
     {
         writeOutput(static_cast<Char8>( (cp >> 18        ) | 0xf0));
         writeOutput(static_cast<Char8>(((cp >> 12) & 0x3f) | 0x80));
         writeOutput(static_cast<Char8>(((cp >> 6)  & 0x3f) | 0x80));
-        writeOutput(static_cast<Char8>( (cp & 0x3f       ) | 0x80));
+        writeOutput(static_cast<Char8>( (cp        & 0x3f) | 0x80));
     }
     else //invalid code point
         codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte utf8
@@ -170,7 +168,7 @@ void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a un
 
 
 inline
-size_t getUtf8Len(unsigned char ch) //ch must be first code unit! returns 0 on error!
+size_t getUtf8Len(Char8 ch) //ch must be first code unit! returns 0 on error!
 {
     if (ch < 0x80)
         return 1;
@@ -184,32 +182,19 @@ size_t getUtf8Len(unsigned char ch) //ch must be first code unit! returns 0 on e
 }
 
 
-template <class CharIterator> inline
-bool decodeTrail(CharIterator& first, CharIterator last, CodePoint& cp) //decode trailing surrogate byte
-{
-    if (++first != last) //trail surrogate expected!
-    {
-        const Char8 ch = static_cast<Char8>(*first);
-        if (ch >> 6 == 0x2) //trail surrogate expected!
-        {
-            cp = (cp << 6) + (ch & 0x3f);
-            return true;
-        }
-    }
-    --first;
-    cp = REPLACEMENT_CHAR;
-    return false;
-}
-
-template <class CharIterator, class Function> inline
-void utf8ToCodePoint(CharIterator first, CharIterator last, Function writeOutput) //"writeOutput" is a unary function taking a CodePoint
+class Utf8Decoder
 {
-    static_assert(sizeof(typename std::iterator_traits<CharIterator>::value_type) == 1, "");
+public:
+    Utf8Decoder(const Char8* str, size_t len) : it_(str), last_(str + len) {}
 
-    for ( ; first != last; ++first)
+    Opt<CodePoint> getNext()
     {
-        CodePoint cp = static_cast<Char8>(*first);
-        switch (getUtf8Len(static_cast<Char8>(cp)))
+        if (it_ == last_)
+            return NoValue();
+
+        const Char8 ch = *it_++;
+        CodePoint cp = ch;
+        switch (getUtf8Len(ch))
         {
             case 0: //invalid utf8 character
                 cp = REPLACEMENT_CHAR;
@@ -218,258 +203,184 @@ void utf8ToCodePoint(CharIterator first, CharIterator last, Function writeOutput
                 break;
             case 2:
                 cp &= 0x1f;
-                decodeTrail(first, last, cp);
+                decodeTrail(cp);
                 break;
             case 3:
                 cp &= 0xf;
-                if (decodeTrail(first, last, cp))
-                    decodeTrail(first, last, cp);
+                if (decodeTrail(cp))
+                    decodeTrail(cp);
                 break;
             case 4:
                 cp &= 0x7;
-                if (decodeTrail(first, last, cp))
-                    if (decodeTrail(first, last, cp))
-                        decodeTrail(first, last, cp);
+                if (decodeTrail(cp))
+                    if (decodeTrail(cp))
+                        decodeTrail(cp);
                 if (cp > CODE_POINT_MAX) cp = REPLACEMENT_CHAR;
                 break;
         }
-        writeOutput(cp);
+        return cp;
     }
-}
-
-
-template <class CharString> inline
-size_t unicodeLength(const CharString& str, char) //utf8
-{
-    using CharType = typename GetCharType<CharString>::Type;
 
-    const CharType*       strFirst  = strBegin(str);
-    const CharType* const strLast   = strFirst + strLength(str);
-
-    size_t len = 0;
-    while (strFirst < strLast) //[!]
+private:
+    bool decodeTrail(CodePoint& cp)
     {
-        ++len;
-        size_t utf8len = getUtf8Len(*strFirst);
-        if (utf8len == 0) ++utf8len; //invalid utf8 character
-        strFirst += utf8len;
+        if (it_ != last_) //trail surrogate expected!
+        {
+            const Char8 ch = *it_;
+            if (ch >> 6 == 0x2) //trail surrogate expected!
+            {
+                cp = (cp << 6) + (ch & 0x3f);
+                ++it_;
+                return true;
+            }
+        }
+        cp = REPLACEMENT_CHAR;
+        return false;
     }
-    return len;
-}
 
+    const Char8* it_;
+    const Char8* const last_;
+};
 
-template <class WideString> inline
-size_t unicodeLengthWide(const WideString& str, Int2Type<2>) //windows: utf16-wchar_t
-{
-    using CharType = typename GetCharType<WideString>::Type;
+//----------------------------------------------------------------------------------------------------------------
 
-    const CharType*       strFirst = strBegin(str);
-    const CharType* const strLast  = strFirst + strLength(str);
+template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<1>) { codePointToUtf8 (cp, writeOutput); } //UTF8-char
+template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<2>) { codePointToUtf16(cp, writeOutput); } //Windows: UTF16-wchar_t
+template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<4>) { writeOutput(cp); } //other OS: UTF32-wchar_t
 
-    size_t len = 0;
-    while (strFirst < strLast) //[!]
-    {
-        ++len;
-        size_t utf16len = getUtf16Len(*strFirst);
-        if (utf16len == 0) ++utf16len; //invalid utf16 character
-        strFirst += utf16len;
-    }
-    return len;
-}
-
-
-template <class WideString> inline
-size_t unicodeLengthWide(const WideString& str, Int2Type<4>) //other OS: utf32-wchar_t
+template <class CharType, class Function> inline
+void codePointToUtf(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a CharType
 {
-    return strLength(str);
+    return codePointToUtf(cp, writeOutput, Int2Type<sizeof(CharType)>());
 }
 
+//----------------------------------------------------------------------------------------------------------------
 
-template <class WideString> inline
-size_t unicodeLength(const WideString& str, wchar_t)
-{
-    return unicodeLengthWide(str, Int2Type<sizeof(wchar_t)>());
-}
-}
+template <class CharType, int charSize>
+class UtfDecoderImpl;
 
 
-template <class UtfString> inline
-size_t unicodeLength(const UtfString& str) //return number of code points
+template <class CharType>
+class UtfDecoderImpl<CharType, 1> //UTF8-char
 {
-    return implementation::unicodeLength(str, typename GetCharType<UtfString>::Type());
-}
+public:
+    UtfDecoderImpl(const CharType* str, size_t len) : decoder_(reinterpret_cast<const Char8*>(str), len) {}
+    Opt<CodePoint> getNext() { return decoder_.getNext(); }
+private:
+    Utf8Decoder decoder_;
+};
 
 
-namespace implementation
-{
-template <class CharString> inline
-size_t findUnicodePos(const CharString& str, size_t unicodePos, char) //utf8-char
+template <class CharType>
+class UtfDecoderImpl<CharType, 2> //Windows: UTF16-wchar_t
 {
-    using CharType = typename GetCharType<CharString>::Type;
-
-    const CharType* strFirst = strBegin(str);
-    const size_t strLen = strLength(str);
-
-    size_t utfPos = 0;
-    while (unicodePos-- > 0)
-    {
-        if (utfPos >= strLen)
-            return strLen;
+public:
+    UtfDecoderImpl(const CharType* str, size_t len) : decoder_(reinterpret_cast<const Char16*>(str), len) {}
+    Opt<CodePoint> getNext() { return decoder_.getNext(); }
+private:
+    Utf16Decoder decoder_;
+};
 
-        size_t utf8len = getUtf8Len(strFirst[utfPos]);
-        if (utf8len == 0) ++utf8len; //invalid utf8 character
-        utfPos += utf8len;
-    }
-    if (utfPos >= strLen)
-        return strLen;
-    return utfPos;
-}
 
-
-template <class WideString> inline
-size_t findUnicodePosWide(const WideString& str, size_t unicodePos, Int2Type<2>) //windows: utf16-wchar_t
+template <class CharType>
+class UtfDecoderImpl<CharType, 4> //other OS: UTF32-wchar_t
 {
-    using CharType = typename GetCharType<WideString>::Type;
-
-    const CharType* strFirst = strBegin(str);
-    const size_t strLen = strLength(str);
-
-    size_t utfPos = 0;
-    while (unicodePos-- > 0)
+public:
+    UtfDecoderImpl(const CharType* str, size_t len) : it_(reinterpret_cast<const CodePoint*>(str)), last_(it_ + len) {}
+    Opt<CodePoint> getNext()
     {
-        if (utfPos >= strLen)
-            return strLen;
-
-        size_t utf16len = getUtf16Len(strFirst[utfPos]);
-        if (utf16len == 0) ++utf16len; //invalid utf16 character
-        utfPos += utf16len;
+        if (it_ == last_)
+            return NoValue();
+        return *it_++;
     }
-    if (utfPos >= strLen)
-        return strLen;
-    return utfPos;
-}
+private:
+    const CodePoint* it_;
+    const CodePoint* last_;
+};
 
 
-template <class WideString> inline
-size_t findUnicodePosWide(const WideString& str, size_t unicodePos, Int2Type<4>) //other OS: utf32-wchar_t
-{
-    return std::min(strLength(str), unicodePos);
-}
-
-
-template <class UtfString> inline
-size_t findUnicodePos(const UtfString& str, size_t unicodePos, wchar_t)
-{
-    return findUnicodePosWide(str, unicodePos, Int2Type<sizeof(wchar_t)>());
-}
-}
-
-
-template <class UtfString> inline
-size_t findUnicodePos(const UtfString& str, size_t unicodePos) //return position of unicode char in UTF-encoded string
-{
-    return implementation::findUnicodePos(str, unicodePos, typename GetCharType<UtfString>::Type());
+template <class CharType>
+using UtfDecoder = UtfDecoderImpl<CharType, sizeof(CharType)>;
 }
 
 //-------------------------------------------------------------------------------------------
 
-namespace implementation
-{
-template <class WideString, class CharString> inline
-WideString utf8ToWide(const CharString& str, Int2Type<2>) //windows: convert utf8 to utf16-wchar_t
+template <class UtfString> inline
+bool isValidUtf(const UtfString& str)
 {
-    WideString output;
-    utf8ToCodePoint(strBegin(str), strBegin(str) + strLength(str),
-    [&](CodePoint cp) { codePointToUtf16(cp, [&](Char16 c) { output += static_cast<wchar_t>(c); }); });
-    return output;
-}
+    using namespace implementation;
 
+    UtfDecoder<typename GetCharType<UtfString>::Type> decoder(strBegin(str), strLength(str));
+    while (Opt<CodePoint> cp = decoder.getNext())
+        if (*cp == REPLACEMENT_CHAR)
+            return false;
 
-template <class WideString, class CharString> inline
-WideString utf8ToWide(const CharString& str, Int2Type<4>) //other OS: convert utf8 to utf32-wchar_t
-{
-    WideString output;
-    utf8ToCodePoint(strBegin(str), strBegin(str) + strLength(str),
-    [&](CodePoint cp) { output += static_cast<wchar_t>(cp); });
-    return output;
+    return true;
 }
 
 
-template <class CharString, class WideString> inline
-CharString wideToUtf8(const WideString& str, Int2Type<2>) //windows: convert utf16-wchar_t to utf8
+template <class UtfString> inline
+size_t unicodeLength(const UtfString& str) //return number of code points (+ correctly handle broken UTF encoding)
 {
-    CharString output;
-    utf16ToCodePoint(strBegin(str), strBegin(str) + strLength(str),
-    [&](CodePoint cp) { codePointToUtf8(cp, [&](char c) { output += c; }); });
-    return output;
+    size_t uniLen = 0;
+    implementation::UtfDecoder<typename GetCharType<UtfString>::Type> decoder(strBegin(str), strLength(str));
+    while (decoder.getNext())
+        ++uniLen;
+    return uniLen;
 }
 
 
-template <class CharString, class WideString> inline
-CharString wideToUtf8(const WideString& str, Int2Type<4>) //other OS: convert utf32-wchar_t to utf8
+template <class UtfString> inline
+UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t uniPosLast) //return position of unicode char in UTF-encoded string
 {
-    CharString output;
-    std::for_each(strBegin(str), strBegin(str) + strLength(str),
-    [&](CodePoint cp) { codePointToUtf8(cp, [&](char c) { output += c; }); });
+    assert(uniPosFirst <= uniPosLast && uniPosLast <= unicodeLength(str));
+    using namespace implementation;
+    using CharType = typename GetCharType<UtfString>::Type;
+    UtfString output;
+    if (uniPosFirst >= uniPosLast) //optimize for empty range
+        return output;
+
+    UtfDecoder<CharType> decoder(strBegin(str), strLength(str));
+    for (size_t uniPos = 0; Opt<CodePoint> cp = decoder.getNext(); ++uniPos) //[!] declaration in condition part of the for-loop
+        if (uniPosFirst <= uniPos)
+        {
+            if (uniPos >= uniPosLast)
+                break;
+            codePointToUtf<CharType>(*cp, [&](CharType c) { output += c; });
+        }
     return output;
 }
-}
 
+//-------------------------------------------------------------------------------------------
 
-template <class CharString> inline
-bool isValidUtf8(const CharString& str)
+namespace implementation
 {
-    using namespace implementation;
-    bool valid = true;
-    utf8ToCodePoint(strBegin(str), strBegin(str) + strLength(str),
-                    [&](CodePoint cp)
-    {
-        if (cp == REPLACEMENT_CHAR)
-            valid = false; //perf: should we use an (expensive) exception for iteration break?
-    });
-    return valid;
-}
-
-
-template <class WideString, class CharString> inline
-WideString utf8ToWide(const CharString& str)
+template <class TargetString, class SourceString> inline
+TargetString utfTo(const SourceString& str, FalseType)
 {
-    static_assert(IsSameType<typename GetCharType<CharString>::Type, char   >::value, "");
-    static_assert(IsSameType<typename GetCharType<WideString>::Type, wchar_t>::value, "");
+    using CharSrc = typename GetCharType<SourceString>::Type;
+    using CharTrg = typename GetCharType<TargetString>::Type;
+    static_assert(sizeof(CharSrc) != sizeof(CharTrg), "no UTF-conversion needed");
 
-    return implementation::utf8ToWide<WideString>(str, Int2Type<sizeof(wchar_t)>());
-}
+    TargetString output;
 
+    UtfDecoder<CharSrc> decoder(strBegin(str), strLength(str));
+    while (Opt<CodePoint> cp = decoder.getNext())
+        codePointToUtf<CharTrg>(*cp, [&](CharTrg c) { output += c; });
 
-template <class CharString, class WideString> inline
-CharString wideToUtf8(const WideString& str)
-{
-    static_assert(IsSameType<typename GetCharType<CharString>::Type, char   >::value, "");
-    static_assert(IsSameType<typename GetCharType<WideString>::Type, wchar_t>::value, "");
-
-    return implementation::wideToUtf8<CharString>(str, Int2Type<sizeof(wchar_t)>());
+    return output;
 }
 
-//-------------------------------------------------------------------------------------------
 
 template <class TargetString, class SourceString> inline
-TargetString utfCvrtTo(const SourceString& str, char, wchar_t) { return utf8ToWide<TargetString>(str); }
-
-template <class TargetString, class SourceString> inline
-TargetString utfCvrtTo(const SourceString& str, wchar_t, char) { return wideToUtf8<TargetString>(str); }
-
-template <class TargetString, class SourceString> inline
-TargetString utfCvrtTo(const SourceString& str, char, char) { return copyStringTo<TargetString>(str); }
+TargetString utfTo(const SourceString& str, TrueType) { return copyStringTo<TargetString>(str); }
+}
 
-template <class TargetString, class SourceString> inline
-TargetString utfCvrtTo(const SourceString& str, wchar_t, wchar_t) { return copyStringTo<TargetString>(str); }
 
 template <class TargetString, class SourceString> inline
-TargetString utfCvrtTo(const SourceString& str)
+TargetString utfTo(const SourceString& str)
 {
-    return utfCvrtTo<TargetString>(str,
-                                   typename GetCharType<SourceString>::Type(),
-                                   typename GetCharType<TargetString>::Type());
+    return implementation::utfTo<TargetString>(str, StaticBool<sizeof(typename GetCharType<SourceString>::Type) == sizeof(typename GetCharType<TargetString>::Type)>());
 }
 }
 
diff --git a/zen/zstring.cpp b/zen/zstring.cpp
index 5f5b1ec8..a936efb5 100755
--- a/zen/zstring.cpp
+++ b/zen/zstring.cpp
@@ -6,11 +6,14 @@
 
 #include "zstring.h"
 #include <stdexcept>
+#include "utf.h"
 
 
 using namespace zen;
 
 /*
+MSDN "Handling Sorting in Your Applications": https://msdn.microsoft.com/en-us/library/windows/desktop/dd318144
+
 Perf test: compare strings 10 mio times; 64 bit build
 -----------------------------------------------------
     string a = "Fjk84$%kgfj$%T\\\\Gffg\\gsdgf\\fgsx----------d-"
@@ -32,3 +35,117 @@ time per call | function
 */
 
 
+
+
+namespace
+{
+int compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen)
+{
+    //- strncasecmp implements ASCII CI-comparsion only! => signature is broken for UTF8-input; toupper() similarly doesn't support Unicode
+    //- wcsncasecmp: https://opensource.apple.com/source/Libc/Libc-763.12/string/wcsncasecmp-fbsd.c
+    // => re-implement comparison based on towlower() to avoid memory allocations
+    using namespace zen::implementation;
+
+    UtfDecoder<char> decL(lhs, lhsLen);
+    UtfDecoder<char> decR(rhs, rhsLen);
+    for (;;)
+    {
+        const Opt<CodePoint> cpL = decL.getNext();
+        const Opt<CodePoint> cpR = decR.getNext();
+        if (!cpL || !cpR)
+            return static_cast<int>(!cpR) - static_cast<int>(!cpL);
+
+        static_assert(sizeof(wchar_t) == sizeof(CodePoint), "");
+        const wchar_t charL = ::towlower(static_cast<wchar_t>(*cpL)); //ordering: towlower() converts to higher code points than towupper()
+        const wchar_t charR = ::towlower(static_cast<wchar_t>(*cpR)); //uses LC_CTYPE category of current locale
+        if (charL != charR)
+            return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention!
+        //unsigned underflow is well-defined!
+    }
+}
+}
+
+
+int cmpStringNaturalLinux(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen)
+{
+    const char* const lhsEnd = lhs + lhsLen;
+    const char* const rhsEnd = rhs + rhsLen;
+    /*
+        - compare strings after conceptually creating blocks of whitespace/numbers/text
+        - implement strict weak ordering!
+        - don't follow broken "strnatcasecmp": https://github.com/php/php-src/blob/master/ext/standard/strnatcmp.c
+                1. incorrect non-ASCII CI-comparison 2. incorrect bounds checks
+                3. incorrect trimming of *all* whitespace 4. arbitrary handling of leading 0 only at string begin
+                5. incorrect handling of whitespace following a number 6. code is a mess
+    */
+    for (;;)
+    {
+        if (lhs == lhsEnd || rhs == rhsEnd)
+            return static_cast<int>(lhs != lhsEnd) - static_cast<int>(rhs != rhsEnd); //"nothing" before "something"
+        //note: "something" never would have been condensed to "nothing" further below => can finish evaluation here
+
+        const bool wsL = isWhiteSpace(*lhs);
+        const bool wsR = isWhiteSpace(*rhs);
+        if (wsL != wsR)
+            return static_cast<int>(!wsL) - static_cast<int>(!wsR); //whitespace before non-ws!
+        if (wsL)
+        {
+            ++lhs, ++rhs;
+            while (lhs != lhsEnd && isWhiteSpace(*lhs)) ++lhs;
+            while (rhs != rhsEnd && isWhiteSpace(*rhs)) ++rhs;
+            continue;
+        }
+
+        const bool digitL = isDigit(*lhs);
+        const bool digitR = isDigit(*rhs);
+        if (digitL != digitR)
+            return static_cast<int>(!digitL) - static_cast<int>(!digitR); //number before chars!
+        if (digitL)
+        {
+            while (lhs != lhsEnd && *lhs == '0') ++lhs;
+            while (rhs != rhsEnd && *rhs == '0') ++rhs;
+
+            int rv = 0;
+            for (;; ++lhs, ++rhs)
+            {
+                const bool endL = lhs == lhsEnd || !isDigit(*lhs);
+                const bool endR = rhs == rhsEnd || !isDigit(*rhs);
+                if (endL != endR)
+                    return static_cast<int>(!endL) - static_cast<int>(!endR); //more digits means bigger number
+                if (endL)
+                    break; //same number of digits
+
+                if (rv == 0 && *lhs != *rhs)
+                    rv = *lhs - *rhs; //found first digit difference comparing from left
+            }
+            if (rv != 0)
+                return rv;
+            continue;
+        }
+
+        //compare full junks of text: consider unicode encoding!
+        const char* textBeginL = lhs++;
+        const char* textBeginR = rhs++; //current char is neither white space nor digit at this point!
+        while (lhs != lhsEnd && !isWhiteSpace(*lhs) && !isDigit(*lhs)) ++lhs;
+        while (rhs != rhsEnd && !isWhiteSpace(*rhs) && !isDigit(*rhs)) ++rhs;
+
+        const int rv = compareNoCaseUtf8(textBeginL, lhs - textBeginL, textBeginR, rhs - textBeginR);
+        if (rv != 0)
+            return rv;
+    }
+}
+
+
+namespace
+{
+}
+
+
+int CmpNaturalSort::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const
+{
+    //auto strL = utfTo<std::string>(Zstring(lhs, lhsLen));
+    //auto strR = utfTo<std::string>(Zstring(rhs, rhsLen));
+    //return cmpStringNaturalLinux(strL.c_str(), strL.size(), strR.c_str(), strR.size());
+    return cmpStringNaturalLinux(lhs, lhsLen, rhs, rhsLen);
+
+}
+\ No newline at end of file
diff --git a/zen/zstring.h b/zen/zstring.h
index 12bda29f..fdb71da0 100755
--- a/zen/zstring.h
+++ b/zen/zstring.h
@@ -19,35 +19,39 @@
 using Zstring = zen::Zbase<Zchar>;
 
 
-int cmpStringNoCase(const wchar_t* lhs, size_t lhsLen, const wchar_t* rhs, size_t rhsLen);
-    int cmpStringNoCase(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen);
-
-template <class S>
-S makeUpperCopy(S str);
-
-
 //Compare filepaths: Windows/OS X does NOT distinguish between upper/lower-case, while Linux DOES
-int cmpFilePath(const wchar_t* lhs, size_t lhsLen, const wchar_t* rhs, size_t rhsLen);
-    int cmpFilePath(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen);
+struct CmpFilePath
+{
+    int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const;
+};
 
+struct CmpNaturalSort
+{
+    int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const;
+};
 
-template <class S, class T> inline
-bool equalFilePath(const S& lhs, const T& rhs) { using namespace zen; return cmpFilePath(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) == 0;  }
 
 struct LessFilePath
 {
-    template <class S, class T>
-    bool operator()(const S& lhs, const T& rhs) const { using namespace zen; return cmpFilePath(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
+    template <class S> //don't support heterogenous input! => use as container predicate only!
+    bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpFilePath()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
 };
 
-
-struct LessNoCase
+struct LessNaturalSort
 {
-    template <class S, class T>
-    bool operator()(const S& lhs, const T& rhs) const { using namespace zen; return cmpStringNoCase(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
+    template <class S> //don't support heterogenous input! => use as container predicate only!
+    bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpNaturalSort()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
 };
 
 
+template <class S>
+S makeUpperCopy(S str);
+
+
+template <class S, class T> inline
+bool equalFilePath(const S& lhs, const T& rhs) { using namespace zen; return strEqual(lhs, rhs, CmpFilePath());  }
+
+
 inline
 Zstring appendSeparator(Zstring path) //support rvalue references!
 {
@@ -63,35 +67,6 @@ Zstring getFileExtension(const Zstring& filePath)
 }
 
 
-template <class S, class T> inline
-bool ciEqual(const S& lhs, const T& rhs) { using namespace zen; return cmpStringNoCase(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) == 0;  }
-
-
-template <class S, class T> inline
-bool ciStartsWith(const S& str, const T& prefix)
-{
-    using namespace zen;
-    const size_t pfLen = strLength(prefix);
-    if (strLength(str) < pfLen)
-        return false;
-
-    return cmpStringNoCase(strBegin(str), pfLen, strBegin(prefix), pfLen) == 0;
-}
-
-
-template <class S, class T> inline
-bool ciEndsWith(const S& str, const T& postfix)
-{
-    using namespace zen;
-    const size_t strLen = strLength(str);
-    const size_t pfLen  = strLength(postfix);
-    if (strLen < pfLen)
-        return false;
-
-    return cmpStringNoCase(strBegin(str) + strLen - pfLen, pfLen, strBegin(postfix), pfLen) == 0;
-}
-
-
 template <class S, class T, class U>
 S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm);
 
@@ -110,37 +85,11 @@ inline
 void makeUpperInPlace(char* str, size_t strLen)
 {
     std::for_each(str, str + strLen, [](char& c) { c = std::toupper(static_cast<unsigned char>(c)); }); //locale-dependent!
-    //result of toupper() is an unsigned char mapped to int range, so the char representation is in the last 8 bits and we need not care about signedness!
+    //result of toupper() is an unsigned char mapped to int range: the char representation is in the last 8 bits and we need not care about signedness!
     //this should work for UTF-8, too: all chars >= 128 are mapped upon themselves!
 }
 
 
-inline
-int cmpStringNoCase(const wchar_t* lhs, size_t lhsLen, const wchar_t* rhs, size_t rhsLen)
-{
-    assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls!
-    assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); //
-
-    const int rv = ::wcsncasecmp(lhs, rhs, std::min(lhsLen, rhsLen)); //locale-dependent!
-    if (rv != 0)
-        return rv;
-    return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
-inline
-int cmpStringNoCase(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen)
-{
-    assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls!
-    assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); //
-
-    const int rv = ::strncasecmp(lhs, rhs, std::min(lhsLen, rhsLen)); //locale-dependent!
-    if (rv != 0)
-        return rv;
-    return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
 template <class S> inline
 S makeUpperCopy(S str)
 {
@@ -153,20 +102,7 @@ S makeUpperCopy(S str)
 
 
 inline
-int cmpFilePath(const wchar_t* lhs, size_t lhsLen, const wchar_t* rhs, size_t rhsLen)
-{
-    assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls!
-    assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); //
-
-    const int rv = std::wcsncmp(lhs, rhs, std::min(lhsLen, rhsLen));
-    if (rv != 0)
-        return rv;
-    return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
-inline
-int cmpFilePath(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen)
+int CmpFilePath::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const
 {
     assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls!
     assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); //
@@ -214,6 +150,7 @@ S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm)
     }
 }
 
+    int cmpStringNaturalLinux(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen);
 
 //---------------------------------------------------------------------------
 //ZEN macro consistency checks:
author	Daniel Wilhelm <shieldwed@outlook.com>	2017-03-12 22:00:35 -0600
committer	Daniel Wilhelm <shieldwed@outlook.com>	2017-03-12 22:00:35 -0600
commit	3ba62ef1de77153e5a8c7bad4451b96f6a1678b0 (patch)
tree	e6e69717e394a528a2e2aca3af036d4befaa9658 /zen
parent	8.9 (diff)
download	FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.tar.gz FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.tar.bz2 FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.zip