add upstream 11.24

author: B. Stack <bgstack15@gmail.com> 2022-09-07 14:49:22 -0400
committer: B. Stack <bgstack15@gmail.com> 2022-09-07 14:49:22 -0400
commit: 47c88c433d17948fab1d8e1d76121a72fe5938cb (patch)
tree: fbc1dea58a6b28f1af4a9e9b2bc8e3e1d23b2103 /zen
parent: Merge branch 'b11.23' into 'master' (diff)
download: FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.tar.gz
FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.tar.bz2
FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.zip
20 files changed, 345 insertions, 316 deletions
diff --git a/zen/build_info.h b/zen/build_info.h
index b06c1302..86ff303c 100644
--- a/zen/build_info.h
+++ b/zen/build_info.h
@@ -26,6 +26,7 @@ enum class BuildArch
 static_assert((BuildArch::program == BuildArch::bit32 ? 32 : 64) == sizeof(void*) * 8);
 
 
+//harmonize with os_arch enum in update_checks table:
 constexpr const char* cpuArchName = BuildArch::program == BuildArch::bit32 ? "i686": "x86-64";
 
 }
diff --git a/zen/file_access.cpp b/zen/file_access.cpp
index 6a62f671..2e119e87 100644
--- a/zen/file_access.cpp
+++ b/zen/file_access.cpp
@@ -70,7 +70,7 @@ std::optional<ItemType> zen::itemStillExists(const Zstring& itemPath) //throw Fi
             try
             {
                 traverseFolder(*parentPath,
-                [&](const    FileInfo& fi) { if (fi.itemName == itemName) throw ItemType::file;    },
+                [&](const    FileInfo& fi) { if (fi.itemName == itemName) throw ItemType::file;    }, //case-sensitive! itemPath must be normalized!
                 [&](const  FolderInfo& fi) { if (fi.itemName == itemName) throw ItemType::folder;  },
                 [&](const SymlinkInfo& si) { if (si.itemName == itemName) throw ItemType::symlink; },
                 [](const std::wstring& errorMsg) { throw FileError(errorMsg); });
@@ -233,7 +233,6 @@ void zen::removeDirectoryPlainRecursion(const Zstring& dirPath) //throw FileErro
 
 namespace
 {
-
 /* Usage overview: (avoid circular pattern!)
 
   moveAndRenameItem() --> moveAndRenameFileSub()
@@ -319,18 +318,20 @@ void setWriteTimeNative(const Zstring& itemPath, const timespec& modTime, ProcSy
             => utimens: https://github.com/coreutils/gnulib/blob/master/lib/utimens.c
         touch: https://github.com/coreutils/coreutils/blob/master/src/touch.c
             => fdutimensat: https://github.com/coreutils/gnulib/blob/master/lib/fdutimensat.c                  */
-    timespec newTimes[2] = {};
-    newTimes[0].tv_sec = ::time(nullptr); //access time; don't use UTIME_NOW/UTIME_OMIT: more bugs! https://freefilesync.org/forum/viewtopic.php?t=1701
-    newTimes[1] = modTime; //modification time
+    const timespec newTimes[2]
+    {
+        {.tv_sec = ::time(nullptr)}, //access time; don't use UTIME_NOW/UTIME_OMIT: more bugs! https://freefilesync.org/forum/viewtopic.php?t=1701
+        modTime,
+    };
     //test: even modTime == 0 is correctly applied (no NOOP!) test2: same behavior for "utime()"
 
     //hell knows why files on gvfs-mounted Samba shares fail to open(O_WRONLY) returning EOPNOTSUPP:
     //https://freefilesync.org/forum/viewtopic.php?t=2803 => utimensat() works (but not for gvfs SFTP)
-    if (::utimensat(AT_FDCWD, itemPath.c_str(), newTimes, procSl == ProcSymlink::direct ? AT_SYMLINK_NOFOLLOW : 0) == 0)
+    if (::utimensat(AT_FDCWD, itemPath.c_str(), newTimes, procSl == ProcSymlink::asLink ? AT_SYMLINK_NOFOLLOW : 0) == 0)
         return;
     try
     {
-        if (procSl == ProcSymlink::direct)
+        if (procSl == ProcSymlink::asLink)
             try
             {
                 if (getItemType(itemPath) == ItemType::symlink) //throw FileError
@@ -554,7 +555,7 @@ void zen::copySymlink(const Zstring& sourcePath, const Zstring& targetPath) //th
     if (::lstat(sourcePath.c_str(), &sourceInfo) != 0)
         THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file attributes of %x."), L"%x", fmtPath(sourcePath)), "lstat");
 
-    setWriteTimeNative(targetPath, sourceInfo.st_mtim, ProcSymlink::direct); //throw FileError
+    setWriteTimeNative(targetPath, sourceInfo.st_mtim, ProcSymlink::asLink); //throw FileError
 }
 
 
diff --git a/zen/file_access.h b/zen/file_access.h
index 17c47731..f6a02edc 100644
--- a/zen/file_access.h
+++ b/zen/file_access.h
@@ -29,12 +29,7 @@ using FileIndex = ino_t;
 using FileTimeNative = timespec;
 
 inline time_t nativeFileTimeToTimeT(const timespec& ft) { return ft.tv_sec; } //follow Windows Explorer and always round down!
-inline timespec timetToNativeFileTime(time_t utcTime)
-{
-    timespec natTime = {};
-    natTime.tv_sec = utcTime;
-    return natTime;
-}
+inline timespec timetToNativeFileTime(time_t utcTime) { return {.tv_sec = utcTime}; }
 
 enum class ItemType
 {
@@ -44,15 +39,14 @@ enum class ItemType
 };
 //(hopefully) fast: does not distinguish between error/not existing
 ItemType getItemType(const Zstring& itemPath); //throw FileError
-//execute potentially SLOW folder traversal but distinguish error/not existing
-//  assumes: - base path still exists
-//           - all child item path parts must correspond to folder traversal
+//execute potentially SLOW folder traversal but distinguish error/not existing:
+//  - all child item path parts must correspond to folder traversal
 //  => we can conclude whether an item is *not* existing anymore by doing a *case-sensitive* name search => potentially SLOW!
 std::optional<ItemType> itemStillExists(const Zstring& itemPath); //throw FileError
 
 enum class ProcSymlink
 {
-    direct,
+    asLink,
     follow
 };
 void setFileTime(const Zstring& filePath, time_t modTime, ProcSymlink procSl); //throw FileError
diff --git a/zen/file_path.cpp b/zen/file_path.cpp
index 716dd8de..f5c207f3 100644
--- a/zen/file_path.cpp
+++ b/zen/file_path.cpp
@@ -13,11 +13,12 @@ std::optional<PathComponents> zen::parsePathComponents(const Zstring& itemPath)
 {
     auto doParse = [&](int sepCountVolumeRoot, bool rootWithSep) -> std::optional<PathComponents>
     {
+        assert(sepCountVolumeRoot > 0);
         const Zstring itemPathPf = appendSeparator(itemPath); //simplify analysis of root without separator, e.g. \\server-name\share
-        int sepCount = 0;
+
         for (auto it = itemPathPf.begin(); it != itemPathPf.end(); ++it)
             if (*it == FILE_NAME_SEPARATOR)
-                if (++sepCount == sepCountVolumeRoot)
+                if (--sepCountVolumeRoot == 0)
                 {
                     Zstring rootPath(itemPathPf.begin(), rootWithSep ? it + 1 : it);
 
@@ -89,7 +90,7 @@ bool zen::isValidRelPath(const Zstring& relPath)
     if constexpr (FILE_NAME_SEPARATOR != Zstr('\\')) if (contains(relPath, Zstr('\\'))) return false;
 
     const Zchar doubleSep[] = {FILE_NAME_SEPARATOR, FILE_NAME_SEPARATOR, 0};
-    return !startsWith(relPath, FILE_NAME_SEPARATOR)&& !endsWith(relPath, FILE_NAME_SEPARATOR)&&
+    return !startsWith(relPath, FILE_NAME_SEPARATOR) && !endsWith(relPath, FILE_NAME_SEPARATOR) &&
            !contains(relPath, doubleSep);
 }
 
diff --git a/zen/file_path.h b/zen/file_path.h
index 4a85514b..85af251d 100644
--- a/zen/file_path.h
+++ b/zen/file_path.h
@@ -40,7 +40,7 @@ std::weak_ordering compareNativePath(const Zstring& lhs, const Zstring& rhs);
 
 inline bool equalNativePath(const Zstring& lhs, const Zstring& rhs) { return compareNativePath(lhs, rhs) == std::weak_ordering::equivalent; }
 
-struct LessNativePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNativePath(lhs, rhs)); } };
+struct LessNativePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNativePath(lhs, rhs) < 0; } };
 //------------------------------------------------------------------------------------------
 
 
diff --git a/zen/file_traverser.h b/zen/file_traverser.h
index cb7782d6..11c3eaa0 100644
--- a/zen/file_traverser.h
+++ b/zen/file_traverser.h
@@ -17,7 +17,7 @@ struct FileInfo
     Zstring itemName;
     Zstring fullPath;
     uint64_t fileSize = 0; //[bytes]
-    time_t modTime = 0; //number of seconds since Jan. 1st 1970 UTC
+    time_t modTime = 0; //number of seconds since Jan. 1st 1970 GMT
 };
 
 struct FolderInfo
@@ -30,7 +30,7 @@ struct SymlinkInfo
 {
     Zstring itemName;
     Zstring fullPath;
-    time_t modTime = 0; //number of seconds since Jan. 1st 1970 UTC
+    time_t modTime = 0; //number of seconds since Jan. 1st 1970 GMT
 };
 
 //- non-recursive
diff --git a/zen/format_unit.cpp b/zen/format_unit.cpp
index 2aa6e094..8b3fccfe 100644
--- a/zen/format_unit.cpp
+++ b/zen/format_unit.cpp
@@ -168,12 +168,27 @@ std::wstring zen::formatNumber(int64_t n)
 
 std::wstring zen::formatUtcToLocalTime(time_t utcTime)
 {
-    auto errorMsg = [&] { return _("Error") + L" (time_t: " + numberTo<std::wstring>(utcTime) + L')'; };
+    auto fmtFallback = [utcTime] //don't take "no" for an answer!
+    {
+        if (const TimeComp tc = getUtcTime(utcTime);
+            tc != TimeComp())
+        {
+            wchar_t buf[128] = {}; //the only way to format abnormally large or invalid modTime: std::strftime() will fail!
+            if (const int rv = std::swprintf(buf, std::size(buf), L"%d-%02d-%02d  %02d:%02d:%02d GMT", tc.year, tc.month, tc.day, tc.hour, tc.minute, tc.second);
+                0 < rv && rv < std::ssize(buf))
+                return std::wstring(buf, rv);
+        }
+
+        return L"time_t = " + numberTo<std::wstring>(utcTime);
+    };
 
     const TimeComp& loc = getLocalTime(utcTime); //returns TimeComp() on error
 
-    std::wstring dateString = utfTo<std::wstring>(formatTime(Zstr("%x  %X"), loc));
-    return !dateString.empty() ? dateString : errorMsg();
+    /*const*/ std::wstring dateTimeFmt = utfTo<std::wstring>(formatTime(Zstr("%x  %X"), loc));
+    if (dateTimeFmt.empty())
+        return fmtFallback();
+
+    return dateTimeFmt;
 }
 
 
@@ -188,9 +203,9 @@ WeekDay impl::getFirstDayOfWeekImpl() //throw SysError
     const char* firstDay = ::nl_langinfo(_NL_TIME_FIRST_WEEKDAY); //[1-Sunday, 7-Saturday]
     ASSERT_SYSERROR(firstDay && 1 <= *firstDay && *firstDay <= 7);
 
-    const int weekDayStartSunday = *firstDay;
-    const int weekDayStartMonday = (weekDayStartSunday - 1 + 6) % 7; //+6 == -1 in Z_7
-    // [0-Monday, 6-Sunday]
+    const int weekDayStartSunday = *firstDay;                        //[1-Sunday, 7-Saturday]
+    const int weekDayStartMonday = (weekDayStartSunday - 2 + 7) % 7; //[0-Monday, 6-Sunday]  7 == 0 in Z_7
+
     return static_cast<WeekDay>(weekDayStartMonday);
 }
 
diff --git a/zen/process_exec.cpp b/zen/process_exec.cpp
index 6b670508..df41a627 100644
--- a/zen/process_exec.cpp
+++ b/zen/process_exec.cpp
@@ -176,8 +176,7 @@ std::pair<int /*exit code*/, std::string> processExecuteImpl(const Zstring& file
 
             const auto waitTimeMs = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - now).count();
 
-            timeval tv = {};
-            tv.tv_sec  = static_cast<long>(waitTimeMs / 1000);
+            timeval tv{.tv_sec = static_cast<long>(waitTimeMs / 1000)};
             tv.tv_usec = static_cast<long>(waitTimeMs - tv.tv_sec * 1000) * 1000;
 
             fd_set rfd = {}; //includes FD_ZERO
diff --git a/zen/resolve_path.cpp b/zen/resolve_path.cpp
index 357dab6a..99e2f6c6 100644
--- a/zen/resolve_path.cpp
+++ b/zen/resolve_path.cpp
@@ -9,7 +9,7 @@
 #include "thread.h"
 #include "file_access.h"
 
-#include <zen/sys_info.h>
+    #include <zen/sys_info.h>
     //    #include <stdlib.h> //getenv()
     #include <unistd.h> //getuid()
     #include <pwd.h>    //getpwuid_r()
@@ -63,16 +63,16 @@ Zstring resolveRelativePath(const Zstring& relativePath)
             https://www.gnu.org/software/bash/manual/html_node/Tilde-Expansion.html               */
         if (startsWith(pathTmp, "~/") || pathTmp == "~")
         {
-                try
-                {
-                    const Zstring& homePath = getUserHome(); //throw FileError
+            try
+            {
+                const Zstring& homePath = getUserHome(); //throw FileError
 
                 if (startsWith(pathTmp, "~/"))
                     pathTmp = appendPath(homePath, pathTmp.c_str() + 2);
                 else //pathTmp == "~"
                     pathTmp = homePath;
-                }
-                catch (FileError&) {}
+            }
+            catch (FileError&) {}
             //else: error! no further processing!
         }
         else
diff --git a/zen/socket.h b/zen/socket.h
index 5ece29f8..d9517bd8 100644
--- a/zen/socket.h
+++ b/zen/socket.h
@@ -33,11 +33,13 @@ class Socket //throw SysError
 public:
     Socket(const Zstring& server, const Zstring& serviceName) //throw SysError
     {
-        ::addrinfo hints = {};
-        hints.ai_socktype = SOCK_STREAM; //we *do* care about this one!
-        hints.ai_flags = AI_ADDRCONFIG; //save a AAAA lookup on machines that can't use the returned data anyhow
+        const addrinfo hints
+        {
+            .ai_flags = AI_ADDRCONFIG, //save a AAAA lookup on machines that can't use the returned data anyhow
+            .ai_socktype = SOCK_STREAM, //we *do* care about this one!
+        };
 
-        ::addrinfo* servinfo = nullptr;
+        addrinfo* servinfo = nullptr;
         ZEN_ON_SCOPE_EXIT(if (servinfo) ::freeaddrinfo(servinfo));
 
         const int rcGai = ::getaddrinfo(server.c_str(), serviceName.c_str(), &hints, &servinfo);
diff --git a/zen/stl_tools.h b/zen/stl_tools.h
index 2726a09d..66af8551 100644
--- a/zen/stl_tools.h
+++ b/zen/stl_tools.h
@@ -68,10 +68,10 @@ template <class Iterator, class T, class CompLess>
 Iterator binarySearch(Iterator first, Iterator last, const T& value, CompLess less);
 
 //read-only variant of std::merge; input: two sorted ranges
-template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly>
+template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly, class Compare>
 void mergeTraversal(Iterator first1, Iterator last1,
                     Iterator first2, Iterator last2,
-                    FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro);
+                    FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro, Compare compare);
 
 //why, oh why is there no std::optional<T>::get()???
 template <class T> inline       T* get(      std::optional<T>& opt) { return opt ? &*opt : nullptr; }
@@ -255,31 +255,32 @@ BidirectionalIterator1 searchLast(const BidirectionalIterator1 first1,       Bid
 //---------------------------------------------------------------------------------------
 
 //read-only variant of std::merge; input: two sorted ranges
-template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly> inline
-void mergeTraversal(Iterator first1, Iterator last1,
-                    Iterator first2, Iterator last2,
-                    FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro)
+template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly, class Compare> inline
+void mergeTraversal(Iterator firstL, Iterator lastL,
+                    Iterator firstR, Iterator lastR,
+                    FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro, Compare compare)
 {
-    auto itL = first1;
-    auto itR = first2;
+    auto itL = firstL;
+    auto itR = firstR;
 
-    auto finishLeft  = [&] { std::for_each(itL, last1, lo); };
-    auto finishRight = [&] { std::for_each(itR, last2, ro); };
+    auto finishLeft  = [&] { std::for_each(itL, lastL, lo); };
+    auto finishRight = [&] { std::for_each(itR, lastR, ro); };
 
-    if (itL == last1) return finishRight();
-    if (itR == last2) return finishLeft ();
+    if (itL == lastL) return finishRight();
+    if (itR == lastR) return finishLeft ();
 
     for (;;)
-        if (itL->first < itR->first)
+        if (const std::weak_ordering cmp = compare(*itL, *itR);
+            cmp < 0)
         {
             lo(*itL);
-            if (++itL == last1)
+            if (++itL == lastL)
                 return finishRight();
         }
-        else if (itR->first < itL->first)
+        else if (cmp > 0)
         {
             ro(*itR);
-            if (++itR == last2)
+            if (++itR == lastR)
                 return finishLeft();
         }
         else
@@ -287,8 +288,8 @@ void mergeTraversal(Iterator first1, Iterator last1,
             bo(*itL, *itR);
             ++itL; //
             ++itR; //increment BOTH before checking for end of range!
-            if (itL == last1) return finishRight();
-            if (itR == last2) return finishLeft ();
+            if (itL == lastL) return finishRight();
+            if (itR == lastR) return finishLeft ();
             //simplify loop by placing both EOB checks at the beginning? => slightly slower
         }
 }
diff --git a/zen/string_base.h b/zen/string_base.h
index ace870b9..e18a0f16 100644
--- a/zen/string_base.h
+++ b/zen/string_base.h
@@ -312,9 +312,10 @@ template <class Char, template <class> class SP>        bool operator==(const Zb
 template <class Char, template <class> class SP>        bool operator==(const Zbase<Char, SP>& lhs, const Char*            rhs);
 template <class Char, template <class> class SP> inline bool operator==(const Char*            lhs, const Zbase<Char, SP>& rhs) { return operator==(rhs, lhs); }
 
-template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs);
-template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Char*            rhs);
-template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Char*            lhs, const Zbase<Char, SP>& rhs);
+//follow convention + compare by unsigned char; alternative: std::lexicographical_compare_three_way + reinterpret_cast<const std::make_unsigned_t<Char>*>()
+template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs) { return compareString(lhs, rhs); }
+template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Char*            rhs) { return compareString(lhs, rhs); }
+template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Char*            lhs, const Zbase<Char, SP>& rhs) { return compareString(lhs, rhs); }
 
 template <class Char, template <class> class SP> inline Zbase<Char, SP> operator+(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs) { return Zbase<Char, SP>(lhs) += rhs; }
 template <class Char, template <class> class SP> inline Zbase<Char, SP> operator+(const Zbase<Char, SP>& lhs, const Char*            rhs) { return Zbase<Char, SP>(lhs) += rhs; }
@@ -495,30 +496,6 @@ bool operator==(const Zbase<Char, SP>& lhs, const Char* rhs)
 
 
 template <class Char, template <class> class SP> inline
-std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs)
-{
-    return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(),  //respect embedded 0
-                                                  rhs.begin(), rhs.end()); //
-}
-
-
-template <class Char, template <class> class SP> inline
-std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Char* rhs)
-{
-    return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), //respect embedded 0
-                                                  rhs, rhs + strLength(rhs));
-}
-
-
-template <class Char, template <class> class SP> inline
-std::strong_ordering operator<=>(const Char* lhs, const Zbase<Char, SP>& rhs)
-{
-    return std::lexicographical_compare_three_way(lhs, lhs + strLength(lhs),
-                                                  rhs.begin(), rhs.end()); //respect embedded 0
-}
-
-
-template <class Char, template <class> class SP> inline
 size_t Zbase<Char, SP>::length() const
 {
     return SP<Char>::length(rawStr_);
diff --git a/zen/string_tools.h b/zen/string_tools.h
index d3f35ce8..cafff3d5 100644
--- a/zen/string_tools.h
+++ b/zen/string_tools.h
@@ -41,7 +41,7 @@ template <class S, class T> bool endsWithAsciiNoCase(const S& str, const T& post
 template <class S, class T> bool equalString     (const S& lhs, const T& rhs);
 template <class S, class T> bool equalAsciiNoCase(const S& lhs, const T& rhs);
 
-//template <class S, class T> std::strong_ordering compareString(const S& lhs, const T& rhs);
+template <class S, class T> std::strong_ordering compareString(const S& lhs, const T& rhs);
 template <class S, class T> std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!)
 
 //STL container predicates for std::map, std::unordered_set/map
@@ -269,10 +269,12 @@ bool equalAsciiNoCase(const S& lhs, const T& rhs)
 }
 
 
-#if 0
-//support embedded 0, unlike strncmp/wcsncmp:
+namespace impl
+{
+//support embedded 0 (unlike strncmp/wcsncmp) + compare unsigned[!] char
 inline std::strong_ordering strcmpWithNulls(const char*    ptr1, const char*    ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; }
 inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; }
+}
 
 template <class S, class T> inline
 std::strong_ordering compareString(const S& lhs, const T& rhs)
@@ -280,13 +282,12 @@ std::strong_ordering compareString(const S& lhs, const T& rhs)
     const size_t lhsLen = strLength(lhs);
     const size_t rhsLen = strLength(rhs);
 
-    //length check *after* strcmpWithNulls(): we DO care about natural ordering: e.g. for "compareString(getUpperCase(lhs), getUpperCase(rhs))"
+    //length check *after* strcmpWithNulls(): we DO care about natural ordering
     if (const std::strong_ordering cmp = impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), std::min(lhsLen, rhsLen));
         cmp != std::strong_ordering::equal)
         return cmp;
     return lhsLen <=> rhsLen;
 }
-#endif
 
 
 template <class S, class T> inline
@@ -587,7 +588,7 @@ struct CopyStringToString
     T copy(const S& src) const
     {
         static_assert(!std::is_same_v<std::decay_t<S>, std::decay_t<T>>);
-        return T(strBegin(src), strLength(src));
+        return {strBegin(src), strLength(src)};
     }
 };
 
@@ -626,11 +627,10 @@ S printNumber(const T& format, const Num& number) //format a single number using
 #endif
     static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>);
 
-    const int BUFFER_SIZE = 128;
-    GetCharTypeT<S> buffer[BUFFER_SIZE]; //zero-initialize?
-    const int charsWritten = impl::saferPrintf(buffer, BUFFER_SIZE, strBegin(format), number);
+    GetCharTypeT<S> buf[128]; //zero-initialize?
+    const int charsWritten = impl::saferPrintf(buf, std::size(buf), strBegin(format), number);
 
-    return 0 < charsWritten && charsWritten < BUFFER_SIZE ? S(buffer, charsWritten) : S();
+    return 0 < charsWritten && charsWritten < std::ssize(buf) ? S(buf, charsWritten) : S();
 }
 
 
@@ -944,7 +944,7 @@ Num hashString(const S& str)
 
 struct StringHash
 {
-    using is_transparent = int; //allow heterogenous lookup!
+    using is_transparent = int; //enable heterogenous lookup!
 
     template <class String>
     size_t operator()(const String& str) const { return hashString<size_t>(str); }
@@ -953,7 +953,7 @@ struct StringHash
 
 struct StringEqual
 {
-    using is_transparent = int; //allow heterogenous lookup!
+    using is_transparent = int; //enable heterogenous lookup!
 
     template <class String1, class String2>
     bool operator()(const String1& lhs, const String2& rhs) const { return equalString(lhs, rhs); }
@@ -963,7 +963,7 @@ struct StringEqual
 struct LessAsciiNoCase
 {
     template <class String>
-    bool operator()(const String& lhs, const String& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); }
+    bool operator()(const String& lhs, const String& rhs) const { return compareAsciiNoCase(lhs, rhs) < 0; }
 };
 
 
diff --git a/zen/string_traits.h b/zen/string_traits.h
index 1a4f4740..31c8c12c 100644
--- a/zen/string_traits.h
+++ b/zen/string_traits.h
@@ -105,8 +105,8 @@ class StringTraits
 public:
     enum
     {
-        isStringClass = hasMemberType_value_type<CleanType> &&
-                        hasMember_c_str         <CleanType> &&
+        isStringClass = hasMemberType_value_type<CleanType>&&
+                        hasMember_c_str         <CleanType>&&
                         hasMember_length        <CleanType>
     };
 
diff --git a/zen/sys_info.cpp b/zen/sys_info.cpp
index bc1bfe62..c57464bc 100644
--- a/zen/sys_info.cpp
+++ b/zen/sys_info.cpp
@@ -111,16 +111,20 @@ ComputerModel zen::getComputerModel() //throw FileError
     {
         auto tryGetInfo = [](const Zstring& filePath)
         {
-            if (!fileAvailable(filePath))
-                return std::wstring();
             try
             {
                 const std::string stream = getFileContent(filePath, nullptr /*notifyUnbufferedIO*/); //throw FileError
                 return utfTo<std::wstring>(trimCpy(stream));
             }
-            catch (const FileError& e) { throw SysError(replaceCpy(e.toString(), L"\n\n", L'\n')); } //errors should be further enriched by context info => SysError
+            catch (FileError&)
+            {
+                if (!itemStillExists(filePath)) //throw FileError
+                    return std::wstring();
+
+                throw;
+            }
         };
-        cm.model  = tryGetInfo("/sys/devices/virtual/dmi/id/product_name"); //throw SysError
+        cm.model  = tryGetInfo("/sys/devices/virtual/dmi/id/product_name"); //throw FileError
         cm.vendor = tryGetInfo("/sys/devices/virtual/dmi/id/sys_vendor");   //
 
         //clean up:
diff --git a/zen/thread.h b/zen/thread.h
index 42fba281..abdc6da0 100644
--- a/zen/thread.h
+++ b/zen/thread.h
@@ -445,7 +445,7 @@ private:
         activeCondition_ = cv;
     }
 
-    std::atomic<bool> stopRequested_{false}; //std:atomic is uninitialized by default!!!
+    std::atomic<bool> stopRequested_{false}; //std::atomic is uninitialized by default!!!
     //"The default constructor is trivial: no initialization takes place other than zero initialization of static and thread-local objects."
 
     std::condition_variable* activeCondition_ = nullptr;
diff --git a/zen/time.h b/zen/time.h
index c2c10fd5..376765be 100644
--- a/zen/time.h
+++ b/zen/time.h
@@ -83,30 +83,32 @@ std::tm toClibTimeComponents(const TimeComp& tc)
            0 <= tc.minute && tc.minute <= 59 &&
            0 <= tc.second && tc.second <= 61);
 
-    std::tm ctc = {};
-    ctc.tm_year  = tc.year - 1900; //years since 1900
-    ctc.tm_mon   = tc.month - 1;   //0-11
-    ctc.tm_mday  = tc.day;         //1-31
-    ctc.tm_hour  = tc.hour;        //0-23
-    ctc.tm_min   = tc.minute;      //0-59
-    ctc.tm_sec   = tc.second;      //0-60 (including leap second)
-    ctc.tm_isdst = -1;             //> 0 if DST is active, == 0 if DST is not active, < 0 if the information is not available
-    //ctc.tm_wday
-    //ctc.tm_yday
-    return ctc;
+    return
+    {
+        .tm_sec   = tc.second,      //0-60 (including leap second)
+        .tm_min   = tc.minute,      //0-59
+        .tm_hour  = tc.hour,        //0-23
+        .tm_mday  = tc.day,         //1-31
+        .tm_mon   = tc.month - 1,   //0-11
+        .tm_year  = tc.year - 1900, //years since 1900
+        .tm_isdst = -1,             //> 0 if DST is active, == 0 if DST is not active, < 0 if the information is not available
+        //.tm_wday
+        //.tm_yday
+    };
 }
 
 inline
 TimeComp toZenTimeComponents(const std::tm& ctc)
 {
-    TimeComp tc;
-    tc.year   = ctc.tm_year + 1900;
-    tc.month  = ctc.tm_mon + 1;
-    tc.day    = ctc.tm_mday;
-    tc.hour   = ctc.tm_hour;
-    tc.minute = ctc.tm_min;
-    tc.second = ctc.tm_sec;
-    return tc;
+    return
+    {
+        .year   = ctc.tm_year + 1900,
+        .month  = ctc.tm_mon + 1,
+        .day    = ctc.tm_mday,
+        .hour   = ctc.tm_hour,
+        .minute = ctc.tm_min,
+        .second = ctc.tm_sec,
+    };
 }
 
 
@@ -235,12 +237,12 @@ std::pair<time_t, bool /*success*/> localToTimeT(const TimeComp& tc) //convert l
 
     const int cycles400 = numeric::intDivFloor(ctc.tm_year + 1900 - 1971/*[!]*/, 400); //see utcToTimeT()
     //1971: ensures resulting time_t >= 0 after time zone, DST adaption, or std::mktime will fail on Windows!
-    ctc.tm_year -= 400 * cycles400;                                       
+    ctc.tm_year -= 400 * cycles400;
 
     const time_t locTime = std::mktime(&ctc);
     if (locTime == -1)
         return {};
-    
+
     assert(locTime > 0);
     return {locTime + secsPer400Years * cycles400, true};
 }
diff --git a/zen/utf.h b/zen/utf.h
index 9c9cf7d1..ca231602 100644
--- a/zen/utf.h
+++ b/zen/utf.h
@@ -7,8 +7,6 @@
 #ifndef UTF_H_01832479146991573473545
 #define UTF_H_01832479146991573473545
 
-//#include <cstdint>
-//#include <iterator>
 #include "string_tools.h" //copyStringTo
 
 
@@ -45,8 +43,8 @@ using CodePoint = uint32_t;
 using Char16    = uint16_t;
 using Char8     = uint8_t;
 
-const CodePoint LEAD_SURROGATE      = 0xd800;
-const CodePoint TRAIL_SURROGATE     = 0xdc00; //== LEAD_SURROGATE_MAX + 1
+const CodePoint LEAD_SURROGATE      = 0xd800; //1101 1000 0000 0000    LEAD_SURROGATE_MAX = TRAIL_SURROGATE - 1
+const CodePoint TRAIL_SURROGATE     = 0xdc00; //1101 1100 0000 0000
 const CodePoint TRAIL_SURROGATE_MAX = 0xdfff;
 
 const CodePoint REPLACEMENT_CHAR    = 0xfffd;
@@ -62,31 +60,17 @@ void codePointToUtf16(CodePoint cp, Function writeOutput) //"writeOutput" is a u
     if (cp < LEAD_SURROGATE)
         writeOutput(static_cast<Char16>(cp));
     else if (cp <= TRAIL_SURROGATE_MAX) //invalid code point
-        codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
-    else if (cp < 0x10000)
+        writeOutput(static_cast<Char16>(REPLACEMENT_CHAR));
+    else if (cp <= 0xffff)
         writeOutput(static_cast<Char16>(cp));
     else if (cp <= CODE_POINT_MAX)
     {
         cp -= 0x10000;
         writeOutput(static_cast<Char16>( LEAD_SURROGATE + (cp >> 10)));
-        writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0x3ff)));
+        writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0b11'1111'1111)));
     }
     else //invalid code point
-        codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
-}
-
-
-inline
-size_t getUtf16Len(Char16 ch) //ch must be first code unit! returns 0 on error!
-{
-    if (ch < LEAD_SURROGATE)
-        return 1;
-    else if (ch < TRAIL_SURROGATE)
-        return 2;
-    else if (ch <= TRAIL_SURROGATE_MAX)
-        return 0; //unexpected trail surrogate!
-    else
-        return 1;
+        writeOutput(static_cast<Char16>(REPLACEMENT_CHAR));
 }
 
 
@@ -102,17 +86,14 @@ public:
 
         const Char16 ch = *it_++;
         CodePoint cp = ch;
-        switch (getUtf16Len(ch))
-        {
-            case 0: //invalid utf16 character
-                cp = REPLACEMENT_CHAR;
-                break;
-            case 1:
-                break;
-            case 2:
-                decodeTrail(cp);
-                break;
-        }
+
+        if (ch < LEAD_SURROGATE || ch > TRAIL_SURROGATE_MAX) //single Char16, no surrogates
+            ;
+        else if (ch < TRAIL_SURROGATE) //two Char16: lead and trail surrogates
+            decodeTrail(cp); //no range check needed: cp is inside [U+010000, U+10FFFF] by construction
+        else //unexpected trail surrogate
+            cp = REPLACEMENT_CHAR;
+
         return cp;
     }
 
@@ -141,46 +122,37 @@ private:
 template <class Function> inline
 void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char8
 {
-    //https://en.wikipedia.org/wiki/UTF-8
-    //assert(cp < LEAD_SURROGATE || TRAIL_SURROGATE_MAX < cp); //code points [0xd800, 0xdfff] are reserved for UTF-16 and *should* not be encoded in UTF-8
+    /* https://en.wikipedia.org/wiki/UTF-8
+      "high and low surrogate halves used by UTF-16 (U+D800 through U+DFFF) and
+       code points not encodable by UTF-16 (those after U+10FFFF) [...] must be treated as an invalid byte sequence" */
 
-    if (cp < 0x80)
+    if (cp <= 0b111'1111)
         writeOutput(static_cast<Char8>(cp));
-    else if (cp < 0x800)
+    else if (cp <= 0b0111'1111'1111)
     {
-        writeOutput(static_cast<Char8>((cp >> 6  ) | 0xc0));
-        writeOutput(static_cast<Char8>((cp & 0x3f) | 0x80));
+        writeOutput(static_cast<Char8>((cp >> 6)        | 0b1100'0000)); //110x xxxx
+        writeOutput(static_cast<Char8>((cp & 0b11'1111) | 0b1000'0000)); //10xx xxxx
     }
-    else if (cp < 0x10000)
+    else if (cp <= 0b1111'1111'1111'1111)
     {
-        writeOutput(static_cast<Char8>( (cp >> 12       ) | 0xe0));
-        writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80));
-        writeOutput(static_cast<Char8>( (cp       & 0x3f) | 0x80));
+        if (LEAD_SURROGATE <= cp && cp <= TRAIL_SURROGATE_MAX) //[0xd800, 0xdfff]
+            codePointToUtf8(REPLACEMENT_CHAR, writeOutput);
+        else
+        {
+            writeOutput(static_cast<Char8>( (cp >> 12)             | 0b1110'0000)); //1110 xxxx
+            writeOutput(static_cast<Char8>(((cp >> 6) & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+            writeOutput(static_cast<Char8>( (cp       & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+        }
     }
     else if (cp <= CODE_POINT_MAX)
     {
-        writeOutput(static_cast<Char8>( (cp >> 18        ) | 0xf0));
-        writeOutput(static_cast<Char8>(((cp >> 12) & 0x3f) | 0x80));
-        writeOutput(static_cast<Char8>(((cp >> 6)  & 0x3f) | 0x80));
-        writeOutput(static_cast<Char8>( (cp        & 0x3f) | 0x80));
+        writeOutput(static_cast<Char8>( (cp >> 18)              | 0b1111'0000)); //1111 0xxx
+        writeOutput(static_cast<Char8>(((cp >> 12) & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+        writeOutput(static_cast<Char8>(((cp >> 6)  & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+        writeOutput(static_cast<Char8>( (cp        & 0b11'1111) | 0b1000'0000)); //10xx xxxx
     }
     else //invalid code point
-        codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte utf8
-}
-
-
-inline
-size_t getUtf8Len(Char8 ch) //ch must be first code unit! returns 0 on error!
-{
-    if (ch < 0x80)
-        return 1;
-    if (ch >> 5 == 0x6)
-        return 2;
-    if (ch >> 4 == 0xe)
-        return 3;
-    if (ch >> 3 == 0x1e)
-        return 4;
-    return 0; //invalid begin of UTF8 encoding
+        codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte UTF8
 }
 
 
@@ -196,30 +168,34 @@ public:
 
         const Char8 ch = *it_++;
         CodePoint cp = ch;
-        switch (getUtf8Len(ch))
+
+        if (ch < 0x80) //1 byte
+            ;
+        else if (ch >> 5 == 0b110) //2 bytes
         {
-            case 0: //invalid utf8 character
-                cp = REPLACEMENT_CHAR;
-                break;
-            case 1:
-                break;
-            case 2:
-                cp &= 0x1f;
-                decodeTrail(cp);
-                break;
-            case 3:
-                cp &= 0xf;
-                if (decodeTrail(cp))
-                    decodeTrail(cp);
-                break;
-            case 4:
-                cp &= 0x7;
-                if (decodeTrail(cp))
-                    if (decodeTrail(cp))
-                        decodeTrail(cp);
-                if (cp > CODE_POINT_MAX) cp = REPLACEMENT_CHAR;
-                break;
+            cp &= 0b1'1111;
+            if (decodeTrail(cp))
+                if (cp <= 0b111'1111) //overlong encoding: "correct encoding of a code point uses only the minimum number of bytes required"
+                    cp = REPLACEMENT_CHAR;
         }
+        else if (ch >> 4 == 0b1110) //3 bytes
+        {
+            cp &= 0b1111;
+            if (decodeTrail(cp) && decodeTrail(cp))
+                if (cp <= 0b0111'1111'1111 ||
+                    (LEAD_SURROGATE <= cp && cp <= TRAIL_SURROGATE_MAX)) //[0xd800, 0xdfff] are invalid code points
+                    cp = REPLACEMENT_CHAR;
+        }
+        else if (ch >> 3 == 0b11110) //4 bytes
+        {
+            cp &= 0b111;
+            if (decodeTrail(cp) && decodeTrail(cp) && decodeTrail(cp))
+                if (cp <= 0b1111'1111'1111'1111 || cp > CODE_POINT_MAX)
+                    cp = REPLACEMENT_CHAR;
+        }
+        else //invalid begin of UTF8 encoding
+            cp = REPLACEMENT_CHAR;
+
         return cp;
     }
 
@@ -229,9 +205,9 @@ private:
         if (it_ != last_) //trail surrogate expected!
         {
             const Char8 ch = *it_;
-            if (ch >> 6 == 0x2) //trail surrogate expected!
+            if (ch >> 6 == 0b10) //trail surrogate expected!
             {
-                cp = (cp << 6) + (ch & 0x3f);
+                cp = (cp << 6) + (ch & 0b11'1111);
                 ++it_;
                 return true;
             }
@@ -337,7 +313,9 @@ UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t u
     assert(uniPosFirst <= uniPosLast && uniPosLast <= unicodeLength(str));
     using namespace impl;
     using CharType = GetCharTypeT<UtfString>;
+
     UtfString output;
+    assert(uniPosFirst <= uniPosLast);
     if (uniPosFirst >= uniPosLast) //optimize for empty range
         return output;
 
@@ -357,6 +335,10 @@ UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t u
 namespace impl
 {
 template <class TargetString, class SourceString> inline
+TargetString utfTo(const SourceString& str, std::true_type) { return copyStringTo<TargetString>(str); }
+
+
+template <class TargetString, class SourceString> inline
 TargetString utfTo(const SourceString& str, std::false_type)
 {
     using CharSrc = GetCharTypeT<SourceString>;
@@ -371,10 +353,6 @@ TargetString utfTo(const SourceString& str, std::false_type)
 
     return output;
 }
-
-
-template <class TargetString, class SourceString> inline
-TargetString utfTo(const SourceString& str, std::true_type) { return copyStringTo<TargetString>(str); }
 }
 
 
diff --git a/zen/zstring.cpp b/zen/zstring.cpp
index 76c0a81f..1e29e461 100644
--- a/zen/zstring.cpp
+++ b/zen/zstring.cpp
@@ -11,46 +11,44 @@
 using namespace zen;
 
 
-Zstring getUnicodeNormalForm(const Zstring& str)
+Zstring getUnicodeNormalFormNonAscii(const Zstring& str)
 {
-    //fast pre-check:
-    if (isAsciiString(str)) //perf: in the range of 3.5ns
-        return str;
-    static_assert(std::is_same_v<decltype(str), const Zbase<Zchar>&>, "god bless our ref-counting! => save output string memory consumption!");
-
     //Example: const char* decomposed  = "\x6f\xcc\x81";
     //         const char* precomposed = "\xc3\xb3";
+    assert(!isAsciiString(str));
+    assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls!
+
     try
     {
         gchar* outStr = ::g_utf8_normalize(str.c_str(), str.length(), G_NORMALIZE_DEFAULT_COMPOSE);
         if (!outStr)
-            throw SysError(formatSystemError("g_utf8_normalize(" + utfTo<std::string>(str) + ')', L"", L"Conversion failed."));
+            throw SysError(formatSystemError("g_utf8_normalize", L"", L"Conversion failed."));
         ZEN_ON_SCOPE_EXIT(::g_free(outStr));
         return outStr;
 
     }
-    catch ([[maybe_unused]] const SysError& e)
+    catch (const SysError& e)
     {
-        assert(false);
-        return str;
+        throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error normalizing string:" +
+                                 '\n' + utfTo<std::string>(str)  + "\n\n" + utfTo<std::string>(e.toString()));
     }
 }
 
 
-Zstring getUpperCase(const Zstring& str)
+Zstring getUnicodeNormalForm(const Zstring& str)
 {
-    assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls!
-
     //fast pre-check:
     if (isAsciiString(str)) //perf: in the range of 3.5ns
-    {
-        Zstring output = str;
-        for (Zchar& c : output)
-            c = asciiToUpper(c);
-        return output;
-    }
+        return str;
+    static_assert(std::is_same_v<decltype(str), const Zbase<Zchar>&>, "god bless our ref-counting! => save output string memory consumption!");
 
-    Zstring strNorm = getUnicodeNormalForm(str);
+    return getUnicodeNormalFormNonAscii(str);
+}
+
+
+Zstring getUpperCaseNonAscii(const Zstring& str)
+{
+    Zstring strNorm = getUnicodeNormalFormNonAscii(str);
     try
     {
         static_assert(sizeof(impl::CodePoint) == sizeof(gunichar));
@@ -64,11 +62,26 @@ Zstring getUpperCase(const Zstring& str)
         return output;
 
     }
-    catch (SysError&)
+    catch (const SysError& e)
     {
-        assert(false);
-        return str;
+        throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error converting string to upper case:" +
+                                 '\n' + utfTo<std::string>(str)  + "\n\n" + utfTo<std::string>(e.toString()));
+    }
+}
+
+
+Zstring getUpperCase(const Zstring& str)
+{
+    if (isAsciiString(str)) //fast path: in the range of 3.5ns
+    {
+        Zstring output = str;
+        for (Zchar& c : output)  //identical to LCMapStringEx(), g_unichar_toupper(), CFStringUppercase() [verified!]
+            c = asciiToUpper(c); //
+        return output;
     }
+    //else: slow path --------------------------------------
+
+    return getUpperCaseNonAscii(str);
 }
 
 
@@ -91,10 +104,10 @@ std::weak_ordering compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char*
 
         static_assert(sizeof(gunichar) == sizeof(impl::CodePoint));
 
+        //ordering: "to lower" converts to higher code points than "to upper"
         const gunichar charL = ::g_unichar_toupper(*cpL); //note: tolower can be ambiguous, so don't use:
         const gunichar charR = ::g_unichar_toupper(*cpR); //e.g. "Σ" (upper case) can be lower-case "ς" in the end of the word or "σ" in the middle.
         if (charL != charR)
-            //ordering: "to lower" converts to higher code points than "to upper"
             return makeUnsigned(charL) <=> makeUnsigned(charR); //unsigned char-comparison is the convention!
     }
 }
@@ -107,78 +120,111 @@ std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs)
           Windows: CompareString() already ignores NFD/NFC differences: nice...
           Linux:  g_unichar_toupper() can't ignore differences
           macOS:  CFStringCompare() considers differences */
-
-    const Zstring& lhsNorm = getUnicodeNormalForm(lhs);
-    const Zstring& rhsNorm = getUnicodeNormalForm(rhs);
-
-    const char* strL = lhsNorm.c_str();
-    const char* strR = rhsNorm.c_str();
-
-    const char* const strEndL = strL + lhsNorm.size();
-    const char* const strEndR = strR + rhsNorm.size();
-    /*  - compare strings after conceptually creating blocks of whitespace/numbers/text
-        - implement strict weak ordering!
-        - don't follow broken "strnatcasecmp": https://github.com/php/php-src/blob/master/ext/standard/strnatcmp.c
-                1. incorrect non-ASCII CI-comparison
-                2. incorrect bounds checks
-                3. incorrect trimming of *all* whitespace
-                4. arbitrary handling of leading 0 only at string begin
-                5. incorrect handling of whitespace following a number
-                6. code is a mess                                          */
-    for (;;)
+    try
     {
-        if (strL == strEndL || strR == strEndR)
-            return (strL != strEndL) <=> (strR != strEndR); //"nothing" before "something"
-        //note: "something" never would have been condensed to "nothing" further below => can finish evaluation here
-
-        const bool wsL = isWhiteSpace(*strL);
-        const bool wsR = isWhiteSpace(*strR);
-        if (wsL != wsR)
-            return !wsL <=> !wsR; //whitespace before non-ws!
-        if (wsL)
-        {
-            ++strL, ++strR;
-            while (strL != strEndL && isWhiteSpace(*strL)) ++strL;
-            while (strR != strEndR && isWhiteSpace(*strR)) ++strR;
-            continue;
-        }
-
-        const bool digitL = isDigit(*strL);
-        const bool digitR = isDigit(*strR);
-        if (digitL != digitR)
-            return !digitL <=> !digitR; //numbers before chars!
-        if (digitL)
+        const Zstring& lhsNorm = getUnicodeNormalForm(lhs);
+        const Zstring& rhsNorm = getUnicodeNormalForm(rhs);
+
+        const char* strL = lhsNorm.c_str();
+        const char* strR = rhsNorm.c_str();
+
+        const char* const strEndL = strL + lhsNorm.size();
+        const char* const strEndR = strR + rhsNorm.size();
+        /*  - compare strings after conceptually creating blocks of whitespace/numbers/text
+            - implement strict weak ordering!
+            - don't follow broken "strnatcasecmp": https://github.com/php/php-src/blob/master/ext/standard/strnatcmp.c
+                    1. incorrect non-ASCII CI-comparison
+                    2. incorrect bounds checks
+                    3. incorrect trimming of *all* whitespace
+                    4. arbitrary handling of leading 0 only at string begin
+                    5. incorrect handling of whitespace following a number
+                    6. code is a mess                                          */
+        for (;;)
         {
-            while (strL != strEndL && *strL == '0') ++strL;
-            while (strR != strEndR && *strR == '0') ++strR;
+            if (strL == strEndL || strR == strEndR)
+                return (strL != strEndL) <=> (strR != strEndR); //"nothing" before "something"
+            //note: "something" never would have been condensed to "nothing" further below => can finish evaluation here
+
+            const bool wsL = isWhiteSpace(*strL);
+            const bool wsR = isWhiteSpace(*strR);
+            if (wsL != wsR)
+                return !wsL <=> !wsR; //whitespace before non-ws!
+            if (wsL)
+            {
+                ++strL, ++strR;
+                while (strL != strEndL && isWhiteSpace(*strL)) ++strL;
+                while (strR != strEndR && isWhiteSpace(*strR)) ++strR;
+                continue;
+            }
 
-            int rv = 0;
-            for (;; ++strL, ++strR)
+            const bool digitL = isDigit(*strL);
+            const bool digitR = isDigit(*strR);
+            if (digitL != digitR)
+                return !digitL <=> !digitR; //numbers before chars!
+            if (digitL)
             {
-                const bool endL = strL == strEndL || !isDigit(*strL);
-                const bool endR = strR == strEndR || !isDigit(*strR);
-                if (endL != endR)
-                    return !endL <=> !endR; //more digits means bigger number
-                if (endL)
-                    break; //same number of digits
-
-                if (rv == 0 && *strL != *strR)
-                    rv = *strL - *strR; //found first digit difference comparing from left
+                while (strL != strEndL && *strL == '0') ++strL;
+                while (strR != strEndR && *strR == '0') ++strR;
+
+                int rv = 0;
+                for (;; ++strL, ++strR)
+                {
+                    const bool endL = strL == strEndL || !isDigit(*strL);
+                    const bool endR = strR == strEndR || !isDigit(*strR);
+                    if (endL != endR)
+                        return !endL <=> !endR; //more digits means bigger number
+                    if (endL)
+                        break; //same number of digits
+
+                    if (rv == 0 && *strL != *strR)
+                        rv = *strL - *strR; //found first digit difference comparing from left
+                }
+                if (rv != 0)
+                    return rv <=> 0;
+                continue;
             }
-            if (rv != 0)
-                return rv <=> 0;
-            continue;
+
+            //compare full junks of text: consider unicode encoding!
+            const char* textBeginL = strL++;
+            const char* textBeginR = strR++; //current char is neither white space nor digit at this point!
+            while (strL != strEndL && !isWhiteSpace(*strL) && !isDigit(*strL)) ++strL;
+            while (strR != strEndR && !isWhiteSpace(*strR) && !isDigit(*strR)) ++strR;
+
+            if (const std::weak_ordering cmp = compareNoCaseUtf8(textBeginL, strL - textBeginL, textBeginR, strR - textBeginR);
+                cmp != std::weak_ordering::equivalent)
+                return cmp;
         }
 
-        //compare full junks of text: consider unicode encoding!
-        const char* textBeginL = strL++;
-        const char* textBeginR = strR++; //current char is neither white space nor digit at this point!
-        while (strL != strEndL && !isWhiteSpace(*strL) && !isDigit(*strL)) ++strL;
-        while (strR != strEndR && !isWhiteSpace(*strR) && !isDigit(*strR)) ++strR;
+    }
+    catch (const SysError& e)
+    {
+        throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error comparing strings:" + '\n' +
+                                 utfTo<std::string>(lhs) + '\n' + utfTo<std::string>(rhs) + "\n\n" + utfTo<std::string>(e.toString()));
+    }
+}
+
 
-        if (const std::weak_ordering cmp = compareNoCaseUtf8(textBeginL, strL - textBeginL, textBeginR, strR - textBeginR);
-            cmp != std::weak_ordering::equivalent)
-            return cmp;
+std::weak_ordering compareNoCase(const Zstring& lhs, const Zstring& rhs)
+{
+    //fast path: no need for extra memory allocations => ~ 6x speedup
+    const size_t minSize = std::min(lhs.size(), rhs.size());
+
+    size_t i = 0;
+    for (; i < minSize; ++i)
+    {
+        const Zchar l = lhs[i];
+        const Zchar r = rhs[i];
+        if (!isAsciiChar(l) || !isAsciiChar(r))
+            goto slowPath; //=> let's NOT make assumptions how getUpperCase() compares "ASCII <=> non-ASCII"
+
+        const Zchar lUp = asciiToUpper(l); //
+        const Zchar rUp = asciiToUpper(r); //no surprises: emulate getUpperCase() [verified!]
+        if (lUp != rUp)                    //
+            return lUp <=> rUp;            //
     }
+    return lhs.size() <=> rhs.size();
+slowPath: //--------------------------------------
 
+    return compareNoCaseUtf8(lhs.c_str() + i, lhs.size() - i, 
+                             rhs.c_str() + i, rhs.size() - i);
 }
diff --git a/zen/zstring.h b/zen/zstring.h
index bc7cfb06..70b9f448 100644
--- a/zen/zstring.h
+++ b/zen/zstring.h
@@ -39,7 +39,7 @@ Zstring getUnicodeNormalForm(const Zstring& str);
 Zstring getUpperCase(const Zstring& str);
 
 //------------------------------------------------------------------------------------------
-struct ZstringNorm //use as STL container key: avoid needless Unicode normalizations during std::map<>::find()
+struct ZstringNorm //use as STL container key: better than repeated Unicode normalizations during std::map<>::find()
 {
     /*explicit*/ ZstringNorm(const Zstring& str) : normStr(getUnicodeNormalForm(str)) {}
     Zstring normStr;
@@ -51,7 +51,7 @@ template<> struct std::hash<ZstringNorm> { size_t operator()(const ZstringNorm&
 //struct LessUnicodeNormal { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return getUnicodeNormalForm(lhs) < getUnicodeNormalForm(rhs); } };
 
 //------------------------------------------------------------------------------------------
-struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find()
+struct ZstringNoCase //use as STL container key: better than repeated upper-case conversions during std::map<>::find()
 {
     /*explicit*/ ZstringNoCase(const Zstring& str) : upperCase(getUpperCase(str)) {}
     Zstring upperCase;
@@ -60,12 +60,18 @@ struct ZstringNoCase //use as STL container key: avoid needless upper-case conve
 };
 template<> struct std::hash<ZstringNoCase> { size_t operator()(const ZstringNoCase& str) const { return std::hash<Zstring>()(str.upperCase); } };
 
-inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return getUpperCase(lhs) == getUpperCase(rhs); }
+
+std::weak_ordering compareNoCase(const Zstring& lhs, const Zstring& rhs);
+
+inline
+bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return compareNoCase(lhs, rhs) == std::weak_ordering::equivalent;  }
+//note: the "lhs.size() != rhs.size()" short-cut would require two isAsciiString() checks
+//=> generally SLOWER than starting comparison directly during first pass and breaking on first difference!
 
 //------------------------------------------------------------------------------------------
 std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs);
 
-struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNatural(lhs, rhs)); } };
+struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNatural(lhs, rhs) < 0; } };
 
 
 //------------------------------------------------------------------------------------------
@@ -73,16 +79,18 @@ struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs)
 const wchar_t EN_DASH = L'\u2013';
 const wchar_t EM_DASH = L'\u2014';
     const wchar_t* const SPACED_DASH = L" \u2014 "; //using 'EM DASH'
-const wchar_t LTR_MARK = L'\u200E'; //UTF-8: E2 80 8E
 const wchar_t* const ELLIPSIS = L"\u2026"; //"..."
 const wchar_t MULT_SIGN = L'\u00D7'; //fancy "x"
 //const wchar_t NOBREAK_SPACE = L'\u00A0';
 const wchar_t ZERO_WIDTH_SPACE = L'\u200B';
 
+const wchar_t LTR_MARK = L'\u200E'; //UTF-8: E2 80 8E
 const wchar_t RTL_MARK = L'\u200F'; //UTF-8: E2 80 8F https://www.w3.org/International/questions/qa-bidi-unicode-controls
-const wchar_t BIDI_DIR_ISOLATE_RTL    = L'\u2067'; //UTF-8: E2 81 A7 => not working on Win 10
-const wchar_t BIDI_POP_DIR_ISOLATE    = L'\u2069'; //UTF-8: E2 81 A9 => not working on Win 10
-const wchar_t BIDI_DIR_EMBEDDING_RTL  = L'\u202B'; //UTF-8: E2 80 AB => not working on Win 10
-const wchar_t BIDI_POP_DIR_FORMATTING = L'\u202C'; //UTF-8: E2 80 AC => not working on Win 10
+//const wchar_t BIDI_DIR_ISOLATE_RTL    = L'\u2067'; //=> not working on Win 10
+//const wchar_t BIDI_POP_DIR_ISOLATE    = L'\u2069'; //=> not working on Win 10
+//const wchar_t BIDI_DIR_EMBEDDING_RTL  = L'\u202B'; //=> not working on Win 10
+//const wchar_t BIDI_POP_DIR_FORMATTING = L'\u202C'; //=> not working on Win 10
+
+const wchar_t* const TAB_SPACE = L"    "; //4: the only sensible space count for tabs
 
 #endif //ZSTRING_H_73425873425789
author	B. Stack <bgstack15@gmail.com>	2022-09-07 14:49:22 -0400
committer	B. Stack <bgstack15@gmail.com>	2022-09-07 14:49:22 -0400
commit	47c88c433d17948fab1d8e1d76121a72fe5938cb (patch)
tree	fbc1dea58a6b28f1af4a9e9b2bc8e3e1d23b2103 /zen
parent	Merge branch 'b11.23' into 'master' (diff)
download	FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.tar.gz FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.tar.bz2 FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.zip