summaryrefslogtreecommitdiff
path: root/zen
diff options
context:
space:
mode:
authorB. Stack <bgstack15@gmail.com>2022-09-07 14:49:22 -0400
committerB. Stack <bgstack15@gmail.com>2022-09-07 14:49:22 -0400
commit47c88c433d17948fab1d8e1d76121a72fe5938cb (patch)
treefbc1dea58a6b28f1af4a9e9b2bc8e3e1d23b2103 /zen
parentMerge branch 'b11.23' into 'master' (diff)
downloadFreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.tar.gz
FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.tar.bz2
FreeFileSync-47c88c433d17948fab1d8e1d76121a72fe5938cb.zip
add upstream 11.24
Diffstat (limited to 'zen')
-rw-r--r--zen/build_info.h1
-rw-r--r--zen/file_access.cpp17
-rw-r--r--zen/file_access.h14
-rw-r--r--zen/file_path.cpp7
-rw-r--r--zen/file_path.h2
-rw-r--r--zen/file_traverser.h4
-rw-r--r--zen/format_unit.cpp27
-rw-r--r--zen/process_exec.cpp3
-rw-r--r--zen/resolve_path.cpp12
-rw-r--r--zen/socket.h10
-rw-r--r--zen/stl_tools.h37
-rw-r--r--zen/string_base.h31
-rw-r--r--zen/string_tools.h26
-rw-r--r--zen/string_traits.h4
-rw-r--r--zen/sys_info.cpp12
-rw-r--r--zen/thread.h2
-rw-r--r--zen/time.h44
-rw-r--r--zen/utf.h160
-rw-r--r--zen/zstring.cpp222
-rw-r--r--zen/zstring.h26
20 files changed, 345 insertions, 316 deletions
diff --git a/zen/build_info.h b/zen/build_info.h
index b06c1302..86ff303c 100644
--- a/zen/build_info.h
+++ b/zen/build_info.h
@@ -26,6 +26,7 @@ enum class BuildArch
static_assert((BuildArch::program == BuildArch::bit32 ? 32 : 64) == sizeof(void*) * 8);
+//harmonize with os_arch enum in update_checks table:
constexpr const char* cpuArchName = BuildArch::program == BuildArch::bit32 ? "i686": "x86-64";
}
diff --git a/zen/file_access.cpp b/zen/file_access.cpp
index 6a62f671..2e119e87 100644
--- a/zen/file_access.cpp
+++ b/zen/file_access.cpp
@@ -70,7 +70,7 @@ std::optional<ItemType> zen::itemStillExists(const Zstring& itemPath) //throw Fi
try
{
traverseFolder(*parentPath,
- [&](const FileInfo& fi) { if (fi.itemName == itemName) throw ItemType::file; },
+ [&](const FileInfo& fi) { if (fi.itemName == itemName) throw ItemType::file; }, //case-sensitive! itemPath must be normalized!
[&](const FolderInfo& fi) { if (fi.itemName == itemName) throw ItemType::folder; },
[&](const SymlinkInfo& si) { if (si.itemName == itemName) throw ItemType::symlink; },
[](const std::wstring& errorMsg) { throw FileError(errorMsg); });
@@ -233,7 +233,6 @@ void zen::removeDirectoryPlainRecursion(const Zstring& dirPath) //throw FileErro
namespace
{
-
/* Usage overview: (avoid circular pattern!)
moveAndRenameItem() --> moveAndRenameFileSub()
@@ -319,18 +318,20 @@ void setWriteTimeNative(const Zstring& itemPath, const timespec& modTime, ProcSy
=> utimens: https://github.com/coreutils/gnulib/blob/master/lib/utimens.c
touch: https://github.com/coreutils/coreutils/blob/master/src/touch.c
=> fdutimensat: https://github.com/coreutils/gnulib/blob/master/lib/fdutimensat.c */
- timespec newTimes[2] = {};
- newTimes[0].tv_sec = ::time(nullptr); //access time; don't use UTIME_NOW/UTIME_OMIT: more bugs! https://freefilesync.org/forum/viewtopic.php?t=1701
- newTimes[1] = modTime; //modification time
+ const timespec newTimes[2]
+ {
+ {.tv_sec = ::time(nullptr)}, //access time; don't use UTIME_NOW/UTIME_OMIT: more bugs! https://freefilesync.org/forum/viewtopic.php?t=1701
+ modTime,
+ };
//test: even modTime == 0 is correctly applied (no NOOP!) test2: same behavior for "utime()"
//hell knows why files on gvfs-mounted Samba shares fail to open(O_WRONLY) returning EOPNOTSUPP:
//https://freefilesync.org/forum/viewtopic.php?t=2803 => utimensat() works (but not for gvfs SFTP)
- if (::utimensat(AT_FDCWD, itemPath.c_str(), newTimes, procSl == ProcSymlink::direct ? AT_SYMLINK_NOFOLLOW : 0) == 0)
+ if (::utimensat(AT_FDCWD, itemPath.c_str(), newTimes, procSl == ProcSymlink::asLink ? AT_SYMLINK_NOFOLLOW : 0) == 0)
return;
try
{
- if (procSl == ProcSymlink::direct)
+ if (procSl == ProcSymlink::asLink)
try
{
if (getItemType(itemPath) == ItemType::symlink) //throw FileError
@@ -554,7 +555,7 @@ void zen::copySymlink(const Zstring& sourcePath, const Zstring& targetPath) //th
if (::lstat(sourcePath.c_str(), &sourceInfo) != 0)
THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file attributes of %x."), L"%x", fmtPath(sourcePath)), "lstat");
- setWriteTimeNative(targetPath, sourceInfo.st_mtim, ProcSymlink::direct); //throw FileError
+ setWriteTimeNative(targetPath, sourceInfo.st_mtim, ProcSymlink::asLink); //throw FileError
}
diff --git a/zen/file_access.h b/zen/file_access.h
index 17c47731..f6a02edc 100644
--- a/zen/file_access.h
+++ b/zen/file_access.h
@@ -29,12 +29,7 @@ using FileIndex = ino_t;
using FileTimeNative = timespec;
inline time_t nativeFileTimeToTimeT(const timespec& ft) { return ft.tv_sec; } //follow Windows Explorer and always round down!
-inline timespec timetToNativeFileTime(time_t utcTime)
-{
- timespec natTime = {};
- natTime.tv_sec = utcTime;
- return natTime;
-}
+inline timespec timetToNativeFileTime(time_t utcTime) { return {.tv_sec = utcTime}; }
enum class ItemType
{
@@ -44,15 +39,14 @@ enum class ItemType
};
//(hopefully) fast: does not distinguish between error/not existing
ItemType getItemType(const Zstring& itemPath); //throw FileError
-//execute potentially SLOW folder traversal but distinguish error/not existing
-// assumes: - base path still exists
-// - all child item path parts must correspond to folder traversal
+//execute potentially SLOW folder traversal but distinguish error/not existing:
+// - all child item path parts must correspond to folder traversal
// => we can conclude whether an item is *not* existing anymore by doing a *case-sensitive* name search => potentially SLOW!
std::optional<ItemType> itemStillExists(const Zstring& itemPath); //throw FileError
enum class ProcSymlink
{
- direct,
+ asLink,
follow
};
void setFileTime(const Zstring& filePath, time_t modTime, ProcSymlink procSl); //throw FileError
diff --git a/zen/file_path.cpp b/zen/file_path.cpp
index 716dd8de..f5c207f3 100644
--- a/zen/file_path.cpp
+++ b/zen/file_path.cpp
@@ -13,11 +13,12 @@ std::optional<PathComponents> zen::parsePathComponents(const Zstring& itemPath)
{
auto doParse = [&](int sepCountVolumeRoot, bool rootWithSep) -> std::optional<PathComponents>
{
+ assert(sepCountVolumeRoot > 0);
const Zstring itemPathPf = appendSeparator(itemPath); //simplify analysis of root without separator, e.g. \\server-name\share
- int sepCount = 0;
+
for (auto it = itemPathPf.begin(); it != itemPathPf.end(); ++it)
if (*it == FILE_NAME_SEPARATOR)
- if (++sepCount == sepCountVolumeRoot)
+ if (--sepCountVolumeRoot == 0)
{
Zstring rootPath(itemPathPf.begin(), rootWithSep ? it + 1 : it);
@@ -89,7 +90,7 @@ bool zen::isValidRelPath(const Zstring& relPath)
if constexpr (FILE_NAME_SEPARATOR != Zstr('\\')) if (contains(relPath, Zstr('\\'))) return false;
const Zchar doubleSep[] = {FILE_NAME_SEPARATOR, FILE_NAME_SEPARATOR, 0};
- return !startsWith(relPath, FILE_NAME_SEPARATOR)&& !endsWith(relPath, FILE_NAME_SEPARATOR)&&
+ return !startsWith(relPath, FILE_NAME_SEPARATOR) && !endsWith(relPath, FILE_NAME_SEPARATOR) &&
!contains(relPath, doubleSep);
}
diff --git a/zen/file_path.h b/zen/file_path.h
index 4a85514b..85af251d 100644
--- a/zen/file_path.h
+++ b/zen/file_path.h
@@ -40,7 +40,7 @@ std::weak_ordering compareNativePath(const Zstring& lhs, const Zstring& rhs);
inline bool equalNativePath(const Zstring& lhs, const Zstring& rhs) { return compareNativePath(lhs, rhs) == std::weak_ordering::equivalent; }
-struct LessNativePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNativePath(lhs, rhs)); } };
+struct LessNativePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNativePath(lhs, rhs) < 0; } };
//------------------------------------------------------------------------------------------
diff --git a/zen/file_traverser.h b/zen/file_traverser.h
index cb7782d6..11c3eaa0 100644
--- a/zen/file_traverser.h
+++ b/zen/file_traverser.h
@@ -17,7 +17,7 @@ struct FileInfo
Zstring itemName;
Zstring fullPath;
uint64_t fileSize = 0; //[bytes]
- time_t modTime = 0; //number of seconds since Jan. 1st 1970 UTC
+ time_t modTime = 0; //number of seconds since Jan. 1st 1970 GMT
};
struct FolderInfo
@@ -30,7 +30,7 @@ struct SymlinkInfo
{
Zstring itemName;
Zstring fullPath;
- time_t modTime = 0; //number of seconds since Jan. 1st 1970 UTC
+ time_t modTime = 0; //number of seconds since Jan. 1st 1970 GMT
};
//- non-recursive
diff --git a/zen/format_unit.cpp b/zen/format_unit.cpp
index 2aa6e094..8b3fccfe 100644
--- a/zen/format_unit.cpp
+++ b/zen/format_unit.cpp
@@ -168,12 +168,27 @@ std::wstring zen::formatNumber(int64_t n)
std::wstring zen::formatUtcToLocalTime(time_t utcTime)
{
- auto errorMsg = [&] { return _("Error") + L" (time_t: " + numberTo<std::wstring>(utcTime) + L')'; };
+ auto fmtFallback = [utcTime] //don't take "no" for an answer!
+ {
+ if (const TimeComp tc = getUtcTime(utcTime);
+ tc != TimeComp())
+ {
+ wchar_t buf[128] = {}; //the only way to format abnormally large or invalid modTime: std::strftime() will fail!
+ if (const int rv = std::swprintf(buf, std::size(buf), L"%d-%02d-%02d %02d:%02d:%02d GMT", tc.year, tc.month, tc.day, tc.hour, tc.minute, tc.second);
+ 0 < rv && rv < std::ssize(buf))
+ return std::wstring(buf, rv);
+ }
+
+ return L"time_t = " + numberTo<std::wstring>(utcTime);
+ };
const TimeComp& loc = getLocalTime(utcTime); //returns TimeComp() on error
- std::wstring dateString = utfTo<std::wstring>(formatTime(Zstr("%x %X"), loc));
- return !dateString.empty() ? dateString : errorMsg();
+ /*const*/ std::wstring dateTimeFmt = utfTo<std::wstring>(formatTime(Zstr("%x %X"), loc));
+ if (dateTimeFmt.empty())
+ return fmtFallback();
+
+ return dateTimeFmt;
}
@@ -188,9 +203,9 @@ WeekDay impl::getFirstDayOfWeekImpl() //throw SysError
const char* firstDay = ::nl_langinfo(_NL_TIME_FIRST_WEEKDAY); //[1-Sunday, 7-Saturday]
ASSERT_SYSERROR(firstDay && 1 <= *firstDay && *firstDay <= 7);
- const int weekDayStartSunday = *firstDay;
- const int weekDayStartMonday = (weekDayStartSunday - 1 + 6) % 7; //+6 == -1 in Z_7
- // [0-Monday, 6-Sunday]
+ const int weekDayStartSunday = *firstDay; //[1-Sunday, 7-Saturday]
+ const int weekDayStartMonday = (weekDayStartSunday - 2 + 7) % 7; //[0-Monday, 6-Sunday] 7 == 0 in Z_7
+
return static_cast<WeekDay>(weekDayStartMonday);
}
diff --git a/zen/process_exec.cpp b/zen/process_exec.cpp
index 6b670508..df41a627 100644
--- a/zen/process_exec.cpp
+++ b/zen/process_exec.cpp
@@ -176,8 +176,7 @@ std::pair<int /*exit code*/, std::string> processExecuteImpl(const Zstring& file
const auto waitTimeMs = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - now).count();
- timeval tv = {};
- tv.tv_sec = static_cast<long>(waitTimeMs / 1000);
+ timeval tv{.tv_sec = static_cast<long>(waitTimeMs / 1000)};
tv.tv_usec = static_cast<long>(waitTimeMs - tv.tv_sec * 1000) * 1000;
fd_set rfd = {}; //includes FD_ZERO
diff --git a/zen/resolve_path.cpp b/zen/resolve_path.cpp
index 357dab6a..99e2f6c6 100644
--- a/zen/resolve_path.cpp
+++ b/zen/resolve_path.cpp
@@ -9,7 +9,7 @@
#include "thread.h"
#include "file_access.h"
-#include <zen/sys_info.h>
+ #include <zen/sys_info.h>
// #include <stdlib.h> //getenv()
#include <unistd.h> //getuid()
#include <pwd.h> //getpwuid_r()
@@ -63,16 +63,16 @@ Zstring resolveRelativePath(const Zstring& relativePath)
https://www.gnu.org/software/bash/manual/html_node/Tilde-Expansion.html */
if (startsWith(pathTmp, "~/") || pathTmp == "~")
{
- try
- {
- const Zstring& homePath = getUserHome(); //throw FileError
+ try
+ {
+ const Zstring& homePath = getUserHome(); //throw FileError
if (startsWith(pathTmp, "~/"))
pathTmp = appendPath(homePath, pathTmp.c_str() + 2);
else //pathTmp == "~"
pathTmp = homePath;
- }
- catch (FileError&) {}
+ }
+ catch (FileError&) {}
//else: error! no further processing!
}
else
diff --git a/zen/socket.h b/zen/socket.h
index 5ece29f8..d9517bd8 100644
--- a/zen/socket.h
+++ b/zen/socket.h
@@ -33,11 +33,13 @@ class Socket //throw SysError
public:
Socket(const Zstring& server, const Zstring& serviceName) //throw SysError
{
- ::addrinfo hints = {};
- hints.ai_socktype = SOCK_STREAM; //we *do* care about this one!
- hints.ai_flags = AI_ADDRCONFIG; //save a AAAA lookup on machines that can't use the returned data anyhow
+ const addrinfo hints
+ {
+ .ai_flags = AI_ADDRCONFIG, //save a AAAA lookup on machines that can't use the returned data anyhow
+ .ai_socktype = SOCK_STREAM, //we *do* care about this one!
+ };
- ::addrinfo* servinfo = nullptr;
+ addrinfo* servinfo = nullptr;
ZEN_ON_SCOPE_EXIT(if (servinfo) ::freeaddrinfo(servinfo));
const int rcGai = ::getaddrinfo(server.c_str(), serviceName.c_str(), &hints, &servinfo);
diff --git a/zen/stl_tools.h b/zen/stl_tools.h
index 2726a09d..66af8551 100644
--- a/zen/stl_tools.h
+++ b/zen/stl_tools.h
@@ -68,10 +68,10 @@ template <class Iterator, class T, class CompLess>
Iterator binarySearch(Iterator first, Iterator last, const T& value, CompLess less);
//read-only variant of std::merge; input: two sorted ranges
-template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly>
+template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly, class Compare>
void mergeTraversal(Iterator first1, Iterator last1,
Iterator first2, Iterator last2,
- FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro);
+ FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro, Compare compare);
//why, oh why is there no std::optional<T>::get()???
template <class T> inline T* get( std::optional<T>& opt) { return opt ? &*opt : nullptr; }
@@ -255,31 +255,32 @@ BidirectionalIterator1 searchLast(const BidirectionalIterator1 first1, Bid
//---------------------------------------------------------------------------------------
//read-only variant of std::merge; input: two sorted ranges
-template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly> inline
-void mergeTraversal(Iterator first1, Iterator last1,
- Iterator first2, Iterator last2,
- FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro)
+template <class Iterator, class FunctionLeftOnly, class FunctionBoth, class FunctionRightOnly, class Compare> inline
+void mergeTraversal(Iterator firstL, Iterator lastL,
+ Iterator firstR, Iterator lastR,
+ FunctionLeftOnly lo, FunctionBoth bo, FunctionRightOnly ro, Compare compare)
{
- auto itL = first1;
- auto itR = first2;
+ auto itL = firstL;
+ auto itR = firstR;
- auto finishLeft = [&] { std::for_each(itL, last1, lo); };
- auto finishRight = [&] { std::for_each(itR, last2, ro); };
+ auto finishLeft = [&] { std::for_each(itL, lastL, lo); };
+ auto finishRight = [&] { std::for_each(itR, lastR, ro); };
- if (itL == last1) return finishRight();
- if (itR == last2) return finishLeft ();
+ if (itL == lastL) return finishRight();
+ if (itR == lastR) return finishLeft ();
for (;;)
- if (itL->first < itR->first)
+ if (const std::weak_ordering cmp = compare(*itL, *itR);
+ cmp < 0)
{
lo(*itL);
- if (++itL == last1)
+ if (++itL == lastL)
return finishRight();
}
- else if (itR->first < itL->first)
+ else if (cmp > 0)
{
ro(*itR);
- if (++itR == last2)
+ if (++itR == lastR)
return finishLeft();
}
else
@@ -287,8 +288,8 @@ void mergeTraversal(Iterator first1, Iterator last1,
bo(*itL, *itR);
++itL; //
++itR; //increment BOTH before checking for end of range!
- if (itL == last1) return finishRight();
- if (itR == last2) return finishLeft ();
+ if (itL == lastL) return finishRight();
+ if (itR == lastR) return finishLeft ();
//simplify loop by placing both EOB checks at the beginning? => slightly slower
}
}
diff --git a/zen/string_base.h b/zen/string_base.h
index ace870b9..e18a0f16 100644
--- a/zen/string_base.h
+++ b/zen/string_base.h
@@ -312,9 +312,10 @@ template <class Char, template <class> class SP> bool operator==(const Zb
template <class Char, template <class> class SP> bool operator==(const Zbase<Char, SP>& lhs, const Char* rhs);
template <class Char, template <class> class SP> inline bool operator==(const Char* lhs, const Zbase<Char, SP>& rhs) { return operator==(rhs, lhs); }
-template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs);
-template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Char* rhs);
-template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Char* lhs, const Zbase<Char, SP>& rhs);
+//follow convention + compare by unsigned char; alternative: std::lexicographical_compare_three_way + reinterpret_cast<const std::make_unsigned_t<Char>*>()
+template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs) { return compareString(lhs, rhs); }
+template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Char* rhs) { return compareString(lhs, rhs); }
+template <class Char, template <class> class SP> std::strong_ordering operator<=>(const Char* lhs, const Zbase<Char, SP>& rhs) { return compareString(lhs, rhs); }
template <class Char, template <class> class SP> inline Zbase<Char, SP> operator+(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs) { return Zbase<Char, SP>(lhs) += rhs; }
template <class Char, template <class> class SP> inline Zbase<Char, SP> operator+(const Zbase<Char, SP>& lhs, const Char* rhs) { return Zbase<Char, SP>(lhs) += rhs; }
@@ -495,30 +496,6 @@ bool operator==(const Zbase<Char, SP>& lhs, const Char* rhs)
template <class Char, template <class> class SP> inline
-std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Zbase<Char, SP>& rhs)
-{
- return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), //respect embedded 0
- rhs.begin(), rhs.end()); //
-}
-
-
-template <class Char, template <class> class SP> inline
-std::strong_ordering operator<=>(const Zbase<Char, SP>& lhs, const Char* rhs)
-{
- return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), //respect embedded 0
- rhs, rhs + strLength(rhs));
-}
-
-
-template <class Char, template <class> class SP> inline
-std::strong_ordering operator<=>(const Char* lhs, const Zbase<Char, SP>& rhs)
-{
- return std::lexicographical_compare_three_way(lhs, lhs + strLength(lhs),
- rhs.begin(), rhs.end()); //respect embedded 0
-}
-
-
-template <class Char, template <class> class SP> inline
size_t Zbase<Char, SP>::length() const
{
return SP<Char>::length(rawStr_);
diff --git a/zen/string_tools.h b/zen/string_tools.h
index d3f35ce8..cafff3d5 100644
--- a/zen/string_tools.h
+++ b/zen/string_tools.h
@@ -41,7 +41,7 @@ template <class S, class T> bool endsWithAsciiNoCase(const S& str, const T& post
template <class S, class T> bool equalString (const S& lhs, const T& rhs);
template <class S, class T> bool equalAsciiNoCase(const S& lhs, const T& rhs);
-//template <class S, class T> std::strong_ordering compareString(const S& lhs, const T& rhs);
+template <class S, class T> std::strong_ordering compareString(const S& lhs, const T& rhs);
template <class S, class T> std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!)
//STL container predicates for std::map, std::unordered_set/map
@@ -269,10 +269,12 @@ bool equalAsciiNoCase(const S& lhs, const T& rhs)
}
-#if 0
-//support embedded 0, unlike strncmp/wcsncmp:
+namespace impl
+{
+//support embedded 0 (unlike strncmp/wcsncmp) + compare unsigned[!] char
inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; }
inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; }
+}
template <class S, class T> inline
std::strong_ordering compareString(const S& lhs, const T& rhs)
@@ -280,13 +282,12 @@ std::strong_ordering compareString(const S& lhs, const T& rhs)
const size_t lhsLen = strLength(lhs);
const size_t rhsLen = strLength(rhs);
- //length check *after* strcmpWithNulls(): we DO care about natural ordering: e.g. for "compareString(getUpperCase(lhs), getUpperCase(rhs))"
+ //length check *after* strcmpWithNulls(): we DO care about natural ordering
if (const std::strong_ordering cmp = impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), std::min(lhsLen, rhsLen));
cmp != std::strong_ordering::equal)
return cmp;
return lhsLen <=> rhsLen;
}
-#endif
template <class S, class T> inline
@@ -587,7 +588,7 @@ struct CopyStringToString
T copy(const S& src) const
{
static_assert(!std::is_same_v<std::decay_t<S>, std::decay_t<T>>);
- return T(strBegin(src), strLength(src));
+ return {strBegin(src), strLength(src)};
}
};
@@ -626,11 +627,10 @@ S printNumber(const T& format, const Num& number) //format a single number using
#endif
static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>);
- const int BUFFER_SIZE = 128;
- GetCharTypeT<S> buffer[BUFFER_SIZE]; //zero-initialize?
- const int charsWritten = impl::saferPrintf(buffer, BUFFER_SIZE, strBegin(format), number);
+ GetCharTypeT<S> buf[128]; //zero-initialize?
+ const int charsWritten = impl::saferPrintf(buf, std::size(buf), strBegin(format), number);
- return 0 < charsWritten && charsWritten < BUFFER_SIZE ? S(buffer, charsWritten) : S();
+ return 0 < charsWritten && charsWritten < std::ssize(buf) ? S(buf, charsWritten) : S();
}
@@ -944,7 +944,7 @@ Num hashString(const S& str)
struct StringHash
{
- using is_transparent = int; //allow heterogenous lookup!
+ using is_transparent = int; //enable heterogenous lookup!
template <class String>
size_t operator()(const String& str) const { return hashString<size_t>(str); }
@@ -953,7 +953,7 @@ struct StringHash
struct StringEqual
{
- using is_transparent = int; //allow heterogenous lookup!
+ using is_transparent = int; //enable heterogenous lookup!
template <class String1, class String2>
bool operator()(const String1& lhs, const String2& rhs) const { return equalString(lhs, rhs); }
@@ -963,7 +963,7 @@ struct StringEqual
struct LessAsciiNoCase
{
template <class String>
- bool operator()(const String& lhs, const String& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); }
+ bool operator()(const String& lhs, const String& rhs) const { return compareAsciiNoCase(lhs, rhs) < 0; }
};
diff --git a/zen/string_traits.h b/zen/string_traits.h
index 1a4f4740..31c8c12c 100644
--- a/zen/string_traits.h
+++ b/zen/string_traits.h
@@ -105,8 +105,8 @@ class StringTraits
public:
enum
{
- isStringClass = hasMemberType_value_type<CleanType> &&
- hasMember_c_str <CleanType> &&
+ isStringClass = hasMemberType_value_type<CleanType>&&
+ hasMember_c_str <CleanType>&&
hasMember_length <CleanType>
};
diff --git a/zen/sys_info.cpp b/zen/sys_info.cpp
index bc1bfe62..c57464bc 100644
--- a/zen/sys_info.cpp
+++ b/zen/sys_info.cpp
@@ -111,16 +111,20 @@ ComputerModel zen::getComputerModel() //throw FileError
{
auto tryGetInfo = [](const Zstring& filePath)
{
- if (!fileAvailable(filePath))
- return std::wstring();
try
{
const std::string stream = getFileContent(filePath, nullptr /*notifyUnbufferedIO*/); //throw FileError
return utfTo<std::wstring>(trimCpy(stream));
}
- catch (const FileError& e) { throw SysError(replaceCpy(e.toString(), L"\n\n", L'\n')); } //errors should be further enriched by context info => SysError
+ catch (FileError&)
+ {
+ if (!itemStillExists(filePath)) //throw FileError
+ return std::wstring();
+
+ throw;
+ }
};
- cm.model = tryGetInfo("/sys/devices/virtual/dmi/id/product_name"); //throw SysError
+ cm.model = tryGetInfo("/sys/devices/virtual/dmi/id/product_name"); //throw FileError
cm.vendor = tryGetInfo("/sys/devices/virtual/dmi/id/sys_vendor"); //
//clean up:
diff --git a/zen/thread.h b/zen/thread.h
index 42fba281..abdc6da0 100644
--- a/zen/thread.h
+++ b/zen/thread.h
@@ -445,7 +445,7 @@ private:
activeCondition_ = cv;
}
- std::atomic<bool> stopRequested_{false}; //std:atomic is uninitialized by default!!!
+ std::atomic<bool> stopRequested_{false}; //std::atomic is uninitialized by default!!!
//"The default constructor is trivial: no initialization takes place other than zero initialization of static and thread-local objects."
std::condition_variable* activeCondition_ = nullptr;
diff --git a/zen/time.h b/zen/time.h
index c2c10fd5..376765be 100644
--- a/zen/time.h
+++ b/zen/time.h
@@ -83,30 +83,32 @@ std::tm toClibTimeComponents(const TimeComp& tc)
0 <= tc.minute && tc.minute <= 59 &&
0 <= tc.second && tc.second <= 61);
- std::tm ctc = {};
- ctc.tm_year = tc.year - 1900; //years since 1900
- ctc.tm_mon = tc.month - 1; //0-11
- ctc.tm_mday = tc.day; //1-31
- ctc.tm_hour = tc.hour; //0-23
- ctc.tm_min = tc.minute; //0-59
- ctc.tm_sec = tc.second; //0-60 (including leap second)
- ctc.tm_isdst = -1; //> 0 if DST is active, == 0 if DST is not active, < 0 if the information is not available
- //ctc.tm_wday
- //ctc.tm_yday
- return ctc;
+ return
+ {
+ .tm_sec = tc.second, //0-60 (including leap second)
+ .tm_min = tc.minute, //0-59
+ .tm_hour = tc.hour, //0-23
+ .tm_mday = tc.day, //1-31
+ .tm_mon = tc.month - 1, //0-11
+ .tm_year = tc.year - 1900, //years since 1900
+ .tm_isdst = -1, //> 0 if DST is active, == 0 if DST is not active, < 0 if the information is not available
+ //.tm_wday
+ //.tm_yday
+ };
}
inline
TimeComp toZenTimeComponents(const std::tm& ctc)
{
- TimeComp tc;
- tc.year = ctc.tm_year + 1900;
- tc.month = ctc.tm_mon + 1;
- tc.day = ctc.tm_mday;
- tc.hour = ctc.tm_hour;
- tc.minute = ctc.tm_min;
- tc.second = ctc.tm_sec;
- return tc;
+ return
+ {
+ .year = ctc.tm_year + 1900,
+ .month = ctc.tm_mon + 1,
+ .day = ctc.tm_mday,
+ .hour = ctc.tm_hour,
+ .minute = ctc.tm_min,
+ .second = ctc.tm_sec,
+ };
}
@@ -235,12 +237,12 @@ std::pair<time_t, bool /*success*/> localToTimeT(const TimeComp& tc) //convert l
const int cycles400 = numeric::intDivFloor(ctc.tm_year + 1900 - 1971/*[!]*/, 400); //see utcToTimeT()
//1971: ensures resulting time_t >= 0 after time zone, DST adaption, or std::mktime will fail on Windows!
- ctc.tm_year -= 400 * cycles400;
+ ctc.tm_year -= 400 * cycles400;
const time_t locTime = std::mktime(&ctc);
if (locTime == -1)
return {};
-
+
assert(locTime > 0);
return {locTime + secsPer400Years * cycles400, true};
}
diff --git a/zen/utf.h b/zen/utf.h
index 9c9cf7d1..ca231602 100644
--- a/zen/utf.h
+++ b/zen/utf.h
@@ -7,8 +7,6 @@
#ifndef UTF_H_01832479146991573473545
#define UTF_H_01832479146991573473545
-//#include <cstdint>
-//#include <iterator>
#include "string_tools.h" //copyStringTo
@@ -45,8 +43,8 @@ using CodePoint = uint32_t;
using Char16 = uint16_t;
using Char8 = uint8_t;
-const CodePoint LEAD_SURROGATE = 0xd800;
-const CodePoint TRAIL_SURROGATE = 0xdc00; //== LEAD_SURROGATE_MAX + 1
+const CodePoint LEAD_SURROGATE = 0xd800; //1101 1000 0000 0000 LEAD_SURROGATE_MAX = TRAIL_SURROGATE - 1
+const CodePoint TRAIL_SURROGATE = 0xdc00; //1101 1100 0000 0000
const CodePoint TRAIL_SURROGATE_MAX = 0xdfff;
const CodePoint REPLACEMENT_CHAR = 0xfffd;
@@ -62,31 +60,17 @@ void codePointToUtf16(CodePoint cp, Function writeOutput) //"writeOutput" is a u
if (cp < LEAD_SURROGATE)
writeOutput(static_cast<Char16>(cp));
else if (cp <= TRAIL_SURROGATE_MAX) //invalid code point
- codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
- else if (cp < 0x10000)
+ writeOutput(static_cast<Char16>(REPLACEMENT_CHAR));
+ else if (cp <= 0xffff)
writeOutput(static_cast<Char16>(cp));
else if (cp <= CODE_POINT_MAX)
{
cp -= 0x10000;
writeOutput(static_cast<Char16>( LEAD_SURROGATE + (cp >> 10)));
- writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0x3ff)));
+ writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0b11'1111'1111)));
}
else //invalid code point
- codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
-}
-
-
-inline
-size_t getUtf16Len(Char16 ch) //ch must be first code unit! returns 0 on error!
-{
- if (ch < LEAD_SURROGATE)
- return 1;
- else if (ch < TRAIL_SURROGATE)
- return 2;
- else if (ch <= TRAIL_SURROGATE_MAX)
- return 0; //unexpected trail surrogate!
- else
- return 1;
+ writeOutput(static_cast<Char16>(REPLACEMENT_CHAR));
}
@@ -102,17 +86,14 @@ public:
const Char16 ch = *it_++;
CodePoint cp = ch;
- switch (getUtf16Len(ch))
- {
- case 0: //invalid utf16 character
- cp = REPLACEMENT_CHAR;
- break;
- case 1:
- break;
- case 2:
- decodeTrail(cp);
- break;
- }
+
+ if (ch < LEAD_SURROGATE || ch > TRAIL_SURROGATE_MAX) //single Char16, no surrogates
+ ;
+ else if (ch < TRAIL_SURROGATE) //two Char16: lead and trail surrogates
+ decodeTrail(cp); //no range check needed: cp is inside [U+010000, U+10FFFF] by construction
+ else //unexpected trail surrogate
+ cp = REPLACEMENT_CHAR;
+
return cp;
}
@@ -141,46 +122,37 @@ private:
template <class Function> inline
void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char8
{
- //https://en.wikipedia.org/wiki/UTF-8
- //assert(cp < LEAD_SURROGATE || TRAIL_SURROGATE_MAX < cp); //code points [0xd800, 0xdfff] are reserved for UTF-16 and *should* not be encoded in UTF-8
+ /* https://en.wikipedia.org/wiki/UTF-8
+ "high and low surrogate halves used by UTF-16 (U+D800 through U+DFFF) and
+ code points not encodable by UTF-16 (those after U+10FFFF) [...] must be treated as an invalid byte sequence" */
- if (cp < 0x80)
+ if (cp <= 0b111'1111)
writeOutput(static_cast<Char8>(cp));
- else if (cp < 0x800)
+ else if (cp <= 0b0111'1111'1111)
{
- writeOutput(static_cast<Char8>((cp >> 6 ) | 0xc0));
- writeOutput(static_cast<Char8>((cp & 0x3f) | 0x80));
+ writeOutput(static_cast<Char8>((cp >> 6) | 0b1100'0000)); //110x xxxx
+ writeOutput(static_cast<Char8>((cp & 0b11'1111) | 0b1000'0000)); //10xx xxxx
}
- else if (cp < 0x10000)
+ else if (cp <= 0b1111'1111'1111'1111)
{
- writeOutput(static_cast<Char8>( (cp >> 12 ) | 0xe0));
- writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80));
- writeOutput(static_cast<Char8>( (cp & 0x3f) | 0x80));
+ if (LEAD_SURROGATE <= cp && cp <= TRAIL_SURROGATE_MAX) //[0xd800, 0xdfff]
+ codePointToUtf8(REPLACEMENT_CHAR, writeOutput);
+ else
+ {
+ writeOutput(static_cast<Char8>( (cp >> 12) | 0b1110'0000)); //1110 xxxx
+ writeOutput(static_cast<Char8>(((cp >> 6) & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+ writeOutput(static_cast<Char8>( (cp & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+ }
}
else if (cp <= CODE_POINT_MAX)
{
- writeOutput(static_cast<Char8>( (cp >> 18 ) | 0xf0));
- writeOutput(static_cast<Char8>(((cp >> 12) & 0x3f) | 0x80));
- writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80));
- writeOutput(static_cast<Char8>( (cp & 0x3f) | 0x80));
+ writeOutput(static_cast<Char8>( (cp >> 18) | 0b1111'0000)); //1111 0xxx
+ writeOutput(static_cast<Char8>(((cp >> 12) & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+ writeOutput(static_cast<Char8>(((cp >> 6) & 0b11'1111) | 0b1000'0000)); //10xx xxxx
+ writeOutput(static_cast<Char8>( (cp & 0b11'1111) | 0b1000'0000)); //10xx xxxx
}
else //invalid code point
- codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte utf8
-}
-
-
-inline
-size_t getUtf8Len(Char8 ch) //ch must be first code unit! returns 0 on error!
-{
- if (ch < 0x80)
- return 1;
- if (ch >> 5 == 0x6)
- return 2;
- if (ch >> 4 == 0xe)
- return 3;
- if (ch >> 3 == 0x1e)
- return 4;
- return 0; //invalid begin of UTF8 encoding
+ codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte UTF8
}
@@ -196,30 +168,34 @@ public:
const Char8 ch = *it_++;
CodePoint cp = ch;
- switch (getUtf8Len(ch))
+
+ if (ch < 0x80) //1 byte
+ ;
+ else if (ch >> 5 == 0b110) //2 bytes
{
- case 0: //invalid utf8 character
- cp = REPLACEMENT_CHAR;
- break;
- case 1:
- break;
- case 2:
- cp &= 0x1f;
- decodeTrail(cp);
- break;
- case 3:
- cp &= 0xf;
- if (decodeTrail(cp))
- decodeTrail(cp);
- break;
- case 4:
- cp &= 0x7;
- if (decodeTrail(cp))
- if (decodeTrail(cp))
- decodeTrail(cp);
- if (cp > CODE_POINT_MAX) cp = REPLACEMENT_CHAR;
- break;
+ cp &= 0b1'1111;
+ if (decodeTrail(cp))
+ if (cp <= 0b111'1111) //overlong encoding: "correct encoding of a code point uses only the minimum number of bytes required"
+ cp = REPLACEMENT_CHAR;
}
+ else if (ch >> 4 == 0b1110) //3 bytes
+ {
+ cp &= 0b1111;
+ if (decodeTrail(cp) && decodeTrail(cp))
+ if (cp <= 0b0111'1111'1111 ||
+ (LEAD_SURROGATE <= cp && cp <= TRAIL_SURROGATE_MAX)) //[0xd800, 0xdfff] are invalid code points
+ cp = REPLACEMENT_CHAR;
+ }
+ else if (ch >> 3 == 0b11110) //4 bytes
+ {
+ cp &= 0b111;
+ if (decodeTrail(cp) && decodeTrail(cp) && decodeTrail(cp))
+ if (cp <= 0b1111'1111'1111'1111 || cp > CODE_POINT_MAX)
+ cp = REPLACEMENT_CHAR;
+ }
+ else //invalid begin of UTF8 encoding
+ cp = REPLACEMENT_CHAR;
+
return cp;
}
@@ -229,9 +205,9 @@ private:
if (it_ != last_) //trail surrogate expected!
{
const Char8 ch = *it_;
- if (ch >> 6 == 0x2) //trail surrogate expected!
+ if (ch >> 6 == 0b10) //trail surrogate expected!
{
- cp = (cp << 6) + (ch & 0x3f);
+ cp = (cp << 6) + (ch & 0b11'1111);
++it_;
return true;
}
@@ -337,7 +313,9 @@ UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t u
assert(uniPosFirst <= uniPosLast && uniPosLast <= unicodeLength(str));
using namespace impl;
using CharType = GetCharTypeT<UtfString>;
+
UtfString output;
+ assert(uniPosFirst <= uniPosLast);
if (uniPosFirst >= uniPosLast) //optimize for empty range
return output;
@@ -357,6 +335,10 @@ UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t u
namespace impl
{
template <class TargetString, class SourceString> inline
+TargetString utfTo(const SourceString& str, std::true_type) { return copyStringTo<TargetString>(str); }
+
+
+template <class TargetString, class SourceString> inline
TargetString utfTo(const SourceString& str, std::false_type)
{
using CharSrc = GetCharTypeT<SourceString>;
@@ -371,10 +353,6 @@ TargetString utfTo(const SourceString& str, std::false_type)
return output;
}
-
-
-template <class TargetString, class SourceString> inline
-TargetString utfTo(const SourceString& str, std::true_type) { return copyStringTo<TargetString>(str); }
}
diff --git a/zen/zstring.cpp b/zen/zstring.cpp
index 76c0a81f..1e29e461 100644
--- a/zen/zstring.cpp
+++ b/zen/zstring.cpp
@@ -11,46 +11,44 @@
using namespace zen;
-Zstring getUnicodeNormalForm(const Zstring& str)
+Zstring getUnicodeNormalFormNonAscii(const Zstring& str)
{
- //fast pre-check:
- if (isAsciiString(str)) //perf: in the range of 3.5ns
- return str;
- static_assert(std::is_same_v<decltype(str), const Zbase<Zchar>&>, "god bless our ref-counting! => save output string memory consumption!");
-
//Example: const char* decomposed = "\x6f\xcc\x81";
// const char* precomposed = "\xc3\xb3";
+ assert(!isAsciiString(str));
+ assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls!
+
try
{
gchar* outStr = ::g_utf8_normalize(str.c_str(), str.length(), G_NORMALIZE_DEFAULT_COMPOSE);
if (!outStr)
- throw SysError(formatSystemError("g_utf8_normalize(" + utfTo<std::string>(str) + ')', L"", L"Conversion failed."));
+ throw SysError(formatSystemError("g_utf8_normalize", L"", L"Conversion failed."));
ZEN_ON_SCOPE_EXIT(::g_free(outStr));
return outStr;
}
- catch ([[maybe_unused]] const SysError& e)
+ catch (const SysError& e)
{
- assert(false);
- return str;
+ throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error normalizing string:" +
+ '\n' + utfTo<std::string>(str) + "\n\n" + utfTo<std::string>(e.toString()));
}
}
-Zstring getUpperCase(const Zstring& str)
+Zstring getUnicodeNormalForm(const Zstring& str)
{
- assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls!
-
//fast pre-check:
if (isAsciiString(str)) //perf: in the range of 3.5ns
- {
- Zstring output = str;
- for (Zchar& c : output)
- c = asciiToUpper(c);
- return output;
- }
+ return str;
+ static_assert(std::is_same_v<decltype(str), const Zbase<Zchar>&>, "god bless our ref-counting! => save output string memory consumption!");
- Zstring strNorm = getUnicodeNormalForm(str);
+ return getUnicodeNormalFormNonAscii(str);
+}
+
+
+Zstring getUpperCaseNonAscii(const Zstring& str)
+{
+ Zstring strNorm = getUnicodeNormalFormNonAscii(str);
try
{
static_assert(sizeof(impl::CodePoint) == sizeof(gunichar));
@@ -64,11 +62,26 @@ Zstring getUpperCase(const Zstring& str)
return output;
}
- catch (SysError&)
+ catch (const SysError& e)
{
- assert(false);
- return str;
+ throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error converting string to upper case:" +
+ '\n' + utfTo<std::string>(str) + "\n\n" + utfTo<std::string>(e.toString()));
+ }
+}
+
+
+Zstring getUpperCase(const Zstring& str)
+{
+ if (isAsciiString(str)) //fast path: in the range of 3.5ns
+ {
+ Zstring output = str;
+ for (Zchar& c : output) //identical to LCMapStringEx(), g_unichar_toupper(), CFStringUppercase() [verified!]
+ c = asciiToUpper(c); //
+ return output;
}
+ //else: slow path --------------------------------------
+
+ return getUpperCaseNonAscii(str);
}
@@ -91,10 +104,10 @@ std::weak_ordering compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char*
static_assert(sizeof(gunichar) == sizeof(impl::CodePoint));
+ //ordering: "to lower" converts to higher code points than "to upper"
const gunichar charL = ::g_unichar_toupper(*cpL); //note: tolower can be ambiguous, so don't use:
const gunichar charR = ::g_unichar_toupper(*cpR); //e.g. "Σ" (upper case) can be lower-case "ς" in the end of the word or "σ" in the middle.
if (charL != charR)
- //ordering: "to lower" converts to higher code points than "to upper"
return makeUnsigned(charL) <=> makeUnsigned(charR); //unsigned char-comparison is the convention!
}
}
@@ -107,78 +120,111 @@ std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs)
Windows: CompareString() already ignores NFD/NFC differences: nice...
Linux: g_unichar_toupper() can't ignore differences
macOS: CFStringCompare() considers differences */
-
- const Zstring& lhsNorm = getUnicodeNormalForm(lhs);
- const Zstring& rhsNorm = getUnicodeNormalForm(rhs);
-
- const char* strL = lhsNorm.c_str();
- const char* strR = rhsNorm.c_str();
-
- const char* const strEndL = strL + lhsNorm.size();
- const char* const strEndR = strR + rhsNorm.size();
- /* - compare strings after conceptually creating blocks of whitespace/numbers/text
- - implement strict weak ordering!
- - don't follow broken "strnatcasecmp": https://github.com/php/php-src/blob/master/ext/standard/strnatcmp.c
- 1. incorrect non-ASCII CI-comparison
- 2. incorrect bounds checks
- 3. incorrect trimming of *all* whitespace
- 4. arbitrary handling of leading 0 only at string begin
- 5. incorrect handling of whitespace following a number
- 6. code is a mess */
- for (;;)
+ try
{
- if (strL == strEndL || strR == strEndR)
- return (strL != strEndL) <=> (strR != strEndR); //"nothing" before "something"
- //note: "something" never would have been condensed to "nothing" further below => can finish evaluation here
-
- const bool wsL = isWhiteSpace(*strL);
- const bool wsR = isWhiteSpace(*strR);
- if (wsL != wsR)
- return !wsL <=> !wsR; //whitespace before non-ws!
- if (wsL)
- {
- ++strL, ++strR;
- while (strL != strEndL && isWhiteSpace(*strL)) ++strL;
- while (strR != strEndR && isWhiteSpace(*strR)) ++strR;
- continue;
- }
-
- const bool digitL = isDigit(*strL);
- const bool digitR = isDigit(*strR);
- if (digitL != digitR)
- return !digitL <=> !digitR; //numbers before chars!
- if (digitL)
+ const Zstring& lhsNorm = getUnicodeNormalForm(lhs);
+ const Zstring& rhsNorm = getUnicodeNormalForm(rhs);
+
+ const char* strL = lhsNorm.c_str();
+ const char* strR = rhsNorm.c_str();
+
+ const char* const strEndL = strL + lhsNorm.size();
+ const char* const strEndR = strR + rhsNorm.size();
+ /* - compare strings after conceptually creating blocks of whitespace/numbers/text
+ - implement strict weak ordering!
+ - don't follow broken "strnatcasecmp": https://github.com/php/php-src/blob/master/ext/standard/strnatcmp.c
+ 1. incorrect non-ASCII CI-comparison
+ 2. incorrect bounds checks
+ 3. incorrect trimming of *all* whitespace
+ 4. arbitrary handling of leading 0 only at string begin
+ 5. incorrect handling of whitespace following a number
+ 6. code is a mess */
+ for (;;)
{
- while (strL != strEndL && *strL == '0') ++strL;
- while (strR != strEndR && *strR == '0') ++strR;
+ if (strL == strEndL || strR == strEndR)
+ return (strL != strEndL) <=> (strR != strEndR); //"nothing" before "something"
+ //note: "something" never would have been condensed to "nothing" further below => can finish evaluation here
+
+ const bool wsL = isWhiteSpace(*strL);
+ const bool wsR = isWhiteSpace(*strR);
+ if (wsL != wsR)
+ return !wsL <=> !wsR; //whitespace before non-ws!
+ if (wsL)
+ {
+ ++strL, ++strR;
+ while (strL != strEndL && isWhiteSpace(*strL)) ++strL;
+ while (strR != strEndR && isWhiteSpace(*strR)) ++strR;
+ continue;
+ }
- int rv = 0;
- for (;; ++strL, ++strR)
+ const bool digitL = isDigit(*strL);
+ const bool digitR = isDigit(*strR);
+ if (digitL != digitR)
+ return !digitL <=> !digitR; //numbers before chars!
+ if (digitL)
{
- const bool endL = strL == strEndL || !isDigit(*strL);
- const bool endR = strR == strEndR || !isDigit(*strR);
- if (endL != endR)
- return !endL <=> !endR; //more digits means bigger number
- if (endL)
- break; //same number of digits
-
- if (rv == 0 && *strL != *strR)
- rv = *strL - *strR; //found first digit difference comparing from left
+ while (strL != strEndL && *strL == '0') ++strL;
+ while (strR != strEndR && *strR == '0') ++strR;
+
+ int rv = 0;
+ for (;; ++strL, ++strR)
+ {
+ const bool endL = strL == strEndL || !isDigit(*strL);
+ const bool endR = strR == strEndR || !isDigit(*strR);
+ if (endL != endR)
+ return !endL <=> !endR; //more digits means bigger number
+ if (endL)
+ break; //same number of digits
+
+ if (rv == 0 && *strL != *strR)
+ rv = *strL - *strR; //found first digit difference comparing from left
+ }
+ if (rv != 0)
+ return rv <=> 0;
+ continue;
}
- if (rv != 0)
- return rv <=> 0;
- continue;
+
+ //compare full junks of text: consider unicode encoding!
+ const char* textBeginL = strL++;
+ const char* textBeginR = strR++; //current char is neither white space nor digit at this point!
+ while (strL != strEndL && !isWhiteSpace(*strL) && !isDigit(*strL)) ++strL;
+ while (strR != strEndR && !isWhiteSpace(*strR) && !isDigit(*strR)) ++strR;
+
+ if (const std::weak_ordering cmp = compareNoCaseUtf8(textBeginL, strL - textBeginL, textBeginR, strR - textBeginR);
+ cmp != std::weak_ordering::equivalent)
+ return cmp;
}
- //compare full junks of text: consider unicode encoding!
- const char* textBeginL = strL++;
- const char* textBeginR = strR++; //current char is neither white space nor digit at this point!
- while (strL != strEndL && !isWhiteSpace(*strL) && !isDigit(*strL)) ++strL;
- while (strR != strEndR && !isWhiteSpace(*strR) && !isDigit(*strR)) ++strR;
+ }
+ catch (const SysError& e)
+ {
+ throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error comparing strings:" + '\n' +
+ utfTo<std::string>(lhs) + '\n' + utfTo<std::string>(rhs) + "\n\n" + utfTo<std::string>(e.toString()));
+ }
+}
+
- if (const std::weak_ordering cmp = compareNoCaseUtf8(textBeginL, strL - textBeginL, textBeginR, strR - textBeginR);
- cmp != std::weak_ordering::equivalent)
- return cmp;
+std::weak_ordering compareNoCase(const Zstring& lhs, const Zstring& rhs)
+{
+ //fast path: no need for extra memory allocations => ~ 6x speedup
+ const size_t minSize = std::min(lhs.size(), rhs.size());
+
+ size_t i = 0;
+ for (; i < minSize; ++i)
+ {
+ const Zchar l = lhs[i];
+ const Zchar r = rhs[i];
+ if (!isAsciiChar(l) || !isAsciiChar(r))
+ goto slowPath; //=> let's NOT make assumptions how getUpperCase() compares "ASCII <=> non-ASCII"
+
+ const Zchar lUp = asciiToUpper(l); //
+ const Zchar rUp = asciiToUpper(r); //no surprises: emulate getUpperCase() [verified!]
+ if (lUp != rUp) //
+ return lUp <=> rUp; //
}
+ return lhs.size() <=> rhs.size();
+slowPath: //--------------------------------------
+ return compareNoCaseUtf8(lhs.c_str() + i, lhs.size() - i,
+ rhs.c_str() + i, rhs.size() - i);
}
diff --git a/zen/zstring.h b/zen/zstring.h
index bc7cfb06..70b9f448 100644
--- a/zen/zstring.h
+++ b/zen/zstring.h
@@ -39,7 +39,7 @@ Zstring getUnicodeNormalForm(const Zstring& str);
Zstring getUpperCase(const Zstring& str);
//------------------------------------------------------------------------------------------
-struct ZstringNorm //use as STL container key: avoid needless Unicode normalizations during std::map<>::find()
+struct ZstringNorm //use as STL container key: better than repeated Unicode normalizations during std::map<>::find()
{
/*explicit*/ ZstringNorm(const Zstring& str) : normStr(getUnicodeNormalForm(str)) {}
Zstring normStr;
@@ -51,7 +51,7 @@ template<> struct std::hash<ZstringNorm> { size_t operator()(const ZstringNorm&
//struct LessUnicodeNormal { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return getUnicodeNormalForm(lhs) < getUnicodeNormalForm(rhs); } };
//------------------------------------------------------------------------------------------
-struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find()
+struct ZstringNoCase //use as STL container key: better than repeated upper-case conversions during std::map<>::find()
{
/*explicit*/ ZstringNoCase(const Zstring& str) : upperCase(getUpperCase(str)) {}
Zstring upperCase;
@@ -60,12 +60,18 @@ struct ZstringNoCase //use as STL container key: avoid needless upper-case conve
};
template<> struct std::hash<ZstringNoCase> { size_t operator()(const ZstringNoCase& str) const { return std::hash<Zstring>()(str.upperCase); } };
-inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return getUpperCase(lhs) == getUpperCase(rhs); }
+
+std::weak_ordering compareNoCase(const Zstring& lhs, const Zstring& rhs);
+
+inline
+bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return compareNoCase(lhs, rhs) == std::weak_ordering::equivalent; }
+//note: the "lhs.size() != rhs.size()" short-cut would require two isAsciiString() checks
+//=> generally SLOWER than starting comparison directly during first pass and breaking on first difference!
//------------------------------------------------------------------------------------------
std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs);
-struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNatural(lhs, rhs)); } };
+struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNatural(lhs, rhs) < 0; } };
//------------------------------------------------------------------------------------------
@@ -73,16 +79,18 @@ struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs)
const wchar_t EN_DASH = L'\u2013';
const wchar_t EM_DASH = L'\u2014';
const wchar_t* const SPACED_DASH = L" \u2014 "; //using 'EM DASH'
-const wchar_t LTR_MARK = L'\u200E'; //UTF-8: E2 80 8E
const wchar_t* const ELLIPSIS = L"\u2026"; //"..."
const wchar_t MULT_SIGN = L'\u00D7'; //fancy "x"
//const wchar_t NOBREAK_SPACE = L'\u00A0';
const wchar_t ZERO_WIDTH_SPACE = L'\u200B';
+const wchar_t LTR_MARK = L'\u200E'; //UTF-8: E2 80 8E
const wchar_t RTL_MARK = L'\u200F'; //UTF-8: E2 80 8F https://www.w3.org/International/questions/qa-bidi-unicode-controls
-const wchar_t BIDI_DIR_ISOLATE_RTL = L'\u2067'; //UTF-8: E2 81 A7 => not working on Win 10
-const wchar_t BIDI_POP_DIR_ISOLATE = L'\u2069'; //UTF-8: E2 81 A9 => not working on Win 10
-const wchar_t BIDI_DIR_EMBEDDING_RTL = L'\u202B'; //UTF-8: E2 80 AB => not working on Win 10
-const wchar_t BIDI_POP_DIR_FORMATTING = L'\u202C'; //UTF-8: E2 80 AC => not working on Win 10
+//const wchar_t BIDI_DIR_ISOLATE_RTL = L'\u2067'; //=> not working on Win 10
+//const wchar_t BIDI_POP_DIR_ISOLATE = L'\u2069'; //=> not working on Win 10
+//const wchar_t BIDI_DIR_EMBEDDING_RTL = L'\u202B'; //=> not working on Win 10
+//const wchar_t BIDI_POP_DIR_FORMATTING = L'\u202C'; //=> not working on Win 10
+
+const wchar_t* const TAB_SPACE = L" "; //4: the only sensible space count for tabs
#endif //ZSTRING_H_73425873425789
bgstack15