From 2c81be72eef5363736cf1892646c74a3311ee4c1 Mon Sep 17 00:00:00 2001 From: "B. Stack" Date: Sun, 22 May 2022 17:03:17 -0400 Subject: add upstream 11.21 --- zen/base64.h | 13 +-- zen/basic_math.h | 19 ++--- zen/build_info.h | 30 +++---- zen/crc.h | 7 +- zen/dir_watcher.cpp | 4 +- zen/dir_watcher.h | 2 +- zen/error_log.h | 12 ++- zen/file_access.cpp | 2 - zen/file_access.h | 2 +- zen/file_error.h | 1 - zen/file_io.h | 3 +- zen/file_path.cpp | 107 +++++++++++++++++++++--- zen/file_path.h | 22 +++++ zen/file_traverser.cpp | 2 +- zen/file_traverser.h | 4 +- zen/format_unit.cpp | 1 - zen/format_unit.h | 5 +- zen/http.cpp | 4 +- zen/http.h | 5 +- zen/i18n.h | 4 +- zen/json.h | 98 +++++++++++----------- zen/open_ssl.cpp | 12 ++- zen/open_ssl.h | 3 +- zen/process_exec.cpp | 4 +- zen/resolve_path.cpp | 105 ++++++++++------------- zen/resolve_path.h | 6 +- zen/scope_guard.h | 4 +- zen/serialize.h | 10 +-- zen/shutdown.cpp | 6 +- zen/socket.h | 1 - zen/stl_tools.h | 94 ++++++++++++--------- zen/string_base.h | 18 ++-- zen/string_tools.h | 221 ++++++++++++++++++++++++++++++++++++------------- zen/string_traits.h | 24 +++--- zen/symlink_target.h | 2 - zen/sys_error.h | 4 +- zen/thread.h | 12 ++- zen/type_traits.h | 62 +++++++------- zen/utf.h | 4 +- zen/zlib_wrap.cpp | 14 ++-- zen/zstring.cpp | 111 ++++++------------------- zen/zstring.h | 110 +++++------------------- 42 files changed, 627 insertions(+), 547 deletions(-) (limited to 'zen') diff --git a/zen/base64.h b/zen/base64.h index f03a1433..48cf2230 100644 --- a/zen/base64.h +++ b/zen/base64.h @@ -7,6 +7,7 @@ #ifndef BASE64_H_08473021856321840873021487213453214 #define BASE64_H_08473021856321840873021487213453214 +#include #include #include "type_traits.h" @@ -55,7 +56,7 @@ constexpr signed char DECODING_MIME[] = -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 }; -const unsigned char INDEX_PAD = 64; //"=" +const unsigned char INDEX_PAD = 64; //index of "=" } @@ -64,8 +65,8 @@ OutputIterator encodeBase64(InputIterator first, InputIterator last, OutputItera { using namespace impl; static_assert(sizeof(typename std::iterator_traits::value_type) == 1); - static_assert(arraySize(ENCODING_MIME) == 64 + 1 + 1); - static_assert(arrayAccumulate(ENCODING_MIME) + INDEX_PAD == 5602); + static_assert(std::size(ENCODING_MIME) == 64 + 1 + 1); + static_assert(arrayHash(ENCODING_MIME) == 1616767125); while (first != last) { @@ -101,8 +102,8 @@ OutputIterator decodeBase64(InputIterator first, InputIterator last, OutputItera { using namespace impl; static_assert(sizeof(typename std::iterator_traits::value_type) == 1); - static_assert(arraySize(DECODING_MIME) == 128); - static_assert(arrayAccumulate(DECODING_MIME) + INDEX_PAD == 2081); + static_assert(std::size(DECODING_MIME) == 128); + static_assert(arrayHash(DECODING_MIME)== 1169145114); const unsigned char INDEX_END = INDEX_PAD + 1; @@ -114,7 +115,7 @@ OutputIterator decodeBase64(InputIterator first, InputIterator last, OutputItera return INDEX_END; const unsigned char ch = static_cast(*first++); - if (ch < arraySize(DECODING_MIME)) //we're in lower ASCII table half + if (ch < std::size(DECODING_MIME)) //we're in lower ASCII table half { const int index = DECODING_MIME[ch]; if (0 <= index && index <= static_cast(INDEX_PAD)) //skip all unknown characters (including carriage return, line-break, tab) diff --git a/zen/basic_math.h b/zen/basic_math.h index 944a0f53..c8a06b78 100644 --- a/zen/basic_math.h +++ b/zen/basic_math.h @@ -8,7 +8,6 @@ #define BASIC_MATH_H_3472639843265675 #include -#include #include #include #include "type_traits.h" @@ -152,10 +151,10 @@ template inline auto intDivRound(N num, D den) { using namespace zen; - static_assert(IsIntegerV&& IsIntegerV); - static_assert(IsSignedIntV == IsSignedIntV); //until further + static_assert(isInteger&& isInteger); + static_assert(isSignedInt == isSignedInt); //until further assert(den != 0); - if constexpr (IsSignedIntV) + if constexpr (isSignedInt) { if ((num < 0) != (den < 0)) return (num - den / 2) / den; @@ -168,10 +167,10 @@ template inline auto intDivCeil(N num, D den) { using namespace zen; - static_assert(IsIntegerV&& IsIntegerV); - static_assert(IsSignedIntV == IsSignedIntV); //until further + static_assert(isInteger&& isInteger); + static_assert(isSignedInt == isSignedInt); //until further assert(den != 0); - if constexpr (IsSignedIntV) + if constexpr (isSignedInt) { if ((num < 0) != (den < 0)) return num / den; @@ -187,10 +186,10 @@ template inline auto intDivFloor(N num, D den) { using namespace zen; - static_assert(IsIntegerV&& IsIntegerV); - static_assert(IsSignedIntV == IsSignedIntV); //until further + static_assert(isInteger&& isInteger); + static_assert(isSignedInt == isSignedInt); //until further assert(den != 0); - if constexpr (IsSignedIntV) + if constexpr (isSignedInt) { if ((num < 0) != (den < 0)) { diff --git a/zen/build_info.h b/zen/build_info.h index 5a1d1635..b06c1302 100644 --- a/zen/build_info.h +++ b/zen/build_info.h @@ -8,25 +8,25 @@ #define BUILD_INFO_H_5928539285603428657 -#define ZEN_ARCH_32BIT 32 -#define ZEN_ARCH_64BIT 64 - #ifdef __LP64__ - #define ZEN_BUILD_ARCH ZEN_ARCH_64BIT - #else - #define ZEN_BUILD_ARCH ZEN_ARCH_32BIT - #endif +namespace zen +{ +enum class BuildArch +{ + bit32, + bit64, -static_assert(ZEN_BUILD_ARCH == sizeof(void*) * 8); +#ifdef __LP64__ + program = bit64 +#else + program = bit32 +#endif +}; +static_assert((BuildArch::program == BuildArch::bit32 ? 32 : 64) == sizeof(void*) * 8); -namespace zen -{ - #if ZEN_BUILD_ARCH == ZEN_ARCH_32BIT - const char cpuArchName[] = "i686"; - #else - const char cpuArchName[] = "x86-64"; - #endif + +constexpr const char* cpuArchName = BuildArch::program == BuildArch::bit32 ? "i686": "x86-64"; } diff --git a/zen/crc.h b/zen/crc.h index 1ff22999..c65ce2e7 100644 --- a/zen/crc.h +++ b/zen/crc.h @@ -52,7 +52,9 @@ uint16_t getCrc16(ByteIterator first, ByteIterator last) //http://www.sunshine2k 0x8801, 0x48c0, 0x4980, 0x8941, 0x4b00, 0x8bc1, 0x8a81, 0x4a40, 0x4e00, 0x8ec1, 0x8f81, 0x4f40, 0x8d01, 0x4dc0, 0x4c80, 0x8c41, 0x4400, 0x84c1, 0x8581, 0x4540, 0x8701, 0x47c0, 0x4680, 0x8641, 0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040 }; - static_assert(arraySize(crcTable) == 256 && arrayAccumulate(crcTable) == 8380544); + static_assert(std::size(crcTable) == 256); + static_assert(arrayHash(crcTable) == 728085957); + crc = (crc >> 8) ^ crcTable[(crc ^ b) & 0xFF]; }); return crc; @@ -96,7 +98,8 @@ uint32_t getCrc32(ByteIterator first, ByteIterator last) //https://en.wikipedia. 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; - static_assert(arraySize(crcTable) == 256 && arrayAccumulate(crcTable) == 549755813760); + static_assert(std::size(crcTable) == 256); + static_assert(arrayHash(crcTable) == 2988069445); crc = (crc >> 8) ^ crcTable[(crc ^ b) & 0xFF]; }); diff --git a/zen/dir_watcher.cpp b/zen/dir_watcher.cpp index 191ffd64..c48928a3 100644 --- a/zen/dir_watcher.cpp +++ b/zen/dir_watcher.cpp @@ -24,7 +24,7 @@ using namespace zen; struct DirWatcher::Impl { int notifDescr = 0; - std::map watchedPaths; //watch descriptor and (sub-)directory paths -> owned by "notifDescr" + std::unordered_map watchedPaths; //watch descriptor and (sub-)directory paths -> owned by "notifDescr" }; @@ -133,7 +133,7 @@ std::vector DirWatcher::fetchChanges(const std::functionsecond) + evt.name; + const Zstring itemPath = appendPath(it->second, evt.name); if ((evt.mask & IN_CREATE) || (evt.mask & IN_MOVED_TO)) diff --git a/zen/dir_watcher.h b/zen/dir_watcher.h index bf71fda9..a4a061e8 100644 --- a/zen/dir_watcher.h +++ b/zen/dir_watcher.h @@ -8,7 +8,7 @@ #define DIR_WATCHER_348577025748023458 #include -#include +//#include #include #include #include "file_error.h" diff --git a/zen/error_log.h b/zen/error_log.h index a24dfe5a..357232f3 100644 --- a/zen/error_log.h +++ b/zen/error_log.h @@ -11,7 +11,6 @@ #include #include "time.h" #include "i18n.h" -#include "utf.h" #include "zstring.h" @@ -37,7 +36,7 @@ std::string formatMessage(const LogEntry& entry); class ErrorLog { public: - void logMsg(const std::wstring& msg, MessageType type); + void logMsg(const std::wstring& msg, MessageType type, time_t time = std::time(nullptr)); struct Stats { @@ -66,9 +65,9 @@ private: //######################## implementation ########################## inline -void ErrorLog::logMsg(const std::wstring& msg, MessageType type) +void ErrorLog::logMsg(const std::wstring& msg, MessageType type, time_t time) { - entries_.push_back({std::time(nullptr), type, utfTo(msg)}); + entries_.push_back({time, type, utfTo(msg)}); } @@ -119,14 +118,13 @@ std::string formatMessage(const LogEntry& entry) const size_t prefixLen = unicodeLength(msgFmt); //consider Unicode! const Zstringc msg = trimCpy(entry.message); - static_assert(std::is_same_v, "don't worry about copying as long as we're using a ref-counted string!"); + static_assert(std::is_same_v, "no worries about copying as long as we're using a ref-counted string!"); for (auto it = msg.begin(); it != msg.end(); ) if (*it == '\n') { - msgFmt += '\n'; + msgFmt += *it++; msgFmt.append(prefixLen, ' '); - ++it; //skip duplicate newlines for (; it != msg.end() && *it == '\n'; ++it) ; diff --git a/zen/file_access.cpp b/zen/file_access.cpp index 2fbcf803..6a62f671 100644 --- a/zen/file_access.cpp +++ b/zen/file_access.cpp @@ -7,7 +7,6 @@ #include "file_access.h" #include #include -#include #include #include "file_traverser.h" #include "scope_guard.h" @@ -17,7 +16,6 @@ #include "guid.h" #include //statfs - //#include //lutimes #ifdef HAVE_SELINUX #include #endif diff --git a/zen/file_access.h b/zen/file_access.h index 691a8df9..17c47731 100644 --- a/zen/file_access.h +++ b/zen/file_access.h @@ -8,7 +8,7 @@ #define FILE_ACCESS_H_8017341345614857 #include -#include "zstring.h" +#include "file_path.h" #include "file_error.h" #include "serialize.h" //IoCallback #include diff --git a/zen/file_error.h b/zen/file_error.h index b9ce9419..168ea806 100644 --- a/zen/file_error.h +++ b/zen/file_error.h @@ -7,7 +7,6 @@ #ifndef FILE_ERROR_H_839567308565656789 #define FILE_ERROR_H_839567308565656789 -#include "zstring.h" #include "sys_error.h" //we'll need this later anyway! diff --git a/zen/file_io.h b/zen/file_io.h index 3d1dfee7..f1e4200d 100644 --- a/zen/file_io.h +++ b/zen/file_io.h @@ -7,9 +7,8 @@ #ifndef FILE_IO_H_89578342758342572345 #define FILE_IO_H_89578342758342572345 -#include "file_error.h" #include "file_access.h" -#include "serialize.h" +//#include "serialize.h" #include "crc.h" #include "guid.h" diff --git a/zen/file_path.cpp b/zen/file_path.cpp index d846e804..926b5c89 100644 --- a/zen/file_path.cpp +++ b/zen/file_path.cpp @@ -13,18 +13,18 @@ std::optional zen::parsePathComponents(const Zstring& itemPath) { auto doParse = [&](int sepCountVolumeRoot, bool rootWithSep) -> std::optional { - const Zstring itemPathFmt = appendSeparator(itemPath); //simplify analysis of root without separator, e.g. \\server-name\share + const Zstring itemPathPf = appendSeparator(itemPath); //simplify analysis of root without separator, e.g. \\server-name\share int sepCount = 0; - for (auto it = itemPathFmt.begin(); it != itemPathFmt.end(); ++it) + for (auto it = itemPathPf.begin(); it != itemPathPf.end(); ++it) if (*it == FILE_NAME_SEPARATOR) if (++sepCount == sepCountVolumeRoot) { - Zstring rootPath(itemPathFmt.begin(), rootWithSep ? it + 1 : it); + Zstring rootPath(itemPathPf.begin(), rootWithSep ? it + 1 : it); - Zstring relPath(it + 1, itemPathFmt.end()); + Zstring relPath(it + 1, itemPathPf.end()); trim(relPath, true, true, [](Zchar c) { return c == FILE_NAME_SEPARATOR; }); - return PathComponents({rootPath, relPath}); + return PathComponents{std::move(rootPath), std::move(relPath)}; } return {}; }; @@ -62,18 +62,103 @@ std::optional zen::parsePathComponents(const Zstring& itemPath) std::optional zen::getParentFolderPath(const Zstring& itemPath) { - if (const std::optional comp = parsePathComponents(itemPath)) + if (const std::optional pc = parsePathComponents(itemPath)) { - if (comp->relPath.empty()) + if (pc->relPath.empty()) return std::nullopt; - const Zstring parentRelPath = beforeLast(comp->relPath, FILE_NAME_SEPARATOR, IfNotFoundReturn::none); - if (parentRelPath.empty()) - return comp->rootPath; - return appendSeparator(comp->rootPath) + parentRelPath; + return appendPath(pc->rootPath, beforeLast(pc->relPath, FILE_NAME_SEPARATOR, IfNotFoundReturn::none)); } assert(false); return std::nullopt; } +Zstring zen::appendSeparator(Zstring path) //support rvalue references! +{ + if (!endsWith(path, FILE_NAME_SEPARATOR)) + path += FILE_NAME_SEPARATOR; + return path; //returning a by-value parameter => RVO if possible, r-value otherwise! +} + + +bool zen::isValidRelPath(const Zstring& relPath) +{ + //relPath is expected to use FILE_NAME_SEPARATOR! + if constexpr (FILE_NAME_SEPARATOR != Zstr('/' )) if (contains(relPath, Zstr('/' ))) return false; + if constexpr (FILE_NAME_SEPARATOR != Zstr('\\')) if (contains(relPath, Zstr('\\'))) return false; + + const Zchar doubleSep[] = {FILE_NAME_SEPARATOR, FILE_NAME_SEPARATOR, 0}; + return !startsWith(relPath, FILE_NAME_SEPARATOR)&& !endsWith(relPath, FILE_NAME_SEPARATOR)&& + !contains(relPath, doubleSep); +} + + +Zstring zen::appendPath(const Zstring& basePath, const Zstring& relPath) +{ + assert(isValidRelPath(relPath)); + if (relPath.empty()) + return basePath; //with or without path separator, e.g. C:\ or C:\folder + + if (basePath.empty()) //basePath might be a relative path, too! + return relPath; + + if (endsWith(basePath, FILE_NAME_SEPARATOR)) + return basePath + relPath; + + Zstring output = basePath; + output.reserve(basePath.size() + 1 + relPath.size()); //append all three strings using a single memory allocation + return std::move(output) + FILE_NAME_SEPARATOR + relPath; // +} + + +Zstring zen::getFileExtension(const Zstring& filePath) +{ + //const Zstring fileName = afterLast(filePath, FILE_NAME_SEPARATOR, IfNotFoundReturn::all); + //return afterLast(fileName, Zstr('.'), IfNotFoundReturn::none); + + auto it = zen::findLast(filePath.begin(), filePath.end(), FILE_NAME_SEPARATOR); + if (it == filePath.end()) + it = filePath.begin(); + else + ++it; + + auto it2 = zen::findLast(it, filePath.end(), Zstr('.')); + if (it2 != filePath.end()) + ++it2; + + return Zstring(it2, filePath.end()); +} + + +/* https://docs.microsoft.com/de-de/windows/desktop/Intl/handling-sorting-in-your-applications + + Perf test: compare strings 10 mio times; 64 bit build + ----------------------------------------------------- + string a = "Fjk84$%kgfj$%T\\\\Gffg\\gsdgf\\fgsx----------d-" + string b = "fjK84$%kgfj$%T\\\\gfFg\\gsdgf\\fgSy----------dfdf" + + Windows (UTF16 wchar_t) + 4 ns | wcscmp + 67 ns | CompareStringOrdinalFunc+ + bIgnoreCase + 314 ns | LCMapString + wmemcmp + + OS X (UTF8 char) + 6 ns | strcmp + 98 ns | strcasecmp + 120 ns | strncasecmp + std::min(sizeLhs, sizeRhs); + 856 ns | CFStringCreateWithCString + CFStringCompare(kCFCompareCaseInsensitive) + 1110 ns | CFStringCreateWithCStringNoCopy + CFStringCompare(kCFCompareCaseInsensitive) + ________________________ + time per call | function */ + +std::weak_ordering zen::compareNativePath(const Zstring& lhs, const Zstring& rhs) +{ + assert(lhs.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! + assert(rhs.find(Zchar('\0')) == Zstring::npos); // + + return lhs <=> rhs; + +} + + diff --git a/zen/file_path.h b/zen/file_path.h index e328fa8e..4a85514b 100644 --- a/zen/file_path.h +++ b/zen/file_path.h @@ -12,6 +12,8 @@ namespace zen { + const Zchar FILE_NAME_SEPARATOR = '/'; + struct PathComponents { Zstring rootPath; //itemPath = rootPath + (FILE_NAME_SEPARATOR?) + relPath @@ -21,6 +23,26 @@ std::optional parsePathComponents(const Zstring& itemPath); //no std::optional getParentFolderPath(const Zstring& itemPath); +Zstring appendSeparator(Zstring path); //support rvalue references! + +bool isValidRelPath(const Zstring& relPath); + +Zstring appendPath(const Zstring& basePath, const Zstring& relPath); + +Zstring getFileExtension(const Zstring& filePath); + +//------------------------------------------------------------------------------------------ +/* Compare *local* file paths: + Windows: igore case (but distinguish Unicode normalization forms!) + Linux: byte-wise comparison + macOS: ignore case + Unicode normalization forms */ +std::weak_ordering compareNativePath(const Zstring& lhs, const Zstring& rhs); + +inline bool equalNativePath(const Zstring& lhs, const Zstring& rhs) { return compareNativePath(lhs, rhs) == std::weak_ordering::equivalent; } + +struct LessNativePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNativePath(lhs, rhs)); } }; +//------------------------------------------------------------------------------------------ + } diff --git a/zen/file_traverser.cpp b/zen/file_traverser.cpp index 515580ae..ff588562 100644 --- a/zen/file_traverser.cpp +++ b/zen/file_traverser.cpp @@ -51,7 +51,7 @@ void zen::traverseFolder(const Zstring& dirPath, if (itemName.empty()) //checks result of normalizeUtfForPosix, too! throw FileError(replaceCpy(_("Cannot read directory %x."), L"%x", fmtPath(dirPath)), formatSystemError("readdir", L"", L"Folder contains an item without name.")); - const Zstring& itemPath = appendSeparator(dirPath) + itemName; + const Zstring& itemPath = appendPath(dirPath, itemName); struct stat statData = {}; try diff --git a/zen/file_traverser.h b/zen/file_traverser.h index e49219f9..cb7782d6 100644 --- a/zen/file_traverser.h +++ b/zen/file_traverser.h @@ -7,10 +7,8 @@ #ifndef FILER_TRAVERSER_H_127463214871234 #define FILER_TRAVERSER_H_127463214871234 -#include #include -#include "zstring.h" - +#include "file_path.h" namespace zen { diff --git a/zen/format_unit.cpp b/zen/format_unit.cpp index 6803523a..13280b68 100644 --- a/zen/format_unit.cpp +++ b/zen/format_unit.cpp @@ -7,7 +7,6 @@ #include "format_unit.h" #include #include -#include #include "basic_math.h" #include "sys_error.h" #include "i18n.h" diff --git a/zen/format_unit.h b/zen/format_unit.h index 72a5e91b..d1ebc28c 100644 --- a/zen/format_unit.h +++ b/zen/format_unit.h @@ -8,8 +8,9 @@ #define FMT_UNIT_8702184019487324 #include -#include -#include "string_tools.h" +#include +//#include +//#include "string_tools.h" namespace zen diff --git a/zen/http.cpp b/zen/http.cpp index 4fe43ede..a26bb3a5 100644 --- a/zen/http.cpp +++ b/zen/http.cpp @@ -47,7 +47,7 @@ public: throw SysError(L"URL uses unexpected protocol."); }(); - std::map headers; + std::unordered_map headers; assert(postBuf || contentType.empty()); if (postBuf && !contentType.empty()) @@ -196,7 +196,7 @@ private: InterruptibleThread worker_; int64_t totalBytesReported_ = 0; int statusCode_ = 0; - std::map responseHeaders_; + std::unordered_map responseHeaders_; const IoCallback notifyUnbufferedIO_; //throw X }; diff --git a/zen/http.h b/zen/http.h index 9457309c..5e40db22 100644 --- a/zen/http.h +++ b/zen/http.h @@ -7,9 +7,8 @@ #ifndef HTTP_H_879083425703425702 #define HTTP_H_879083425703425702 -#include -#include -#include +#include "sys_error.h" +#include "serialize.h" namespace zen { diff --git a/zen/i18n.h b/zen/i18n.h index 31bb3df8..5695cb65 100644 --- a/zen/i18n.h +++ b/zen/i18n.h @@ -7,8 +7,8 @@ #ifndef I18_N_H_3843489325044253425456 #define I18_N_H_3843489325044253425456 -#include -#include +//#include +//#include #include "globals.h" #include "string_tools.h" #include "format_unit.h" diff --git a/zen/json.h b/zen/json.h index f6458d6a..6cfd3bb3 100644 --- a/zen/json.h +++ b/zen/json.h @@ -40,9 +40,9 @@ struct JsonValue Type type = Type::null; - std::string primVal; //for primitive types - std::map objectVal; //"[...] most implementations of JSON libraries do not accept duplicate keys [...]" => fine! - std::vector arrayVal; + std::string primVal; //for primitive types + std::unordered_map objectVal; //"[...] most implementations of JSON libraries do not accept duplicate keys [...]" => fine! + std::vector arrayVal; }; @@ -303,26 +303,26 @@ std::string serializeJson(const JsonValue& jval, namespace json_impl { -struct Token +enum class TokenType { - enum class Type - { - eof, - curlyOpen, - curlyClose, - squareOpen, - squareClose, - colon, - comma, - string, // - number, //primitive types - boolean, // - null, // - }; + eof, + curlyOpen, + curlyClose, + squareOpen, + squareClose, + colon, + comma, + string, // + number, //primitive types + boolean, // + null, // +}; - Token(Type t) : type(t) {} +struct Token +{ + Token(TokenType t) : type(t) {} - Type type; + TokenType type; std::string primVal; //for primitive types }; @@ -341,27 +341,27 @@ public: pos_ = std::find_if_not(pos_, stream_.end(), isJsonWhiteSpace); if (pos_ == stream_.end()) - return Token::Type::eof; + return TokenType::eof; - if (*pos_ == '{') return ++pos_, Token::Type::curlyOpen; - if (*pos_ == '}') return ++pos_, Token::Type::curlyClose; - if (*pos_ == '[') return ++pos_, Token::Type::squareOpen; - if (*pos_ == ']') return ++pos_, Token::Type::squareClose; - if (*pos_ == ':') return ++pos_, Token::Type::colon; - if (*pos_ == ',') return ++pos_, Token::Type::comma; - if (startsWith("null")) return pos_ += 4, Token(Token::Type::null); + if (*pos_ == '{') return ++pos_, TokenType::curlyOpen; + if (*pos_ == '}') return ++pos_, TokenType::curlyClose; + if (*pos_ == '[') return ++pos_, TokenType::squareOpen; + if (*pos_ == ']') return ++pos_, TokenType::squareClose; + if (*pos_ == ':') return ++pos_, TokenType::colon; + if (*pos_ == ',') return ++pos_, TokenType::comma; + if (startsWith("null")) return pos_ += 4, Token(TokenType::null); if (startsWith("true")) { pos_ += 4; - Token tk(Token::Type::boolean); + Token tk(TokenType::boolean); tk.primVal = "true"; return tk; } if (startsWith("false")) { pos_ += 5; - Token tk(Token::Type::boolean); + Token tk(TokenType::boolean); tk.primVal = "false"; return tk; } @@ -371,7 +371,7 @@ public: for (auto it = ++pos_; it != stream_.end(); ++it) if (*it == '"') { - Token tk(Token::Type::string); + Token tk(TokenType::string); tk.primVal = jsonUnescape({pos_, it}); pos_ = ++it; return tk; @@ -388,7 +388,7 @@ public: if (itNumEnd == pos_) throw JsonParsingError(posRow(), posCol()); - Token tk(Token::Type::number); + Token tk(TokenType::number); tk.primVal.assign(pos_, itNumEnd); pos_ = itNumEnd; return tk; @@ -441,7 +441,7 @@ public: JsonValue parse() //throw JsonParsingError { JsonValue jval = parseValue(); //throw JsonParsingError - expectToken(Token::Type::eof); // + expectToken(TokenType::eof); // return jval; } @@ -451,73 +451,73 @@ private: JsonValue parseValue() //throw JsonParsingError { - if (token().type == Token::Type::curlyOpen) + if (token().type == TokenType::curlyOpen) { nextToken(); //throw JsonParsingError JsonValue jval(JsonValue::Type::object); - if (token().type != Token::Type::curlyClose) + if (token().type != TokenType::curlyClose) for (;;) { - expectToken(Token::Type::string); //throw JsonParsingError + expectToken(TokenType::string); //throw JsonParsingError std::string name = token().primVal; nextToken(); //throw JsonParsingError - consumeToken(Token::Type::colon); //throw JsonParsingError + consumeToken(TokenType::colon); //throw JsonParsingError JsonValue value = parseValue(); //throw JsonParsingError jval.objectVal.emplace(std::move(name), std::move(value)); - if (token().type != Token::Type::comma) + if (token().type != TokenType::comma) break; nextToken(); //throw JsonParsingError } - consumeToken(Token::Type::curlyClose); //throw JsonParsingError + consumeToken(TokenType::curlyClose); //throw JsonParsingError return jval; } - else if (token().type == Token::Type::squareOpen) + else if (token().type == TokenType::squareOpen) { nextToken(); //throw JsonParsingError JsonValue jval(JsonValue::Type::array); - if (token().type != Token::Type::squareClose) + if (token().type != TokenType::squareClose) for (;;) { JsonValue value = parseValue(); //throw JsonParsingError jval.arrayVal.emplace_back(std::move(value)); - if (token().type != Token::Type::comma) + if (token().type != TokenType::comma) break; nextToken(); //throw JsonParsingError } - consumeToken(Token::Type::squareClose); //throw JsonParsingError + consumeToken(TokenType::squareClose); //throw JsonParsingError return jval; } - else if (token().type == Token::Type::string) + else if (token().type == TokenType::string) { JsonValue jval(token().primVal); nextToken(); //throw JsonParsingError return jval; } - else if (token().type == Token::Type::number) + else if (token().type == TokenType::number) { JsonValue jval(JsonValue::Type::number); jval.primVal = token().primVal; nextToken(); //throw JsonParsingError return jval; } - else if (token().type == Token::Type::boolean) + else if (token().type == TokenType::boolean) { JsonValue jval(JsonValue::Type::boolean); jval.primVal = token().primVal; nextToken(); //throw JsonParsingError return jval; } - else if (token().type == Token::Type::null) + else if (token().type == TokenType::null) { nextToken(); //throw JsonParsingError return JsonValue(); @@ -530,13 +530,13 @@ private: void nextToken() { tk_ = scn_.getNextToken(); } //throw JsonParsingError - void expectToken(Token::Type t) //throw JsonParsingError + void expectToken(TokenType t) //throw JsonParsingError { if (token().type != t) throw JsonParsingError(scn_.posRow(), scn_.posCol()); } - void consumeToken(Token::Type t) //throw JsonParsingError + void consumeToken(TokenType t) //throw JsonParsingError { expectToken(t); //throw JsonParsingError nextToken(); // diff --git a/zen/open_ssl.cpp b/zen/open_ssl.cpp index 99d7582e..6dc13d3d 100644 --- a/zen/open_ssl.cpp +++ b/zen/open_ssl.cpp @@ -419,15 +419,13 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: { std::vector lines; - for (auto it = keyStream.begin();;) //=> keep local: "warning: declaration of ‘it’ shadows a previous local" + split2(keyStream, isLineBreak, + [&lines](const char* blockFirst, const char* blockLast) { - auto itLineBegin = std::find_if_not(it, keyStream.end(), isLineBreak); - if (itLineBegin == keyStream.end()) - break; + if (blockFirst != blockLast) //consider Windows' + lines.emplace_back(blockFirst, blockLast); + }); - it = std::find_if(itLineBegin + 1, keyStream.end(), isLineBreak); - lines.emplace_back(itLineBegin, it); - } //----------- parse PuTTY ppk structure ---------------------------------- auto itLine = lines.begin(); if (itLine == lines.end() || !startsWith(*itLine, "PuTTY-User-Key-File-2: ")) diff --git a/zen/open_ssl.h b/zen/open_ssl.h index d1b823de..c66ad9c0 100644 --- a/zen/open_ssl.h +++ b/zen/open_ssl.h @@ -7,8 +7,7 @@ #ifndef OPEN_SSL_H_801974580936508934568792347506 #define OPEN_SSL_H_801974580936508934568792347506 -#include -#include +#include "sys_error.h" namespace zen diff --git a/zen/process_exec.cpp b/zen/process_exec.cpp index 0c5789d5..6b670508 100644 --- a/zen/process_exec.cpp +++ b/zen/process_exec.cpp @@ -44,8 +44,8 @@ namespace std::pair processExecuteImpl(const Zstring& filePath, const std::vector& arguments, std::optional timeoutMs) //throw SysError, SysErrorTimeOut { - const Zstring tempFilePath = appendSeparator(getTempFolderPath()) + //throw FileError - Zstr("FFS-") + utfTo(formatAsHexString(generateGUID())); + const Zstring tempFilePath = appendPath(getTempFolderPath(), //throw FileError + Zstr("FFS-") + utfTo(formatAsHexString(generateGUID()))); /* can't use popen(): does NOT return the exit code on Linux (despite the documentation!), although it works correctly on macOS => use pipes instead: https://linux.die.net/man/2/waitpid bonus: no need for "2>&1" to redirect STDERR to STDOUT diff --git a/zen/resolve_path.cpp b/zen/resolve_path.cpp index f0a49976..2b1a82d3 100644 --- a/zen/resolve_path.cpp +++ b/zen/resolve_path.cpp @@ -8,7 +8,6 @@ #include "time.h" #include "thread.h" #include "file_access.h" -#include "file_path.h" #include //getenv() #include //getcwd() @@ -18,11 +17,11 @@ using namespace zen; namespace { -std::optional getEnvironmentVar(const Zstring& name) +std::optional getEnvironmentVar(const Zchar* name) { assert(runningOnMainThread()); //getenv() is not thread-safe! - const char* buffer = ::getenv(name.c_str()); //no extended error reporting + const char* buffer = ::getenv(name); //no extended error reporting if (!buffer) return {}; Zstring value(buffer); @@ -69,7 +68,7 @@ Zstring resolveRelativePath(const Zstring& relativePath) if (const std::optional homeDir = getEnvironmentVar("HOME")) { if (startsWith(pathTmp, "~/")) - pathTmp = appendSeparator(*homeDir) + afterFirst(pathTmp, '/', IfNotFoundReturn::none); + pathTmp = appendPath(*homeDir, pathTmp.c_str() + 2); else //pathTmp == "~" pathTmp = *homeDir; } @@ -81,7 +80,7 @@ Zstring resolveRelativePath(const Zstring& relativePath) if (char* dirPath = ::getcwd(nullptr, 0)) { ZEN_ON_SCOPE_EXIT(::free(dirPath)); - pathTmp = appendSeparator(dirPath) + pathTmp; + pathTmp = appendPath(dirPath, pathTmp); } } } @@ -142,7 +141,7 @@ std::optional tryResolveMacro(const Zstring& macro) //macro without %-c } //try to resolve as environment variables - if (std::optional value = getEnvironmentVar(macro)) + if (std::optional value = getEnvironmentVar(macro.c_str())) return *value; return {}; @@ -190,57 +189,45 @@ Zstring expandVolumeName(Zstring pathPhrase) // [volname]:\folder [volname]\ } return pathPhrase; } +} -void getFolderAliasesRecursive(const Zstring& pathPhrase, std::set& output) +std::vector zen::getPathPhraseAliases(const Zstring& itemPath) { + assert(!itemPath.empty()); + std::vector pathAliases{makePathPhrase(itemPath)}; - //3. environment variables: C:\Users\ -> %UserProfile% { - std::vector> macroList; - //get list of useful variables - auto addEnvVar = [&](const Zstring& envName) - { - if (std::optional value = getEnvironmentVar(envName)) - macroList.emplace_back(envName, *value); - }; - addEnvVar("HOME"); //Linux: /home/ Mac: /Users/ - //addEnvVar("USER"); -> any benefit? - //substitute paths by symbolic names - for (const auto& [macroName, macroPath] : macroList) + //environment variables: C:\Users\ -> %UserProfile% + auto substByMacro = [&](const Zchar* macroName, const Zstring& macroPath) { //should use a replaceCpy() that considers "local path" case-sensitivity (if only we had one...) - const Zstring pathSubst = replaceCpyAsciiNoCase(pathPhrase, macroPath, MACRO_SEP + macroName + MACRO_SEP); - if (pathSubst != pathPhrase) - output.insert(pathSubst); - } - } + if (contains(itemPath, macroPath)) + pathAliases.push_back(makePathPhrase(replaceCpyAsciiNoCase(itemPath, macroPath, MACRO_SEP + Zstring(macroName) + MACRO_SEP))); + }; + + for (const Zchar* envName : + { + "HOME", //Linux: /home/ Mac: /Users/ + //"USER", -> any benefit? + }) + if (const std::optional envPath = getEnvironmentVar(envName)) + substByMacro(envName, *envPath); - //4. replace (all) macros: %UserProfile% -> C:\Users\ - { - const Zstring pathExp = expandMacros(pathPhrase); - if (pathExp != pathPhrase) - if (output.insert(pathExp).second) - getFolderAliasesRecursive(pathExp, output); //recurse! } -} + //removeDuplicates()? should not be needed... + + std::sort(pathAliases.begin(), pathAliases.end(), LessNaturalSort() /*even on Linux*/); + return pathAliases; } -std::vector zen::getFolderPathAliases(const Zstring& folderPathPhrase) +Zstring zen::makePathPhrase(const Zstring& itemPath) { - const Zstring dirPath = trimCpy(folderPathPhrase); - if (dirPath.empty()) - return {}; - - std::set tmp; - getFolderAliasesRecursive(dirPath, tmp); - - tmp.erase(dirPath); - tmp.erase(Zstring()); - - return {tmp.begin(), tmp.end()}; + if (endsWith(itemPath, Zstr(' '))) //path phrase concept must survive trimming! + return itemPath + FILE_NAME_SEPARATOR; + return itemPath; } @@ -254,26 +241,22 @@ Zstring zen::getResolvedFilePath(const Zstring& pathPhrase) //noexcept //remove leading/trailing whitespace before allowing misinterpretation in applyLongPathPrefix() trim(path); //attention: don't remove all whitespace from right, e.g. 0xa0 may be used as part of a folder name - - path = expandVolumeName(path); //may block for slow USB sticks and idle HDDs! - - /* need to resolve relative paths: - WINDOWS: - - \\?\-prefix requires absolute names - - Volume Shadow Copy: volume name needs to be part of each file path - - file icon buffer (at least for extensions that are actually read from disk, like "exe") - WINDOWS/LINUX: - - detection of dependent directories, e.g. "\" and "C:\test" */ - path = resolveRelativePath(path); + { + path = expandVolumeName(path); //may block for slow USB sticks and idle HDDs! + + /* need to resolve relative paths: + WINDOWS: + - \\?\-prefix requires absolute names + - Volume Shadow Copy: volume name needs to be part of each file path + - file icon buffer (at least for extensions that are actually read from disk, like "exe") + WINDOWS/LINUX: + - detection of dependent directories, e.g. "\" and "C:\test" */ + path = resolveRelativePath(path); + } //remove trailing slash, unless volume root: - if (std::optional pc = parsePathComponents(path)) - { - if (pc->relPath.empty()) - path = pc->rootPath; - else - path = appendSeparator(pc->rootPath) + pc->relPath; - } //keep this brace for GCC: -Wparentheses + if (const std::optional pc = parsePathComponents(path)) + path = appendPath(pc->rootPath, pc->relPath); return path; } diff --git a/zen/resolve_path.h b/zen/resolve_path.h index 4a5fc8fe..bfef087b 100644 --- a/zen/resolve_path.h +++ b/zen/resolve_path.h @@ -7,8 +7,7 @@ #ifndef RESOLVE_PATH_H_817402834713454 #define RESOLVE_PATH_H_817402834713454 -#include -#include "zstring.h" +#include "file_error.h" namespace zen @@ -24,7 +23,8 @@ Zstring getResolvedFilePath(const Zstring& pathPhrase); //noexcept //macro substitution only Zstring expandMacros(const Zstring& text); -std::vector getFolderPathAliases(const Zstring& folderPathPhrase); //may block for slow USB sticks when resolving [] +std::vector getPathPhraseAliases(const Zstring& itemPath); +Zstring makePathPhrase(const Zstring& itemPath); } diff --git a/zen/scope_guard.h b/zen/scope_guard.h index 61422eb4..1e4165be 100644 --- a/zen/scope_guard.h +++ b/zen/scope_guard.h @@ -8,7 +8,7 @@ #define SCOPE_GUARD_H_8971632487321434 #include -#include +//#include #include "type_traits.h" #include "legacy_compiler.h" //std::uncaught_exceptions @@ -91,7 +91,7 @@ private: ScopeGuard (const ScopeGuard&) = delete; ScopeGuard& operator=(const ScopeGuard&) = delete; - F fun_; + const F fun_; const int exeptionCount_ = std::uncaught_exceptions(); bool dismissed_ = false; }; diff --git a/zen/serialize.h b/zen/serialize.h index f9677630..b2561808 100644 --- a/zen/serialize.h +++ b/zen/serialize.h @@ -8,9 +8,9 @@ #define SERIALIZE_H_839405783574356 #include -#include -#include -#include "string_base.h" +//#include +//#include +//#include "string_base.h" #include "sys_error.h" //keep header clean from specific stream implementations! (e.g.file_io.h)! used by abstract.h! @@ -206,7 +206,7 @@ void writeArray(BufferedOutputStream& stream, const void* buffer, size_t len) template inline void writeNumber(BufferedOutputStream& stream, const N& num) { - static_assert(IsArithmeticV || std::is_same_v || std::is_enum_v); + static_assert(isArithmetic || std::is_same_v || std::is_enum_v); writeArray(stream, &num, sizeof(N)); } @@ -234,7 +234,7 @@ void readArray(BufferedInputStream& stream, void* buffer, size_t len) //throw Sy template inline N readNumber(BufferedInputStream& stream) //throw SysErrorUnexpectedEos { - static_assert(IsArithmeticV || std::is_same_v || std::is_enum_v); + static_assert(isArithmetic || std::is_same_v || std::is_enum_v); N num{}; readArray(stream, &num, sizeof(N)); //throw SysErrorUnexpectedEos return num; diff --git a/zen/shutdown.cpp b/zen/shutdown.cpp index a812d6ae..e64e1e70 100644 --- a/zen/shutdown.cpp +++ b/zen/shutdown.cpp @@ -16,9 +16,9 @@ using namespace zen; void zen::shutdownSystem() //throw FileError { - assert(runningOnMainThread()); - if (runningOnMainThread()) - onSystemShutdownRunTasks(); + assert(runningOnMainThread()); + if (runningOnMainThread()) + onSystemShutdownRunTasks(); try { //https://linux.die.net/man/2/reboot => needs admin rights! diff --git a/zen/socket.h b/zen/socket.h index f9813852..5ece29f8 100644 --- a/zen/socket.h +++ b/zen/socket.h @@ -7,7 +7,6 @@ #ifndef SOCKET_H_23498325972583947678456437 #define SOCKET_H_23498325972583947678456437 -#include #include "sys_error.h" #include //close #include diff --git a/zen/stl_tools.h b/zen/stl_tools.h index 0d359641..9f7977db 100644 --- a/zen/stl_tools.h +++ b/zen/stl_tools.h @@ -10,16 +10,28 @@ #include #include #include +#include +#include #include #include #include #include -#include "string_traits.h" +#include "type_traits.h" //enhancements for namespace zen { +//unfortunately std::erase_if is useless garbage on GCC 12 (requires non-modifying predicate) +template +void eraseIf(std::vector& v, Predicate p); + +template +void eraseIf(std::set& s, Predicate p); + +template +void eraseIf(std::map& m, Predicate p); + //append STL containers template void append(std::vector& v, const C& c); @@ -104,6 +116,44 @@ SharedRef makeSharedRef(Args&& ... args) { return SharedRef(std::make_shar //######################## implementation ######################## + +template inline +void eraseIf(std::vector& v, Predicate p) +{ + v.erase(std::remove_if(v.begin(), v.end(), p), v.end()); +} + + +namespace impl +{ +template inline +void setOrMapEraseIf(S& s, Predicate p) +{ + for (auto it = s.begin(); it != s.end();) + if (p(*it)) + s.erase(it++); + else + ++it; +} +} + + +template inline +void eraseIf(std::set& s, Predicate p) { impl::setOrMapEraseIf(s, p); } //don't make this any more generic! e.g. must not compile for std::vector!!! + + +template inline +void eraseIf(std::map& m, Predicate p) { impl::setOrMapEraseIf(m, p); } + + +template inline +void eraseIf(std::unordered_set& s, Predicate p) { impl::setOrMapEraseIf(s, p); } + + +template inline +void eraseIf(std::unordered_map& m, Predicate p) { impl::setOrMapEraseIf(m, p); } + + template inline void append(std::vector& v, const C& c) { v.insert(v.end(), c.begin(), c.end()); } @@ -249,9 +299,8 @@ void mergeTraversal(Iterator first1, Iterator last1, } -//FNV-1a: https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function template -class FNV1aHash +class FNV1aHash //FNV-1a: https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function { public: FNV1aHash() {} @@ -266,50 +315,13 @@ public: Num get() const { return hashVal_; } private: - static_assert(IsUnsignedIntV); + static_assert(isUnsignedInt); static_assert(sizeof(Num) == 4 || sizeof(Num) == 8); static constexpr Num base_ = sizeof(Num) == 4 ? 2166136261U : 14695981039346656037ULL; static constexpr Num prime_ = sizeof(Num) == 4 ? 16777619U : 1099511628211ULL; Num hashVal_ = base_; }; - - -template inline -Num hashArray(ByteIterator first, ByteIterator last) -{ - using ValType = typename std::iterator_traits::value_type; - static_assert(sizeof(ValType) <= sizeof(Num)); - static_assert(IsIntegerV || std::is_same_v || std::is_same_v); - - FNV1aHash hash; - std::for_each(first, last, [&hash](ValType v) { hash.add(v); }); - return hash.get(); -} - - -struct StringHash //support for custom string classes with std::unordered_set/map -{ - using is_transparent = int; //allow heterogenous lookup! - - template - size_t operator()(const String& str) const - { - const auto* const strFirst = strBegin(str); - return hashArray(strFirst, strFirst + strLength(str)); - } -}; - -struct StringEqual -{ - using is_transparent = int; //allow heterogenous lookup! - - template - bool operator()(const String1& lhs, const String2& rhs) const - { - return equalString(lhs, rhs); - } -}; } #endif //STL_TOOLS_H_84567184321434 diff --git a/zen/string_base.h b/zen/string_base.h index 693ce118..ace870b9 100644 --- a/zen/string_base.h +++ b/zen/string_base.h @@ -7,8 +7,8 @@ #ifndef STRING_BASE_H_083217454562342526 #define STRING_BASE_H_083217454562342526 -#include #include +#include //std::exchange #include "string_tools.h" @@ -377,8 +377,8 @@ Zbase::Zbase(const Zbase& str) template class SP> inline Zbase::Zbase(Zbase&& tmp) noexcept { - rawStr_ = tmp.rawStr_; - tmp.rawStr_ = nullptr; //usually nullptr would violate the class invarants, but it is good enough for the destructor! + rawStr_ = std::exchange(tmp.rawStr_, nullptr); + //usually nullptr would violate the class invarants, but it is good enough for the destructor! //caveat: do not increment ref-count of an unshared string! We'd lose optimization opportunity of reusing its memory! } @@ -637,8 +637,8 @@ Zbase& Zbase::operator=(Zbase&& tmp) noexcept { //don't use swap() but end rawStr_ life time immediately this->destroy(rawStr_); - rawStr_ = tmp.rawStr_; - tmp.rawStr_ = nullptr; + + rawStr_ = std::exchange(tmp.rawStr_, nullptr); return *this; } @@ -653,4 +653,12 @@ void Zbase::pop_back() } } + +//std::hash specialization in global namespace +template class SP> +struct std::hash> +{ + size_t operator()(const zen::Zbase& str) const { return zen::hashString(str); } +}; + #endif //STRING_BASE_H_083217454562342526 diff --git a/zen/string_tools.h b/zen/string_tools.h index ee4e5613..d3f35ce8 100644 --- a/zen/string_tools.h +++ b/zen/string_tools.h @@ -11,9 +11,6 @@ #include //iswspace #include //sprintf #include //swprintf -#include -#include -#include #include "stl_tools.h" #include "string_traits.h" #include "legacy_compiler.h" // but without the compiler crashes :> @@ -33,25 +30,30 @@ template Char asciiToLower(Char c); template Char asciiToUpper(Char c); //both S and T can be strings or char/wchar_t arrays or single char/wchar_t -template >> bool contains(const S& str, const T& term); +template /*Astyle hates tripe >*/ >> bool contains(const S& str, const T& term); - template bool startsWith (const S& str, const T& prefix); - template bool startsWithAsciiNoCase(const S& str, const T& prefix); +template bool startsWith (const S& str, const T& prefix); +template bool startsWithAsciiNoCase(const S& str, const T& prefix); - template bool endsWith (const S& str, const T& postfix); - template bool endsWithAsciiNoCase(const S& str, const T& postfix); +template bool endsWith (const S& str, const T& postfix); +template bool endsWithAsciiNoCase(const S& str, const T& postfix); - template bool equalString (const S& lhs, const T& rhs); - template bool equalAsciiNoCase(const S& lhs, const T& rhs); +template bool equalString (const S& lhs, const T& rhs); +template bool equalAsciiNoCase(const S& lhs, const T& rhs); - // template std::strong_ordering compareString (const S& lhs, const T& rhs); - template std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!) +//template std::strong_ordering compareString(const S& lhs, const T& rhs); +template std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!) - struct LessAsciiNoCase //STL container predicate -{ - template bool operator()(const S& lhs, const S& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); } -}; +//STL container predicates for std::map, std::unordered_set/map +struct StringHash; +struct StringEqual; + +struct LessAsciiNoCase; +struct StringHashAsciiNoCase; +struct StringEqualAsciiNoCase; +template Num hashString(const S& str); +template Num appendHashString(Num hashVal, const S& str); enum class IfNotFoundReturn { @@ -77,8 +79,11 @@ template void trim (S& str, bool fromLeft = true, bo template void trim(S& str, bool fromLeft, bool fromRight, Function trimThisChar); -template [[nodiscard]] S replaceCpy(S str, const T& oldTerm, const U& newTerm, bool replaceAll = true); -template void replace (S& str, const T& oldTerm, const U& newTerm, bool replaceAll = true); +template [[nodiscard]] S replaceCpy(S str, const T& oldTerm, const U& newTerm); +template void replace (S& str, const T& oldTerm, const U& newTerm); + +template [[nodiscard]] S replaceCpyAsciiNoCase(S str, const T& oldTerm, const U& newTerm); +template void replaceAsciiNoCase (S& str, const T& oldTerm, const U& newTerm); //high-performance conversion between numbers and strings template S numberTo(const Num& number); @@ -173,25 +178,28 @@ template inline Char asciiToLower(Char c) { if (static_cast('A') <= c && c <= static_cast('Z')) - return static_cast(c - static_cast('A') + static_cast('a')); - return c; + return static_cast(c - static_cast('A') + static_cast('a')); + return c; } - template inline - Char asciiToUpper(Char c) +template inline +Char asciiToUpper(Char c) { if (static_cast('a') <= c && c <= static_cast('z')) - return static_cast(c - static_cast('a') + static_cast('A')); - return c; + return static_cast(c - static_cast('a') + static_cast('A')); + return c; } - namespace impl +namespace impl { -//support embedded 0, unlike strncmp/wcsncmp: -inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; } -inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; } +template inline +bool equalSubstring(const Char* lhs, const Char* rhs, size_t len) +{ + //support embedded 0, unlike strncmp/wcsncmp: + return std::equal(lhs, lhs + len, rhs); +} template inline @@ -213,13 +221,14 @@ template inline bool startsWith(const S& str, const T& prefix) { const size_t pfLen = strLength(prefix); - return strLength(str) >= pfLen && impl::strcmpWithNulls(strBegin(str), strBegin(prefix), pfLen) == std::strong_ordering::equal; + return strLength(str) >= pfLen && impl::equalSubstring(strBegin(str), strBegin(prefix), pfLen); } template inline bool startsWithAsciiNoCase(const S& str, const T& prefix) { + assert(isAsciiString(str) || isAsciiString(prefix)); const size_t pfLen = strLength(prefix); return strLength(str) >= pfLen && impl::strcmpAsciiNoCase(strBegin(str), strBegin(prefix), pfLen) == std::weak_ordering::equivalent; } @@ -230,7 +239,7 @@ bool endsWith(const S& str, const T& postfix) { const size_t strLen = strLength(str); const size_t pfLen = strLength(postfix); - return strLen >= pfLen && impl::strcmpWithNulls(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == std::strong_ordering::equal; + return strLen >= pfLen && impl::equalSubstring(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen); } @@ -247,19 +256,24 @@ template inline bool equalString(const S& lhs, const T& rhs) { const size_t lhsLen = strLength(lhs); - return lhsLen == strLength(rhs) && impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), lhsLen) == std::strong_ordering::equal; + return lhsLen == strLength(rhs) && impl::equalSubstring(strBegin(lhs), strBegin(rhs), lhsLen); } template inline bool equalAsciiNoCase(const S& lhs, const T& rhs) { + assert(isAsciiString(lhs) || isAsciiString(rhs)); const size_t lhsLen = strLength(lhs); return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == std::weak_ordering::equivalent; } #if 0 +//support embedded 0, unlike strncmp/wcsncmp: +inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; } +inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; } + template inline std::strong_ordering compareString(const S& lhs, const T& rhs) { @@ -427,29 +441,22 @@ namespace impl ZEN_INIT_DETECT_MEMBER(append) //either call operator+=(S(str, len)) or append(str, len) -template >> inline +template >> inline void stringAppend(S& str, InputIterator first, InputIterator last) { str.append(first, last); } //inefficient append: keep disabled until really needed -//template >> inline +//template >> inline //void stringAppend(S& str, InputIterator first, InputIterator last) { str += S(first, last); } -} - - -template inline -S replaceCpy(S str, const T& oldTerm, const U& newTerm, bool replaceAll) -{ - replace(str, oldTerm, newTerm, replaceAll); - return str; -} -template inline -void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) +template inline +void replace(S& str, const T& oldTerm, const U& newTerm, CharEq charEqual) { static_assert(std::is_same_v, GetCharTypeT>); static_assert(std::is_same_v, GetCharTypeT>); const size_t oldLen = strLength(oldTerm); + const size_t newLen = strLength(newTerm); + //assert(oldLen != 0); -> reasonable check, but challenged by unit-test if (oldLen == 0) return; @@ -457,13 +464,17 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) const auto* const oldEnd = oldBegin + oldLen; const auto* const newBegin = strBegin(newTerm); - const auto* const newEnd = newBegin + strLength(newTerm); + const auto* const newEnd = newBegin + newLen; + + using CharType = GetCharTypeT; + if (oldLen == 1 && newLen == 1) //don't use expensive std::search unless required! + return std::replace_if(str.begin(), str.end(), [charEqual, charOld = *oldBegin](CharType c) { return charEqual(c, charOld); }, *newBegin); - auto it = strBegin(str); //don't use str.begin() or wxString will return this wxUni* nonsense! - const auto* const strEnd = it + strLength(str); + auto* it = strBegin(str); //don't use str.begin() or wxString will return this wxUni* nonsense! + auto* const strEnd = it + strLength(str); auto itFound = std::search(it, strEnd, - oldBegin, oldEnd); + oldBegin, oldEnd, charEqual); if (itFound == strEnd) return; //optimize "oldTerm not found" @@ -472,12 +483,13 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) { impl::stringAppend(output, newBegin, newEnd); it = itFound + oldLen; - +#if 0 if (!replaceAll) itFound = strEnd; else +#endif itFound = std::search(it, strEnd, - oldBegin, oldEnd); + oldBegin, oldEnd, charEqual); impl::stringAppend(output, it, itFound); } @@ -485,6 +497,37 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) str = std::move(output); } +} + + +template inline +void replace(S& str, const T& oldTerm, const U& newTerm) +{ impl::replace(str, oldTerm, newTerm, std::equal_to()); } + + +template inline +S replaceCpy(S str, const T& oldTerm, const U& newTerm) +{ + replace(str, oldTerm, newTerm); + return str; +} + + +template inline +void replaceAsciiNoCase(S& str, const T& oldTerm, const U& newTerm) +{ + using CharType = GetCharTypeT; + impl::replace(str, oldTerm, newTerm, + [](CharType charL, CharType charR) { return asciiToLower(charL) == asciiToLower(charR); }); +} + + +template inline +S replaceCpyAsciiNoCase(S str, const T& oldTerm, const U& newTerm) +{ + replaceAsciiNoCase(str, oldTerm, newTerm); + return str; +} template inline @@ -813,9 +856,9 @@ template inline S numberTo(const Num& number) { using TypeTag = std::integral_constant ? impl::NumberType::signedInt : - IsUnsignedIntV ? impl::NumberType::unsignedInt : - IsFloatV ? impl::NumberType::floatingPoint : + isSignedInt ? impl::NumberType::signedInt : + isUnsignedInt ? impl::NumberType::unsignedInt : + isFloat ? impl::NumberType::floatingPoint : impl::NumberType::other>; return impl::numberTo(number, TypeTag()); @@ -826,9 +869,9 @@ template inline Num stringTo(const S& str) { using TypeTag = std::integral_constant ? impl::NumberType::signedInt : - IsUnsignedIntV ? impl::NumberType::unsignedInt : - IsFloatV ? impl::NumberType::floatingPoint : + isSignedInt ? impl::NumberType::signedInt : + isUnsignedInt ? impl::NumberType::unsignedInt : + isFloat ? impl::NumberType::floatingPoint : impl::NumberType::other>; return impl::stringTo(str, TypeTag()); @@ -885,6 +928,72 @@ std::string formatAsHexString(const std::string_view& blob) } + + +template inline +Num hashString(const S& str) +{ + using CharType = GetCharTypeT; + const auto* const strFirst = strBegin(str); + + FNV1aHash hash; + std::for_each(strFirst, strFirst + strLength(str), [&hash](CharType c) { hash.add(c); }); + return hash.get(); +} + + +struct StringHash +{ + using is_transparent = int; //allow heterogenous lookup! + + template + size_t operator()(const String& str) const { return hashString(str); } +}; + + +struct StringEqual +{ + using is_transparent = int; //allow heterogenous lookup! + + template + bool operator()(const String1& lhs, const String2& rhs) const { return equalString(lhs, rhs); } +}; + + +struct LessAsciiNoCase +{ + template + bool operator()(const String& lhs, const String& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); } +}; + + +struct StringHashAsciiNoCase +{ + using is_transparent = int; //allow heterogenous lookup! + + template + size_t operator()(const String& str) const + { + using CharType = GetCharTypeT; + const auto* const strFirst = strBegin(str); + + FNV1aHash hash; + std::for_each(strFirst, strFirst + strLength(str), [&hash](CharType c) { hash.add(asciiToLower(c)); }); + return hash.get(); + } +}; + + +struct StringEqualAsciiNoCase +{ + using is_transparent = int; //allow heterogenous lookup! + + template + bool operator()(const String1& lhs, const String2& rhs) const + { + return equalAsciiNoCase(lhs, rhs); + } +}; } #endif //STRING_TOOLS_H_213458973046 diff --git a/zen/string_traits.h b/zen/string_traits.h index ca40f7d6..1a4f4740 100644 --- a/zen/string_traits.h +++ b/zen/string_traits.h @@ -15,9 +15,9 @@ //uniform access to string-like types, both classes and character arrays namespace zen { -/* IsStringLikeV<>: - IsStringLikeV //equals "true" - IsStringLikeV //equals "false" +/* isStringLike<>: + isStringLike //equals "true" + isStringLike //equals "false" GetCharTypeT<>: GetCharTypeT //equals wchar_t @@ -51,7 +51,7 @@ template auto makeStringView(Iterator first, size_t len); //---------------------- implementation ---------------------- namespace impl { -template //test if result of S::c_str() can convert to const Char* +template //test if result of S::c_str() can convert to const Char* class HasConversion { using Yes = char[1]; @@ -105,9 +105,9 @@ class StringTraits public: enum { - isStringClass = HasMemberTypeV_value_type&& - HasMemberV_c_str && - HasMemberV_length + isStringClass = hasMemberType_value_type && + hasMember_c_str && + hasMember_length }; using CharType = typename GetCharTypeImpl::Type; @@ -121,10 +121,10 @@ public: } -template -constexpr bool IsStringLikeV = impl::StringTraits::isStringLike; +template +constexpr bool isStringLike = impl::StringTraits::isStringLike; -template +template using GetCharTypeT = typename impl::StringTraits::CharType; @@ -184,7 +184,7 @@ inline size_t strLength(const std::basic_string_view& ref) { retu template inline auto strBegin(S&& str) { - static_assert(IsStringLikeV); + static_assert(isStringLike); return impl::strBegin(std::forward(str)); } @@ -192,7 +192,7 @@ auto strBegin(S&& str) template inline size_t strLength(S&& str) { - static_assert(IsStringLikeV); + static_assert(isStringLike); return impl::strLength(std::forward(str)); } diff --git a/zen/symlink_target.h b/zen/symlink_target.h index 4f007047..ada4e358 100644 --- a/zen/symlink_target.h +++ b/zen/symlink_target.h @@ -7,9 +7,7 @@ #ifndef SYMLINK_TARGET_H_801783470198357483 #define SYMLINK_TARGET_H_801783470198357483 -#include "scope_guard.h" #include "file_error.h" -#include "file_path.h" #include #include //realpath diff --git a/zen/sys_error.h b/zen/sys_error.h index 99cf9316..6d03f299 100644 --- a/zen/sys_error.h +++ b/zen/sys_error.h @@ -8,8 +8,8 @@ #define SYS_ERROR_H_3284791347018951324534 #include "scope_guard.h" // -#include "utf.h" //not used by this header, but the "rest of the world" needs it! -#include "i18n.h" // +#include "i18n.h" //not used by this header, but the "rest of the world" needs it! +#include "zstring.h" // #include #include diff --git a/zen/thread.h b/zen/thread.h index 136c7a5c..42fba281 100644 --- a/zen/thread.h +++ b/zen/thread.h @@ -9,9 +9,7 @@ #include #include -#include "scope_guard.h" #include "ring_buffer.h" -#include "string_tools.h" #include "zstring.h" @@ -65,7 +63,7 @@ class ThreadStopRequest {}; //context of worker thread: void interruptionPoint(); //throw ThreadStopRequest -template +template void interruptibleWait(std::condition_variable& cv, std::unique_lock& lock, Predicate pred); //throw ThreadStopRequest template @@ -90,7 +88,7 @@ auto runAsync(Function&& fun); //wait for all with a time limit: return true if *all* results are available! //TODO: use std::when_all when available -template +template bool waitForAllTimed(InputIterator first, InputIterator last, const Duration& wait_duration); template inline @@ -311,7 +309,7 @@ auto runAsync(Function&& fun) } -template inline +template inline bool waitForAllTimed(InputIterator first, InputIterator last, const Duration& duration) { const std::chrono::steady_clock::time_point stopTime = std::chrono::steady_clock::now() + duration; @@ -417,7 +415,7 @@ public: } //context of worker thread: - template + template void interruptibleWait(std::condition_variable& cv, std::unique_lock& lock, Predicate pred) //throw ThreadStopRequest { setConditionVar(&cv); @@ -476,7 +474,7 @@ void interruptionPoint() //throw ThreadStopRequest //context of worker thread: -template inline +template inline void interruptibleWait(std::condition_variable& cv, std::unique_lock& lock, Predicate pred) //throw ThreadStopRequest { assert(impl::threadLocalInterruptionStatus); diff --git a/zen/type_traits.h b/zen/type_traits.h index aca80393..28817aad 100644 --- a/zen/type_traits.h +++ b/zen/type_traits.h @@ -7,18 +7,19 @@ #ifndef TYPE_TRAITS_H_3425628658765467 #define TYPE_TRAITS_H_3425628658765467 +#include #include //https://en.cppreference.com/w/cpp/header/type_traits namespace zen { -template +template struct GetFirstOf { using Type = T; }; -template using GetFirstOfT = typename GetFirstOf::Type; +template using GetFirstOfT = typename GetFirstOf::Type; template @@ -28,61 +29,64 @@ class FunctionReturnType public: using Type = decltype(dummyFun(F())); }; -template using FunctionReturnTypeT = typename FunctionReturnType::Type; +template using FunctionReturnTypeT = typename FunctionReturnType::Type; //============================================================================= -template -constexpr size_t arraySize(T (&)[N]) { return N; } - -template -constexpr S arrayAccumulate(T (&arr)[N]) +template +constexpr uint32_t arrayHash(T (&arr)[N]) //don't bother making FNV1aHash constexpr instead { - S sum = 0; - for (size_t i = 0; i < N; ++i) - sum += arr[i]; - return sum; + uint32_t hashVal = 2166136261U; //FNV-1a base + + std::for_each(&arr[0], &arr[N], [&hashVal](T n) + { + //static_assert(isInteger || std::is_same_v || std::is_same_v); + static_assert(sizeof(T) <= sizeof(hashVal)); + hashVal ^= static_cast(n); + hashVal *= 16777619U; //prime + }); + return hashVal; } //Herb Sutter's signedness conversion helpers: https://herbsutter.com/2013/06/13/gotw-93-solution-auto-variables-part-2/ -template inline auto makeSigned (T t) { return static_cast>(t); } -template inline auto makeUnsigned(T t) { return static_cast>(t); } +template inline auto makeSigned (T t) { return static_cast>(t); } +template inline auto makeUnsigned(T t) { return static_cast>(t); } //################# Built-in Types ######################## //unfortunate standardized nonsense: std::is_integral<> includes bool, char, wchar_t! => roll our own: -template constexpr bool IsUnsignedIntV = std::is_same_v, unsigned char> || +template constexpr bool isUnsignedInt = std::is_same_v, unsigned char> || std::is_same_v, unsigned short int> || std::is_same_v, unsigned int> || std::is_same_v, unsigned long int> || std::is_same_v, unsigned long long int>; -template constexpr bool IsSignedIntV = std::is_same_v, signed char> || +template constexpr bool isSignedInt = std::is_same_v, signed char> || std::is_same_v, short int> || std::is_same_v, int> || std::is_same_v, long int> || std::is_same_v, long long int>; -template constexpr bool IsIntegerV = IsUnsignedIntV || IsSignedIntV; -template constexpr bool IsFloatV = std::is_floating_point_v; -template constexpr bool IsArithmeticV = IsIntegerV || IsFloatV; +template constexpr bool isInteger = isUnsignedInt || isSignedInt; +template constexpr bool isFloat = std::is_floating_point_v; +template constexpr bool isArithmetic = isInteger || isFloat; //################# Class Members ######################## -/* Detect data or function members of a class by name: ZEN_INIT_DETECT_MEMBER + HasMember_ +/* Detect data or function members of a class by name: ZEN_INIT_DETECT_MEMBER + hasMember_ Example: 1. ZEN_INIT_DETECT_MEMBER(c_str); - 2. HasMemberV_c_str -> use boolean + 2. hasMember_c_str -> use boolean Detect data or function members of a class by name *and* type: ZEN_INIT_DETECT_MEMBER2 + HasMember_ Example: 1. ZEN_INIT_DETECT_MEMBER2(size, size_t (T::*)() const); - 2. HasMember_size::value -> use as boolean + 2. hasMember_size::value -> use as boolean - Detect member type of a class: ZEN_INIT_DETECT_MEMBER_TYPE + HasMemberType_ + Detect member type of a class: ZEN_INIT_DETECT_MEMBER_TYPE + hasMemberType_ Example: 1. ZEN_INIT_DETECT_MEMBER_TYPE(value_type); - 2. HasMemberTypeV_value_type -> use as boolean */ + 2. hasMemberType_value_type -> use as boolean */ //########## Sorting ############################## /* @@ -103,7 +107,7 @@ private: }; template inline -/**/ Predicate makeSortDirection(Predicate pred, std::true_type) { return pred; } +/**/ Predicate makeSortDirection(Predicate pred, std::true_type) { return pred; } template inline LessDescending makeSortDirection(Predicate pred, std::false_type) { return pred; } @@ -138,10 +142,10 @@ LessDescending makeSortDirection(Predicate pred, std::false_type) { r enum { value = sizeof(hasMember(nullptr)) == sizeof(Yes) }; \ }; \ \ - template \ + template \ struct HasMemberImpl_##NAME : std::false_type {}; \ \ - template constexpr bool HasMemberV_##NAME = HasMemberImpl_##NAME, T>::value; + template constexpr bool hasMember_##NAME = HasMemberImpl_##NAME, T>::value; //#################################################################### @@ -161,7 +165,7 @@ LessDescending makeSortDirection(Predicate pred, std::false_type) { r enum { value = sizeof(hasMember(nullptr)) == sizeof(Yes) }; \ }; \ \ - template constexpr bool HasMemberV_##NAME = HasMember_##NAME::value; + template constexpr bool hasMember_##NAME = HasMember_##NAME::value; //#################################################################### @@ -181,7 +185,7 @@ LessDescending makeSortDirection(Predicate pred, std::false_type) { r enum { value = sizeof(hasMemberType(nullptr)) == sizeof(Yes) }; \ }; \ \ - template constexpr bool HasMemberTypeV_##TYPENAME = HasMemberType_##TYPENAME::value; + template constexpr bool hasMemberType_##TYPENAME = HasMemberType_##TYPENAME::value; } diff --git a/zen/utf.h b/zen/utf.h index 541bc785..9c9cf7d1 100644 --- a/zen/utf.h +++ b/zen/utf.h @@ -7,8 +7,8 @@ #ifndef UTF_H_01832479146991573473545 #define UTF_H_01832479146991573473545 -#include -#include +//#include +//#include #include "string_tools.h" //copyStringTo diff --git a/zen/zlib_wrap.cpp b/zen/zlib_wrap.cpp index 6f17dc08..e87a284f 100644 --- a/zen/zlib_wrap.cpp +++ b/zen/zlib_wrap.cpp @@ -46,10 +46,10 @@ size_t zen::impl::zlib_compressBound(size_t len) size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError { uLongf bufSize = static_cast(trgLen); - const int rv = ::compress2(static_cast(trg), //Bytef* dest, - &bufSize, //uLongf* destLen, - static_cast(src), //const Bytef* source, - static_cast(srcLen), //uLong sourceLen, + const int rv = ::compress2(static_cast(trg), //Bytef* dest + &bufSize, //uLongf* destLen + static_cast(src), //const Bytef* source + static_cast(srcLen), //uLong sourceLen level); //int level // Z_OK: success // Z_MEM_ERROR: not enough memory @@ -64,9 +64,9 @@ size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_ size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError { uLongf bufSize = static_cast(trgLen); - const int rv = ::uncompress(static_cast(trg), //Bytef* dest, - &bufSize, //uLongf* destLen, - static_cast(src), //const Bytef* source, + const int rv = ::uncompress(static_cast(trg), //Bytef* dest + &bufSize, //uLongf* destLen + static_cast(src), //const Bytef* source static_cast(srcLen)); //uLong sourceLen // Z_OK: success // Z_MEM_ERROR: not enough memory diff --git a/zen/zstring.cpp b/zen/zstring.cpp index 635fb47d..76c0a81f 100644 --- a/zen/zstring.cpp +++ b/zen/zstring.cpp @@ -5,50 +5,12 @@ // ***************************************************************************** #include "zstring.h" -#include -#include "utf.h" - #include #include "sys_error.h" using namespace zen; -Zstring getUpperCase(const Zstring& str) -{ - assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! - - //fast pre-check: - if (isAsciiString(str)) //perf: in the range of 3.5ns - { - Zstring output = str; - for (Zchar& c : output) - c = asciiToUpper(c); - return output; - } - - Zstring strNorm = getUnicodeNormalForm(str); - try - { - static_assert(sizeof(impl::CodePoint) == sizeof(gunichar)); - Zstring output; - output.reserve(strNorm.size()); - - UtfDecoder decoder(strNorm.c_str(), strNorm.size()); - while (const std::optional cp = decoder.getNext()) - impl::codePointToUtf(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent! - - return output; - - } - catch (SysError&) - { - assert(false); - return str; - } -} - - Zstring getUnicodeNormalForm(const Zstring& str) { //fast pre-check: @@ -75,63 +37,38 @@ Zstring getUnicodeNormalForm(const Zstring& str) } -Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm) +Zstring getUpperCase(const Zstring& str) { - if (oldTerm.empty()) - return str; - - //assert(isAsciiString(oldTerm)); - Zstring output; + assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! - for (size_t pos = 0;;) + //fast pre-check: + if (isAsciiString(str)) //perf: in the range of 3.5ns { - const size_t posFound = std::search(str.begin() + pos, str.end(), //can't use getUpperCase(): input/output sizes may differ! - oldTerm.begin(), oldTerm.end(), - [](Zchar charL, Zchar charR) { return asciiToUpper(charL) == asciiToUpper(charR); }) - str.begin(); - - if (posFound == str.size()) - { - if (pos == 0) //optimize "oldTerm not found": return ref-counted copy - return str; - output.append(str.begin() + pos, str.end()); - return output; - } - - output.append(str.begin() + pos, str.begin() + posFound); - output += newTerm; - pos = posFound + oldTerm.size(); + Zstring output = str; + for (Zchar& c : output) + c = asciiToUpper(c); + return output; } -} - - -/* https://docs.microsoft.com/de-de/windows/desktop/Intl/handling-sorting-in-your-applications - - Perf test: compare strings 10 mio times; 64 bit build - ----------------------------------------------------- - string a = "Fjk84$%kgfj$%T\\\\Gffg\\gsdgf\\fgsx----------d-" - string b = "fjK84$%kgfj$%T\\\\gfFg\\gsdgf\\fgSy----------dfdf" - Windows (UTF16 wchar_t) - 4 ns | wcscmp - 67 ns | CompareStringOrdinalFunc+ + bIgnoreCase - 314 ns | LCMapString + wmemcmp - - OS X (UTF8 char) - 6 ns | strcmp - 98 ns | strcasecmp - 120 ns | strncasecmp + std::min(sizeLhs, sizeRhs); - 856 ns | CFStringCreateWithCString + CFStringCompare(kCFCompareCaseInsensitive) - 1110 ns | CFStringCreateWithCStringNoCopy + CFStringCompare(kCFCompareCaseInsensitive) - ________________________ - time per call | function */ + Zstring strNorm = getUnicodeNormalForm(str); + try + { + static_assert(sizeof(impl::CodePoint) == sizeof(gunichar)); + Zstring output; + output.reserve(strNorm.size()); -std::weak_ordering compareNativePath(const Zstring& lhs, const Zstring& rhs) -{ - assert(lhs.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! - assert(rhs.find(Zchar('\0')) == Zstring::npos); // + UtfDecoder decoder(strNorm.c_str(), strNorm.size()); + while (const std::optional cp = decoder.getNext()) + impl::codePointToUtf(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent! - return lhs <=> rhs; + return output; + } + catch (SysError&) + { + assert(false); + return str; + } } diff --git a/zen/zstring.h b/zen/zstring.h index 15735cb0..bc7cfb06 100644 --- a/zen/zstring.h +++ b/zen/zstring.h @@ -7,12 +7,13 @@ #ifndef ZSTRING_H_73425873425789 #define ZSTRING_H_73425873425789 +#include //not used by this header, but the "rest of the world" needs it! +#include "utf.h" // #include "string_base.h" using Zchar = char; #define Zstr(x) x - const Zchar FILE_NAME_SEPARATOR = '/'; //"The reason for all the fuss above" - Loki/SmartPtr @@ -24,12 +25,6 @@ using Zstringc = zen::Zbase; //using Zstringw = zen::Zbase; -/* Caveat: don't expect input/output string sizes to match: - - different UTF-8 encoding length of upper-case chars - - different number of upper case chars (e.g. ß => "SS" on macOS) - - output is Unicode-normalized */ -Zstring getUpperCase(const Zstring& str); - //Windows, Linux: precomposed //macOS: decomposed Zstring getUnicodeNormalForm(const Zstring& str); @@ -37,105 +32,44 @@ Zstring getUnicodeNormalForm(const Zstring& str); and conformant software should not treat canonically equivalent sequences, whether composed or decomposed or something in between, as different." https://www.win.tue.nl/~aeb/linux/uc/nfc_vs_nfd.html */ -struct LessUnicodeNormal { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return getUnicodeNormalForm(lhs) < getUnicodeNormalForm(rhs); } }; - -Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm); +/* Caveat: don't expect input/output string sizes to match: + - different UTF-8 encoding length of upper-case chars + - different number of upper case chars (e.g. ß => "SS" on macOS) + - output is Unicode-normalized */ +Zstring getUpperCase(const Zstring& str); //------------------------------------------------------------------------------------------ +struct ZstringNorm //use as STL container key: avoid needless Unicode normalizations during std::map<>::find() +{ + /*explicit*/ ZstringNorm(const Zstring& str) : normStr(getUnicodeNormalForm(str)) {} + Zstring normStr; -inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return getUpperCase(lhs) == getUpperCase(rhs); } + std::strong_ordering operator<=>(const ZstringNorm&) const = default; +}; +template<> struct std::hash { size_t operator()(const ZstringNorm& str) const { return std::hash()(str.normStr); } }; + +//struct LessUnicodeNormal { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return getUnicodeNormalForm(lhs) < getUnicodeNormalForm(rhs); } }; +//------------------------------------------------------------------------------------------ struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find() { - ZstringNoCase(const Zstring& str) : upperCase(getUpperCase(str)) {} + /*explicit*/ ZstringNoCase(const Zstring& str) : upperCase(getUpperCase(str)) {} Zstring upperCase; std::strong_ordering operator<=>(const ZstringNoCase&) const = default; }; +template<> struct std::hash { size_t operator()(const ZstringNoCase& str) const { return std::hash()(str.upperCase); } }; -//------------------------------------------------------------------------------------------ - -/* Compare *local* file paths: - Windows: igore case - Linux: byte-wise comparison - macOS: ignore case + Unicode normalization forms */ -std::weak_ordering compareNativePath(const Zstring& lhs, const Zstring& rhs); - -inline bool equalNativePath(const Zstring& lhs, const Zstring& rhs) { return compareNativePath(lhs, rhs) == std::weak_ordering::equivalent; } - -struct LessNativePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNativePath(lhs, rhs)); } }; +inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return getUpperCase(lhs) == getUpperCase(rhs); } //------------------------------------------------------------------------------------------ std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs); struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return std::is_lt(compareNatural(lhs, rhs)); } }; -//------------------------------------------------------------------------------------------ - - - -inline -Zstring appendSeparator(Zstring path) //support rvalue references! -{ - if (!zen::endsWith(path, FILE_NAME_SEPARATOR)) - path += FILE_NAME_SEPARATOR; - return path; //returning a by-value parameter => RVO if possible, r-value otherwise! -} - - -inline -Zstring appendPaths(const Zstring& basePath, const Zstring& relPath, Zchar pathSep) -{ - using namespace zen; - - assert(!startsWith(relPath, pathSep) && !endsWith(relPath, pathSep)); - if (relPath.empty()) - return basePath; - if (basePath.empty()) - return relPath; - - if (startsWith(relPath, pathSep)) - { - if (relPath.size() == 1) - return basePath; - - if (endsWith(basePath, pathSep)) - return basePath + (relPath.c_str() + 1); - } - else if (!endsWith(basePath, pathSep)) - { - Zstring output = basePath; - output.reserve(basePath.size() + 1 + relPath.size()); //append all three strings using a single memory allocation - return std::move(output) + pathSep + relPath; // - } - return basePath + relPath; -} - -inline Zstring nativeAppendPaths(const Zstring& basePath, const Zstring& relPath) { return appendPaths(basePath, relPath, FILE_NAME_SEPARATOR); } - - -inline -Zstring getFileExtension(const Zstring& filePath) -{ - //const Zstring fileName = afterLast(filePath, FILE_NAME_SEPARATOR, IfNotFoundReturn::all); - //return afterLast(fileName, Zstr('.'), zen::IfNotFoundReturn::none); - - auto it = zen::findLast(filePath.begin(), filePath.end(), FILE_NAME_SEPARATOR); - if (it == filePath.end()) - it = filePath.begin(); - else - ++it; - - auto it2 = zen::findLast(it, filePath.end(), Zstr('.')); - if (it2 != filePath.end()) - ++it2; - - return Zstring(it2, filePath.end()); -} - - -//common unicode characters +//------------------------------------------------------------------------------------------ +//common Unicode characters const wchar_t EN_DASH = L'\u2013'; const wchar_t EM_DASH = L'\u2014'; const wchar_t* const SPACED_DASH = L" \u2014 "; //using 'EM DASH' -- cgit