diff options
author | B Stack <bgstack15@gmail.com> | 2018-10-17 02:11:26 +0000 |
---|---|---|
committer | B Stack <bgstack15@gmail.com> | 2018-10-17 02:11:26 +0000 |
commit | f70f8f961ef8f4d909266f71310e3515f25928e6 (patch) | |
tree | 89b2a018482c164bdd8ecac5c76b19a08f420dec /zen | |
parent | Merge branch '10.4' into 'master' (diff) | |
parent | 10.5 (diff) | |
download | FreeFileSync-f70f8f961ef8f4d909266f71310e3515f25928e6.tar.gz FreeFileSync-f70f8f961ef8f4d909266f71310e3515f25928e6.tar.bz2 FreeFileSync-f70f8f961ef8f4d909266f71310e3515f25928e6.zip |
Merge branch '10.5' into 'master'10.5
10.5
See merge request opensource-tracking/FreeFileSync!2
Diffstat (limited to 'zen')
-rwxr-xr-x | zen/file_access.cpp | 2 | ||||
-rwxr-xr-x | zen/file_error.h | 2 | ||||
-rwxr-xr-x | zen/file_traverser.cpp | 2 | ||||
-rwxr-xr-x | zen/globals.h | 135 | ||||
-rwxr-xr-x | zen/guid.h | 1 | ||||
-rwxr-xr-x | zen/http.cpp | 57 | ||||
-rwxr-xr-x | zen/i18n.h | 4 | ||||
-rwxr-xr-x | zen/legacy_compiler.h | 3 | ||||
-rwxr-xr-x | zen/scope_guard.h | 7 | ||||
-rwxr-xr-x | zen/shell_execute.h | 4 | ||||
-rwxr-xr-x | zen/socket.h | 74 | ||||
-rwxr-xr-x | zen/stl_tools.h | 1 | ||||
-rwxr-xr-x | zen/string_tools.h | 171 | ||||
-rwxr-xr-x | zen/thread.cpp | 12 | ||||
-rwxr-xr-x | zen/time.h | 6 | ||||
-rwxr-xr-x | zen/type_traits.h | 1 | ||||
-rwxr-xr-x | zen/utf.h | 6 | ||||
-rwxr-xr-x | zen/zstring.cpp | 184 | ||||
-rwxr-xr-x | zen/zstring.h | 163 |
19 files changed, 536 insertions, 299 deletions
diff --git a/zen/file_access.cpp b/zen/file_access.cpp index a81fdae0..88b70b14 100755 --- a/zen/file_access.cpp +++ b/zen/file_access.cpp @@ -381,7 +381,7 @@ void zen::renameFile(const Zstring& pathSource, const Zstring& pathTarget) //thr const Zstring parentPathSrc = beforeLast(pathSource, FILE_NAME_SEPARATOR, IF_MISSING_RETURN_NONE); const Zstring parentPathTrg = beforeLast(pathTarget, FILE_NAME_SEPARATOR, IF_MISSING_RETURN_NONE); //some (broken) devices may fail to rename case directly: - if (equalFilePath(parentPathSrc, parentPathTrg)) + if (equalLocalPath(parentPathSrc, parentPathTrg)) { if (fileNameSrc == fileNameTrg) return; //non-sensical request diff --git a/zen/file_error.h b/zen/file_error.h index 086d0998..101d6543 100755 --- a/zen/file_error.h +++ b/zen/file_error.h @@ -45,7 +45,7 @@ DEFINE_NEW_FILE_ERROR(ErrorDifferentVolume); //----------- facilitate usage of std::wstring for error messages -------------------- -inline std::wstring fmtPath(const std::wstring& displayPath) { return L'\"' + displayPath + L'\"'; } +inline std::wstring fmtPath(const std::wstring& displayPath) { return L'"' + displayPath + L'"'; } inline std::wstring fmtPath(const Zstring& displayPath) { return fmtPath(utfTo<std::wstring>(displayPath)); } inline std::wstring fmtPath(const wchar_t* displayPath) { return fmtPath(std::wstring(displayPath)); } //resolve overload ambiguity } diff --git a/zen/file_traverser.cpp b/zen/file_traverser.cpp index e342c8ec..cc6e0c0b 100755 --- a/zen/file_traverser.cpp +++ b/zen/file_traverser.cpp @@ -78,7 +78,7 @@ void zen::traverseFolder(const Zstring& dirPath, if (onFolder) onFolder({ itemName, itemPath }); } - else //a file or named pipe, ect. + else //a file or named pipe, etc. { if (onFile) onFile({ itemName, itemPath, makeUnsigned(statData.st_size), statData.st_mtime }); diff --git a/zen/globals.h b/zen/globals.h index 10975414..024147fa 100755 --- a/zen/globals.h +++ b/zen/globals.h @@ -14,14 +14,22 @@ namespace zen { -//solve static destruction order fiasco by providing shared ownership and serialized access to global variables +/* +Solve static destruction order fiasco by providing shared ownership and serialized access to global variables + +=>there may be accesses to "Global<T>::get()" during process shutdown e.g. _("") used by message in debug_minidump.cpp or by some detached thread assembling an error message! +=> use trivially-destructible POD only!!! + +ATTENTION: function-static globals have the compiler generate "magic statics" == compiler-genenerated locking code which will crash or leak memory when accessed after global is "dead" + => "solved" by FunStatGlobal, but we can't have "too many" of these... +*/ template <class T> -class Global +class Global //don't use for function-scope statics! { public: Global() { - static_assert(std::is_trivially_destructible_v<Pod>, "this memory needs to live forever"); + static_assert(std::is_trivially_constructible_v<Pod>&& std::is_trivially_destructible_v<Pod>, "this memory needs to live forever"); assert(!pod_.inst && !pod_.spinLock); //we depend on static zero-initialization! } @@ -52,16 +60,131 @@ public: } private: - //avoid static destruction order fiasco: there may be accesses to "Global<T>::get()" during process shutdown - //e.g. _("") used by message in debug_minidump.cpp or by some detached thread assembling an error message! - //=> use trivially-destructible POD only!!! struct Pod { + std::atomic<bool> spinLock; // { false }; rely entirely on static zero-initialization! => avoid potential contention with worker thread during Global<> construction! + //serialize access; can't use std::mutex: has non-trival destructor std::shared_ptr<T>* inst; // = nullptr; + } pod_; +}; + +//=================================================================================================================== +//=================================================================================================================== + +struct CleanUpEntry +{ + using CleanUpFunction = void (*)(void* callbackData); + CleanUpFunction cleanUpFun; + void* callbackData; + CleanUpEntry* prev; +}; +void registerGlobalForDestruction(CleanUpEntry& entry); + + +template <class T> +class FunStatGlobal +{ +public: + //No FunStatGlobal() or ~FunStatGlobal()! + + std::shared_ptr<T> get() + { + static_assert(std::is_trivially_constructible_v<FunStatGlobal>&& + std::is_trivially_destructible_v<FunStatGlobal>, "this class must not generate code for magic statics!"); + + while (pod_.spinLock.exchange(true)) ; + ZEN_ON_SCOPE_EXIT(pod_.spinLock = false); + if (pod_.inst) + return *pod_.inst; + return nullptr; + } + + void set(std::unique_ptr<T>&& newInst) + { + std::shared_ptr<T>* tmpInst = nullptr; + if (newInst) + tmpInst = new std::shared_ptr<T>(std::move(newInst)); + { + while (pod_.spinLock.exchange(true)) ; + ZEN_ON_SCOPE_EXIT(pod_.spinLock = false); + + std::swap(pod_.inst, tmpInst); + registerDestruction(); + } + delete tmpInst; + } + + void initOnce(std::unique_ptr<T> (*getInitialValue)()) + { + while (pod_.spinLock.exchange(true)) ; + ZEN_ON_SCOPE_EXIT(pod_.spinLock = false); + + if (!pod_.cleanUpEntry.cleanUpFun) + { + assert(!pod_.inst); + if (std::unique_ptr<T> newInst = (*getInitialValue)()) + pod_.inst = new std::shared_ptr<T>(std::move(newInst)); + registerDestruction(); + } + } + +private: + //call while holding pod_.spinLock + void registerDestruction() + { + assert(pod_.spinLock); + + if (!pod_.cleanUpEntry.cleanUpFun) + { + pod_.cleanUpEntry.callbackData = this; + pod_.cleanUpEntry.cleanUpFun = [](void* callbackData) + { + auto thisPtr = static_cast<FunStatGlobal*>(callbackData); + thisPtr->set(nullptr); + }; + + registerGlobalForDestruction(pod_.cleanUpEntry); + } + } + + struct Pod + { std::atomic<bool> spinLock; // { false }; rely entirely on static zero-initialization! => avoid potential contention with worker thread during Global<> construction! //serialize access; can't use std::mutex: has non-trival destructor + std::shared_ptr<T>* inst; // = nullptr; + CleanUpEntry cleanUpEntry; } pod_; }; + + +inline +void registerGlobalForDestruction(CleanUpEntry& entry) +{ + static struct + { + std::atomic<bool> spinLock; + CleanUpEntry* head; + } cleanUpList; + + static_assert(std::is_trivially_constructible_v<decltype(cleanUpList)>&& + std::is_trivially_destructible_v<decltype(cleanUpList)>, "we must not generate code for magic statics!"); + + while (cleanUpList.spinLock.exchange(true)) ; + ZEN_ON_SCOPE_EXIT(cleanUpList.spinLock = false); + + std::atexit([] + { + while (cleanUpList.spinLock.exchange(true)) ; + ZEN_ON_SCOPE_EXIT(cleanUpList.spinLock = false); + + (*cleanUpList.head->cleanUpFun)(cleanUpList.head->callbackData); + cleanUpList.head = cleanUpList.head->prev; //nicely clean up in reverse order of construction + }); + + entry.prev = cleanUpList.head; + cleanUpList.head = &entry; + +} } #endif //GLOBALS_H_8013740213748021573485 @@ -9,6 +9,7 @@ #include <fcntl.h> //open #include <unistd.h> //close + #include <zen/sys_error.h> //#include <uuid/uuid.h> -> uuid_generate(), uuid_unparse(); avoid additional dependency for "sudo apt-get install uuid-dev" diff --git a/zen/http.cpp b/zen/http.cpp index d06d3309..1f89bf20 100755 --- a/zen/http.cpp +++ b/zen/http.cpp @@ -26,9 +26,9 @@ public: const bool useTls = [&] { - if (startsWith(url, Zstr("http://"), CmpAsciiNoCase())) + if (startsWithAsciiNoCase(url, Zstr("http://"))) return false; - if (startsWith(url, Zstr("https://"), CmpAsciiNoCase())) + if (startsWithAsciiNoCase(url, Zstr("https://"))) return true; throw SysError(L"URL uses unexpected protocol."); }(); @@ -57,35 +57,16 @@ public: //https://www.w3.org/Protocols/HTTP/1.0/spec.html#Request-Line std::string msg = (postParams ? "POST " : "GET ") + utfTo<std::string>(page) + " HTTP/1.0\r\n"; - for (const auto& item : headers) - msg += item.first + ": " + item.second + "\r\n"; + for (const auto& [name, value] : headers) + msg += name + ": " + value + "\r\n"; msg += "\r\n"; msg += postBuf; //send request for (size_t bytesToSend = msg.size(); bytesToSend > 0;) - { - int bytesSent = 0; - for (;;) - { - bytesSent = ::send(socket_->get(), //_In_ SOCKET s, - &*(msg.end() - bytesToSend), //_In_ const char *buf, - static_cast<int>(bytesToSend), //_In_ int len, - 0); //_In_ int flags - if (bytesSent >= 0 || errno != EINTR) - break; - } - if (bytesSent < 0) - THROW_LAST_SYS_ERROR_WSA(L"send"); - if (bytesSent > static_cast<int>(bytesToSend)) - throw SysError(L"send: buffer overflow."); - if (bytesSent == 0) - throw SysError(L"send: zero bytes processed"); - - bytesToSend -= bytesSent; - } - if (::shutdown(socket_->get(), SHUT_WR) != 0) - THROW_LAST_SYS_ERROR_WSA(L"shutdown"); + bytesToSend -= tryWriteSocket(socket_->get(), &*(msg.end() - bytesToSend), bytesToSend); //throw SysError + + shutdownSocketSend(socket_->get()); //throw SysError //receive response: std::string headBuf; @@ -116,7 +97,7 @@ public: const std::vector<std::string> statusItems = split(statusBuf, ' ', SplitType::ALLOW_EMPTY); //HTTP-Version SP Status-Code SP Reason-Phrase CRLF if (statusItems.size() < 2 || !startsWith(statusItems[0], "HTTP/")) - throw SysError(L"Invalid HTTP response: \"" + utfTo<std::wstring>(statusBuf) + L"\""); + throw SysError(L"Invalid HTTP response: \"" + utfTo<std::wstring>(statusBuf) + L'"'); statusCode_ = stringTo<int>(statusItems[1]); @@ -175,8 +156,6 @@ public: private: size_t tryRead(void* buffer, size_t bytesToRead) //throw SysError; may return short, only 0 means EOF! { - if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check! - throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo<std::string>(__LINE__)); assert(bytesToRead <= getBlockSize()); //block size might be 1000 while reading HTTP header if (contentRemaining_ >= 0) @@ -185,21 +164,7 @@ private: return 0; bytesToRead = static_cast<size_t>(std::min(static_cast<int64_t>(bytesToRead), contentRemaining_)); //[!] contentRemaining_ > 4 GB possible! } - int bytesReceived = 0; - for (;;) - { - bytesReceived = ::recv(socket_->get(), //_In_ SOCKET s, - static_cast<char*>(buffer), //_Out_ char *buf, - static_cast<int>(bytesToRead), //_In_ int len, - 0); //_In_ int flags - if (bytesReceived >= 0 || errno != EINTR) - break; - } - if (bytesReceived < 0) - THROW_LAST_SYS_ERROR_WSA(L"recv"); - if (static_cast<size_t>(bytesReceived) > bytesToRead) //better safe than sorry - throw SysError(L"HttpInputStream::tryRead: buffer overflow."); - + const size_t bytesReceived = tryReadSocket(socket_->get(), buffer, bytesToRead); //throw SysError; may return short, only 0 means EOF! if (contentRemaining_ >= 0) contentRemaining_ -= bytesReceived; @@ -325,8 +290,8 @@ std::string urldecode(const std::string& str) std::string zen::xWwwFormUrlEncode(const std::vector<std::pair<std::string, std::string>>& paramPairs) { std::string output; - for (const auto& pair : paramPairs) - output += urlencode(pair.first) + '=' + urlencode(pair.second) + '&'; + for (const auto& [name, value] : paramPairs) + output += urlencode(name) + '=' + urlencode(value) + '&'; //encode both key and value: https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1 if (!output.empty()) output.pop_back(); @@ -59,10 +59,10 @@ std::shared_ptr<const TranslationHandler> getTranslator(); namespace impl { inline -Global<const TranslationHandler>& refGlobalTranslationHandler() +FunStatGlobal<const TranslationHandler>& refGlobalTranslationHandler() { //getTranslator() may be called even after static objects of this translation unit are destroyed! - static Global<const TranslationHandler> inst; //external linkage even in header! + static FunStatGlobal<const TranslationHandler> inst; //external linkage even in header! return inst; } } diff --git a/zen/legacy_compiler.h b/zen/legacy_compiler.h index e9d50b97..16d87c53 100755 --- a/zen/legacy_compiler.h +++ b/zen/legacy_compiler.h @@ -7,14 +7,11 @@ #ifndef LEGACY_COMPILER_H_839567308565656789 #define LEGACY_COMPILER_H_839567308565656789 - #include <optional> - namespace std { //https://gcc.gnu.org/onlinedocs/libstdc++/manual/status.html //https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations - } #endif //LEGACY_COMPILER_H_839567308565656789 diff --git a/zen/scope_guard.h b/zen/scope_guard.h index d056bb2a..9eff6c1f 100755 --- a/zen/scope_guard.h +++ b/zen/scope_guard.h @@ -10,6 +10,7 @@ #include <cassert> #include <exception> #include "type_traits.h" +#include "legacy_compiler.h" //std::uncaught_exceptions //best of Zen, Loki and C++17 @@ -103,8 +104,8 @@ auto makeGuard(F&& fun) { return ScopeGuard<runMode, std::decay_t<F>>(std::forwa #define ZEN_CHECK_CASE_FOR_CONSTANT_IMPL(X) L ## X -#define ZEN_ON_SCOPE_EXIT(X) auto ZEN_CONCAT(dummy, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_EXIT >([&]{ X; }); (void)ZEN_CONCAT(dummy, __LINE__); -#define ZEN_ON_SCOPE_FAIL(X) auto ZEN_CONCAT(dummy, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_FAIL >([&]{ X; }); (void)ZEN_CONCAT(dummy, __LINE__); -#define ZEN_ON_SCOPE_SUCCESS(X) auto ZEN_CONCAT(dummy, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_SUCCESS>([&]{ X; }); (void)ZEN_CONCAT(dummy, __LINE__); +#define ZEN_ON_SCOPE_EXIT(X) auto ZEN_CONCAT(scopeGuard, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_EXIT >([&]{ X; }); (void)ZEN_CONCAT(scopeGuard, __LINE__); +#define ZEN_ON_SCOPE_FAIL(X) auto ZEN_CONCAT(scopeGuard, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_FAIL >([&]{ X; }); (void)ZEN_CONCAT(scopeGuard, __LINE__); +#define ZEN_ON_SCOPE_SUCCESS(X) auto ZEN_CONCAT(scopeGuard, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_SUCCESS>([&]{ X; }); (void)ZEN_CONCAT(scopeGuard, __LINE__); #endif //SCOPE_GUARD_H_8971632487321434 diff --git a/zen/shell_execute.h b/zen/shell_execute.h index 0802fbcb..98824d70 100755 --- a/zen/shell_execute.h +++ b/zen/shell_execute.h @@ -39,7 +39,7 @@ void shellExecute(const Zstring& command, ExecutionType type) //throw FileError if (type == ExecutionType::SYNC) { //Posix ::system() - execute a shell command - const int rv = ::system(command.c_str()); //do NOT use std::system as its documentation says nothing about "WEXITSTATUS(rv)", ect... + const int rv = ::system(command.c_str()); //do NOT use std::system as its documentation says nothing about "WEXITSTATUS(rv)", etc... if (rv == -1 || WEXITSTATUS(rv) == 127) throw FileError(_("Incorrect command line:") + L"\n" + utfTo<std::wstring>(command)); //http://linux.die.net/man/3/system "In case /bin/sh could not be executed, the exit status will be that of a command that does exit(127)" @@ -73,7 +73,7 @@ void shellExecute(const Zstring& command, ExecutionType type) //throw FileError inline void openWithDefaultApplication(const Zstring& itemPath) //throw FileError { - shellExecute("xdg-open \"" + itemPath + "\"", ExecutionType::ASYNC); // + shellExecute("xdg-open \"" + itemPath + '"', ExecutionType::ASYNC); // } } diff --git a/zen/socket.h b/zen/socket.h index e551a5ba..33ac2e50 100755 --- a/zen/socket.h +++ b/zen/socket.h @@ -20,6 +20,12 @@ namespace zen do { const ErrorCode ecInternal = getLastError(); throw SysError(formatSystemError(functionName, ecInternal)); } while (false) +//patch up socket portability: +using SocketType = int; +const SocketType invalidSocket = -1; +inline void closeSocket(SocketType s) { ::close(s); } + + //Winsock needs to be initialized before calling any of these functions! (WSAStartup/WSACleanup) class Socket //throw SysError @@ -67,18 +73,78 @@ public: ~Socket() { closeSocket(socket_); } - using SocketType = int; SocketType get() const { return socket_; } private: Socket (const Socket&) = delete; Socket& operator=(const Socket&) = delete; - static const SocketType invalidSocket = -1; - static void closeSocket(SocketType s) { ::close(s); } - SocketType socket_ = invalidSocket; }; + + +//more socket helper functions: +namespace +{ +size_t tryReadSocket(SocketType socket, void* buffer, size_t bytesToRead) //throw SysError; may return short, only 0 means EOF! +{ + if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check! + throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo<std::string>(__LINE__)); + + int bytesReceived = 0; + for (;;) + { + bytesReceived = ::recv(socket, //_In_ SOCKET s, + static_cast<char*>(buffer), //_Out_ char *buf, + static_cast<int>(bytesToRead), //_In_ int len, + 0); //_In_ int flags + if (bytesReceived >= 0 || errno != EINTR) + break; + } + if (bytesReceived < 0) + THROW_LAST_SYS_ERROR_WSA(L"recv"); + + if (static_cast<size_t>(bytesReceived) > bytesToRead) //better safe than sorry + throw SysError(L"HttpInputStream::tryRead: buffer overflow."); + + return bytesReceived; //"zero indicates end of file" +} + + +size_t tryWriteSocket(SocketType socket, const void* buffer, size_t bytesToWrite) //throw SysError; may return short! CONTRACT: bytesToWrite > 0 +{ + if (bytesToWrite == 0) + throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo<std::string>(__LINE__)); + + int bytesWritten = 0; + for (;;) + { + bytesWritten = ::send(socket, //_In_ SOCKET s, + static_cast<const char*>(buffer), //_In_ const char *buf, + static_cast<int>(bytesToWrite), //_In_ int len, + 0); //_In_ int flags + if (bytesWritten >= 0 || errno != EINTR) + break; + } + if (bytesWritten < 0) + THROW_LAST_SYS_ERROR_WSA(L"send"); + if (bytesWritten > static_cast<int>(bytesToWrite)) + throw SysError(L"send: buffer overflow."); + if (bytesWritten == 0) + throw SysError(L"send: zero bytes processed"); + + return bytesWritten; +} +} + + +inline +void shutdownSocketSend(SocketType socket) //throw SysError +{ + if (::shutdown(socket, SHUT_WR) != 0) + THROW_LAST_SYS_ERROR_WSA(L"shutdown"); +} + } #endif //SOCKET_H_23498325972583947678456437 diff --git a/zen/stl_tools.h b/zen/stl_tools.h index be9bf710..c3a9bf8f 100755 --- a/zen/stl_tools.h +++ b/zen/stl_tools.h @@ -12,6 +12,7 @@ #include <vector> #include <memory> #include <algorithm> +#include <optional> #include "string_traits.h" #include "build_info.h" diff --git a/zen/string_tools.h b/zen/string_tools.h index 8746722a..657c70d5 100755 --- a/zen/string_tools.h +++ b/zen/string_tools.h @@ -26,32 +26,30 @@ template <class Char> bool isWhiteSpace(Char c); template <class Char> bool isDigit (Char c); //not exactly the same as "std::isdigit" -> we consider '0'-'9' only! template <class Char> bool isHexDigit (Char c); template <class Char> bool isAsciiAlpha(Char c); +template <class Char> bool isAsciiString(const Char* str); template <class Char> Char asciiToLower(Char c); template <class Char> Char asciiToUpper(Char c); -//case-sensitive comparison (compile-time correctness: use different number of arguments as STL comparison predicates!) -struct CmpBinary { template <class Char> int operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const; }; +//both S and T can be strings or char/wchar_t arrays or single char/wchar_t +template <class S, class T> bool contains(const S& str, const T& term); -//basic case-insensitive comparison (considering A-Z only!) -struct CmpAsciiNoCase { template <class Char> int operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const; }; +template <class S, class T> bool startsWith (const S& str, const T& prefix); +template <class S, class T> bool startsWithAsciiNoCase(const S& str, const T& prefix); -struct LessAsciiNoCase -{ - template <class S> //don't support heterogenous input! => use as container predicate only! - bool operator()(const S& lhs, const S& rhs) const { return CmpAsciiNoCase()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; } -}; +template <class S, class T> bool endsWith (const S& str, const T& postfix); +template <class S, class T> bool endsWithAsciiNoCase(const S& str, const T& postfix); -//both S and T can be strings or char/wchar_t arrays or simple char/wchar_t -template <class S, class T> bool contains(const S& str, const T& term); +template <class S, class T> bool equalString (const S& lhs, const T& rhs); +template <class S, class T> bool equalAsciiNoCase(const S& lhs, const T& rhs); -template <class S, class T> bool startsWith(const S& str, const T& prefix); -template <class S, class T, class Function> bool startsWith(const S& str, const T& prefix, Function cmpStringFun); +template <class S, class T> int compareString (const S& lhs, const T& rhs); +template <class S, class T> int compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!) -template <class S, class T> bool endsWith (const S& str, const T& postfix); -template <class S, class T, class Function> bool endsWith (const S& str, const T& postfix, Function cmpStringFun); +struct LessAsciiNoCase //STL container predicate +{ + template <class S> bool operator()(const S& lhs, const S& rhs) const { return compareAsciiNoCase(lhs, rhs) < 0; } +}; -template <class S, class T> bool strEqual(const S& lhs, const T& rhs); -template <class S, class T, class Function> bool strEqual(const S& lhs, const T& rhs, Function cmpStringFun); enum FailureReturnVal { @@ -152,6 +150,17 @@ bool isAsciiAlpha(Char c) template <class Char> inline +bool isAsciiString(const Char* str) +{ + static_assert(std::is_same_v<Char, char> || std::is_same_v<Char, wchar_t>); + for (Char c = *str; c != 0; c = *++str) + if (zen::makeUnsigned(c) >= 128) + return false; + return true; +} + + +template <class Char> inline Char asciiToLower(Char c) { if (static_cast<Char>('A') <= c && c <= static_cast<Char>('Z')) @@ -169,41 +178,103 @@ Char asciiToUpper(Char c) } -template <class S, class T, class Function> inline -bool startsWith(const S& str, const T& prefix, Function cmpStringFun) +namespace impl +{ +inline int strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num); } //support embedded 0, unlike strncmp/wcsncmp! +inline int strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num); } // + + +template <class Char> inline +int strcmpAsciiNoCase(const Char* lhs, const Char* rhs, size_t len) +{ + while (len-- > 0) + { + const Char charL = asciiToLower(*lhs++); //ordering: lower-case chars have higher code points than uppper-case + const Char charR = asciiToLower(*rhs++); // + if (charL != charR) + return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention! + //unsigned underflow is well-defined! + } + return 0; +} +} + + +template <class S, class T> inline +bool startsWith(const S& str, const T& prefix) { const size_t pfLen = strLength(prefix); - if (strLength(str) < pfLen) - return false; + return strLength(str) >= pfLen && impl::strcmpWithNulls(strBegin(str), strBegin(prefix), pfLen) == 0; +} + - return cmpStringFun(strBegin(str), pfLen, - strBegin(prefix), pfLen) == 0; +template <class S, class T> inline +bool startsWithAsciiNoCase(const S& str, const T& prefix) +{ + const size_t pfLen = strLength(prefix); + return strLength(str) >= pfLen && impl::strcmpAsciiNoCase(strBegin(str), strBegin(prefix), pfLen) == 0; } -template <class S, class T, class Function> inline -bool endsWith(const S& str, const T& postfix, Function cmpStringFun) +template <class S, class T> inline +bool endsWith(const S& str, const T& postfix) { const size_t strLen = strLength(str); const size_t pfLen = strLength(postfix); - if (strLen < pfLen) - return false; + return strLen >= pfLen && impl::strcmpWithNulls(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == 0; +} + + +template <class S, class T> inline +bool endsWithAsciiNoCase(const S& str, const T& postfix) +{ + const size_t strLen = strLength(str); + const size_t pfLen = strLength(postfix); + return strLen >= pfLen && impl::strcmpAsciiNoCase(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == 0; +} + - return cmpStringFun(strBegin(str) + strLen - pfLen, pfLen, - strBegin(postfix), pfLen) == 0; +template <class S, class T> inline +bool equalString(const S& lhs, const T& rhs) +{ + const size_t lhsLen = strLength(lhs); + return lhsLen == strLength(rhs) && impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), lhsLen) == 0; } -template <class S, class T, class Function> inline -bool strEqual(const S& lhs, const T& rhs, Function cmpStringFun) +template <class S, class T> inline +bool equalAsciiNoCase(const S& lhs, const T& rhs) +{ + const size_t lhsLen = strLength(lhs); + return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == 0; +} + + +template <class S, class T> inline +int compareString(const S& lhs, const T& rhs) { - return cmpStringFun(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) == 0; + const size_t lhsLen = strLength(lhs); + const size_t rhsLen = strLength(rhs); + + //length check *after* strcmpWithNulls(): we do care about natural ordering: e.g. for "compareString(makeUpperCopy(lhs), makeUpperCopy(rhs))" + const int rv = impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), std::min(lhsLen, rhsLen)); + if (rv != 0) + return rv; + return static_cast<int>(lhsLen) - static_cast<int>(rhsLen); } -template <class S, class T> inline bool startsWith(const S& str, const T& prefix ) { return startsWith(str, prefix, CmpBinary()); } -template <class S, class T> inline bool endsWith (const S& str, const T& postfix) { return endsWith (str, postfix, CmpBinary()); } -template <class S, class T> inline bool strEqual (const S& lhs, const T& rhs ) { return strEqual (lhs, rhs, CmpBinary()); } +template <class S, class T> inline +int compareAsciiNoCase(const S& lhs, const T& rhs) +{ + const size_t lhsLen = strLength(lhs); + const size_t rhsLen = strLength(rhs); + + const int rv = impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), std::min(lhsLen, rhsLen)); + if (rv != 0) + return rv; + return static_cast<int>(lhsLen) - static_cast<int>(rhsLen); +} template <class S, class T> inline @@ -464,42 +535,12 @@ struct CopyStringToString<T, T> //perf: we don't need a deep copy if string type template <class S> T copy(S&& str) const { return std::forward<S>(str); } }; - -inline int strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std::memcmp (ptr1, ptr2, num); } -inline int strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num); } } template <class T, class S> inline T copyStringTo(S&& str) { return impl::CopyStringToString<std::decay_t<S>, T>().copy(std::forward<S>(str)); } -template <class Char> inline -int CmpBinary::operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const -{ - //support embedded 0, unlike strncmp/wcsncmp! - const int rv = impl::strcmpWithNulls(lhs, rhs, std::min(lhsLen, rhsLen)); - if (rv != 0) - return rv; - return static_cast<int>(lhsLen) - static_cast<int>(rhsLen); -} - - -template <class Char> inline -int CmpAsciiNoCase::operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const -{ - const auto* const lhsLast = lhs + std::min(lhsLen, rhsLen); - while (lhs != lhsLast) - { - const Char charL = asciiToLower(*lhs++); //ordering: lower-case chars have higher code points than uppper-case - const Char charR = asciiToLower(*rhs++); // - if (charL != charR) - return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention! - //unsigned underflow is well-defined! - } - return static_cast<int>(lhsLen) - static_cast<int>(rhsLen); -} - - namespace impl { template <class Num> inline diff --git a/zen/thread.cpp b/zen/thread.cpp index 8016d4a9..08bfaa25 100755 --- a/zen/thread.cpp +++ b/zen/thread.cpp @@ -34,10 +34,7 @@ uint64_t getThreadIdNative() } -struct InitMainThreadIdOnStartup -{ - InitMainThreadIdOnStartup() { getMainThreadId(); } -} startupInitMainThreadId; +const uint64_t globalMainThreadId = getThreadId(); //avoid code-gen for "magic static"! } @@ -50,6 +47,9 @@ uint64_t zen::getThreadId() uint64_t zen::getMainThreadId() { - static const uint64_t mainThreadId = getThreadId(); - return mainThreadId; + //don't make this a function-scope static (avoid code-gen for "magic static") + if (globalMainThreadId == 0) //might be called during static initialization + return getThreadId(); + + return globalMainThreadId; } @@ -327,13 +327,13 @@ TimeComp parseTime(const String& format, const String2& str, UserDefinedFormatTa const CharType* itStr = strBegin(str); const CharType* const strLast = itStr + strLength(str); - auto extractNumber = [&](int& result, size_t digitCount) -> bool + auto extractNumber = [&](int& result, size_t digitCount) { if (strLast - itStr < makeSigned(digitCount)) return false; - if (std::any_of(itStr, itStr + digitCount, [](CharType c) { return !isDigit(c); })) - return false; + if (!std::all_of(itStr, itStr + digitCount, isDigit<CharType>)) + return false; result = zen::stringTo<int>(StringRef<const CharType>(itStr, itStr + digitCount)); itStr += digitCount; diff --git a/zen/type_traits.h b/zen/type_traits.h index 2d4e7a97..8783cb6a 100755 --- a/zen/type_traits.h +++ b/zen/type_traits.h @@ -8,7 +8,6 @@ #define TYPE_TRAITS_H_3425628658765467 #include <type_traits> -#include "legacy_compiler.h" //http://en.cppreference.com/w/cpp/header/type_traits @@ -192,7 +192,7 @@ public: std::optional<CodePoint> getNext() { if (it_ == last_) - return std::nullopt; //GCC 8.2 bug: -Wmaybe-uninitialized for "return {};" + return std::nullopt; const Char8 ch = *it_++; CodePoint cp = ch; @@ -313,7 +313,7 @@ bool isValidUtf(const UtfString& str) using namespace impl; UtfDecoder<GetCharTypeT<UtfString>> decoder(strBegin(str), strLength(str)); - while (std::optional<CodePoint> cp = decoder.getNext()) + while (const std::optional<CodePoint> cp = decoder.getNext()) if (*cp == REPLACEMENT_CHAR) return false; @@ -367,7 +367,7 @@ TargetString utfTo(const SourceString& str, std::false_type) TargetString output; UtfDecoder<CharSrc> decoder(strBegin(str), strLength(str)); - while (std::optional<CodePoint> cp = decoder.getNext()) + while (const std::optional<CodePoint> cp = decoder.getNext()) codePointToUtf<CharTrg>(*cp, [&](CharTrg c) { output += c; }); return output; diff --git a/zen/zstring.cpp b/zen/zstring.cpp index 8bf77a0b..68609030 100755 --- a/zen/zstring.cpp +++ b/zen/zstring.cpp @@ -8,9 +8,102 @@ #include <stdexcept> #include "utf.h" + #include <gtk/gtk.h> + #include "sys_error.h" using namespace zen; + +Zstring makeUpperCopy(const Zstring& str) +{ + //fast pre-check: + if (isAsciiString(str.c_str())) //perf: in the range of 3.5ns + { + Zstring output = str; + for (Zchar& c : output) c = asciiToUpper(c); + return output; + } + + Zstring strNorm = getUnicodeNormalForm(str); + try + { + static_assert(sizeof(impl::CodePoint) == sizeof(gunichar)); + Zstring output; + output.reserve(strNorm.size()); + + impl::UtfDecoder<char> decoder(strNorm.c_str(), strNorm.size()); + while (const std::optional<impl::CodePoint> cp = decoder.getNext()) + impl::codePointToUtf<char>(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent! + + return output; + + } + catch (const SysError& e) + { + (void)e; + assert(false); + return str; + } +} + + +Zstring getUnicodeNormalForm(const Zstring& str) +{ + //fast pre-check: + if (isAsciiString(str.c_str())) //perf: in the range of 3.5ns + return str; //god bless our ref-counting! => save output string memory consumption! + + //Example: const char* decomposed = "\x6f\xcc\x81"; + // const char* precomposed = "\xc3\xb3"; + try + { + gchar* outStr = ::g_utf8_normalize (str.c_str(), str.length(), G_NORMALIZE_DEFAULT_COMPOSE); + if (!outStr) + throw SysError(L"g_utf8_normalize: conversion failed. (" + utfTo<std::wstring>(str) + L")"); + ZEN_ON_SCOPE_EXIT(::g_free(outStr)); + return outStr; + + } + catch (const SysError& e) + { + (void)e; + assert(false); + return str; + } +} + + +Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm) +{ + if (oldTerm.empty()) + return str; + + Zstring strU = str; + Zstring oldU = oldTerm; + + for (Zchar& c : strU) c = asciiToUpper(c); //can't use makeUpperCopy(): input/output sizes may differ! + for (Zchar& c : oldU) c = asciiToUpper(c); // + + Zstring output; + + for (size_t pos = 0;;) + { + const size_t posFound = strU.find(oldU, pos); + if (posFound == Zstring::npos) + { + if (pos == 0) //optimize "oldTerm not found": return ref-counted copy + return str; + output.append(str.begin() + pos, str.end()); + return output; + } + + output.append(str.begin() + pos, str.begin() + posFound); + output += newTerm; + pos = posFound + oldTerm.size(); + } +} + + /* MSDN "Handling Sorting in Your Applications": https://msdn.microsoft.com/en-us/library/windows/desktop/dd318144 @@ -33,8 +126,14 @@ OS X (UTF8 char) ________________________ time per call | function */ +int compareLocalPath(const Zstring& lhs, const Zstring& rhs) +{ + assert(lhs.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! + assert(rhs.find(Zchar('\0')) == Zstring::npos); // + return compareString(lhs, rhs); +} namespace @@ -43,7 +142,7 @@ int compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rh { //- strncasecmp implements ASCII CI-comparsion only! => signature is broken for UTF8-input; toupper() similarly doesn't support Unicode //- wcsncasecmp: https://opensource.apple.com/source/Libc/Libc-763.12/string/wcsncasecmp-fbsd.c - // => re-implement comparison based on towlower() to avoid memory allocations + // => re-implement comparison based on g_unichar_tolower() to avoid memory allocations impl::UtfDecoder<char> decL(lhs, lhsLen); impl::UtfDecoder<char> decR(rhs, rhsLen); @@ -54,23 +153,35 @@ int compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rh if (!cpL || !cpR) return static_cast<int>(!cpR) - static_cast<int>(!cpL); - //support unit-testing on Windows: CodePoint is truncated to wchar_t - static_assert(sizeof(wchar_t) == sizeof(impl::CodePoint)); + static_assert(sizeof(gunichar) == sizeof(impl::CodePoint)); - const wchar_t charL = ::towlower(static_cast<wchar_t>(*cpL)); //ordering: towlower() converts to higher code points than towupper() - const wchar_t charR = ::towlower(static_cast<wchar_t>(*cpR)); //uses LC_CTYPE category of current locale + const gunichar charL = ::g_unichar_toupper(*cpL); //note: tolower can be ambiguous, so don't use: + const gunichar charR = ::g_unichar_toupper(*cpR); //e.g. "Σ" (upper case) can be lower-case "ς" in the end of the word or "σ" in the middle. if (charL != charR) + //ordering: "to lower" converts to higher code points than "to upper" return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention! //unsigned underflow is well-defined! } } + } -int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen) +int compareNatural(const Zstring& lhs, const Zstring& rhs) { - const char* const lhsEnd = lhs + lhsLen; - const char* const rhsEnd = rhs + rhsLen; + //Unicode normal forms: + // Windows: CompareString() already ignores NFD/NFC differences: nice... + // Linux: g_unichar_toupper() can't ignore differences + // macOS: CFStringCompare() considers differences + + const Zstring& lhsNorm = getUnicodeNormalForm(lhs); + const Zstring& rhsNorm = getUnicodeNormalForm(rhs); + + const char* strL = lhsNorm.c_str(); + const char* strR = rhsNorm.c_str(); + + const char* const strEndL = strL + lhsNorm.size(); + const char* const strEndR = strR + rhsNorm.size(); /* - compare strings after conceptually creating blocks of whitespace/numbers/text - implement strict weak ordering! @@ -84,43 +195,43 @@ int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, s */ for (;;) { - if (lhs == lhsEnd || rhs == rhsEnd) - return static_cast<int>(lhs != lhsEnd) - static_cast<int>(rhs != rhsEnd); //"nothing" before "something" + if (strL == strEndL || strR == strEndR) + return static_cast<int>(strL != strEndL) - static_cast<int>(strR != strEndR); //"nothing" before "something" //note: "something" never would have been condensed to "nothing" further below => can finish evaluation here - const bool wsL = isWhiteSpace(*lhs); - const bool wsR = isWhiteSpace(*rhs); + const bool wsL = isWhiteSpace(*strL); + const bool wsR = isWhiteSpace(*strR); if (wsL != wsR) return static_cast<int>(!wsL) - static_cast<int>(!wsR); //whitespace before non-ws! if (wsL) { - ++lhs, ++rhs; - while (lhs != lhsEnd && isWhiteSpace(*lhs)) ++lhs; - while (rhs != rhsEnd && isWhiteSpace(*rhs)) ++rhs; + ++strL, ++strR; + while (strL != strEndL && isWhiteSpace(*strL)) ++strL; + while (strR != strEndR && isWhiteSpace(*strR)) ++strR; continue; } - const bool digitL = isDigit(*lhs); - const bool digitR = isDigit(*rhs); + const bool digitL = isDigit(*strL); + const bool digitR = isDigit(*strR); if (digitL != digitR) return static_cast<int>(!digitL) - static_cast<int>(!digitR); //number before chars! if (digitL) { - while (lhs != lhsEnd && *lhs == '0') ++lhs; - while (rhs != rhsEnd && *rhs == '0') ++rhs; + while (strL != strEndL && *strL == '0') ++strL; + while (strR != strEndR && *strR == '0') ++strR; int rv = 0; - for (;; ++lhs, ++rhs) + for (;; ++strL, ++strR) { - const bool endL = lhs == lhsEnd || !isDigit(*lhs); - const bool endR = rhs == rhsEnd || !isDigit(*rhs); + const bool endL = strL == strEndL || !isDigit(*strL); + const bool endR = strR == strEndR || !isDigit(*strR); if (endL != endR) return static_cast<int>(!endL) - static_cast<int>(!endR); //more digits means bigger number if (endL) break; //same number of digits - if (rv == 0 && *lhs != *rhs) - rv = *lhs - *rhs; //found first digit difference comparing from left + if (rv == 0 && *strL != *strR) + rv = *strL - *strR; //found first digit difference comparing from left } if (rv != 0) return rv; @@ -128,28 +239,19 @@ int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, s } //compare full junks of text: consider unicode encoding! - const char* textBeginL = lhs++; - const char* textBeginR = rhs++; //current char is neither white space nor digit at this point! - while (lhs != lhsEnd && !isWhiteSpace(*lhs) && !isDigit(*lhs)) ++lhs; - while (rhs != rhsEnd && !isWhiteSpace(*rhs) && !isDigit(*rhs)) ++rhs; + const char* textBeginL = strL++; + const char* textBeginR = strR++; //current char is neither white space nor digit at this point! + while (strL != strEndL && !isWhiteSpace(*strL) && !isDigit(*strL)) ++strL; + while (strR != strEndR && !isWhiteSpace(*strR) && !isDigit(*strR)) ++strR; - const int rv = compareNoCaseUtf8(textBeginL, lhs - textBeginL, textBeginR, rhs - textBeginR); + const int rv = compareNoCaseUtf8(textBeginL, strL - textBeginL, textBeginR, strR - textBeginR); if (rv != 0) return rv; } -} - -namespace -{ } -int CmpNaturalSort::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const -{ - //auto strL = utfTo<std::string>(Zstring(lhs, lhsLen)); - //auto strR = utfTo<std::string>(Zstring(rhs, rhsLen)); - //return cmpStringNaturalLinux(strL.c_str(), strL.size(), strR.c_str(), strR.size()); - return cmpStringNaturalLinux(lhs, lhsLen, rhs, rhsLen); - -}
\ No newline at end of file +warn_static("clean up implementation of these two:") +//template <> inline bool isWhiteSpace(char c) +//template <> inline bool isWhiteSpace(wchar_t c) diff --git a/zen/zstring.h b/zen/zstring.h index 7fa21335..20cf968d 100755 --- a/zen/zstring.h +++ b/zen/zstring.h @@ -14,6 +14,7 @@ #define Zstr(x) x const Zchar FILE_NAME_SEPARATOR = '/'; + //"The reason for all the fuss above" - Loki/SmartPtr //a high-performance string for interfacing with native OS APIs in multithreaded contexts using Zstring = zen::Zbase<Zchar>; @@ -22,43 +23,71 @@ using Zstring = zen::Zbase<Zchar>; using Zstringw = zen::Zbase<wchar_t>; -//Compare filepaths: Windows/OS X does NOT distinguish between upper/lower-case, while Linux DOES -struct CmpFilePath -{ - int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const; -}; +//Caveat: don't expect input/output string sizes to match: +// - different UTF-8 encoding length of upper-case chars +// - different number of upper case chars (e.g. "ß" => "SS" on macOS) +// - output is Unicode-normalized +Zstring makeUpperCopy(const Zstring& str); -struct CmpNaturalSort -{ - int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const; -}; +//Windows, Linux: precomposed +//macOS: decomposed +Zstring getUnicodeNormalForm(const Zstring& str); +Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm); -struct LessFilePath -{ - template <class S> //don't support heterogenous input! => use as container predicate only! - bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpFilePath()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; } -}; +//------------------------------------------------------------------------------------------ +//inline +//int compareNoCase(const Zstring& lhs, const Zstring& rhs) +//{ +// return zen::compareString(makeUpperCopy(lhs), makeUpperCopy(rhs)); +// //avoid eager optimization bugs: e.g. "if (isAsciiString()) compareAsciiNoCase()" might model a different order! +//} + +inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return makeUpperCopy(lhs) == makeUpperCopy(rhs); } -struct LessNaturalSort +struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find() { - template <class S> //don't support heterogenous input! => use as container predicate only! - bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpNaturalSort()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; } + ZstringNoCase(const Zstring& str) : upperCase(makeUpperCopy(str)) {} + Zstring upperCase; }; +inline bool operator<(const ZstringNoCase& lhs, const ZstringNoCase& rhs) { return lhs.upperCase < rhs.upperCase; } + +//struct LessNoCase { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNoCase(lhs, rhs) < 0; } }; + +//------------------------------------------------------------------------------------------ + +//Compare *local* file paths: +// Windows: igore case +// Linux: byte-wise comparison +// macOS: igore case + Unicode normalization forms +int compareLocalPath(const Zstring& lhs, const Zstring& rhs); + +inline bool equalLocalPath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; } +struct LessLocalPath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } }; -template <class S> -S makeUpperCopy(S str); +//------------------------------------------------------------------------------------------ +int compareNatural(const Zstring& lhs, const Zstring& rhs); +struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring rhs) const { return compareNatural(lhs, rhs) < 0; } }; +//------------------------------------------------------------------------------------------ + +warn_static("get rid:") +inline int compareFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs); } + +inline bool equalFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; } + +struct LessFilePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } }; +//------------------------------------------------------------------------------------------ -template <class S, class T> inline -bool equalFilePath(const S& lhs, const T& rhs) { using namespace zen; return strEqual(lhs, rhs, CmpFilePath()); } inline Zstring appendSeparator(Zstring path) //support rvalue references! { - return zen::endsWith(path, FILE_NAME_SEPARATOR) ? path : (path += FILE_NAME_SEPARATOR); //returning a by-value parameter implicitly converts to r-value! + if (!zen::endsWith(path, FILE_NAME_SEPARATOR)) + path += FILE_NAME_SEPARATOR; + return path; //returning a by-value parameter => RVO if possible, r-value otherwise! } @@ -82,12 +111,7 @@ Zstring getFileExtension(const Zstring& filePath) } -template <class S, class T, class U> -S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm); - - - -//common unicode sequences +//common unicode characters const wchar_t EM_DASH = L'\u2014'; const wchar_t EN_DASH = L'\u2013'; const wchar_t* const SPACED_DASH = L" \u2013 "; //using 'EN DASH' @@ -99,89 +123,6 @@ const wchar_t MULT_SIGN = L'\u00D7'; //fancy "x" - - -//################################# inline implementation ######################################## -inline -void makeUpperInPlace(wchar_t* str, size_t strLen) -{ - std::for_each(str, str + strLen, [](wchar_t& c) { c = std::towupper(c); }); //locale-dependent! -} - - -inline -void makeUpperInPlace(char* str, size_t strLen) -{ - std::for_each(str, str + strLen, [](char& c) { c = std::toupper(static_cast<unsigned char>(c)); }); //locale-dependent! - //result of toupper() is an unsigned char mapped to int range: the char representation is in the last 8 bits and we need not care about signedness! - //this should work for UTF-8, too: all chars >= 128 are mapped upon themselves! -} - - -template <class S> inline -S makeUpperCopy(S str) -{ - const size_t len = str.length(); //we assert S is a string type! - if (len > 0) - makeUpperInPlace(&*str.begin(), len); - - return str; -} - - -inline -int CmpFilePath::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const -{ - assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls! - assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); // - - const int rv = std::strncmp(lhs, rhs, std::min(lhsLen, rhsLen)); - if (rv != 0) - return rv; - return static_cast<int>(lhsLen) - static_cast<int>(rhsLen); -} - - -template <class S, class T, class U> inline -S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm) -{ - using namespace zen; - static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>); - static_assert(std::is_same_v<GetCharTypeT<T>, GetCharTypeT<U>>); - const size_t oldLen = strLength(oldTerm); - if (oldLen == 0) - return str; - - const S strU = makeUpperCopy(str); //S required to be a string class - const S oldU = makeUpperCopy<S>(oldTerm); //[!] T not required to be a string class - assert(strLength(strU) == strLength(str )); - assert(strLength(oldU) == strLength(oldTerm)); - - const auto* const newBegin = strBegin(newTerm); - const auto* const newEnd = newBegin + strLength(newTerm); - - S output; - - for (size_t pos = 0;;) - { - const auto itFound = std::search(strU.begin() + pos, strU.end(), - oldU.begin(), oldU.end()); - if (itFound == strU.end() && pos == 0) - return str; //optimize "oldTerm not found": return ref-counted copy - - impl::stringAppend(output, str.begin() + pos, str.begin() + (itFound - strU.begin())); - if (itFound == strU.end()) - return output; - - impl::stringAppend(output, newBegin, newEnd); - pos = (itFound - strU.begin()) + oldLen; - } -} - -//expose for unit tests -int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen); -inline int cmpStringNaturalLinux(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen) { return cmpStringNaturalLinuxTest(lhs, lhsLen, rhs, rhsLen); } - //--------------------------------------------------------------------------- //ZEN macro consistency checks: |