summaryrefslogtreecommitdiff
path: root/zen
diff options
context:
space:
mode:
Diffstat (limited to 'zen')
-rwxr-xr-xzen/file_access.cpp2
-rwxr-xr-xzen/file_error.h2
-rwxr-xr-xzen/file_traverser.cpp2
-rwxr-xr-xzen/globals.h135
-rwxr-xr-xzen/guid.h1
-rwxr-xr-xzen/http.cpp57
-rwxr-xr-xzen/i18n.h4
-rwxr-xr-xzen/legacy_compiler.h3
-rwxr-xr-xzen/scope_guard.h7
-rwxr-xr-xzen/shell_execute.h4
-rwxr-xr-xzen/socket.h74
-rwxr-xr-xzen/stl_tools.h1
-rwxr-xr-xzen/string_tools.h171
-rwxr-xr-xzen/thread.cpp12
-rwxr-xr-xzen/time.h6
-rwxr-xr-xzen/type_traits.h1
-rwxr-xr-xzen/utf.h6
-rwxr-xr-xzen/zstring.cpp184
-rwxr-xr-xzen/zstring.h163
19 files changed, 536 insertions, 299 deletions
diff --git a/zen/file_access.cpp b/zen/file_access.cpp
index a81fdae0..88b70b14 100755
--- a/zen/file_access.cpp
+++ b/zen/file_access.cpp
@@ -381,7 +381,7 @@ void zen::renameFile(const Zstring& pathSource, const Zstring& pathTarget) //thr
const Zstring parentPathSrc = beforeLast(pathSource, FILE_NAME_SEPARATOR, IF_MISSING_RETURN_NONE);
const Zstring parentPathTrg = beforeLast(pathTarget, FILE_NAME_SEPARATOR, IF_MISSING_RETURN_NONE);
//some (broken) devices may fail to rename case directly:
- if (equalFilePath(parentPathSrc, parentPathTrg))
+ if (equalLocalPath(parentPathSrc, parentPathTrg))
{
if (fileNameSrc == fileNameTrg)
return; //non-sensical request
diff --git a/zen/file_error.h b/zen/file_error.h
index 086d0998..101d6543 100755
--- a/zen/file_error.h
+++ b/zen/file_error.h
@@ -45,7 +45,7 @@ DEFINE_NEW_FILE_ERROR(ErrorDifferentVolume);
//----------- facilitate usage of std::wstring for error messages --------------------
-inline std::wstring fmtPath(const std::wstring& displayPath) { return L'\"' + displayPath + L'\"'; }
+inline std::wstring fmtPath(const std::wstring& displayPath) { return L'"' + displayPath + L'"'; }
inline std::wstring fmtPath(const Zstring& displayPath) { return fmtPath(utfTo<std::wstring>(displayPath)); }
inline std::wstring fmtPath(const wchar_t* displayPath) { return fmtPath(std::wstring(displayPath)); } //resolve overload ambiguity
}
diff --git a/zen/file_traverser.cpp b/zen/file_traverser.cpp
index e342c8ec..cc6e0c0b 100755
--- a/zen/file_traverser.cpp
+++ b/zen/file_traverser.cpp
@@ -78,7 +78,7 @@ void zen::traverseFolder(const Zstring& dirPath,
if (onFolder)
onFolder({ itemName, itemPath });
}
- else //a file or named pipe, ect.
+ else //a file or named pipe, etc.
{
if (onFile)
onFile({ itemName, itemPath, makeUnsigned(statData.st_size), statData.st_mtime });
diff --git a/zen/globals.h b/zen/globals.h
index 10975414..024147fa 100755
--- a/zen/globals.h
+++ b/zen/globals.h
@@ -14,14 +14,22 @@
namespace zen
{
-//solve static destruction order fiasco by providing shared ownership and serialized access to global variables
+/*
+Solve static destruction order fiasco by providing shared ownership and serialized access to global variables
+
+=>there may be accesses to "Global<T>::get()" during process shutdown e.g. _("") used by message in debug_minidump.cpp or by some detached thread assembling an error message!
+=> use trivially-destructible POD only!!!
+
+ATTENTION: function-static globals have the compiler generate "magic statics" == compiler-genenerated locking code which will crash or leak memory when accessed after global is "dead"
+ => "solved" by FunStatGlobal, but we can't have "too many" of these...
+*/
template <class T>
-class Global
+class Global //don't use for function-scope statics!
{
public:
Global()
{
- static_assert(std::is_trivially_destructible_v<Pod>, "this memory needs to live forever");
+ static_assert(std::is_trivially_constructible_v<Pod>&& std::is_trivially_destructible_v<Pod>, "this memory needs to live forever");
assert(!pod_.inst && !pod_.spinLock); //we depend on static zero-initialization!
}
@@ -52,16 +60,131 @@ public:
}
private:
- //avoid static destruction order fiasco: there may be accesses to "Global<T>::get()" during process shutdown
- //e.g. _("") used by message in debug_minidump.cpp or by some detached thread assembling an error message!
- //=> use trivially-destructible POD only!!!
struct Pod
{
+ std::atomic<bool> spinLock; // { false }; rely entirely on static zero-initialization! => avoid potential contention with worker thread during Global<> construction!
+ //serialize access; can't use std::mutex: has non-trival destructor
std::shared_ptr<T>* inst; // = nullptr;
+ } pod_;
+};
+
+//===================================================================================================================
+//===================================================================================================================
+
+struct CleanUpEntry
+{
+ using CleanUpFunction = void (*)(void* callbackData);
+ CleanUpFunction cleanUpFun;
+ void* callbackData;
+ CleanUpEntry* prev;
+};
+void registerGlobalForDestruction(CleanUpEntry& entry);
+
+
+template <class T>
+class FunStatGlobal
+{
+public:
+ //No FunStatGlobal() or ~FunStatGlobal()!
+
+ std::shared_ptr<T> get()
+ {
+ static_assert(std::is_trivially_constructible_v<FunStatGlobal>&&
+ std::is_trivially_destructible_v<FunStatGlobal>, "this class must not generate code for magic statics!");
+
+ while (pod_.spinLock.exchange(true)) ;
+ ZEN_ON_SCOPE_EXIT(pod_.spinLock = false);
+ if (pod_.inst)
+ return *pod_.inst;
+ return nullptr;
+ }
+
+ void set(std::unique_ptr<T>&& newInst)
+ {
+ std::shared_ptr<T>* tmpInst = nullptr;
+ if (newInst)
+ tmpInst = new std::shared_ptr<T>(std::move(newInst));
+ {
+ while (pod_.spinLock.exchange(true)) ;
+ ZEN_ON_SCOPE_EXIT(pod_.spinLock = false);
+
+ std::swap(pod_.inst, tmpInst);
+ registerDestruction();
+ }
+ delete tmpInst;
+ }
+
+ void initOnce(std::unique_ptr<T> (*getInitialValue)())
+ {
+ while (pod_.spinLock.exchange(true)) ;
+ ZEN_ON_SCOPE_EXIT(pod_.spinLock = false);
+
+ if (!pod_.cleanUpEntry.cleanUpFun)
+ {
+ assert(!pod_.inst);
+ if (std::unique_ptr<T> newInst = (*getInitialValue)())
+ pod_.inst = new std::shared_ptr<T>(std::move(newInst));
+ registerDestruction();
+ }
+ }
+
+private:
+ //call while holding pod_.spinLock
+ void registerDestruction()
+ {
+ assert(pod_.spinLock);
+
+ if (!pod_.cleanUpEntry.cleanUpFun)
+ {
+ pod_.cleanUpEntry.callbackData = this;
+ pod_.cleanUpEntry.cleanUpFun = [](void* callbackData)
+ {
+ auto thisPtr = static_cast<FunStatGlobal*>(callbackData);
+ thisPtr->set(nullptr);
+ };
+
+ registerGlobalForDestruction(pod_.cleanUpEntry);
+ }
+ }
+
+ struct Pod
+ {
std::atomic<bool> spinLock; // { false }; rely entirely on static zero-initialization! => avoid potential contention with worker thread during Global<> construction!
//serialize access; can't use std::mutex: has non-trival destructor
+ std::shared_ptr<T>* inst; // = nullptr;
+ CleanUpEntry cleanUpEntry;
} pod_;
};
+
+
+inline
+void registerGlobalForDestruction(CleanUpEntry& entry)
+{
+ static struct
+ {
+ std::atomic<bool> spinLock;
+ CleanUpEntry* head;
+ } cleanUpList;
+
+ static_assert(std::is_trivially_constructible_v<decltype(cleanUpList)>&&
+ std::is_trivially_destructible_v<decltype(cleanUpList)>, "we must not generate code for magic statics!");
+
+ while (cleanUpList.spinLock.exchange(true)) ;
+ ZEN_ON_SCOPE_EXIT(cleanUpList.spinLock = false);
+
+ std::atexit([]
+ {
+ while (cleanUpList.spinLock.exchange(true)) ;
+ ZEN_ON_SCOPE_EXIT(cleanUpList.spinLock = false);
+
+ (*cleanUpList.head->cleanUpFun)(cleanUpList.head->callbackData);
+ cleanUpList.head = cleanUpList.head->prev; //nicely clean up in reverse order of construction
+ });
+
+ entry.prev = cleanUpList.head;
+ cleanUpList.head = &entry;
+
+}
}
#endif //GLOBALS_H_8013740213748021573485
diff --git a/zen/guid.h b/zen/guid.h
index 89e800b5..a26688f8 100755
--- a/zen/guid.h
+++ b/zen/guid.h
@@ -9,6 +9,7 @@
#include <fcntl.h> //open
#include <unistd.h> //close
+ #include <zen/sys_error.h>
//#include <uuid/uuid.h> -> uuid_generate(), uuid_unparse(); avoid additional dependency for "sudo apt-get install uuid-dev"
diff --git a/zen/http.cpp b/zen/http.cpp
index d06d3309..1f89bf20 100755
--- a/zen/http.cpp
+++ b/zen/http.cpp
@@ -26,9 +26,9 @@ public:
const bool useTls = [&]
{
- if (startsWith(url, Zstr("http://"), CmpAsciiNoCase()))
+ if (startsWithAsciiNoCase(url, Zstr("http://")))
return false;
- if (startsWith(url, Zstr("https://"), CmpAsciiNoCase()))
+ if (startsWithAsciiNoCase(url, Zstr("https://")))
return true;
throw SysError(L"URL uses unexpected protocol.");
}();
@@ -57,35 +57,16 @@ public:
//https://www.w3.org/Protocols/HTTP/1.0/spec.html#Request-Line
std::string msg = (postParams ? "POST " : "GET ") + utfTo<std::string>(page) + " HTTP/1.0\r\n";
- for (const auto& item : headers)
- msg += item.first + ": " + item.second + "\r\n";
+ for (const auto& [name, value] : headers)
+ msg += name + ": " + value + "\r\n";
msg += "\r\n";
msg += postBuf;
//send request
for (size_t bytesToSend = msg.size(); bytesToSend > 0;)
- {
- int bytesSent = 0;
- for (;;)
- {
- bytesSent = ::send(socket_->get(), //_In_ SOCKET s,
- &*(msg.end() - bytesToSend), //_In_ const char *buf,
- static_cast<int>(bytesToSend), //_In_ int len,
- 0); //_In_ int flags
- if (bytesSent >= 0 || errno != EINTR)
- break;
- }
- if (bytesSent < 0)
- THROW_LAST_SYS_ERROR_WSA(L"send");
- if (bytesSent > static_cast<int>(bytesToSend))
- throw SysError(L"send: buffer overflow.");
- if (bytesSent == 0)
- throw SysError(L"send: zero bytes processed");
-
- bytesToSend -= bytesSent;
- }
- if (::shutdown(socket_->get(), SHUT_WR) != 0)
- THROW_LAST_SYS_ERROR_WSA(L"shutdown");
+ bytesToSend -= tryWriteSocket(socket_->get(), &*(msg.end() - bytesToSend), bytesToSend); //throw SysError
+
+ shutdownSocketSend(socket_->get()); //throw SysError
//receive response:
std::string headBuf;
@@ -116,7 +97,7 @@ public:
const std::vector<std::string> statusItems = split(statusBuf, ' ', SplitType::ALLOW_EMPTY); //HTTP-Version SP Status-Code SP Reason-Phrase CRLF
if (statusItems.size() < 2 || !startsWith(statusItems[0], "HTTP/"))
- throw SysError(L"Invalid HTTP response: \"" + utfTo<std::wstring>(statusBuf) + L"\"");
+ throw SysError(L"Invalid HTTP response: \"" + utfTo<std::wstring>(statusBuf) + L'"');
statusCode_ = stringTo<int>(statusItems[1]);
@@ -175,8 +156,6 @@ public:
private:
size_t tryRead(void* buffer, size_t bytesToRead) //throw SysError; may return short, only 0 means EOF!
{
- if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check!
- throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo<std::string>(__LINE__));
assert(bytesToRead <= getBlockSize()); //block size might be 1000 while reading HTTP header
if (contentRemaining_ >= 0)
@@ -185,21 +164,7 @@ private:
return 0;
bytesToRead = static_cast<size_t>(std::min(static_cast<int64_t>(bytesToRead), contentRemaining_)); //[!] contentRemaining_ > 4 GB possible!
}
- int bytesReceived = 0;
- for (;;)
- {
- bytesReceived = ::recv(socket_->get(), //_In_ SOCKET s,
- static_cast<char*>(buffer), //_Out_ char *buf,
- static_cast<int>(bytesToRead), //_In_ int len,
- 0); //_In_ int flags
- if (bytesReceived >= 0 || errno != EINTR)
- break;
- }
- if (bytesReceived < 0)
- THROW_LAST_SYS_ERROR_WSA(L"recv");
- if (static_cast<size_t>(bytesReceived) > bytesToRead) //better safe than sorry
- throw SysError(L"HttpInputStream::tryRead: buffer overflow.");
-
+ const size_t bytesReceived = tryReadSocket(socket_->get(), buffer, bytesToRead); //throw SysError; may return short, only 0 means EOF!
if (contentRemaining_ >= 0)
contentRemaining_ -= bytesReceived;
@@ -325,8 +290,8 @@ std::string urldecode(const std::string& str)
std::string zen::xWwwFormUrlEncode(const std::vector<std::pair<std::string, std::string>>& paramPairs)
{
std::string output;
- for (const auto& pair : paramPairs)
- output += urlencode(pair.first) + '=' + urlencode(pair.second) + '&';
+ for (const auto& [name, value] : paramPairs)
+ output += urlencode(name) + '=' + urlencode(value) + '&';
//encode both key and value: https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1
if (!output.empty())
output.pop_back();
diff --git a/zen/i18n.h b/zen/i18n.h
index 45762861..2ecee45a 100755
--- a/zen/i18n.h
+++ b/zen/i18n.h
@@ -59,10 +59,10 @@ std::shared_ptr<const TranslationHandler> getTranslator();
namespace impl
{
inline
-Global<const TranslationHandler>& refGlobalTranslationHandler()
+FunStatGlobal<const TranslationHandler>& refGlobalTranslationHandler()
{
//getTranslator() may be called even after static objects of this translation unit are destroyed!
- static Global<const TranslationHandler> inst; //external linkage even in header!
+ static FunStatGlobal<const TranslationHandler> inst; //external linkage even in header!
return inst;
}
}
diff --git a/zen/legacy_compiler.h b/zen/legacy_compiler.h
index e9d50b97..16d87c53 100755
--- a/zen/legacy_compiler.h
+++ b/zen/legacy_compiler.h
@@ -7,14 +7,11 @@
#ifndef LEGACY_COMPILER_H_839567308565656789
#define LEGACY_COMPILER_H_839567308565656789
- #include <optional>
-
namespace std
{
//https://gcc.gnu.org/onlinedocs/libstdc++/manual/status.html
//https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations
-
}
#endif //LEGACY_COMPILER_H_839567308565656789
diff --git a/zen/scope_guard.h b/zen/scope_guard.h
index d056bb2a..9eff6c1f 100755
--- a/zen/scope_guard.h
+++ b/zen/scope_guard.h
@@ -10,6 +10,7 @@
#include <cassert>
#include <exception>
#include "type_traits.h"
+#include "legacy_compiler.h" //std::uncaught_exceptions
//best of Zen, Loki and C++17
@@ -103,8 +104,8 @@ auto makeGuard(F&& fun) { return ScopeGuard<runMode, std::decay_t<F>>(std::forwa
#define ZEN_CHECK_CASE_FOR_CONSTANT_IMPL(X) L ## X
-#define ZEN_ON_SCOPE_EXIT(X) auto ZEN_CONCAT(dummy, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_EXIT >([&]{ X; }); (void)ZEN_CONCAT(dummy, __LINE__);
-#define ZEN_ON_SCOPE_FAIL(X) auto ZEN_CONCAT(dummy, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_FAIL >([&]{ X; }); (void)ZEN_CONCAT(dummy, __LINE__);
-#define ZEN_ON_SCOPE_SUCCESS(X) auto ZEN_CONCAT(dummy, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_SUCCESS>([&]{ X; }); (void)ZEN_CONCAT(dummy, __LINE__);
+#define ZEN_ON_SCOPE_EXIT(X) auto ZEN_CONCAT(scopeGuard, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_EXIT >([&]{ X; }); (void)ZEN_CONCAT(scopeGuard, __LINE__);
+#define ZEN_ON_SCOPE_FAIL(X) auto ZEN_CONCAT(scopeGuard, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_FAIL >([&]{ X; }); (void)ZEN_CONCAT(scopeGuard, __LINE__);
+#define ZEN_ON_SCOPE_SUCCESS(X) auto ZEN_CONCAT(scopeGuard, __LINE__) = zen::makeGuard<zen::ScopeGuardRunMode::ON_SUCCESS>([&]{ X; }); (void)ZEN_CONCAT(scopeGuard, __LINE__);
#endif //SCOPE_GUARD_H_8971632487321434
diff --git a/zen/shell_execute.h b/zen/shell_execute.h
index 0802fbcb..98824d70 100755
--- a/zen/shell_execute.h
+++ b/zen/shell_execute.h
@@ -39,7 +39,7 @@ void shellExecute(const Zstring& command, ExecutionType type) //throw FileError
if (type == ExecutionType::SYNC)
{
//Posix ::system() - execute a shell command
- const int rv = ::system(command.c_str()); //do NOT use std::system as its documentation says nothing about "WEXITSTATUS(rv)", ect...
+ const int rv = ::system(command.c_str()); //do NOT use std::system as its documentation says nothing about "WEXITSTATUS(rv)", etc...
if (rv == -1 || WEXITSTATUS(rv) == 127)
throw FileError(_("Incorrect command line:") + L"\n" + utfTo<std::wstring>(command));
//http://linux.die.net/man/3/system "In case /bin/sh could not be executed, the exit status will be that of a command that does exit(127)"
@@ -73,7 +73,7 @@ void shellExecute(const Zstring& command, ExecutionType type) //throw FileError
inline
void openWithDefaultApplication(const Zstring& itemPath) //throw FileError
{
- shellExecute("xdg-open \"" + itemPath + "\"", ExecutionType::ASYNC); //
+ shellExecute("xdg-open \"" + itemPath + '"', ExecutionType::ASYNC); //
}
}
diff --git a/zen/socket.h b/zen/socket.h
index e551a5ba..33ac2e50 100755
--- a/zen/socket.h
+++ b/zen/socket.h
@@ -20,6 +20,12 @@ namespace zen
do { const ErrorCode ecInternal = getLastError(); throw SysError(formatSystemError(functionName, ecInternal)); } while (false)
+//patch up socket portability:
+using SocketType = int;
+const SocketType invalidSocket = -1;
+inline void closeSocket(SocketType s) { ::close(s); }
+
+
//Winsock needs to be initialized before calling any of these functions! (WSAStartup/WSACleanup)
class Socket //throw SysError
@@ -67,18 +73,78 @@ public:
~Socket() { closeSocket(socket_); }
- using SocketType = int;
SocketType get() const { return socket_; }
private:
Socket (const Socket&) = delete;
Socket& operator=(const Socket&) = delete;
- static const SocketType invalidSocket = -1;
- static void closeSocket(SocketType s) { ::close(s); }
-
SocketType socket_ = invalidSocket;
};
+
+
+//more socket helper functions:
+namespace
+{
+size_t tryReadSocket(SocketType socket, void* buffer, size_t bytesToRead) //throw SysError; may return short, only 0 means EOF!
+{
+ if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check!
+ throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo<std::string>(__LINE__));
+
+ int bytesReceived = 0;
+ for (;;)
+ {
+ bytesReceived = ::recv(socket, //_In_ SOCKET s,
+ static_cast<char*>(buffer), //_Out_ char *buf,
+ static_cast<int>(bytesToRead), //_In_ int len,
+ 0); //_In_ int flags
+ if (bytesReceived >= 0 || errno != EINTR)
+ break;
+ }
+ if (bytesReceived < 0)
+ THROW_LAST_SYS_ERROR_WSA(L"recv");
+
+ if (static_cast<size_t>(bytesReceived) > bytesToRead) //better safe than sorry
+ throw SysError(L"HttpInputStream::tryRead: buffer overflow.");
+
+ return bytesReceived; //"zero indicates end of file"
+}
+
+
+size_t tryWriteSocket(SocketType socket, const void* buffer, size_t bytesToWrite) //throw SysError; may return short! CONTRACT: bytesToWrite > 0
+{
+ if (bytesToWrite == 0)
+ throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo<std::string>(__LINE__));
+
+ int bytesWritten = 0;
+ for (;;)
+ {
+ bytesWritten = ::send(socket, //_In_ SOCKET s,
+ static_cast<const char*>(buffer), //_In_ const char *buf,
+ static_cast<int>(bytesToWrite), //_In_ int len,
+ 0); //_In_ int flags
+ if (bytesWritten >= 0 || errno != EINTR)
+ break;
+ }
+ if (bytesWritten < 0)
+ THROW_LAST_SYS_ERROR_WSA(L"send");
+ if (bytesWritten > static_cast<int>(bytesToWrite))
+ throw SysError(L"send: buffer overflow.");
+ if (bytesWritten == 0)
+ throw SysError(L"send: zero bytes processed");
+
+ return bytesWritten;
+}
+}
+
+
+inline
+void shutdownSocketSend(SocketType socket) //throw SysError
+{
+ if (::shutdown(socket, SHUT_WR) != 0)
+ THROW_LAST_SYS_ERROR_WSA(L"shutdown");
+}
+
}
#endif //SOCKET_H_23498325972583947678456437
diff --git a/zen/stl_tools.h b/zen/stl_tools.h
index be9bf710..c3a9bf8f 100755
--- a/zen/stl_tools.h
+++ b/zen/stl_tools.h
@@ -12,6 +12,7 @@
#include <vector>
#include <memory>
#include <algorithm>
+#include <optional>
#include "string_traits.h"
#include "build_info.h"
diff --git a/zen/string_tools.h b/zen/string_tools.h
index 8746722a..657c70d5 100755
--- a/zen/string_tools.h
+++ b/zen/string_tools.h
@@ -26,32 +26,30 @@ template <class Char> bool isWhiteSpace(Char c);
template <class Char> bool isDigit (Char c); //not exactly the same as "std::isdigit" -> we consider '0'-'9' only!
template <class Char> bool isHexDigit (Char c);
template <class Char> bool isAsciiAlpha(Char c);
+template <class Char> bool isAsciiString(const Char* str);
template <class Char> Char asciiToLower(Char c);
template <class Char> Char asciiToUpper(Char c);
-//case-sensitive comparison (compile-time correctness: use different number of arguments as STL comparison predicates!)
-struct CmpBinary { template <class Char> int operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const; };
+//both S and T can be strings or char/wchar_t arrays or single char/wchar_t
+template <class S, class T> bool contains(const S& str, const T& term);
-//basic case-insensitive comparison (considering A-Z only!)
-struct CmpAsciiNoCase { template <class Char> int operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const; };
+template <class S, class T> bool startsWith (const S& str, const T& prefix);
+template <class S, class T> bool startsWithAsciiNoCase(const S& str, const T& prefix);
-struct LessAsciiNoCase
-{
- template <class S> //don't support heterogenous input! => use as container predicate only!
- bool operator()(const S& lhs, const S& rhs) const { return CmpAsciiNoCase()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
-};
+template <class S, class T> bool endsWith (const S& str, const T& postfix);
+template <class S, class T> bool endsWithAsciiNoCase(const S& str, const T& postfix);
-//both S and T can be strings or char/wchar_t arrays or simple char/wchar_t
-template <class S, class T> bool contains(const S& str, const T& term);
+template <class S, class T> bool equalString (const S& lhs, const T& rhs);
+template <class S, class T> bool equalAsciiNoCase(const S& lhs, const T& rhs);
-template <class S, class T> bool startsWith(const S& str, const T& prefix);
-template <class S, class T, class Function> bool startsWith(const S& str, const T& prefix, Function cmpStringFun);
+template <class S, class T> int compareString (const S& lhs, const T& rhs);
+template <class S, class T> int compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!)
-template <class S, class T> bool endsWith (const S& str, const T& postfix);
-template <class S, class T, class Function> bool endsWith (const S& str, const T& postfix, Function cmpStringFun);
+struct LessAsciiNoCase //STL container predicate
+{
+ template <class S> bool operator()(const S& lhs, const S& rhs) const { return compareAsciiNoCase(lhs, rhs) < 0; }
+};
-template <class S, class T> bool strEqual(const S& lhs, const T& rhs);
-template <class S, class T, class Function> bool strEqual(const S& lhs, const T& rhs, Function cmpStringFun);
enum FailureReturnVal
{
@@ -152,6 +150,17 @@ bool isAsciiAlpha(Char c)
template <class Char> inline
+bool isAsciiString(const Char* str)
+{
+ static_assert(std::is_same_v<Char, char> || std::is_same_v<Char, wchar_t>);
+ for (Char c = *str; c != 0; c = *++str)
+ if (zen::makeUnsigned(c) >= 128)
+ return false;
+ return true;
+}
+
+
+template <class Char> inline
Char asciiToLower(Char c)
{
if (static_cast<Char>('A') <= c && c <= static_cast<Char>('Z'))
@@ -169,41 +178,103 @@ Char asciiToUpper(Char c)
}
-template <class S, class T, class Function> inline
-bool startsWith(const S& str, const T& prefix, Function cmpStringFun)
+namespace impl
+{
+inline int strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num); } //support embedded 0, unlike strncmp/wcsncmp!
+inline int strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num); } //
+
+
+template <class Char> inline
+int strcmpAsciiNoCase(const Char* lhs, const Char* rhs, size_t len)
+{
+ while (len-- > 0)
+ {
+ const Char charL = asciiToLower(*lhs++); //ordering: lower-case chars have higher code points than uppper-case
+ const Char charR = asciiToLower(*rhs++); //
+ if (charL != charR)
+ return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention!
+ //unsigned underflow is well-defined!
+ }
+ return 0;
+}
+}
+
+
+template <class S, class T> inline
+bool startsWith(const S& str, const T& prefix)
{
const size_t pfLen = strLength(prefix);
- if (strLength(str) < pfLen)
- return false;
+ return strLength(str) >= pfLen && impl::strcmpWithNulls(strBegin(str), strBegin(prefix), pfLen) == 0;
+}
+
- return cmpStringFun(strBegin(str), pfLen,
- strBegin(prefix), pfLen) == 0;
+template <class S, class T> inline
+bool startsWithAsciiNoCase(const S& str, const T& prefix)
+{
+ const size_t pfLen = strLength(prefix);
+ return strLength(str) >= pfLen && impl::strcmpAsciiNoCase(strBegin(str), strBegin(prefix), pfLen) == 0;
}
-template <class S, class T, class Function> inline
-bool endsWith(const S& str, const T& postfix, Function cmpStringFun)
+template <class S, class T> inline
+bool endsWith(const S& str, const T& postfix)
{
const size_t strLen = strLength(str);
const size_t pfLen = strLength(postfix);
- if (strLen < pfLen)
- return false;
+ return strLen >= pfLen && impl::strcmpWithNulls(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == 0;
+}
+
+
+template <class S, class T> inline
+bool endsWithAsciiNoCase(const S& str, const T& postfix)
+{
+ const size_t strLen = strLength(str);
+ const size_t pfLen = strLength(postfix);
+ return strLen >= pfLen && impl::strcmpAsciiNoCase(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == 0;
+}
+
- return cmpStringFun(strBegin(str) + strLen - pfLen, pfLen,
- strBegin(postfix), pfLen) == 0;
+template <class S, class T> inline
+bool equalString(const S& lhs, const T& rhs)
+{
+ const size_t lhsLen = strLength(lhs);
+ return lhsLen == strLength(rhs) && impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), lhsLen) == 0;
}
-template <class S, class T, class Function> inline
-bool strEqual(const S& lhs, const T& rhs, Function cmpStringFun)
+template <class S, class T> inline
+bool equalAsciiNoCase(const S& lhs, const T& rhs)
+{
+ const size_t lhsLen = strLength(lhs);
+ return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == 0;
+}
+
+
+template <class S, class T> inline
+int compareString(const S& lhs, const T& rhs)
{
- return cmpStringFun(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) == 0;
+ const size_t lhsLen = strLength(lhs);
+ const size_t rhsLen = strLength(rhs);
+
+ //length check *after* strcmpWithNulls(): we do care about natural ordering: e.g. for "compareString(makeUpperCopy(lhs), makeUpperCopy(rhs))"
+ const int rv = impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), std::min(lhsLen, rhsLen));
+ if (rv != 0)
+ return rv;
+ return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
}
-template <class S, class T> inline bool startsWith(const S& str, const T& prefix ) { return startsWith(str, prefix, CmpBinary()); }
-template <class S, class T> inline bool endsWith (const S& str, const T& postfix) { return endsWith (str, postfix, CmpBinary()); }
-template <class S, class T> inline bool strEqual (const S& lhs, const T& rhs ) { return strEqual (lhs, rhs, CmpBinary()); }
+template <class S, class T> inline
+int compareAsciiNoCase(const S& lhs, const T& rhs)
+{
+ const size_t lhsLen = strLength(lhs);
+ const size_t rhsLen = strLength(rhs);
+
+ const int rv = impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), std::min(lhsLen, rhsLen));
+ if (rv != 0)
+ return rv;
+ return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
+}
template <class S, class T> inline
@@ -464,42 +535,12 @@ struct CopyStringToString<T, T> //perf: we don't need a deep copy if string type
template <class S>
T copy(S&& str) const { return std::forward<S>(str); }
};
-
-inline int strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std::memcmp (ptr1, ptr2, num); }
-inline int strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num); }
}
template <class T, class S> inline
T copyStringTo(S&& str) { return impl::CopyStringToString<std::decay_t<S>, T>().copy(std::forward<S>(str)); }
-template <class Char> inline
-int CmpBinary::operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const
-{
- //support embedded 0, unlike strncmp/wcsncmp!
- const int rv = impl::strcmpWithNulls(lhs, rhs, std::min(lhsLen, rhsLen));
- if (rv != 0)
- return rv;
- return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
-template <class Char> inline
-int CmpAsciiNoCase::operator()(const Char* lhs, size_t lhsLen, const Char* rhs, size_t rhsLen) const
-{
- const auto* const lhsLast = lhs + std::min(lhsLen, rhsLen);
- while (lhs != lhsLast)
- {
- const Char charL = asciiToLower(*lhs++); //ordering: lower-case chars have higher code points than uppper-case
- const Char charR = asciiToLower(*rhs++); //
- if (charL != charR)
- return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention!
- //unsigned underflow is well-defined!
- }
- return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
namespace impl
{
template <class Num> inline
diff --git a/zen/thread.cpp b/zen/thread.cpp
index 8016d4a9..08bfaa25 100755
--- a/zen/thread.cpp
+++ b/zen/thread.cpp
@@ -34,10 +34,7 @@ uint64_t getThreadIdNative()
}
-struct InitMainThreadIdOnStartup
-{
- InitMainThreadIdOnStartup() { getMainThreadId(); }
-} startupInitMainThreadId;
+const uint64_t globalMainThreadId = getThreadId(); //avoid code-gen for "magic static"!
}
@@ -50,6 +47,9 @@ uint64_t zen::getThreadId()
uint64_t zen::getMainThreadId()
{
- static const uint64_t mainThreadId = getThreadId();
- return mainThreadId;
+ //don't make this a function-scope static (avoid code-gen for "magic static")
+ if (globalMainThreadId == 0) //might be called during static initialization
+ return getThreadId();
+
+ return globalMainThreadId;
}
diff --git a/zen/time.h b/zen/time.h
index b06d3d15..a32e28e3 100755
--- a/zen/time.h
+++ b/zen/time.h
@@ -327,13 +327,13 @@ TimeComp parseTime(const String& format, const String2& str, UserDefinedFormatTa
const CharType* itStr = strBegin(str);
const CharType* const strLast = itStr + strLength(str);
- auto extractNumber = [&](int& result, size_t digitCount) -> bool
+ auto extractNumber = [&](int& result, size_t digitCount)
{
if (strLast - itStr < makeSigned(digitCount))
return false;
- if (std::any_of(itStr, itStr + digitCount, [](CharType c) { return !isDigit(c); }))
- return false;
+ if (!std::all_of(itStr, itStr + digitCount, isDigit<CharType>))
+ return false;
result = zen::stringTo<int>(StringRef<const CharType>(itStr, itStr + digitCount));
itStr += digitCount;
diff --git a/zen/type_traits.h b/zen/type_traits.h
index 2d4e7a97..8783cb6a 100755
--- a/zen/type_traits.h
+++ b/zen/type_traits.h
@@ -8,7 +8,6 @@
#define TYPE_TRAITS_H_3425628658765467
#include <type_traits>
-#include "legacy_compiler.h"
//http://en.cppreference.com/w/cpp/header/type_traits
diff --git a/zen/utf.h b/zen/utf.h
index da6aaf97..5a095874 100755
--- a/zen/utf.h
+++ b/zen/utf.h
@@ -192,7 +192,7 @@ public:
std::optional<CodePoint> getNext()
{
if (it_ == last_)
- return std::nullopt; //GCC 8.2 bug: -Wmaybe-uninitialized for "return {};"
+ return std::nullopt;
const Char8 ch = *it_++;
CodePoint cp = ch;
@@ -313,7 +313,7 @@ bool isValidUtf(const UtfString& str)
using namespace impl;
UtfDecoder<GetCharTypeT<UtfString>> decoder(strBegin(str), strLength(str));
- while (std::optional<CodePoint> cp = decoder.getNext())
+ while (const std::optional<CodePoint> cp = decoder.getNext())
if (*cp == REPLACEMENT_CHAR)
return false;
@@ -367,7 +367,7 @@ TargetString utfTo(const SourceString& str, std::false_type)
TargetString output;
UtfDecoder<CharSrc> decoder(strBegin(str), strLength(str));
- while (std::optional<CodePoint> cp = decoder.getNext())
+ while (const std::optional<CodePoint> cp = decoder.getNext())
codePointToUtf<CharTrg>(*cp, [&](CharTrg c) { output += c; });
return output;
diff --git a/zen/zstring.cpp b/zen/zstring.cpp
index 8bf77a0b..68609030 100755
--- a/zen/zstring.cpp
+++ b/zen/zstring.cpp
@@ -8,9 +8,102 @@
#include <stdexcept>
#include "utf.h"
+ #include <gtk/gtk.h>
+ #include "sys_error.h"
using namespace zen;
+
+Zstring makeUpperCopy(const Zstring& str)
+{
+ //fast pre-check:
+ if (isAsciiString(str.c_str())) //perf: in the range of 3.5ns
+ {
+ Zstring output = str;
+ for (Zchar& c : output) c = asciiToUpper(c);
+ return output;
+ }
+
+ Zstring strNorm = getUnicodeNormalForm(str);
+ try
+ {
+ static_assert(sizeof(impl::CodePoint) == sizeof(gunichar));
+ Zstring output;
+ output.reserve(strNorm.size());
+
+ impl::UtfDecoder<char> decoder(strNorm.c_str(), strNorm.size());
+ while (const std::optional<impl::CodePoint> cp = decoder.getNext())
+ impl::codePointToUtf<char>(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent!
+
+ return output;
+
+ }
+ catch (const SysError& e)
+ {
+ (void)e;
+ assert(false);
+ return str;
+ }
+}
+
+
+Zstring getUnicodeNormalForm(const Zstring& str)
+{
+ //fast pre-check:
+ if (isAsciiString(str.c_str())) //perf: in the range of 3.5ns
+ return str; //god bless our ref-counting! => save output string memory consumption!
+
+ //Example: const char* decomposed = "\x6f\xcc\x81";
+ // const char* precomposed = "\xc3\xb3";
+ try
+ {
+ gchar* outStr = ::g_utf8_normalize (str.c_str(), str.length(), G_NORMALIZE_DEFAULT_COMPOSE);
+ if (!outStr)
+ throw SysError(L"g_utf8_normalize: conversion failed. (" + utfTo<std::wstring>(str) + L")");
+ ZEN_ON_SCOPE_EXIT(::g_free(outStr));
+ return outStr;
+
+ }
+ catch (const SysError& e)
+ {
+ (void)e;
+ assert(false);
+ return str;
+ }
+}
+
+
+Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm)
+{
+ if (oldTerm.empty())
+ return str;
+
+ Zstring strU = str;
+ Zstring oldU = oldTerm;
+
+ for (Zchar& c : strU) c = asciiToUpper(c); //can't use makeUpperCopy(): input/output sizes may differ!
+ for (Zchar& c : oldU) c = asciiToUpper(c); //
+
+ Zstring output;
+
+ for (size_t pos = 0;;)
+ {
+ const size_t posFound = strU.find(oldU, pos);
+ if (posFound == Zstring::npos)
+ {
+ if (pos == 0) //optimize "oldTerm not found": return ref-counted copy
+ return str;
+ output.append(str.begin() + pos, str.end());
+ return output;
+ }
+
+ output.append(str.begin() + pos, str.begin() + posFound);
+ output += newTerm;
+ pos = posFound + oldTerm.size();
+ }
+}
+
+
/*
MSDN "Handling Sorting in Your Applications": https://msdn.microsoft.com/en-us/library/windows/desktop/dd318144
@@ -33,8 +126,14 @@ OS X (UTF8 char)
________________________
time per call | function
*/
+int compareLocalPath(const Zstring& lhs, const Zstring& rhs)
+{
+ assert(lhs.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls!
+ assert(rhs.find(Zchar('\0')) == Zstring::npos); //
+ return compareString(lhs, rhs);
+}
namespace
@@ -43,7 +142,7 @@ int compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rh
{
//- strncasecmp implements ASCII CI-comparsion only! => signature is broken for UTF8-input; toupper() similarly doesn't support Unicode
//- wcsncasecmp: https://opensource.apple.com/source/Libc/Libc-763.12/string/wcsncasecmp-fbsd.c
- // => re-implement comparison based on towlower() to avoid memory allocations
+ // => re-implement comparison based on g_unichar_tolower() to avoid memory allocations
impl::UtfDecoder<char> decL(lhs, lhsLen);
impl::UtfDecoder<char> decR(rhs, rhsLen);
@@ -54,23 +153,35 @@ int compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rh
if (!cpL || !cpR)
return static_cast<int>(!cpR) - static_cast<int>(!cpL);
- //support unit-testing on Windows: CodePoint is truncated to wchar_t
- static_assert(sizeof(wchar_t) == sizeof(impl::CodePoint));
+ static_assert(sizeof(gunichar) == sizeof(impl::CodePoint));
- const wchar_t charL = ::towlower(static_cast<wchar_t>(*cpL)); //ordering: towlower() converts to higher code points than towupper()
- const wchar_t charR = ::towlower(static_cast<wchar_t>(*cpR)); //uses LC_CTYPE category of current locale
+ const gunichar charL = ::g_unichar_toupper(*cpL); //note: tolower can be ambiguous, so don't use:
+ const gunichar charR = ::g_unichar_toupper(*cpR); //e.g. "Σ" (upper case) can be lower-case "ς" in the end of the word or "σ" in the middle.
if (charL != charR)
+ //ordering: "to lower" converts to higher code points than "to upper"
return static_cast<unsigned int>(charL) - static_cast<unsigned int>(charR); //unsigned char-comparison is the convention!
//unsigned underflow is well-defined!
}
}
+
}
-int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen)
+int compareNatural(const Zstring& lhs, const Zstring& rhs)
{
- const char* const lhsEnd = lhs + lhsLen;
- const char* const rhsEnd = rhs + rhsLen;
+ //Unicode normal forms:
+ // Windows: CompareString() already ignores NFD/NFC differences: nice...
+ // Linux: g_unichar_toupper() can't ignore differences
+ // macOS: CFStringCompare() considers differences
+
+ const Zstring& lhsNorm = getUnicodeNormalForm(lhs);
+ const Zstring& rhsNorm = getUnicodeNormalForm(rhs);
+
+ const char* strL = lhsNorm.c_str();
+ const char* strR = rhsNorm.c_str();
+
+ const char* const strEndL = strL + lhsNorm.size();
+ const char* const strEndR = strR + rhsNorm.size();
/*
- compare strings after conceptually creating blocks of whitespace/numbers/text
- implement strict weak ordering!
@@ -84,43 +195,43 @@ int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, s
*/
for (;;)
{
- if (lhs == lhsEnd || rhs == rhsEnd)
- return static_cast<int>(lhs != lhsEnd) - static_cast<int>(rhs != rhsEnd); //"nothing" before "something"
+ if (strL == strEndL || strR == strEndR)
+ return static_cast<int>(strL != strEndL) - static_cast<int>(strR != strEndR); //"nothing" before "something"
//note: "something" never would have been condensed to "nothing" further below => can finish evaluation here
- const bool wsL = isWhiteSpace(*lhs);
- const bool wsR = isWhiteSpace(*rhs);
+ const bool wsL = isWhiteSpace(*strL);
+ const bool wsR = isWhiteSpace(*strR);
if (wsL != wsR)
return static_cast<int>(!wsL) - static_cast<int>(!wsR); //whitespace before non-ws!
if (wsL)
{
- ++lhs, ++rhs;
- while (lhs != lhsEnd && isWhiteSpace(*lhs)) ++lhs;
- while (rhs != rhsEnd && isWhiteSpace(*rhs)) ++rhs;
+ ++strL, ++strR;
+ while (strL != strEndL && isWhiteSpace(*strL)) ++strL;
+ while (strR != strEndR && isWhiteSpace(*strR)) ++strR;
continue;
}
- const bool digitL = isDigit(*lhs);
- const bool digitR = isDigit(*rhs);
+ const bool digitL = isDigit(*strL);
+ const bool digitR = isDigit(*strR);
if (digitL != digitR)
return static_cast<int>(!digitL) - static_cast<int>(!digitR); //number before chars!
if (digitL)
{
- while (lhs != lhsEnd && *lhs == '0') ++lhs;
- while (rhs != rhsEnd && *rhs == '0') ++rhs;
+ while (strL != strEndL && *strL == '0') ++strL;
+ while (strR != strEndR && *strR == '0') ++strR;
int rv = 0;
- for (;; ++lhs, ++rhs)
+ for (;; ++strL, ++strR)
{
- const bool endL = lhs == lhsEnd || !isDigit(*lhs);
- const bool endR = rhs == rhsEnd || !isDigit(*rhs);
+ const bool endL = strL == strEndL || !isDigit(*strL);
+ const bool endR = strR == strEndR || !isDigit(*strR);
if (endL != endR)
return static_cast<int>(!endL) - static_cast<int>(!endR); //more digits means bigger number
if (endL)
break; //same number of digits
- if (rv == 0 && *lhs != *rhs)
- rv = *lhs - *rhs; //found first digit difference comparing from left
+ if (rv == 0 && *strL != *strR)
+ rv = *strL - *strR; //found first digit difference comparing from left
}
if (rv != 0)
return rv;
@@ -128,28 +239,19 @@ int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, s
}
//compare full junks of text: consider unicode encoding!
- const char* textBeginL = lhs++;
- const char* textBeginR = rhs++; //current char is neither white space nor digit at this point!
- while (lhs != lhsEnd && !isWhiteSpace(*lhs) && !isDigit(*lhs)) ++lhs;
- while (rhs != rhsEnd && !isWhiteSpace(*rhs) && !isDigit(*rhs)) ++rhs;
+ const char* textBeginL = strL++;
+ const char* textBeginR = strR++; //current char is neither white space nor digit at this point!
+ while (strL != strEndL && !isWhiteSpace(*strL) && !isDigit(*strL)) ++strL;
+ while (strR != strEndR && !isWhiteSpace(*strR) && !isDigit(*strR)) ++strR;
- const int rv = compareNoCaseUtf8(textBeginL, lhs - textBeginL, textBeginR, rhs - textBeginR);
+ const int rv = compareNoCaseUtf8(textBeginL, strL - textBeginL, textBeginR, strR - textBeginR);
if (rv != 0)
return rv;
}
-}
-
-namespace
-{
}
-int CmpNaturalSort::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const
-{
- //auto strL = utfTo<std::string>(Zstring(lhs, lhsLen));
- //auto strR = utfTo<std::string>(Zstring(rhs, rhsLen));
- //return cmpStringNaturalLinux(strL.c_str(), strL.size(), strR.c_str(), strR.size());
- return cmpStringNaturalLinux(lhs, lhsLen, rhs, rhsLen);
-
-} \ No newline at end of file
+warn_static("clean up implementation of these two:")
+//template <> inline bool isWhiteSpace(char c)
+//template <> inline bool isWhiteSpace(wchar_t c)
diff --git a/zen/zstring.h b/zen/zstring.h
index 7fa21335..20cf968d 100755
--- a/zen/zstring.h
+++ b/zen/zstring.h
@@ -14,6 +14,7 @@
#define Zstr(x) x
const Zchar FILE_NAME_SEPARATOR = '/';
+
//"The reason for all the fuss above" - Loki/SmartPtr
//a high-performance string for interfacing with native OS APIs in multithreaded contexts
using Zstring = zen::Zbase<Zchar>;
@@ -22,43 +23,71 @@ using Zstring = zen::Zbase<Zchar>;
using Zstringw = zen::Zbase<wchar_t>;
-//Compare filepaths: Windows/OS X does NOT distinguish between upper/lower-case, while Linux DOES
-struct CmpFilePath
-{
- int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const;
-};
+//Caveat: don't expect input/output string sizes to match:
+// - different UTF-8 encoding length of upper-case chars
+// - different number of upper case chars (e.g. "ß" => "SS" on macOS)
+// - output is Unicode-normalized
+Zstring makeUpperCopy(const Zstring& str);
-struct CmpNaturalSort
-{
- int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const;
-};
+//Windows, Linux: precomposed
+//macOS: decomposed
+Zstring getUnicodeNormalForm(const Zstring& str);
+Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm);
-struct LessFilePath
-{
- template <class S> //don't support heterogenous input! => use as container predicate only!
- bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpFilePath()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
-};
+//------------------------------------------------------------------------------------------
+//inline
+//int compareNoCase(const Zstring& lhs, const Zstring& rhs)
+//{
+// return zen::compareString(makeUpperCopy(lhs), makeUpperCopy(rhs));
+// //avoid eager optimization bugs: e.g. "if (isAsciiString()) compareAsciiNoCase()" might model a different order!
+//}
+
+inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return makeUpperCopy(lhs) == makeUpperCopy(rhs); }
-struct LessNaturalSort
+struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find()
{
- template <class S> //don't support heterogenous input! => use as container predicate only!
- bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpNaturalSort()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
+ ZstringNoCase(const Zstring& str) : upperCase(makeUpperCopy(str)) {}
+ Zstring upperCase;
};
+inline bool operator<(const ZstringNoCase& lhs, const ZstringNoCase& rhs) { return lhs.upperCase < rhs.upperCase; }
+
+//struct LessNoCase { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNoCase(lhs, rhs) < 0; } };
+
+//------------------------------------------------------------------------------------------
+
+//Compare *local* file paths:
+// Windows: igore case
+// Linux: byte-wise comparison
+// macOS: igore case + Unicode normalization forms
+int compareLocalPath(const Zstring& lhs, const Zstring& rhs);
+
+inline bool equalLocalPath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; }
+struct LessLocalPath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } };
-template <class S>
-S makeUpperCopy(S str);
+//------------------------------------------------------------------------------------------
+int compareNatural(const Zstring& lhs, const Zstring& rhs);
+struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring rhs) const { return compareNatural(lhs, rhs) < 0; } };
+//------------------------------------------------------------------------------------------
+
+warn_static("get rid:")
+inline int compareFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs); }
+
+inline bool equalFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; }
+
+struct LessFilePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } };
+//------------------------------------------------------------------------------------------
-template <class S, class T> inline
-bool equalFilePath(const S& lhs, const T& rhs) { using namespace zen; return strEqual(lhs, rhs, CmpFilePath()); }
inline
Zstring appendSeparator(Zstring path) //support rvalue references!
{
- return zen::endsWith(path, FILE_NAME_SEPARATOR) ? path : (path += FILE_NAME_SEPARATOR); //returning a by-value parameter implicitly converts to r-value!
+ if (!zen::endsWith(path, FILE_NAME_SEPARATOR))
+ path += FILE_NAME_SEPARATOR;
+ return path; //returning a by-value parameter => RVO if possible, r-value otherwise!
}
@@ -82,12 +111,7 @@ Zstring getFileExtension(const Zstring& filePath)
}
-template <class S, class T, class U>
-S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm);
-
-
-
-//common unicode sequences
+//common unicode characters
const wchar_t EM_DASH = L'\u2014';
const wchar_t EN_DASH = L'\u2013';
const wchar_t* const SPACED_DASH = L" \u2013 "; //using 'EN DASH'
@@ -99,89 +123,6 @@ const wchar_t MULT_SIGN = L'\u00D7'; //fancy "x"
-
-
-//################################# inline implementation ########################################
-inline
-void makeUpperInPlace(wchar_t* str, size_t strLen)
-{
- std::for_each(str, str + strLen, [](wchar_t& c) { c = std::towupper(c); }); //locale-dependent!
-}
-
-
-inline
-void makeUpperInPlace(char* str, size_t strLen)
-{
- std::for_each(str, str + strLen, [](char& c) { c = std::toupper(static_cast<unsigned char>(c)); }); //locale-dependent!
- //result of toupper() is an unsigned char mapped to int range: the char representation is in the last 8 bits and we need not care about signedness!
- //this should work for UTF-8, too: all chars >= 128 are mapped upon themselves!
-}
-
-
-template <class S> inline
-S makeUpperCopy(S str)
-{
- const size_t len = str.length(); //we assert S is a string type!
- if (len > 0)
- makeUpperInPlace(&*str.begin(), len);
-
- return str;
-}
-
-
-inline
-int CmpFilePath::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const
-{
- assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls!
- assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); //
-
- const int rv = std::strncmp(lhs, rhs, std::min(lhsLen, rhsLen));
- if (rv != 0)
- return rv;
- return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
-template <class S, class T, class U> inline
-S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm)
-{
- using namespace zen;
- static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>);
- static_assert(std::is_same_v<GetCharTypeT<T>, GetCharTypeT<U>>);
- const size_t oldLen = strLength(oldTerm);
- if (oldLen == 0)
- return str;
-
- const S strU = makeUpperCopy(str); //S required to be a string class
- const S oldU = makeUpperCopy<S>(oldTerm); //[!] T not required to be a string class
- assert(strLength(strU) == strLength(str ));
- assert(strLength(oldU) == strLength(oldTerm));
-
- const auto* const newBegin = strBegin(newTerm);
- const auto* const newEnd = newBegin + strLength(newTerm);
-
- S output;
-
- for (size_t pos = 0;;)
- {
- const auto itFound = std::search(strU.begin() + pos, strU.end(),
- oldU.begin(), oldU.end());
- if (itFound == strU.end() && pos == 0)
- return str; //optimize "oldTerm not found": return ref-counted copy
-
- impl::stringAppend(output, str.begin() + pos, str.begin() + (itFound - strU.begin()));
- if (itFound == strU.end())
- return output;
-
- impl::stringAppend(output, newBegin, newEnd);
- pos = (itFound - strU.begin()) + oldLen;
- }
-}
-
-//expose for unit tests
-int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen);
-inline int cmpStringNaturalLinux(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen) { return cmpStringNaturalLinuxTest(lhs, lhsLen, rhs, rhsLen); }
-
//---------------------------------------------------------------------------
//ZEN macro consistency checks:
bgstack15