diff options
Diffstat (limited to 'zen/zstring.h')
-rwxr-xr-x | zen/zstring.h | 163 |
1 files changed, 52 insertions, 111 deletions
diff --git a/zen/zstring.h b/zen/zstring.h index 7fa21335..20cf968d 100755 --- a/zen/zstring.h +++ b/zen/zstring.h @@ -14,6 +14,7 @@ #define Zstr(x) x const Zchar FILE_NAME_SEPARATOR = '/'; + //"The reason for all the fuss above" - Loki/SmartPtr //a high-performance string for interfacing with native OS APIs in multithreaded contexts using Zstring = zen::Zbase<Zchar>; @@ -22,43 +23,71 @@ using Zstring = zen::Zbase<Zchar>; using Zstringw = zen::Zbase<wchar_t>; -//Compare filepaths: Windows/OS X does NOT distinguish between upper/lower-case, while Linux DOES -struct CmpFilePath -{ - int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const; -}; +//Caveat: don't expect input/output string sizes to match: +// - different UTF-8 encoding length of upper-case chars +// - different number of upper case chars (e.g. "ß" => "SS" on macOS) +// - output is Unicode-normalized +Zstring makeUpperCopy(const Zstring& str); -struct CmpNaturalSort -{ - int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const; -}; +//Windows, Linux: precomposed +//macOS: decomposed +Zstring getUnicodeNormalForm(const Zstring& str); +Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm); -struct LessFilePath -{ - template <class S> //don't support heterogenous input! => use as container predicate only! - bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpFilePath()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; } -}; +//------------------------------------------------------------------------------------------ +//inline +//int compareNoCase(const Zstring& lhs, const Zstring& rhs) +//{ +// return zen::compareString(makeUpperCopy(lhs), makeUpperCopy(rhs)); +// //avoid eager optimization bugs: e.g. "if (isAsciiString()) compareAsciiNoCase()" might model a different order! +//} + +inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return makeUpperCopy(lhs) == makeUpperCopy(rhs); } -struct LessNaturalSort +struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find() { - template <class S> //don't support heterogenous input! => use as container predicate only! - bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpNaturalSort()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; } + ZstringNoCase(const Zstring& str) : upperCase(makeUpperCopy(str)) {} + Zstring upperCase; }; +inline bool operator<(const ZstringNoCase& lhs, const ZstringNoCase& rhs) { return lhs.upperCase < rhs.upperCase; } + +//struct LessNoCase { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNoCase(lhs, rhs) < 0; } }; + +//------------------------------------------------------------------------------------------ + +//Compare *local* file paths: +// Windows: igore case +// Linux: byte-wise comparison +// macOS: igore case + Unicode normalization forms +int compareLocalPath(const Zstring& lhs, const Zstring& rhs); + +inline bool equalLocalPath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; } +struct LessLocalPath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } }; -template <class S> -S makeUpperCopy(S str); +//------------------------------------------------------------------------------------------ +int compareNatural(const Zstring& lhs, const Zstring& rhs); +struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring rhs) const { return compareNatural(lhs, rhs) < 0; } }; +//------------------------------------------------------------------------------------------ + +warn_static("get rid:") +inline int compareFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs); } + +inline bool equalFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; } + +struct LessFilePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } }; +//------------------------------------------------------------------------------------------ -template <class S, class T> inline -bool equalFilePath(const S& lhs, const T& rhs) { using namespace zen; return strEqual(lhs, rhs, CmpFilePath()); } inline Zstring appendSeparator(Zstring path) //support rvalue references! { - return zen::endsWith(path, FILE_NAME_SEPARATOR) ? path : (path += FILE_NAME_SEPARATOR); //returning a by-value parameter implicitly converts to r-value! + if (!zen::endsWith(path, FILE_NAME_SEPARATOR)) + path += FILE_NAME_SEPARATOR; + return path; //returning a by-value parameter => RVO if possible, r-value otherwise! } @@ -82,12 +111,7 @@ Zstring getFileExtension(const Zstring& filePath) } -template <class S, class T, class U> -S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm); - - - -//common unicode sequences +//common unicode characters const wchar_t EM_DASH = L'\u2014'; const wchar_t EN_DASH = L'\u2013'; const wchar_t* const SPACED_DASH = L" \u2013 "; //using 'EN DASH' @@ -99,89 +123,6 @@ const wchar_t MULT_SIGN = L'\u00D7'; //fancy "x" - - -//################################# inline implementation ######################################## -inline -void makeUpperInPlace(wchar_t* str, size_t strLen) -{ - std::for_each(str, str + strLen, [](wchar_t& c) { c = std::towupper(c); }); //locale-dependent! -} - - -inline -void makeUpperInPlace(char* str, size_t strLen) -{ - std::for_each(str, str + strLen, [](char& c) { c = std::toupper(static_cast<unsigned char>(c)); }); //locale-dependent! - //result of toupper() is an unsigned char mapped to int range: the char representation is in the last 8 bits and we need not care about signedness! - //this should work for UTF-8, too: all chars >= 128 are mapped upon themselves! -} - - -template <class S> inline -S makeUpperCopy(S str) -{ - const size_t len = str.length(); //we assert S is a string type! - if (len > 0) - makeUpperInPlace(&*str.begin(), len); - - return str; -} - - -inline -int CmpFilePath::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const -{ - assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls! - assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); // - - const int rv = std::strncmp(lhs, rhs, std::min(lhsLen, rhsLen)); - if (rv != 0) - return rv; - return static_cast<int>(lhsLen) - static_cast<int>(rhsLen); -} - - -template <class S, class T, class U> inline -S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm) -{ - using namespace zen; - static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>); - static_assert(std::is_same_v<GetCharTypeT<T>, GetCharTypeT<U>>); - const size_t oldLen = strLength(oldTerm); - if (oldLen == 0) - return str; - - const S strU = makeUpperCopy(str); //S required to be a string class - const S oldU = makeUpperCopy<S>(oldTerm); //[!] T not required to be a string class - assert(strLength(strU) == strLength(str )); - assert(strLength(oldU) == strLength(oldTerm)); - - const auto* const newBegin = strBegin(newTerm); - const auto* const newEnd = newBegin + strLength(newTerm); - - S output; - - for (size_t pos = 0;;) - { - const auto itFound = std::search(strU.begin() + pos, strU.end(), - oldU.begin(), oldU.end()); - if (itFound == strU.end() && pos == 0) - return str; //optimize "oldTerm not found": return ref-counted copy - - impl::stringAppend(output, str.begin() + pos, str.begin() + (itFound - strU.begin())); - if (itFound == strU.end()) - return output; - - impl::stringAppend(output, newBegin, newEnd); - pos = (itFound - strU.begin()) + oldLen; - } -} - -//expose for unit tests -int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen); -inline int cmpStringNaturalLinux(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen) { return cmpStringNaturalLinuxTest(lhs, lhsLen, rhs, rhsLen); } - //--------------------------------------------------------------------------- //ZEN macro consistency checks: |