From 2c81be72eef5363736cf1892646c74a3311ee4c1 Mon Sep 17 00:00:00 2001 From: "B. Stack" Date: Sun, 22 May 2022 17:03:17 -0400 Subject: add upstream 11.21 --- zen/string_tools.h | 221 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 165 insertions(+), 56 deletions(-) (limited to 'zen/string_tools.h') diff --git a/zen/string_tools.h b/zen/string_tools.h index ee4e5613..d3f35ce8 100644 --- a/zen/string_tools.h +++ b/zen/string_tools.h @@ -11,9 +11,6 @@ #include //iswspace #include //sprintf #include //swprintf -#include -#include -#include #include "stl_tools.h" #include "string_traits.h" #include "legacy_compiler.h" // but without the compiler crashes :> @@ -33,25 +30,30 @@ template Char asciiToLower(Char c); template Char asciiToUpper(Char c); //both S and T can be strings or char/wchar_t arrays or single char/wchar_t -template >> bool contains(const S& str, const T& term); +template /*Astyle hates tripe >*/ >> bool contains(const S& str, const T& term); - template bool startsWith (const S& str, const T& prefix); - template bool startsWithAsciiNoCase(const S& str, const T& prefix); +template bool startsWith (const S& str, const T& prefix); +template bool startsWithAsciiNoCase(const S& str, const T& prefix); - template bool endsWith (const S& str, const T& postfix); - template bool endsWithAsciiNoCase(const S& str, const T& postfix); +template bool endsWith (const S& str, const T& postfix); +template bool endsWithAsciiNoCase(const S& str, const T& postfix); - template bool equalString (const S& lhs, const T& rhs); - template bool equalAsciiNoCase(const S& lhs, const T& rhs); +template bool equalString (const S& lhs, const T& rhs); +template bool equalAsciiNoCase(const S& lhs, const T& rhs); - // template std::strong_ordering compareString (const S& lhs, const T& rhs); - template std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!) +//template std::strong_ordering compareString(const S& lhs, const T& rhs); +template std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!) - struct LessAsciiNoCase //STL container predicate -{ - template bool operator()(const S& lhs, const S& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); } -}; +//STL container predicates for std::map, std::unordered_set/map +struct StringHash; +struct StringEqual; + +struct LessAsciiNoCase; +struct StringHashAsciiNoCase; +struct StringEqualAsciiNoCase; +template Num hashString(const S& str); +template Num appendHashString(Num hashVal, const S& str); enum class IfNotFoundReturn { @@ -77,8 +79,11 @@ template void trim (S& str, bool fromLeft = true, bo template void trim(S& str, bool fromLeft, bool fromRight, Function trimThisChar); -template [[nodiscard]] S replaceCpy(S str, const T& oldTerm, const U& newTerm, bool replaceAll = true); -template void replace (S& str, const T& oldTerm, const U& newTerm, bool replaceAll = true); +template [[nodiscard]] S replaceCpy(S str, const T& oldTerm, const U& newTerm); +template void replace (S& str, const T& oldTerm, const U& newTerm); + +template [[nodiscard]] S replaceCpyAsciiNoCase(S str, const T& oldTerm, const U& newTerm); +template void replaceAsciiNoCase (S& str, const T& oldTerm, const U& newTerm); //high-performance conversion between numbers and strings template S numberTo(const Num& number); @@ -173,25 +178,28 @@ template inline Char asciiToLower(Char c) { if (static_cast('A') <= c && c <= static_cast('Z')) - return static_cast(c - static_cast('A') + static_cast('a')); - return c; + return static_cast(c - static_cast('A') + static_cast('a')); + return c; } - template inline - Char asciiToUpper(Char c) +template inline +Char asciiToUpper(Char c) { if (static_cast('a') <= c && c <= static_cast('z')) - return static_cast(c - static_cast('a') + static_cast('A')); - return c; + return static_cast(c - static_cast('a') + static_cast('A')); + return c; } - namespace impl +namespace impl { -//support embedded 0, unlike strncmp/wcsncmp: -inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; } -inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; } +template inline +bool equalSubstring(const Char* lhs, const Char* rhs, size_t len) +{ + //support embedded 0, unlike strncmp/wcsncmp: + return std::equal(lhs, lhs + len, rhs); +} template inline @@ -213,13 +221,14 @@ template inline bool startsWith(const S& str, const T& prefix) { const size_t pfLen = strLength(prefix); - return strLength(str) >= pfLen && impl::strcmpWithNulls(strBegin(str), strBegin(prefix), pfLen) == std::strong_ordering::equal; + return strLength(str) >= pfLen && impl::equalSubstring(strBegin(str), strBegin(prefix), pfLen); } template inline bool startsWithAsciiNoCase(const S& str, const T& prefix) { + assert(isAsciiString(str) || isAsciiString(prefix)); const size_t pfLen = strLength(prefix); return strLength(str) >= pfLen && impl::strcmpAsciiNoCase(strBegin(str), strBegin(prefix), pfLen) == std::weak_ordering::equivalent; } @@ -230,7 +239,7 @@ bool endsWith(const S& str, const T& postfix) { const size_t strLen = strLength(str); const size_t pfLen = strLength(postfix); - return strLen >= pfLen && impl::strcmpWithNulls(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == std::strong_ordering::equal; + return strLen >= pfLen && impl::equalSubstring(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen); } @@ -247,19 +256,24 @@ template inline bool equalString(const S& lhs, const T& rhs) { const size_t lhsLen = strLength(lhs); - return lhsLen == strLength(rhs) && impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), lhsLen) == std::strong_ordering::equal; + return lhsLen == strLength(rhs) && impl::equalSubstring(strBegin(lhs), strBegin(rhs), lhsLen); } template inline bool equalAsciiNoCase(const S& lhs, const T& rhs) { + assert(isAsciiString(lhs) || isAsciiString(rhs)); const size_t lhsLen = strLength(lhs); return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == std::weak_ordering::equivalent; } #if 0 +//support embedded 0, unlike strncmp/wcsncmp: +inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; } +inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; } + template inline std::strong_ordering compareString(const S& lhs, const T& rhs) { @@ -427,29 +441,22 @@ namespace impl ZEN_INIT_DETECT_MEMBER(append) //either call operator+=(S(str, len)) or append(str, len) -template >> inline +template >> inline void stringAppend(S& str, InputIterator first, InputIterator last) { str.append(first, last); } //inefficient append: keep disabled until really needed -//template >> inline +//template >> inline //void stringAppend(S& str, InputIterator first, InputIterator last) { str += S(first, last); } -} - - -template inline -S replaceCpy(S str, const T& oldTerm, const U& newTerm, bool replaceAll) -{ - replace(str, oldTerm, newTerm, replaceAll); - return str; -} -template inline -void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) +template inline +void replace(S& str, const T& oldTerm, const U& newTerm, CharEq charEqual) { static_assert(std::is_same_v, GetCharTypeT>); static_assert(std::is_same_v, GetCharTypeT>); const size_t oldLen = strLength(oldTerm); + const size_t newLen = strLength(newTerm); + //assert(oldLen != 0); -> reasonable check, but challenged by unit-test if (oldLen == 0) return; @@ -457,13 +464,17 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) const auto* const oldEnd = oldBegin + oldLen; const auto* const newBegin = strBegin(newTerm); - const auto* const newEnd = newBegin + strLength(newTerm); + const auto* const newEnd = newBegin + newLen; + + using CharType = GetCharTypeT; + if (oldLen == 1 && newLen == 1) //don't use expensive std::search unless required! + return std::replace_if(str.begin(), str.end(), [charEqual, charOld = *oldBegin](CharType c) { return charEqual(c, charOld); }, *newBegin); - auto it = strBegin(str); //don't use str.begin() or wxString will return this wxUni* nonsense! - const auto* const strEnd = it + strLength(str); + auto* it = strBegin(str); //don't use str.begin() or wxString will return this wxUni* nonsense! + auto* const strEnd = it + strLength(str); auto itFound = std::search(it, strEnd, - oldBegin, oldEnd); + oldBegin, oldEnd, charEqual); if (itFound == strEnd) return; //optimize "oldTerm not found" @@ -472,12 +483,13 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) { impl::stringAppend(output, newBegin, newEnd); it = itFound + oldLen; - +#if 0 if (!replaceAll) itFound = strEnd; else +#endif itFound = std::search(it, strEnd, - oldBegin, oldEnd); + oldBegin, oldEnd, charEqual); impl::stringAppend(output, it, itFound); } @@ -485,6 +497,37 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll) str = std::move(output); } +} + + +template inline +void replace(S& str, const T& oldTerm, const U& newTerm) +{ impl::replace(str, oldTerm, newTerm, std::equal_to()); } + + +template inline +S replaceCpy(S str, const T& oldTerm, const U& newTerm) +{ + replace(str, oldTerm, newTerm); + return str; +} + + +template inline +void replaceAsciiNoCase(S& str, const T& oldTerm, const U& newTerm) +{ + using CharType = GetCharTypeT; + impl::replace(str, oldTerm, newTerm, + [](CharType charL, CharType charR) { return asciiToLower(charL) == asciiToLower(charR); }); +} + + +template inline +S replaceCpyAsciiNoCase(S str, const T& oldTerm, const U& newTerm) +{ + replaceAsciiNoCase(str, oldTerm, newTerm); + return str; +} template inline @@ -813,9 +856,9 @@ template inline S numberTo(const Num& number) { using TypeTag = std::integral_constant ? impl::NumberType::signedInt : - IsUnsignedIntV ? impl::NumberType::unsignedInt : - IsFloatV ? impl::NumberType::floatingPoint : + isSignedInt ? impl::NumberType::signedInt : + isUnsignedInt ? impl::NumberType::unsignedInt : + isFloat ? impl::NumberType::floatingPoint : impl::NumberType::other>; return impl::numberTo(number, TypeTag()); @@ -826,9 +869,9 @@ template inline Num stringTo(const S& str) { using TypeTag = std::integral_constant ? impl::NumberType::signedInt : - IsUnsignedIntV ? impl::NumberType::unsignedInt : - IsFloatV ? impl::NumberType::floatingPoint : + isSignedInt ? impl::NumberType::signedInt : + isUnsignedInt ? impl::NumberType::unsignedInt : + isFloat ? impl::NumberType::floatingPoint : impl::NumberType::other>; return impl::stringTo(str, TypeTag()); @@ -885,6 +928,72 @@ std::string formatAsHexString(const std::string_view& blob) } + + +template inline +Num hashString(const S& str) +{ + using CharType = GetCharTypeT; + const auto* const strFirst = strBegin(str); + + FNV1aHash hash; + std::for_each(strFirst, strFirst + strLength(str), [&hash](CharType c) { hash.add(c); }); + return hash.get(); +} + + +struct StringHash +{ + using is_transparent = int; //allow heterogenous lookup! + + template + size_t operator()(const String& str) const { return hashString(str); } +}; + + +struct StringEqual +{ + using is_transparent = int; //allow heterogenous lookup! + + template + bool operator()(const String1& lhs, const String2& rhs) const { return equalString(lhs, rhs); } +}; + + +struct LessAsciiNoCase +{ + template + bool operator()(const String& lhs, const String& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); } +}; + + +struct StringHashAsciiNoCase +{ + using is_transparent = int; //allow heterogenous lookup! + + template + size_t operator()(const String& str) const + { + using CharType = GetCharTypeT; + const auto* const strFirst = strBegin(str); + + FNV1aHash hash; + std::for_each(strFirst, strFirst + strLength(str), [&hash](CharType c) { hash.add(asciiToLower(c)); }); + return hash.get(); + } +}; + + +struct StringEqualAsciiNoCase +{ + using is_transparent = int; //allow heterogenous lookup! + + template + bool operator()(const String1& lhs, const String2& rhs) const + { + return equalAsciiNoCase(lhs, rhs); + } +}; } #endif //STRING_TOOLS_H_213458973046 -- cgit