summaryrefslogtreecommitdiff
path: root/zen/string_tools.h
diff options
context:
space:
mode:
Diffstat (limited to 'zen/string_tools.h')
-rw-r--r--zen/string_tools.h221
1 files changed, 165 insertions, 56 deletions
diff --git a/zen/string_tools.h b/zen/string_tools.h
index ee4e5613..d3f35ce8 100644
--- a/zen/string_tools.h
+++ b/zen/string_tools.h
@@ -11,9 +11,6 @@
#include <cwctype> //iswspace
#include <cstdio> //sprintf
#include <cwchar> //swprintf
-#include <algorithm>
-#include <cassert>
-#include <vector>
#include "stl_tools.h"
#include "string_traits.h"
#include "legacy_compiler.h" //<charconv> but without the compiler crashes :>
@@ -33,25 +30,30 @@ template <class Char> Char asciiToLower(Char c);
template <class Char> Char asciiToUpper(Char c);
//both S and T can be strings or char/wchar_t arrays or single char/wchar_t
-template <class S, class T, typename = std::enable_if_t<IsStringLikeV<S>>> bool contains(const S& str, const T& term);
+template <class S, class T, typename = std::enable_if_t<isStringLike<S>/*Astyle hates tripe >*/ >> bool contains(const S& str, const T& term);
- template <class S, class T> bool startsWith (const S& str, const T& prefix);
- template <class S, class T> bool startsWithAsciiNoCase(const S& str, const T& prefix);
+template <class S, class T> bool startsWith (const S& str, const T& prefix);
+template <class S, class T> bool startsWithAsciiNoCase(const S& str, const T& prefix);
- template <class S, class T> bool endsWith (const S& str, const T& postfix);
- template <class S, class T> bool endsWithAsciiNoCase(const S& str, const T& postfix);
+template <class S, class T> bool endsWith (const S& str, const T& postfix);
+template <class S, class T> bool endsWithAsciiNoCase(const S& str, const T& postfix);
- template <class S, class T> bool equalString (const S& lhs, const T& rhs);
- template <class S, class T> bool equalAsciiNoCase(const S& lhs, const T& rhs);
+template <class S, class T> bool equalString (const S& lhs, const T& rhs);
+template <class S, class T> bool equalAsciiNoCase(const S& lhs, const T& rhs);
- // template <class S, class T> std::strong_ordering compareString (const S& lhs, const T& rhs);
- template <class S, class T> std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!)
+//template <class S, class T> std::strong_ordering compareString(const S& lhs, const T& rhs);
+template <class S, class T> std::weak_ordering compareAsciiNoCase(const S& lhs, const T& rhs); //basic case-insensitive comparison (considering A-Z only!)
- struct LessAsciiNoCase //STL container predicate
-{
- template <class S> bool operator()(const S& lhs, const S& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); }
-};
+//STL container predicates for std::map, std::unordered_set/map
+struct StringHash;
+struct StringEqual;
+
+struct LessAsciiNoCase;
+struct StringHashAsciiNoCase;
+struct StringEqualAsciiNoCase;
+template <class Num, class S> Num hashString(const S& str);
+template <class Num, class S> Num appendHashString(Num hashVal, const S& str);
enum class IfNotFoundReturn
{
@@ -77,8 +79,11 @@ template <class S> void trim (S& str, bool fromLeft = true, bo
template <class S, class Function> void trim(S& str, bool fromLeft, bool fromRight, Function trimThisChar);
-template <class S, class T, class U> [[nodiscard]] S replaceCpy(S str, const T& oldTerm, const U& newTerm, bool replaceAll = true);
-template <class S, class T, class U> void replace (S& str, const T& oldTerm, const U& newTerm, bool replaceAll = true);
+template <class S, class T, class U> [[nodiscard]] S replaceCpy(S str, const T& oldTerm, const U& newTerm);
+template <class S, class T, class U> void replace (S& str, const T& oldTerm, const U& newTerm);
+
+template <class S, class T, class U> [[nodiscard]] S replaceCpyAsciiNoCase(S str, const T& oldTerm, const U& newTerm);
+template <class S, class T, class U> void replaceAsciiNoCase (S& str, const T& oldTerm, const U& newTerm);
//high-performance conversion between numbers and strings
template <class S, class Num> S numberTo(const Num& number);
@@ -173,25 +178,28 @@ template <class Char> inline
Char asciiToLower(Char c)
{
if (static_cast<Char>('A') <= c && c <= static_cast<Char>('Z'))
- return static_cast<Char>(c - static_cast<Char>('A') + static_cast<Char>('a'));
- return c;
+ return static_cast<Char>(c - static_cast<Char>('A') + static_cast<Char>('a'));
+ return c;
}
- template <class Char> inline
- Char asciiToUpper(Char c)
+template <class Char> inline
+Char asciiToUpper(Char c)
{
if (static_cast<Char>('a') <= c && c <= static_cast<Char>('z'))
- return static_cast<Char>(c - static_cast<Char>('a') + static_cast<Char>('A'));
- return c;
+ return static_cast<Char>(c - static_cast<Char>('a') + static_cast<Char>('A'));
+ return c;
}
- namespace impl
+namespace impl
{
-//support embedded 0, unlike strncmp/wcsncmp:
-inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; }
-inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; }
+template <class Char> inline
+bool equalSubstring(const Char* lhs, const Char* rhs, size_t len)
+{
+ //support embedded 0, unlike strncmp/wcsncmp:
+ return std::equal(lhs, lhs + len, rhs);
+}
template <class Char1, class Char2> inline
@@ -213,13 +221,14 @@ template <class S, class T> inline
bool startsWith(const S& str, const T& prefix)
{
const size_t pfLen = strLength(prefix);
- return strLength(str) >= pfLen && impl::strcmpWithNulls(strBegin(str), strBegin(prefix), pfLen) == std::strong_ordering::equal;
+ return strLength(str) >= pfLen && impl::equalSubstring(strBegin(str), strBegin(prefix), pfLen);
}
template <class S, class T> inline
bool startsWithAsciiNoCase(const S& str, const T& prefix)
{
+ assert(isAsciiString(str) || isAsciiString(prefix));
const size_t pfLen = strLength(prefix);
return strLength(str) >= pfLen && impl::strcmpAsciiNoCase(strBegin(str), strBegin(prefix), pfLen) == std::weak_ordering::equivalent;
}
@@ -230,7 +239,7 @@ bool endsWith(const S& str, const T& postfix)
{
const size_t strLen = strLength(str);
const size_t pfLen = strLength(postfix);
- return strLen >= pfLen && impl::strcmpWithNulls(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen) == std::strong_ordering::equal;
+ return strLen >= pfLen && impl::equalSubstring(strBegin(str) + strLen - pfLen, strBegin(postfix), pfLen);
}
@@ -247,19 +256,24 @@ template <class S, class T> inline
bool equalString(const S& lhs, const T& rhs)
{
const size_t lhsLen = strLength(lhs);
- return lhsLen == strLength(rhs) && impl::strcmpWithNulls(strBegin(lhs), strBegin(rhs), lhsLen) == std::strong_ordering::equal;
+ return lhsLen == strLength(rhs) && impl::equalSubstring(strBegin(lhs), strBegin(rhs), lhsLen);
}
template <class S, class T> inline
bool equalAsciiNoCase(const S& lhs, const T& rhs)
{
+ assert(isAsciiString(lhs) || isAsciiString(rhs));
const size_t lhsLen = strLength(lhs);
return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == std::weak_ordering::equivalent;
}
#if 0
+//support embedded 0, unlike strncmp/wcsncmp:
+inline std::strong_ordering strcmpWithNulls(const char* ptr1, const char* ptr2, size_t num) { return std:: memcmp(ptr1, ptr2, num) <=> 0; }
+inline std::strong_ordering strcmpWithNulls(const wchar_t* ptr1, const wchar_t* ptr2, size_t num) { return std::wmemcmp(ptr1, ptr2, num) <=> 0; }
+
template <class S, class T> inline
std::strong_ordering compareString(const S& lhs, const T& rhs)
{
@@ -427,29 +441,22 @@ namespace impl
ZEN_INIT_DETECT_MEMBER(append)
//either call operator+=(S(str, len)) or append(str, len)
-template <class S, class InputIterator, typename = std::enable_if_t<HasMemberV_append<S>>> inline
+template <class S, class InputIterator, typename = std::enable_if_t<hasMember_append<S>>> inline
void stringAppend(S& str, InputIterator first, InputIterator last) { str.append(first, last); }
//inefficient append: keep disabled until really needed
-//template <class S, class InputIterator, typename = std::enable_if_t<!HasMemberV_append<S>>> inline
+//template <class S, class InputIterator, typename = std::enable_if_t<!hasMember_append<S>>> inline
//void stringAppend(S& str, InputIterator first, InputIterator last) { str += S(first, last); }
-}
-
-
-template <class S, class T, class U> inline
-S replaceCpy(S str, const T& oldTerm, const U& newTerm, bool replaceAll)
-{
- replace(str, oldTerm, newTerm, replaceAll);
- return str;
-}
-template <class S, class T, class U> inline
-void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll)
+template <class S, class T, class U, class CharEq> inline
+void replace(S& str, const T& oldTerm, const U& newTerm, CharEq charEqual)
{
static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>);
static_assert(std::is_same_v<GetCharTypeT<T>, GetCharTypeT<U>>);
const size_t oldLen = strLength(oldTerm);
+ const size_t newLen = strLength(newTerm);
+ //assert(oldLen != 0); -> reasonable check, but challenged by unit-test
if (oldLen == 0)
return;
@@ -457,13 +464,17 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll)
const auto* const oldEnd = oldBegin + oldLen;
const auto* const newBegin = strBegin(newTerm);
- const auto* const newEnd = newBegin + strLength(newTerm);
+ const auto* const newEnd = newBegin + newLen;
+
+ using CharType = GetCharTypeT<S>;
+ if (oldLen == 1 && newLen == 1) //don't use expensive std::search unless required!
+ return std::replace_if(str.begin(), str.end(), [charEqual, charOld = *oldBegin](CharType c) { return charEqual(c, charOld); }, *newBegin);
- auto it = strBegin(str); //don't use str.begin() or wxString will return this wxUni* nonsense!
- const auto* const strEnd = it + strLength(str);
+ auto* it = strBegin(str); //don't use str.begin() or wxString will return this wxUni* nonsense!
+ auto* const strEnd = it + strLength(str);
auto itFound = std::search(it, strEnd,
- oldBegin, oldEnd);
+ oldBegin, oldEnd, charEqual);
if (itFound == strEnd)
return; //optimize "oldTerm not found"
@@ -472,12 +483,13 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll)
{
impl::stringAppend(output, newBegin, newEnd);
it = itFound + oldLen;
-
+#if 0
if (!replaceAll)
itFound = strEnd;
else
+#endif
itFound = std::search(it, strEnd,
- oldBegin, oldEnd);
+ oldBegin, oldEnd, charEqual);
impl::stringAppend(output, it, itFound);
}
@@ -485,6 +497,37 @@ void replace(S& str, const T& oldTerm, const U& newTerm, bool replaceAll)
str = std::move(output);
}
+}
+
+
+template <class S, class T, class U> inline
+void replace(S& str, const T& oldTerm, const U& newTerm)
+{ impl::replace(str, oldTerm, newTerm, std::equal_to()); }
+
+
+template <class S, class T, class U> inline
+S replaceCpy(S str, const T& oldTerm, const U& newTerm)
+{
+ replace(str, oldTerm, newTerm);
+ return str;
+}
+
+
+template <class S, class T, class U> inline
+void replaceAsciiNoCase(S& str, const T& oldTerm, const U& newTerm)
+{
+ using CharType = GetCharTypeT<S>;
+ impl::replace(str, oldTerm, newTerm,
+ [](CharType charL, CharType charR) { return asciiToLower(charL) == asciiToLower(charR); });
+}
+
+
+template <class S, class T, class U> inline
+S replaceCpyAsciiNoCase(S str, const T& oldTerm, const U& newTerm)
+{
+ replaceAsciiNoCase(str, oldTerm, newTerm);
+ return str;
+}
template <class Char, class Function> inline
@@ -813,9 +856,9 @@ template <class S, class Num> inline
S numberTo(const Num& number)
{
using TypeTag = std::integral_constant<impl::NumberType,
- IsSignedIntV <Num> ? impl::NumberType::signedInt :
- IsUnsignedIntV<Num> ? impl::NumberType::unsignedInt :
- IsFloatV <Num> ? impl::NumberType::floatingPoint :
+ isSignedInt <Num> ? impl::NumberType::signedInt :
+ isUnsignedInt<Num> ? impl::NumberType::unsignedInt :
+ isFloat <Num> ? impl::NumberType::floatingPoint :
impl::NumberType::other>;
return impl::numberTo<S>(number, TypeTag());
@@ -826,9 +869,9 @@ template <class Num, class S> inline
Num stringTo(const S& str)
{
using TypeTag = std::integral_constant<impl::NumberType,
- IsSignedIntV <Num> ? impl::NumberType::signedInt :
- IsUnsignedIntV<Num> ? impl::NumberType::unsignedInt :
- IsFloatV <Num> ? impl::NumberType::floatingPoint :
+ isSignedInt <Num> ? impl::NumberType::signedInt :
+ isUnsignedInt<Num> ? impl::NumberType::unsignedInt :
+ isFloat <Num> ? impl::NumberType::floatingPoint :
impl::NumberType::other>;
return impl::stringTo<Num>(str, TypeTag());
@@ -885,6 +928,72 @@ std::string formatAsHexString(const std::string_view& blob)
}
+
+
+template <class Num, class S> inline
+Num hashString(const S& str)
+{
+ using CharType = GetCharTypeT<S>;
+ const auto* const strFirst = strBegin(str);
+
+ FNV1aHash<Num> hash;
+ std::for_each(strFirst, strFirst + strLength(str), [&hash](CharType c) { hash.add(c); });
+ return hash.get();
+}
+
+
+struct StringHash
+{
+ using is_transparent = int; //allow heterogenous lookup!
+
+ template <class String>
+ size_t operator()(const String& str) const { return hashString<size_t>(str); }
+};
+
+
+struct StringEqual
+{
+ using is_transparent = int; //allow heterogenous lookup!
+
+ template <class String1, class String2>
+ bool operator()(const String1& lhs, const String2& rhs) const { return equalString(lhs, rhs); }
+};
+
+
+struct LessAsciiNoCase
+{
+ template <class String>
+ bool operator()(const String& lhs, const String& rhs) const { return std::is_lt(compareAsciiNoCase(lhs, rhs)); }
+};
+
+
+struct StringHashAsciiNoCase
+{
+ using is_transparent = int; //allow heterogenous lookup!
+
+ template <class String>
+ size_t operator()(const String& str) const
+ {
+ using CharType = GetCharTypeT<String>;
+ const auto* const strFirst = strBegin(str);
+
+ FNV1aHash<size_t> hash;
+ std::for_each(strFirst, strFirst + strLength(str), [&hash](CharType c) { hash.add(asciiToLower(c)); });
+ return hash.get();
+ }
+};
+
+
+struct StringEqualAsciiNoCase
+{
+ using is_transparent = int; //allow heterogenous lookup!
+
+ template <class String1, class String2>
+ bool operator()(const String1& lhs, const String2& rhs) const
+ {
+ return equalAsciiNoCase(lhs, rhs);
+ }
+};
}
#endif //STRING_TOOLS_H_213458973046
bgstack15