diff options
Diffstat (limited to 'zen/zstring.cpp')
-rw-r--r-- | zen/zstring.cpp | 111 |
1 files changed, 24 insertions, 87 deletions
diff --git a/zen/zstring.cpp b/zen/zstring.cpp index 635fb47d..76c0a81f 100644 --- a/zen/zstring.cpp +++ b/zen/zstring.cpp @@ -5,50 +5,12 @@ // ***************************************************************************** #include "zstring.h" -#include <stdexcept> -#include "utf.h" - #include <glib.h> #include "sys_error.h" using namespace zen; -Zstring getUpperCase(const Zstring& str) -{ - assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! - - //fast pre-check: - if (isAsciiString(str)) //perf: in the range of 3.5ns - { - Zstring output = str; - for (Zchar& c : output) - c = asciiToUpper(c); - return output; - } - - Zstring strNorm = getUnicodeNormalForm(str); - try - { - static_assert(sizeof(impl::CodePoint) == sizeof(gunichar)); - Zstring output; - output.reserve(strNorm.size()); - - UtfDecoder<char> decoder(strNorm.c_str(), strNorm.size()); - while (const std::optional<impl::CodePoint> cp = decoder.getNext()) - impl::codePointToUtf<char>(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent! - - return output; - - } - catch (SysError&) - { - assert(false); - return str; - } -} - - Zstring getUnicodeNormalForm(const Zstring& str) { //fast pre-check: @@ -75,63 +37,38 @@ Zstring getUnicodeNormalForm(const Zstring& str) } -Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm) +Zstring getUpperCase(const Zstring& str) { - if (oldTerm.empty()) - return str; - - //assert(isAsciiString(oldTerm)); - Zstring output; + assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! - for (size_t pos = 0;;) + //fast pre-check: + if (isAsciiString(str)) //perf: in the range of 3.5ns { - const size_t posFound = std::search(str.begin() + pos, str.end(), //can't use getUpperCase(): input/output sizes may differ! - oldTerm.begin(), oldTerm.end(), - [](Zchar charL, Zchar charR) { return asciiToUpper(charL) == asciiToUpper(charR); }) - str.begin(); - - if (posFound == str.size()) - { - if (pos == 0) //optimize "oldTerm not found": return ref-counted copy - return str; - output.append(str.begin() + pos, str.end()); - return output; - } - - output.append(str.begin() + pos, str.begin() + posFound); - output += newTerm; - pos = posFound + oldTerm.size(); + Zstring output = str; + for (Zchar& c : output) + c = asciiToUpper(c); + return output; } -} - - -/* https://docs.microsoft.com/de-de/windows/desktop/Intl/handling-sorting-in-your-applications - - Perf test: compare strings 10 mio times; 64 bit build - ----------------------------------------------------- - string a = "Fjk84$%kgfj$%T\\\\Gffg\\gsdgf\\fgsx----------d-" - string b = "fjK84$%kgfj$%T\\\\gfFg\\gsdgf\\fgSy----------dfdf" - Windows (UTF16 wchar_t) - 4 ns | wcscmp - 67 ns | CompareStringOrdinalFunc+ + bIgnoreCase - 314 ns | LCMapString + wmemcmp - - OS X (UTF8 char) - 6 ns | strcmp - 98 ns | strcasecmp - 120 ns | strncasecmp + std::min(sizeLhs, sizeRhs); - 856 ns | CFStringCreateWithCString + CFStringCompare(kCFCompareCaseInsensitive) - 1110 ns | CFStringCreateWithCStringNoCopy + CFStringCompare(kCFCompareCaseInsensitive) - ________________________ - time per call | function */ + Zstring strNorm = getUnicodeNormalForm(str); + try + { + static_assert(sizeof(impl::CodePoint) == sizeof(gunichar)); + Zstring output; + output.reserve(strNorm.size()); -std::weak_ordering compareNativePath(const Zstring& lhs, const Zstring& rhs) -{ - assert(lhs.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! - assert(rhs.find(Zchar('\0')) == Zstring::npos); // + UtfDecoder<char> decoder(strNorm.c_str(), strNorm.size()); + while (const std::optional<impl::CodePoint> cp = decoder.getNext()) + impl::codePointToUtf<char>(::g_unichar_toupper(*cp), [&](char c) { output += c; }); //don't use std::towupper: *incomplete* and locale-dependent! - return lhs <=> rhs; + return output; + } + catch (SysError&) + { + assert(false); + return str; + } } |