summaryrefslogtreecommitdiff
path: root/zen/zstring.h
diff options
context:
space:
mode:
authorB Stack <bgstack15@gmail.com>2018-10-16 17:33:51 -0400
committerB Stack <bgstack15@gmail.com>2018-10-16 17:33:51 -0400
commit878a41d3be13da2a654df74f2a35ea8b295c8a13 (patch)
tree89b2a018482c164bdd8ecac5c76b19a08f420dec /zen/zstring.h
parentMerge branch '10.4' into 'master' (diff)
downloadFreeFileSync-878a41d3be13da2a654df74f2a35ea8b295c8a13.tar.gz
FreeFileSync-878a41d3be13da2a654df74f2a35ea8b295c8a13.tar.bz2
FreeFileSync-878a41d3be13da2a654df74f2a35ea8b295c8a13.zip
10.5
Diffstat (limited to 'zen/zstring.h')
-rwxr-xr-xzen/zstring.h163
1 files changed, 52 insertions, 111 deletions
diff --git a/zen/zstring.h b/zen/zstring.h
index 7fa21335..20cf968d 100755
--- a/zen/zstring.h
+++ b/zen/zstring.h
@@ -14,6 +14,7 @@
#define Zstr(x) x
const Zchar FILE_NAME_SEPARATOR = '/';
+
//"The reason for all the fuss above" - Loki/SmartPtr
//a high-performance string for interfacing with native OS APIs in multithreaded contexts
using Zstring = zen::Zbase<Zchar>;
@@ -22,43 +23,71 @@ using Zstring = zen::Zbase<Zchar>;
using Zstringw = zen::Zbase<wchar_t>;
-//Compare filepaths: Windows/OS X does NOT distinguish between upper/lower-case, while Linux DOES
-struct CmpFilePath
-{
- int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const;
-};
+//Caveat: don't expect input/output string sizes to match:
+// - different UTF-8 encoding length of upper-case chars
+// - different number of upper case chars (e.g. "ß" => "SS" on macOS)
+// - output is Unicode-normalized
+Zstring makeUpperCopy(const Zstring& str);
-struct CmpNaturalSort
-{
- int operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const;
-};
+//Windows, Linux: precomposed
+//macOS: decomposed
+Zstring getUnicodeNormalForm(const Zstring& str);
+Zstring replaceCpyAsciiNoCase(const Zstring& str, const Zstring& oldTerm, const Zstring& newTerm);
-struct LessFilePath
-{
- template <class S> //don't support heterogenous input! => use as container predicate only!
- bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpFilePath()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
-};
+//------------------------------------------------------------------------------------------
+//inline
+//int compareNoCase(const Zstring& lhs, const Zstring& rhs)
+//{
+// return zen::compareString(makeUpperCopy(lhs), makeUpperCopy(rhs));
+// //avoid eager optimization bugs: e.g. "if (isAsciiString()) compareAsciiNoCase()" might model a different order!
+//}
+
+inline bool equalNoCase(const Zstring& lhs, const Zstring& rhs) { return makeUpperCopy(lhs) == makeUpperCopy(rhs); }
-struct LessNaturalSort
+struct ZstringNoCase //use as STL container key: avoid needless upper-case conversions during std::map<>::find()
{
- template <class S> //don't support heterogenous input! => use as container predicate only!
- bool operator()(const S& lhs, const S& rhs) const { using namespace zen; return CmpNaturalSort()(strBegin(lhs), strLength(lhs), strBegin(rhs), strLength(rhs)) < 0; }
+ ZstringNoCase(const Zstring& str) : upperCase(makeUpperCopy(str)) {}
+ Zstring upperCase;
};
+inline bool operator<(const ZstringNoCase& lhs, const ZstringNoCase& rhs) { return lhs.upperCase < rhs.upperCase; }
+
+//struct LessNoCase { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareNoCase(lhs, rhs) < 0; } };
+
+//------------------------------------------------------------------------------------------
+
+//Compare *local* file paths:
+// Windows: igore case
+// Linux: byte-wise comparison
+// macOS: igore case + Unicode normalization forms
+int compareLocalPath(const Zstring& lhs, const Zstring& rhs);
+
+inline bool equalLocalPath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; }
+struct LessLocalPath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } };
-template <class S>
-S makeUpperCopy(S str);
+//------------------------------------------------------------------------------------------
+int compareNatural(const Zstring& lhs, const Zstring& rhs);
+struct LessNaturalSort { bool operator()(const Zstring& lhs, const Zstring rhs) const { return compareNatural(lhs, rhs) < 0; } };
+//------------------------------------------------------------------------------------------
+
+warn_static("get rid:")
+inline int compareFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs); }
+
+inline bool equalFilePath(const Zstring& lhs, const Zstring& rhs) { return compareLocalPath(lhs, rhs) == 0; }
+
+struct LessFilePath { bool operator()(const Zstring& lhs, const Zstring& rhs) const { return compareLocalPath(lhs, rhs) < 0; } };
+//------------------------------------------------------------------------------------------
-template <class S, class T> inline
-bool equalFilePath(const S& lhs, const T& rhs) { using namespace zen; return strEqual(lhs, rhs, CmpFilePath()); }
inline
Zstring appendSeparator(Zstring path) //support rvalue references!
{
- return zen::endsWith(path, FILE_NAME_SEPARATOR) ? path : (path += FILE_NAME_SEPARATOR); //returning a by-value parameter implicitly converts to r-value!
+ if (!zen::endsWith(path, FILE_NAME_SEPARATOR))
+ path += FILE_NAME_SEPARATOR;
+ return path; //returning a by-value parameter => RVO if possible, r-value otherwise!
}
@@ -82,12 +111,7 @@ Zstring getFileExtension(const Zstring& filePath)
}
-template <class S, class T, class U>
-S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm);
-
-
-
-//common unicode sequences
+//common unicode characters
const wchar_t EM_DASH = L'\u2014';
const wchar_t EN_DASH = L'\u2013';
const wchar_t* const SPACED_DASH = L" \u2013 "; //using 'EN DASH'
@@ -99,89 +123,6 @@ const wchar_t MULT_SIGN = L'\u00D7'; //fancy "x"
-
-
-//################################# inline implementation ########################################
-inline
-void makeUpperInPlace(wchar_t* str, size_t strLen)
-{
- std::for_each(str, str + strLen, [](wchar_t& c) { c = std::towupper(c); }); //locale-dependent!
-}
-
-
-inline
-void makeUpperInPlace(char* str, size_t strLen)
-{
- std::for_each(str, str + strLen, [](char& c) { c = std::toupper(static_cast<unsigned char>(c)); }); //locale-dependent!
- //result of toupper() is an unsigned char mapped to int range: the char representation is in the last 8 bits and we need not care about signedness!
- //this should work for UTF-8, too: all chars >= 128 are mapped upon themselves!
-}
-
-
-template <class S> inline
-S makeUpperCopy(S str)
-{
- const size_t len = str.length(); //we assert S is a string type!
- if (len > 0)
- makeUpperInPlace(&*str.begin(), len);
-
- return str;
-}
-
-
-inline
-int CmpFilePath::operator()(const Zchar* lhs, size_t lhsLen, const Zchar* rhs, size_t rhsLen) const
-{
- assert(std::find(lhs, lhs + lhsLen, 0) == lhs + lhsLen); //don't expect embedded nulls!
- assert(std::find(rhs, rhs + rhsLen, 0) == rhs + rhsLen); //
-
- const int rv = std::strncmp(lhs, rhs, std::min(lhsLen, rhsLen));
- if (rv != 0)
- return rv;
- return static_cast<int>(lhsLen) - static_cast<int>(rhsLen);
-}
-
-
-template <class S, class T, class U> inline
-S ciReplaceCpy(const S& str, const T& oldTerm, const U& newTerm)
-{
- using namespace zen;
- static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>);
- static_assert(std::is_same_v<GetCharTypeT<T>, GetCharTypeT<U>>);
- const size_t oldLen = strLength(oldTerm);
- if (oldLen == 0)
- return str;
-
- const S strU = makeUpperCopy(str); //S required to be a string class
- const S oldU = makeUpperCopy<S>(oldTerm); //[!] T not required to be a string class
- assert(strLength(strU) == strLength(str ));
- assert(strLength(oldU) == strLength(oldTerm));
-
- const auto* const newBegin = strBegin(newTerm);
- const auto* const newEnd = newBegin + strLength(newTerm);
-
- S output;
-
- for (size_t pos = 0;;)
- {
- const auto itFound = std::search(strU.begin() + pos, strU.end(),
- oldU.begin(), oldU.end());
- if (itFound == strU.end() && pos == 0)
- return str; //optimize "oldTerm not found": return ref-counted copy
-
- impl::stringAppend(output, str.begin() + pos, str.begin() + (itFound - strU.begin()));
- if (itFound == strU.end())
- return output;
-
- impl::stringAppend(output, newBegin, newEnd);
- pos = (itFound - strU.begin()) + oldLen;
- }
-}
-
-//expose for unit tests
-int cmpStringNaturalLinuxTest(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen);
-inline int cmpStringNaturalLinux(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen) { return cmpStringNaturalLinuxTest(lhs, lhsLen, rhs, rhsLen); }
-
//---------------------------------------------------------------------------
//ZEN macro consistency checks:
bgstack15