// ***************************************************************************** // * This file is part of the FreeFileSync project. It is distributed under * // * GNU General Public License: https://www.gnu.org/licenses/gpl-3.0 * // * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * // ***************************************************************************** #ifndef STRING_BASE_H_083217454562342526 #define STRING_BASE_H_083217454562342526 #include #include #include "string_tools.h" #include "legacy_compiler.h" //constinit2 //Zbase - a policy based string class optimizing performance and flexibility namespace zen { /* Allocator Policy: ----------------- void* allocate(size_t size) //throw std::bad_alloc void deallocate(void* ptr) size_t calcCapacity(size_t length) */ class AllocatorOptimalSpeed //exponential growth + min size { protected: //::operator new/delete show same performance characterisics like malloc()/free()! static void* allocate(size_t size) { return ::operator new (size); } //throw std::bad_alloc static void deallocate(void* ptr) { ::operator delete (ptr); } static size_t calcCapacity(size_t length) { return std::max(16, std::max(length + length / 2, length)); } //- size_t might overflow! => better catch here than return a too small size covering up the real error: a way too large length! //- any growth rate should not exceed golden ratio: 1.618033989 }; class AllocatorOptimalMemory //no wasted memory, but more reallocations required when manipulating string { protected: static void* allocate(size_t size) { return ::operator new (size); } //throw std::bad_alloc static void deallocate(void* ptr) { ::operator delete (ptr); } static size_t calcCapacity(size_t length) { return length; } }; /* Storage Policy: --------------- template //Allocator Policy Char* create(size_t size) Char* create(size_t size, size_t minCapacity) Char* clone(Char* ptr) void destroy(Char* ptr) //must handle "destroy(nullptr)"! bool canWrite(const Char* ptr, size_t minCapacity) //needs to be checked before writing to "ptr" size_t length(const Char* ptr) void setLength(Char* ptr, size_t newLength) */ template //Allocator Policy class StorageDeepCopy : public AP { protected: ~StorageDeepCopy() {} Char* create(size_t size) { return create(size, size); } Char* create(size_t size, size_t minCapacity) { assert(size <= minCapacity); const size_t newCapacity = AP::calcCapacity(minCapacity); assert(newCapacity >= minCapacity); Descriptor* const newDescr = static_cast(this->allocate(sizeof(Descriptor) + (newCapacity + 1) * sizeof(Char))); //throw std::bad_alloc new (newDescr) Descriptor(size, newCapacity); return reinterpret_cast(newDescr + 1); //alignment note: "newDescr + 1" is Descriptor-aligned, which is larger than alignment for Char-array! => no problem! } Char* clone(Char* ptr) { const size_t len = length(ptr); Char* newData = create(len); //throw std::bad_alloc std::copy(ptr, ptr + len + 1, newData); return newData; } void destroy(Char* ptr) { if (!ptr) return; //support "destroy(nullptr)" Descriptor* const d = descr(ptr); d->~Descriptor(); this->deallocate(d); } //this needs to be checked before writing to "ptr" static bool canWrite(const Char* ptr, size_t minCapacity) { return minCapacity <= descr(ptr)->capacity; } static size_t length(const Char* ptr) { return descr(ptr)->length; } static void setLength(Char* ptr, size_t newLength) { assert(canWrite(ptr, newLength)); descr(ptr)->length = newLength; } private: struct Descriptor { Descriptor(size_t len, size_t cap) : length (static_cast(len)), capacity(static_cast(cap)) {} uint32_t length; const uint32_t capacity; //allocated size without null-termination }; static Descriptor* descr( Char* ptr) { return reinterpret_cast< Descriptor*>(ptr) - 1; } static const Descriptor* descr(const Char* ptr) { return reinterpret_cast(ptr) - 1; } }; template //Allocator Policy class StorageRefCountThreadSafe : public AP { protected: ~StorageRefCountThreadSafe() {} Char* create(size_t size) { return create(size, size); } Char* create(size_t size, size_t minCapacity) { assert(size <= minCapacity); if (minCapacity == 0) //perf: avoid memory allocation for empty string { ++globalEmptyString.descr.refCount; return &globalEmptyString.nullTerm; } const size_t newCapacity = AP::calcCapacity(minCapacity); assert(newCapacity >= minCapacity); Descriptor* const newDescr = static_cast(this->allocate(sizeof(Descriptor) + (newCapacity + 1) * sizeof(Char))); //throw std::bad_alloc new (newDescr) Descriptor(size, newCapacity); return reinterpret_cast(newDescr + 1); } static Char* clone(Char* ptr) { ++descr(ptr)->refCount; return ptr; } void destroy(Char* ptr) { assert(ptr != reinterpret_cast(0x1)); //detect double-deletion if (!ptr) //support "destroy(nullptr)" { return; } Descriptor* const d = descr(ptr); if (--(d->refCount) == 0) //operator--() is overloaded to decrement and evaluate in a single atomic operation! { d->~Descriptor(); this->deallocate(d); } } static bool canWrite(const Char* ptr, size_t minCapacity) //needs to be checked before writing to "ptr" { const Descriptor* const d = descr(ptr); assert(d->refCount > 0); return d->refCount == 1 && minCapacity <= d->capacity; } static size_t length(const Char* ptr) { return descr(ptr)->length; } static void setLength(Char* ptr, size_t newLength) { assert(canWrite(ptr, newLength)); descr(ptr)->length = static_cast(newLength); } private: struct Descriptor { constexpr Descriptor(size_t len, size_t cap) : length (static_cast(len)), capacity(static_cast(cap)) { static_assert(decltype(refCount)::is_always_lock_free); } std::atomic refCount{1}; //std:atomic is uninitialized by default! uint32_t length; const uint32_t capacity; //allocated size without null-termination }; static Descriptor* descr( Char* ptr) { return reinterpret_cast< Descriptor*>(ptr) - 1; } static const Descriptor* descr(const Char* ptr) { return reinterpret_cast(ptr) - 1; } struct GlobalEmptyString { Descriptor descr{0 /*length*/, 0 /*capacity*/}; Char nullTerm = 0; }; static_assert(offsetof(GlobalEmptyString, nullTerm) - offsetof(GlobalEmptyString, descr) == sizeof(Descriptor), "no gap!"); static_assert(std::is_trivially_destructible_v, "this memory needs to live forever"); inline static constinit2 GlobalEmptyString globalEmptyString; //constinit: dodge static initialization order fiasco! }; template using DefaultStoragePolicy = StorageRefCountThreadSafe; //################################################################################################################################################################ //perf note: interestingly StorageDeepCopy and StorageRefCountThreadSafe show same performance in FFS comparison template class SP = DefaultStoragePolicy> //Storage Policy class Zbase : public SP { public: Zbase(); Zbase(const Char* str) : Zbase(str, str + strLength(str)) {} //implicit conversion from a C-string! Zbase(const Char* str, size_t len) : Zbase(str, str + len) {} Zbase(size_t count, Char fillChar); Zbase(const Zbase& str); Zbase(Zbase&& tmp) noexcept; template Zbase(InputIterator first, InputIterator last); //explicit Zbase(Char ch); //dangerous if implicit: Char buffer[]; return buffer[0]; ups... forgot &, but not a compiler error! //-> non-standard extension!!! ~Zbase(); //operator const Char* () const; //NO implicit conversion to a C-string!! Many problems... one of them: if we forget to provide operator overloads, it'll just work with a Char*... //STL accessors using iterator = Char*; using const_iterator = const Char*; using reference = Char&; using const_reference = const Char&; using value_type = Char; iterator begin(); iterator end (); const_iterator begin () const { return rawStr_; } const_iterator end () const { return rawStr_ + length(); } const_iterator cbegin() const { return begin(); } const_iterator cend () const { return end (); } //std::string functions size_t length() const; size_t size () const { return length(); } const Char* c_str() const { return rawStr_; } //C-string format with 0-termination const Char& operator[](size_t pos) const; /**/ Char& operator[](size_t pos); bool empty() const { return length() == 0; } void clear(); size_t find (const Zbase& str, size_t pos = 0) const; // size_t find (const Char* str, size_t pos = 0) const; // size_t find (Char ch, size_t pos = 0) const; //returns "npos" if not found size_t rfind(Char ch, size_t pos = npos) const; // size_t rfind(const Char* str, size_t pos = npos) const; // //Zbase& replace(size_t pos1, size_t n1, const Zbase& str); void reserve(size_t minCapacity); Zbase& assign(const Char* str, size_t len) { return assign(str, str + len); } Zbase& append(const Char* str, size_t len) { return append(str, str + len); } template Zbase& assign(InputIterator first, InputIterator last); template Zbase& append(InputIterator first, InputIterator last); void resize(size_t newSize, Char fillChar = 0); void swap(Zbase& str) { std::swap(rawStr_, str.rawStr_); } void push_back(Char val) { operator+=(val); } //STL access void pop_back(); Zbase& operator=(Zbase&& tmp) noexcept; Zbase& operator=(const Zbase& str); Zbase& operator=(const Char* str) { return assign(str, strLength(str)); } Zbase& operator=(Char ch) { return assign(&ch, 1); } Zbase& operator+=(const Zbase& str) { return append(str.c_str(), str.length()); } Zbase& operator+=(const Char* str) { return append(str, strLength(str)); } Zbase& operator+=(Char ch) { return append(&ch, 1); } static const size_t npos = static_cast(-1); private: Zbase (int) = delete; // Zbase(size_t count, int) = delete; // Zbase& operator= (int) = delete; //detect usage errors by creating an intentional ambiguity with "Char" Zbase& operator+= (int) = delete; // void push_back (int) = delete; // Char* rawStr_; }; template class SP> bool operator==(const Zbase& lhs, const Zbase& rhs); template class SP> bool operator==(const Zbase& lhs, const Char* rhs); template class SP> inline bool operator==(const Char* lhs, const Zbase& rhs) { return operator==(rhs, lhs); } template class SP> std::strong_ordering operator<=>(const Zbase& lhs, const Zbase& rhs); template class SP> std::strong_ordering operator<=>(const Zbase& lhs, const Char* rhs); template class SP> std::strong_ordering operator<=>(const Char* lhs, const Zbase& rhs); template class SP> inline Zbase operator+(const Zbase& lhs, const Zbase& rhs) { return Zbase(lhs) += rhs; } template class SP> inline Zbase operator+(const Zbase& lhs, const Char* rhs) { return Zbase(lhs) += rhs; } template class SP> inline Zbase operator+(const Zbase& lhs, Char rhs) { return Zbase(lhs) += rhs; } //don't use unified first argument but save one move-construction in the r-value case instead! template class SP> inline Zbase operator+(Zbase&& lhs, const Zbase& rhs) { return std::move(lhs += rhs); } //the move *is* needed!!! template class SP> inline Zbase operator+(Zbase&& lhs, const Char* rhs) { return std::move(lhs += rhs); } //lhs, is an l-value parameter... template class SP> inline Zbase operator+(Zbase&& lhs, Char rhs) { return std::move(lhs += rhs); } //and not a local variable => no copy elision template class SP> inline Zbase operator+(const Char* lhs, const Zbase& rhs) { return Zbase(lhs ) += rhs; } template class SP> inline Zbase operator+( Char lhs, const Zbase& rhs) { return Zbase(&lhs, 1) += rhs; } template class SP> inline Zbase operator+(const Zbase&, int) = delete; //detect usage errors template class SP> inline Zbase operator+(int, const Zbase&) = delete; // //################################# implementation ######################################## template class SP> inline Zbase::Zbase() { rawStr_ = this->create(0); rawStr_[0] = 0; } template class SP> template inline Zbase::Zbase(InputIterator first, InputIterator last) { rawStr_ = this->create(std::distance(first, last)); *std::copy(first, last, rawStr_) = 0; } template class SP> inline Zbase::Zbase(size_t count, Char fillChar) { rawStr_ = this->create(count); std::fill(rawStr_, rawStr_ + count, fillChar); rawStr_[count] = 0; } template class SP> inline Zbase::Zbase(const Zbase& str) { rawStr_ = this->clone(str.rawStr_); } template class SP> inline Zbase::Zbase(Zbase&& tmp) noexcept { rawStr_ = tmp.rawStr_; tmp.rawStr_ = nullptr; //usually nullptr would violate the class invarants, but it is good enough for the destructor! //caveat: do not increment ref-count of an unshared string! We'd lose optimization opportunity of reusing its memory! } template class SP> inline Zbase::~Zbase() { static_assert(noexcept(this->~Zbase())); //has exception spec of compiler-generated destructor by default this->destroy(rawStr_); //rawStr_ may be nullptr; see move constructor! } template class SP> inline size_t Zbase::find(const Zbase& str, size_t pos) const { assert(pos <= length()); const size_t len = length(); const Char* thisEnd = begin() + len; //respect embedded 0 const Char* it = std::search(begin() + std::min(pos, len), thisEnd, str.begin(), str.end()); return it == thisEnd ? npos : it - begin(); } template class SP> inline size_t Zbase::find(const Char* str, size_t pos) const { assert(pos <= length()); const size_t len = length(); const Char* thisEnd = begin() + len; //respect embedded 0 const Char* it = std::search(begin() + std::min(pos, len), thisEnd, str, str + strLength(str)); return it == thisEnd ? npos : it - begin(); } template class SP> inline size_t Zbase::find(Char ch, size_t pos) const { assert(pos <= length()); const size_t len = length(); const Char* thisEnd = begin() + len; //respect embedded 0 const Char* it = std::find(begin() + std::min(pos, len), thisEnd, ch); return it == thisEnd ? npos : it - begin(); } template class SP> inline size_t Zbase::rfind(Char ch, size_t pos) const { assert(pos == npos || pos <= length()); const size_t len = length(); const Char* currEnd = begin() + (pos == npos ? len : std::min(pos + 1, len)); const Char* it = findLast(begin(), currEnd, ch); return it == currEnd ? npos : it - begin(); } template class SP> inline size_t Zbase::rfind(const Char* str, size_t pos) const { assert(pos == npos || pos <= length()); const size_t strLen = strLength(str); const size_t len = length(); const Char* currEnd = begin() + (pos == npos ? len : std::min(pos + strLen, len)); const Char* it = searchLast(begin(), currEnd, str, str + strLen); return it == currEnd ? npos : it - begin(); } template class SP> inline void Zbase::resize(size_t newSize, Char fillChar) { const size_t oldSize = length(); if (this->canWrite(rawStr_, newSize)) { if (oldSize < newSize) std::fill(rawStr_ + oldSize, rawStr_ + newSize, fillChar); rawStr_[newSize] = 0; this->setLength(rawStr_, newSize); } else { Char* newStr = this->create(newSize); if (oldSize < newSize) { std::copy(rawStr_, rawStr_ + oldSize, newStr); std::fill(newStr + oldSize, newStr + newSize, fillChar); } else std::copy(rawStr_, rawStr_ + newSize, newStr); newStr[newSize] = 0; this->destroy(rawStr_); rawStr_ = newStr; } } template class SP> inline bool operator==(const Zbase& lhs, const Zbase& rhs) { return lhs.length() == rhs.length() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); //respect embedded 0 } template class SP> inline bool operator==(const Zbase& lhs, const Char* rhs) { return lhs.length() == strLength(rhs) && std::equal(lhs.begin(), lhs.end(), rhs); //respect embedded 0 } template class SP> inline std::strong_ordering operator<=>(const Zbase& lhs, const Zbase& rhs) { return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), //respect embedded 0 rhs.begin(), rhs.end()); // } template class SP> inline std::strong_ordering operator<=>(const Zbase& lhs, const Char* rhs) { return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), //respect embedded 0 rhs, rhs + strLength(rhs)); } template class SP> inline std::strong_ordering operator<=>(const Char* lhs, const Zbase& rhs) { return std::lexicographical_compare_three_way(lhs, lhs + strLength(lhs), rhs.begin(), rhs.end()); //respect embedded 0 } template class SP> inline size_t Zbase::length() const { return SP::length(rawStr_); } template class SP> inline const Char& Zbase::operator[](size_t pos) const { assert(pos < length()); //design by contract! no runtime check! return rawStr_[pos]; } template class SP> inline Char& Zbase::operator[](size_t pos) { reserve(length()); //make unshared! assert(pos < length()); //design by contract! no runtime check! return rawStr_[pos]; } template class SP> inline auto Zbase::begin() -> iterator { reserve(length()); //make unshared! return rawStr_; } template class SP> inline auto Zbase::end() -> iterator { return begin() + length(); } template class SP> inline void Zbase::clear() { if (!empty()) { if (this->canWrite(rawStr_, 0)) { rawStr_[0] = 0; //keep allocated memory this->setLength(rawStr_, 0); // } else *this = Zbase(); } } template class SP> inline void Zbase::reserve(size_t minCapacity) //make unshared and check capacity { if (!this->canWrite(rawStr_, minCapacity)) { //allocate a new string const size_t len = length(); Char* newStr = this->create(len, std::max(len, minCapacity)); //reserve() must NEVER shrink the string: logical const! std::copy(rawStr_, rawStr_ + len + 1, newStr); //include 0-termination this->destroy(rawStr_); rawStr_ = newStr; } } template class SP> template inline Zbase& Zbase::assign(InputIterator first, InputIterator last) { const size_t len = std::distance(first, last); if (this->canWrite(rawStr_, len)) { *std::copy(first, last, rawStr_) = 0; this->setLength(rawStr_, len); } else *this = Zbase(first, last); return *this; } template class SP> template inline Zbase& Zbase::append(InputIterator first, InputIterator last) { const size_t len = std::distance(first, last); if (len > 0) //avoid making this string unshared for no reason { const size_t thisLen = length(); reserve(thisLen + len); //make unshared and check capacity *std::copy(first, last, rawStr_ + thisLen) = 0; this->setLength(rawStr_, thisLen + len); } return *this; } //don't use unifying assignment but save one move-construction in the r-value case instead! template class SP> inline Zbase& Zbase::operator=(const Zbase& str) { Zbase(str).swap(*this); return *this; } template class SP> inline Zbase& Zbase::operator=(Zbase&& tmp) noexcept { //don't use swap() but end rawStr_ life time immediately this->destroy(rawStr_); rawStr_ = tmp.rawStr_; tmp.rawStr_ = nullptr; return *this; } template class SP> inline void Zbase::pop_back() { const size_t len = length(); assert(len > 0); if (len > 0) resize(len - 1); } } #endif //STRING_BASE_H_083217454562342526