// ***************************************************************************** // * This file is part of the FreeFileSync project. It is distributed under * // * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * // * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * // ***************************************************************************** #ifndef STRING_BASE_H_083217454562342526 #define STRING_BASE_H_083217454562342526 #include #include #include #include #include "string_tools.h" //Zbase - a policy based string class optimizing performance and flexibility namespace zen { /* Allocator Policy: ----------------- void* allocate(size_t size) //throw std::bad_alloc void deallocate(void* ptr) size_t calcCapacity(size_t length) */ class AllocatorOptimalSpeed //exponential growth + min size { public: //::operator new/ ::operator delete show same performance characterisics like malloc()/free()! static void* allocate(size_t size) { return ::malloc(size); } //throw std::bad_alloc static void deallocate(void* ptr) { ::free(ptr); } static size_t calcCapacity(size_t length) { return std::max(16, std::max(length + length / 2, length)); } //- size_t might overflow! => better catch here than return a too small size covering up the real error: a way too large length! //- any growth rate should not exceed golden ratio: 1.618033989 }; class AllocatorOptimalMemory //no wasted memory, but more reallocations required when manipulating string { public: static void* allocate(size_t size) { return ::malloc(size); } //throw std::bad_alloc static void deallocate(void* ptr) { ::free(ptr); } static size_t calcCapacity(size_t length) { return length; } }; /* Storage Policy: --------------- template //Allocator Policy Char* create(size_t size) Char* create(size_t size, size_t minCapacity) Char* clone(Char* ptr) void destroy(Char* ptr) //must handle "destroy(nullptr)"! bool canWrite(const Char* ptr, size_t minCapacity) //needs to be checked before writing to "ptr" size_t length(const Char* ptr) void setLength(Char* ptr, size_t newLength) */ template //Allocator Policy class StorageDeepCopy : public AP { protected: ~StorageDeepCopy() {} Char* create(size_t size) { return create(size, size); } Char* create(size_t size, size_t minCapacity) { assert(size <= minCapacity); const size_t newCapacity = AP::calcCapacity(minCapacity); assert(newCapacity >= minCapacity); Descriptor* const newDescr = static_cast(this->allocate(sizeof(Descriptor) + (newCapacity + 1) * sizeof(Char))); //throw std::bad_alloc new (newDescr) Descriptor(size, newCapacity); return reinterpret_cast(newDescr + 1); //alignment note: "newDescr + 1" is Descriptor-aligned, which is larger than alignment for Char-array! => no problem! } Char* clone(Char* ptr) { Char* newData = create(length(ptr)); //throw std::bad_alloc std::copy(ptr, ptr + length(ptr) + 1, newData); return newData; } void destroy(Char* ptr) { if (!ptr) return; //support "destroy(nullptr)" Descriptor* const d = descr(ptr); d->~Descriptor(); this->deallocate(d); } //this needs to be checked before writing to "ptr" static bool canWrite(const Char* ptr, size_t minCapacity) { return minCapacity <= descr(ptr)->capacity; } static size_t length(const Char* ptr) { return descr(ptr)->length; } static void setLength(Char* ptr, size_t newLength) { assert(canWrite(ptr, newLength)); descr(ptr)->length = newLength; } private: struct Descriptor { Descriptor(size_t len, size_t cap) : length (static_cast(len)), capacity(static_cast(cap)) {} std::uint32_t length; std::uint32_t capacity; //allocated size without null-termination }; static Descriptor* descr( Char* ptr) { return reinterpret_cast< Descriptor*>(ptr) - 1; } static const Descriptor* descr(const Char* ptr) { return reinterpret_cast(ptr) - 1; } }; template //Allocator Policy class StorageRefCountThreadSafe : public AP { protected: ~StorageRefCountThreadSafe() {} Char* create(size_t size) { return create(size, size); } Char* create(size_t size, size_t minCapacity) { assert(size <= minCapacity); const size_t newCapacity = AP::calcCapacity(minCapacity); assert(newCapacity >= minCapacity); Descriptor* const newDescr = static_cast(this->allocate(sizeof(Descriptor) + (newCapacity + 1) * sizeof(Char))); //throw std::bad_alloc new (newDescr) Descriptor(size, newCapacity); return reinterpret_cast(newDescr + 1); } static Char* clone(Char* ptr) { ++descr(ptr)->refCount; return ptr; } #ifdef NDEBUG void destroy(Char* ptr) #else void destroy(Char*& ptr) #endif { assert(ptr != reinterpret_cast(0x1)); //detect double-deletion if (!ptr) //support "destroy(nullptr)" { #ifndef NDEBUG ptr = reinterpret_cast(0x1); #endif return; } Descriptor* const d = descr(ptr); if (--(d->refCount) == 0) //operator--() is overloaded to decrement and evaluate in a single atomic operation! { d->~Descriptor(); this->deallocate(d); #ifndef NDEBUG ptr = reinterpret_cast(0x1); #endif } } static bool canWrite(const Char* ptr, size_t minCapacity) //needs to be checked before writing to "ptr" { const Descriptor* const d = descr(ptr); assert(d->refCount > 0); return d->refCount == 1 && minCapacity <= d->capacity; } static size_t length(const Char* ptr) { return descr(ptr)->length; } static void setLength(Char* ptr, size_t newLength) { assert(canWrite(ptr, newLength)); descr(ptr)->length = static_cast(newLength); } private: struct Descriptor { Descriptor(size_t len, size_t cap) : length (static_cast(len)), capacity(static_cast(cap)) { static_assert(ATOMIC_INT_LOCK_FREE == 2, ""); } //2: "The atomic type is always lock-free" std::atomic refCount { 1 }; //std:atomic is uninitialized by default! std::uint32_t length; std::uint32_t capacity; //allocated size without null-termination }; static Descriptor* descr( Char* ptr) { return reinterpret_cast< Descriptor*>(ptr) - 1; } static const Descriptor* descr(const Char* ptr) { return reinterpret_cast(ptr) - 1; } }; //################################################################################################################################################################ //perf note: interestingly StorageDeepCopy and StorageRefCountThreadSafe show same performance in FFS comparison template class SP = StorageRefCountThreadSafe, //Storage Policy class AP = AllocatorOptimalSpeed> //Allocator Policy class Zbase : public SP { public: Zbase(); Zbase(const Char* source); //implicit conversion from a C-string Zbase(const Char* source, size_t length); Zbase(const Zbase& source); Zbase(Zbase&& tmp) noexcept; //explicit Zbase(Char source); //dangerous if implicit: Char buffer[]; return buffer[0]; ups... forgot &, but not a compiler error! //-> non-standard extension!!! //allow explicit construction from different string type, prevent ambiguity via SFINAE //template explicit Zbase(const S& other, typename S::value_type = 0); ~Zbase(); //operator const Char* () const; //NO implicit conversion to a C-string!! Many problems... one of them: if we forget to provide operator overloads, it'll just work with a Char*... //STL accessors using iterator = Char*; using const_iterator = const Char*; using reference = Char&; using const_reference = const Char&; using value_type = Char; Zbase(const_iterator first, const_iterator last); Char* begin(); Char* end (); const Char* begin() const; const Char* end () const; const Char* cbegin() const { return begin(); } const Char* cend () const { return end(); } //std::string functions size_t length() const; size_t size () const { return length(); } const Char* c_str() const { return rawStr; } //C-string format with 0-termination const Char* data() const { return rawStr; } //internal representation, 0-termination not guaranteed const Char operator[](size_t pos) const; bool empty() const { return length() == 0; } void clear(); size_t find (const Zbase& str, size_t pos = 0) const; // size_t find (const Char* str, size_t pos = 0) const; // size_t find (Char ch, size_t pos = 0) const; //returns "npos" if not found size_t rfind(Char ch, size_t pos = npos) const; // size_t rfind(const Char* str, size_t pos = npos) const; // //Zbase& replace(size_t pos1, size_t n1, const Zbase& str); void reserve(size_t minCapacity); Zbase& assign(const Char* source, size_t len); Zbase& append(const Char* source, size_t len); void resize(size_t newSize, Char fillChar = 0); void swap(Zbase& other); void push_back(Char val) { operator+=(val); } //STL access void pop_back(); Zbase& operator=(const Zbase& source); Zbase& operator=(Zbase&& tmp) noexcept; Zbase& operator=(const Char* source); Zbase& operator=(Char source); Zbase& operator+=(const Zbase& other); Zbase& operator+=(const Char* other); Zbase& operator+=(Char ch); static const size_t npos = static_cast(-1); private: Zbase (int) = delete; // Zbase& operator= (int) = delete; //detect usage errors by creating an intentional ambiguity with "Char" Zbase& operator+=(int) = delete; // void push_back (int) = delete; // Char* rawStr; }; template class SP, class AP> bool operator==(const Zbase& lhs, const Zbase& rhs); template class SP, class AP> bool operator==(const Zbase& lhs, const Char* rhs); template class SP, class AP> inline bool operator==(const Char* lhs, const Zbase& rhs) { return operator==(rhs, lhs); } template class SP, class AP> inline bool operator!=(const Zbase& lhs, const Zbase& rhs) { return !operator==(lhs, rhs); } template class SP, class AP> inline bool operator!=(const Zbase& lhs, const Char* rhs) { return !operator==(lhs, rhs); } template class SP, class AP> inline bool operator!=(const Char* lhs, const Zbase& rhs) { return !operator==(lhs, rhs); } template class SP, class AP> bool operator<(const Zbase& lhs, const Zbase& rhs); template class SP, class AP> bool operator<(const Zbase& lhs, const Char* rhs); template class SP, class AP> bool operator<(const Char* lhs, const Zbase& rhs); template class SP, class AP> inline Zbase operator+(const Zbase& lhs, const Zbase& rhs) { return Zbase(lhs) += rhs; } template class SP, class AP> inline Zbase operator+(const Zbase& lhs, const Char* rhs) { return Zbase(lhs) += rhs; } template class SP, class AP> inline Zbase operator+(const Zbase& lhs, Char rhs) { return Zbase(lhs) += rhs; } //don't use unified first argument but save one move-construction in the r-value case instead! template class SP, class AP> inline Zbase operator+(Zbase&& lhs, const Zbase& rhs) { return std::move(lhs += rhs); } //the move *is* needed!!! template class SP, class AP> inline Zbase operator+(Zbase&& lhs, const Char* rhs) { return std::move(lhs += rhs); } //lhs, is an l-value parameter... template class SP, class AP> inline Zbase operator+(Zbase&& lhs, Char rhs) { return std::move(lhs += rhs); } //and not a local variable => no copy elision template class SP, class AP> inline Zbase operator+( Char lhs, const Zbase& rhs) { return Zbase(&lhs, 1) += rhs; } template class SP, class AP> inline Zbase operator+(const Char* lhs, const Zbase& rhs) { return Zbase(lhs ) += rhs; } //################################# implementation ######################################## template class SP, class AP> inline Zbase::Zbase() { //resist the temptation to avoid this allocation by referening a static global: NO performance advantage, MT issues! rawStr = this->create(0); rawStr[0] = 0; } template class SP, class AP> inline Zbase::Zbase(const Char* source) { const size_t sourceLen = strLength(source); rawStr = this->create(sourceLen); std::copy(source, source + sourceLen + 1, rawStr); //include null-termination } template class SP, class AP> inline Zbase::Zbase(const Char* source, size_t sourceLen) { rawStr = this->create(sourceLen); std::copy(source, source + sourceLen, rawStr); rawStr[sourceLen] = 0; } template class SP, class AP> Zbase::Zbase(const_iterator first, const_iterator last) { assert(first <= last); const size_t sourceLen = last - first; rawStr = this->create(sourceLen); std::copy(first, last, rawStr); rawStr[sourceLen] = 0; } template class SP, class AP> inline Zbase::Zbase(const Zbase& source) { rawStr = this->clone(source.rawStr); } template class SP, class AP> inline Zbase::Zbase(Zbase&& tmp) noexcept { rawStr = tmp.rawStr; tmp.rawStr = nullptr; //usually nullptr would violate the class invarants, but it is good enough for the destructor! //caveat: do not increment ref-count of an unshared string! We'd lose optimization opportunity of reusing its memory! } /* template class SP, class AP> template inline Zbase::Zbase(const S& other, typename S::value_type) { const size_t sourceLen = other.size(); rawStr = this->create(sourceLen); std::copy(other.c_str(), other.c_str() + sourceLen, rawStr); rawStr[sourceLen] = 0; } */ template class SP, class AP> inline Zbase::~Zbase() { static_assert(noexcept(this->~Zbase()), ""); //has exception spec of compiler-generated destructor by default this->destroy(rawStr); //rawStr may be nullptr; see move constructor! } template class SP, class AP> inline size_t Zbase::find(const Zbase& str, size_t pos) const { assert(pos <= length()); const size_t len = length(); const Char* thisEnd = begin() + len; //respect embedded 0 const Char* it = std::search(begin() + std::min(pos, len), thisEnd, str.begin(), str.end()); return it == thisEnd ? npos : it - begin(); } template class SP, class AP> inline size_t Zbase::find(const Char* str, size_t pos) const { assert(pos <= length()); const size_t len = length(); const Char* thisEnd = begin() + len; //respect embedded 0 const Char* it = std::search(begin() + std::min(pos, len), thisEnd, str, str + strLength(str)); return it == thisEnd ? npos : it - begin(); } template class SP, class AP> inline size_t Zbase::find(Char ch, size_t pos) const { assert(pos <= length()); const size_t len = length(); const Char* thisEnd = begin() + len; //respect embedded 0 const Char* it = std::find(begin() + std::min(pos, len), thisEnd, ch); return it == thisEnd ? npos : it - begin(); } template class SP, class AP> inline size_t Zbase::rfind(Char ch, size_t pos) const { assert(pos == npos || pos <= length()); const size_t len = length(); const Char* currEnd = begin() + (pos == npos ? len : std::min(pos + 1, len)); const Char* it = find_last(begin(), currEnd, ch); return it == currEnd ? npos : it - begin(); } template class SP, class AP> inline size_t Zbase::rfind(const Char* str, size_t pos) const { assert(pos == npos || pos <= length()); const size_t strLen = strLength(str); const size_t len = length(); const Char* currEnd = begin() + (pos == npos ? len : std::min(pos + strLen, len)); const Char* it = search_last(begin(), currEnd, str, str + strLen); return it == currEnd ? npos : it - begin(); } template class SP, class AP> inline void Zbase::resize(size_t newSize, Char fillChar) { const size_t oldSize = length(); if (this->canWrite(rawStr, newSize)) { if (oldSize < newSize) std::fill(rawStr + oldSize, rawStr + newSize, fillChar); rawStr[newSize] = 0; this->setLength(rawStr, newSize); } else { Char* newStr = this->create(newSize); if (oldSize < newSize) { std::copy(rawStr, rawStr + oldSize, newStr); std::fill(newStr + oldSize, newStr + newSize, fillChar); } else std::copy(rawStr, rawStr + newSize, newStr); newStr[newSize] = 0; this->destroy(rawStr); rawStr = newStr; } } template class SP, class AP> inline bool operator==(const Zbase& lhs, const Zbase& rhs) { return lhs.length() == rhs.length() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); //respect embedded 0 } template class SP, class AP> inline bool operator==(const Zbase& lhs, const Char* rhs) { return lhs.length() == strLength(rhs) && std::equal(lhs.begin(), lhs.end(), rhs); //respect embedded 0 } template class SP, class AP> inline bool operator<(const Zbase& lhs, const Zbase& rhs) { return std::lexicographical_compare(lhs.begin(), lhs.end(), //respect embedded 0 rhs.begin(), rhs.end()); } template class SP, class AP> inline bool operator<(const Zbase& lhs, const Char* rhs) { return std::lexicographical_compare(lhs.begin(), lhs.end(), //respect embedded 0 rhs, rhs + strLength(rhs)); } template class SP, class AP> inline bool operator<(const Char* lhs, const Zbase& rhs) { return std::lexicographical_compare(lhs, lhs + strLength(lhs), //respect embedded 0 rhs.begin(), rhs.end()); } template class SP, class AP> inline size_t Zbase::length() const { return SP::length(rawStr); } template class SP, class AP> inline const Char Zbase::operator[](size_t pos) const { assert(pos < length()); //design by contract! no runtime check! return rawStr[pos]; } template class SP, class AP> inline const Char* Zbase::begin() const { return rawStr; } template class SP, class AP> inline const Char* Zbase::end() const { return rawStr + length(); } template class SP, class AP> inline Char* Zbase::begin() { reserve(length()); //make unshared! return rawStr; } template class SP, class AP> inline Char* Zbase::end() { return begin() + length(); } template class SP, class AP> inline void Zbase::clear() { if (!empty()) { if (this->canWrite(rawStr, 0)) { rawStr[0] = 0; //keep allocated memory this->setLength(rawStr, 0); // } else *this = Zbase(); } } template class SP, class AP> inline void Zbase::swap(Zbase& other) { std::swap(rawStr, other.rawStr); } template class SP, class AP> inline void Zbase::reserve(size_t minCapacity) //make unshared and check capacity { if (!this->canWrite(rawStr, minCapacity)) { //allocate a new string const size_t len = length(); Char* newStr = this->create(len, std::max(len, minCapacity)); //reserve() must NEVER shrink the string: logical const! std::copy(rawStr, rawStr + len + 1, newStr); //include 0-termination this->destroy(rawStr); rawStr = newStr; } } template class SP, class AP> inline Zbase& Zbase::assign(const Char* source, size_t len) { if (this->canWrite(rawStr, len)) { std::copy(source, source + len, rawStr); rawStr[len] = 0; //include null-termination this->setLength(rawStr, len); } else *this = Zbase(source, len); return *this; } template class SP, class AP> inline Zbase& Zbase::append(const Char* source, size_t len) { const size_t thisLen = length(); reserve(thisLen + len); //make unshared and check capacity std::copy(source, source + len, rawStr + thisLen); rawStr[thisLen + len] = 0; this->setLength(rawStr, thisLen + len); return *this; } template class SP, class AP> inline Zbase& Zbase::operator=(const Zbase& other) { Zbase(other).swap(*this); return *this; } template class SP, class AP> inline Zbase& Zbase::operator=(Zbase&& tmp) noexcept { swap(tmp); //don't use unifying assignment but save one move-construction in the r-value case instead! return *this; } template class SP, class AP> inline Zbase& Zbase::operator=(const Char* source) { return assign(source, strLength(source)); } template class SP, class AP> inline Zbase& Zbase::operator=(Char ch) { return assign(&ch, 1); } template class SP, class AP> inline Zbase& Zbase::operator+=(const Zbase& other) { return append(other.c_str(), other.length()); } template class SP, class AP> inline Zbase& Zbase::operator+=(const Char* other) { return append(other, strLength(other)); } template class SP, class AP> inline Zbase& Zbase::operator+=(Char ch) { return append(&ch, 1); } template class SP, class AP> inline void Zbase::pop_back() { const size_t len = length(); assert(len > 0); if (len > 0) resize(len - 1); } } #endif //STRING_BASE_H_083217454562342526