From bd6336c629841c6db3a6ca53a936d629d34db53b Mon Sep 17 00:00:00 2001 From: Daniel Wilhelm Date: Fri, 18 Apr 2014 17:15:16 +0200 Subject: 4.1 --- shared/string_tools.h | 687 -------------------------------------------------- 1 file changed, 687 deletions(-) delete mode 100644 shared/string_tools.h (limited to 'shared/string_tools.h') diff --git a/shared/string_tools.h b/shared/string_tools.h deleted file mode 100644 index 8951d942..00000000 --- a/shared/string_tools.h +++ /dev/null @@ -1,687 +0,0 @@ -// ************************************************************************** -// * This file is part of the zenXML project. It is distributed under the * -// * Boost Software License, Version 1.0. See accompanying file * -// * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt. * -// * Copyright (C) 2011 ZenJu (zhnmju123 AT gmx.de) * -// ************************************************************************** - -#ifndef STRING_TOOLS_HEADER_213458973046 -#define STRING_TOOLS_HEADER_213458973046 - -#include //size_t -#include //isspace -#include //iswspace -#include //swprintf -#include //sprintf -#include -#include -#include -#include -#include -#include "loki/TypeManip.h" -#include "loki/EmptyType.h" -#include "loki/TypeTraits.h" -#include "assert_static.h" -#ifdef _MSC_VER -template <> struct Loki::IsCustomUnsignedInt { enum { value = 1 }; }; -template <> struct Loki::IsCustomSignedInt { enum { value = 1 }; }; -#endif - - -//enhance arbitray string class with useful non-member functions: -namespace zen -{ -template size_t cStringLength(const C* str); //strlen() -template bool cStringIsWhiteSpace(C ch); -template bool cStringIsDigit(C ch); - -//uniform access to string-like types: classes and character arrays -/* -strBegin(): - std::wstring str(L"dummy"); - char array[] = "dummy"; - const wchar_t* iter = strBegin(str); //returns str.c_str() - const char* iter2 = strBegin(array); //returns array - -strLength(): - strLength(str); //equals str.size() - strLength(array); //equals cStringLength(array) - -StringTraits<>: - StringTraits::CharType //equals wchar_t - StringTraits ::CharType //equals wchar_t - StringTraits::isStringLike; //equals "true" - StringTraits ::isStringLike; //equals "false" - StringTraits::isStringClass //equals "true" - StringTraits ::isStringClass //equals "false" -*/ - -template bool startsWith(const S& str, const T& prefix); //both S and T can be strings or char/wchar_t arrays or simple char/wchar_t -template bool endsWith (const S& str, const T& postfix); // - -template S afterLast (const S& str, const T& ch); //returns the whole string if ch not found -template S beforeLast (const S& str, const T& ch); //returns empty string if ch not found -template S afterFirst (const S& str, const T& ch); //returns empty string if ch not found -template S beforeFirst(const S& str, const T& ch); //returns the whole string if ch not found - -template std::vector split(const S& str, const T& delimiter); -template void truncate(S& str, size_t newLen); -template void replace(S& str, const T& old, const U& replacement, bool replaceAll = true); -template void trim(S& str, bool fromLeft = true, bool fromRight = true); - -//high-performance conversion from numbers to strings -template S toString(const Num& number); -template Num toNumber(const S& str); - -//string to string conversion: converst string-like type into compatible target string class -template T cvrtString(const S& str); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//---------------------- implementation ---------------------- - -template inline -size_t cStringLength(const C* str) //strlen() -{ - assert_static((Loki::IsSameType::value || Loki::IsSameType::value)); - size_t len = 0; - while (*str++ != 0) - ++len; - return len; -} - - -template <> inline -bool cStringIsWhiteSpace(char ch) -{ - //caveat 1: std::isspace() takes an int, but expects an unsigned char - //caveat 2: some parts of UTF-8 chars are erroneously seen as whitespace, e.g. the a0 from "\xec\x8b\xa0" (MSVC) - return static_cast(ch) < 128 && - std::isspace(static_cast(ch)) != 0; -} - -template <> inline bool cStringIsWhiteSpace(unsigned char ch) { return cStringIsWhiteSpace(ch); } -template <> inline bool cStringIsWhiteSpace(signed char ch) { return cStringIsWhiteSpace(ch); } -template <> inline bool cStringIsWhiteSpace(wchar_t ch) { return std::iswspace(ch) != 0; } - -template <> inline -bool cStringIsDigit(char ch) -{ - return std::isdigit(static_cast(ch)) != 0; //caveat: takes an int, but expects an unsigned char -} - - -template <> -inline -bool cStringIsDigit(wchar_t ch) -{ - return std::iswdigit(ch) != 0; -} - -namespace implementation -{ -template -struct UnArray { typedef T NonArrayType; }; - -template -struct UnArray { typedef T NonArrayType; }; - -template -struct UnPointer { typedef T NonPtrType; }; - -template -struct UnPointer { typedef T NonPtrType; }; - -template -struct UnReference { typedef T NonRefType; }; - -template -struct UnReference { typedef T NonRefType; }; - - -template -class HasValueType -{ - typedef char Yes[1]; - typedef char No [2]; - - template class HelperTp {}; - - //detect presence of a member type called value_type - template static Yes& hasMemberValueType(HelperTp*); - template static No& hasMemberValueType(...); - -public: - enum { Result = sizeof(hasMemberValueType(NULL)) == sizeof(Yes) - }; -}; - - -template -class HasStringMembers -{ -public: - enum { Result = false }; -}; - -template -class HasStringMembers -{ - typedef char Yes[1]; - typedef char No [2]; - - //detect presence of member functions (without specific restriction on return type, within T or one of it's base classes) - template class HelperFn {}; - - struct Fallback - { - int c_str; - int length; - }; - - template - struct Helper2 : public U, public Fallback {}; //U must be a class-type! - - //we don't know the exact declaration of the member attribute (may be in base class), but we know what NOT to expect: - template static No& hasMemberCstr(HelperFn::c_str>*); - template static Yes& hasMemberCstr(...); - - template static No& hasMemberLength(HelperFn::length>*); - template static Yes& hasMemberLength(...); -public: - enum { Result = sizeof(hasMemberCstr (NULL)) == sizeof(Yes) && - sizeof(hasMemberLength(NULL)) == sizeof(Yes) - }; -}; - -template struct StringTraits2 { typedef Loki::EmptyType Result; }; //"StringTraits2": fix some VS bug with namespace and partial template specialization - -template struct StringTraits2 { typedef typename S::value_type Result; }; -template <> struct StringTraits2 { typedef char Result; }; -template <> struct StringTraits2 { typedef wchar_t Result; }; -} - -template -struct StringTraits -{ -private: - typedef typename implementation::UnReference::NonRefType NonRefType; - typedef typename Loki::TypeTraits::NonConstType UndecoratedType; - - typedef typename implementation::UnArray::NonArrayType NonArrayType; - typedef typename implementation::UnPointer::NonPtrType NonPtrType; - typedef typename Loki::TypeTraits::NonConstType NonConstValType; //handle "const char* const" -public: - enum - { - isStringClass = implementation::HasStringMembers::Result>::Result - }; - - typedef typename implementation::StringTraits2::Result CharType; - - enum - { - isStringLike = Loki::IsSameType::value || Loki::IsSameType::value - }; -}; - - -template inline -const typename StringTraits::CharType* strBegin(const S& str, typename S::value_type dummy = 0) { return str.c_str(); } //SFINAE: T must be a "string" - -template -inline const typename StringTraits::CharType* strBegin(const Char* str) { return str; } -inline const char* strBegin(const char& ch) { return &ch; } -inline const wchar_t* strBegin(const wchar_t& ch) { return &ch; } - - -template inline -size_t strLength(const S& str, typename S::value_type dummy = 0) { return str.length(); } //SFINAE: T must be a "string" - -template -inline size_t strLength(const Char* str) { return cStringLength(str); } -inline size_t strLength(char) { return 1; } -inline size_t strLength(wchar_t) { return 1; } - - -template inline -bool startsWith(const S& str, const T& prefix) -{ - assert_static(StringTraits::isStringLike); - assert_static(StringTraits::isStringLike); - - const size_t pfLength = strLength(prefix); - if (strLength(str) < pfLength) - return false; - - return std::equal(strBegin(str), strBegin(str) + pfLength, - strBegin(prefix)); -} - - -template inline -bool endsWith(const S& str, const T& postfix) -{ - assert_static(StringTraits::isStringLike); - assert_static(StringTraits::isStringLike); - - size_t strLen = strLength(str); - size_t pfLen = strLength(postfix); - if (strLen < pfLen) - return false; - - typedef typename StringTraits::CharType CharType; - - const CharType* cmpBegin = strBegin(str) + strLen - pfLen; - return std::equal(cmpBegin, cmpBegin + pfLen, - strBegin(postfix)); -} - - -//returns the whole string if ch not found -template inline -S afterLast(const S& str, const T& ch) -{ - assert_static(StringTraits::isStringLike); - - const size_t pos = str.rfind(ch); - if (pos != S::npos) - { - size_t chLen = strLength(ch); - return S(str.c_str() + pos + chLen, str.length() - pos - chLen); - } - else - return str; -} - - -//returns empty string if ch not found -template inline -S beforeLast(const S& str, const T& ch) -{ - assert_static(StringTraits::isStringLike); - - const size_t pos = str.rfind(ch); - if (pos != S::npos) - return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! - else - return S(); -} - - -//returns empty string if ch not found -template inline -S afterFirst(const S& str, const T& ch) -{ - assert_static(StringTraits::isStringLike); - - const size_t pos = str.find(ch); - if (pos != S::npos) - { - size_t chLen = strLength(ch); - return S(str.c_str() + pos + chLen, str.length() - pos - chLen); - } - else - return S(); - -} - - -//returns the whole string if ch not found -template inline -S beforeFirst(const S& str, const T& ch) -{ - assert_static(StringTraits::isStringLike); - - const size_t pos = str.find(ch); - if (pos != S::npos) - return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! - else - return str; -} - - -template inline -std::vector split(const S& str, const T& delimiter) -{ - assert_static(StringTraits::isStringLike); - - std::vector output; - size_t bockStart = 0; - size_t delimLen = strLength(delimiter); - if (delimLen != 0) - { - for (size_t blockEnd = str.find(delimiter, bockStart); - blockEnd != S::npos; - bockStart = blockEnd + delimLen, blockEnd = str.find(delimiter, bockStart)) - { - output.push_back(S(str.c_str() + bockStart, blockEnd - bockStart)); - } - } - output.push_back(S(str.c_str() + bockStart, str.length() - bockStart)); - return output; -} - - -template inline -void truncate(S& str, size_t newLen) -{ - if (newLen < str.length()) - str.resize(newLen); -} - - -template inline -void replace(S& str, const T& old, const U& replacement, bool replaceAll) -{ - assert_static(StringTraits::isStringLike); - assert_static(StringTraits::isStringLike); - - size_t pos = 0; - size_t oldLen = strLength(old); - size_t repLen = strLength(replacement); - while ((pos = str.find(old, pos)) != S::npos) - { - str.replace(pos, oldLen, replacement); - pos += repLen; //move past the string that was replaced - - if (!replaceAll) - break; - } -} - - -template inline -void trim(S& str, bool fromLeft, bool fromRight) -{ - assert(fromLeft || fromRight); - - typedef typename S::value_type CharType; - - const CharType* newBegin = str.c_str(); - const CharType* newEnd = str.c_str() + str.length(); - - if (fromRight) - while (newBegin != newEnd && cStringIsWhiteSpace(newEnd[-1])) - --newEnd; - - if (fromLeft) - while (newBegin != newEnd && cStringIsWhiteSpace(*newBegin)) - ++newBegin; - - const size_t newLength = newEnd - newBegin; - if (newLength != str.length()) - { - if (newBegin != str.c_str()) - str = S(newBegin, newLength); //minor inefficiency: in case "str" is not shared, we could save an allocation and do a memory move only - else - str.resize(newLength); - } -} - - -namespace implementation -{ -template -struct CnvtStringToString -{ - T convert(const S& src) const { return T(strBegin(src), strLength(src)); } -}; - -template -struct CnvtStringToString //perf: we don't need a deep copy if string types match -{ - const S& convert(const S& src) const { return src; } -}; -} - -template inline -T cvrtString(const S& str) { return implementation::CnvtStringToString().convert(str); } - - -namespace implementation -{ -enum NumberType -{ - NUM_TYPE_SIGNED_INT, - NUM_TYPE_UNSIGNED_INT, - NUM_TYPE_FLOATING_POINT, - NUM_TYPE_OTHER, -}; - - -template -struct CvrtNumberToString -{ - S convert(const Num& number) const //default number to string conversion using streams: convenient, but SLOW, SLOW, SLOW!!!! (~ factor of 20) - { - typedef typename StringTraits::CharType CharType; - - std::basic_ostringstream ss; - ss << number; - return cvrtString(ss.str()); - } -}; - - -template -struct CvrtNumberToString -{ - S convert(const Num& number) const { return convertFloat(number, typename StringTraits::CharType()); } - -private: - S convertFloat(const Num& number, char) const - { - char buffer[50]; - int charsWritten = std::sprintf(buffer, "%f", static_cast(number)); - return charsWritten > 0 ? S(buffer, charsWritten) : S(); - } - S convertFloat(const Num& number, wchar_t) const - { - wchar_t buffer[50]; -#ifdef __MINGW32__ - int charsWritten = ::swprintf(buffer, L"%f", static_cast(number)); //MinGW does not comply to the C standard here -#else - int charsWritten = std::swprintf(buffer, 50, L"%f", static_cast(number)); -#endif - return charsWritten > 0 ? S(buffer, charsWritten) : S(); - } -}; - -/* -perf: integer to string: (executed 10 mio. times) - std::stringstream - 14796 ms - std::sprintf - 3086 ms - hand coded - 778 ms -*/ - -template inline -S formatInteger(Num n, bool hasMinus) -{ - assert(n >= 0); - S output; - do - { - output += '0' + n % 10; - n /= 10; - } - while (n != 0); - if (hasMinus) - output += '-'; - - std::reverse(output.begin(), output.end()); - return output; -} - -template -struct CvrtNumberToString -{ - S convert(const Num& number) const { return formatInteger(number < 0 ? -number : number, number < 0); } -}; - -template -struct CvrtNumberToString -{ - S convert(const Num& number) const { return formatInteger(number, false); } -}; - -//-------------------------------------------------------------------------------- - -template -struct CvrtStringToNumber -{ - Num convert(const S& str) const //default string to number conversion using streams: convenient, but SLOW - { - typedef typename StringTraits::CharType CharType; - Num number = 0; - std::basic_istringstream(cvrtString >(str)) >> number; - return number; - } -}; - - -template -struct CvrtStringToNumber -{ - Num convert(const S& str) const { return convertFloat(strBegin(str)); } - -private: - Num convertFloat(const char* str) const { return std::strtod(str, NULL); } - Num convertFloat(const wchar_t* str) const { return std::wcstod(str, NULL); } -}; - -template -Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic -{ - typedef typename StringTraits::CharType CharType; - - const CharType* first = strBegin(str); - const CharType* last = first + strLength(str); - - while (first != last && cStringIsWhiteSpace(*first)) //skip leading whitespace - ++first; - - hasMinusSign = false; //handle minus sign - if (first != last) - { - if (*first == '-') - { - hasMinusSign = true; - ++first; - } - else if (*first == '+') - ++first; - } - - Num number = 0; - for (const CharType* iter = first; iter != last; ++iter) - { - const CharType c = *iter; - if ('0' <= c && c <= '9') - { - number *= 10; - number += c - '0'; - } - else - { - assert(std::find_if(iter, last, std::not1(std::ptr_fun(&cStringIsWhiteSpace))) == last); //rest of string should contain whitespace only - break; - } - } - return number; -} - - -template -struct CvrtStringToNumber -{ - Num convert(const S& str) const - { - bool hasMinusSign = false; //handle minus sign - const Num number = extractInteger(str, hasMinusSign); - return hasMinusSign ? -number : number; - } -}; - - -template -struct CvrtStringToNumber -{ - Num convert(const S& str) const //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic - { - bool hasMinusSign = false; //handle minus sign - const Num number = extractInteger(str, hasMinusSign); - if (hasMinusSign) - { - assert(false); - return 0U; - } - return number; - } -}; -} - - -template -inline -S toString(const Num& number) //convert number to string the C++ way -{ - using namespace implementation; - return CvrtNumberToString < S, Num, - Loki::TypeTraits::isSignedInt ? NUM_TYPE_SIGNED_INT : - Loki::TypeTraits::isUnsignedInt ? NUM_TYPE_UNSIGNED_INT : - Loki::TypeTraits::isFloat ? NUM_TYPE_FLOATING_POINT : - NUM_TYPE_OTHER - > ().convert(number); -} - - -template -inline -Num toNumber(const S& str) //convert string to number the C++ way -{ - using namespace implementation; - return CvrtStringToNumber < S, Num, - Loki::TypeTraits::isSignedInt ? NUM_TYPE_SIGNED_INT : - Loki::TypeTraits::isUnsignedInt ? NUM_TYPE_UNSIGNED_INT : - Loki::TypeTraits::isFloat ? NUM_TYPE_FLOATING_POINT : - NUM_TYPE_OTHER - > ().convert(str); -} - -} - -#endif //STRING_TOOLS_HEADER_213458973046 -- cgit