diff options
Diffstat (limited to 'zen/string_tools.h')
-rw-r--r-- | zen/string_tools.h | 399 |
1 files changed, 216 insertions, 183 deletions
diff --git a/zen/string_tools.h b/zen/string_tools.h index 8cafad07..5f20a2de 100644 --- a/zen/string_tools.h +++ b/zen/string_tools.h @@ -14,17 +14,18 @@ #include <cwchar> //swprintf #include <algorithm> #include <cassert> -#include <sstream> #include <vector> +#include <sstream> +#include "stl_tools.h" #include "string_traits.h" -#include "type_traits.h" //enhance arbitray string class with useful non-member functions: namespace zen { -template <class C> bool cStringIsWhiteSpace(C ch); -template <class C> bool cStringIsDigit(C ch); +template <class Char> bool cStringIsWhiteSpace(Char ch); +template <class Char> bool cStringIsDigit (Char ch); //not exactly the same as "std::isdigit" -> we consider '0'-'9' only! + template <class S, class T> bool startsWith(const S& str, const T& prefix); //both S and T can be strings or char/wchar_t arrays or simple char/wchar_t template <class S, class T> bool endsWith (const S& str, const T& postfix); // @@ -40,14 +41,14 @@ template <class S> void trim(S& str, bool fromLeft = true, bool fromRight = true template <class S, class T, class U> void replace ( S& str, const T& oldOne, const U& newOne, bool replaceAll = true); template <class S, class T, class U> S replaceCpy(const S& str, const T& oldOne, const U& newOne, bool replaceAll = true); -//high-performance conversion from numbers to strings -template <class S, class T, class Num> S printNumber(const T& format, const Num& number); //format a single number using ::sprintf +//high-performance conversion between numbers and strings +template <class S, class T, class Num> S printNumber(const T& format, const Num& number); //format a single number using std::snprintf() template <class S, class Num> S toString(const Num& number); template <class Num, class S > Num toNumber(const S& str); -//string to string conversion: converst string-like type into char-compatible target string class -template <class T, class S> T cvrtString(const S& str); +//string to string conversion: converts string-like type into char-compatible target string class +template <class T, class S> T copyStringTo(const S& str); @@ -90,35 +91,30 @@ bool cStringIsWhiteSpace(char ch) std::isspace(static_cast<unsigned char>(ch)) != 0; } -//template <> inline bool cStringIsWhiteSpace(unsigned char ch) { return cStringIsWhiteSpace<char>(ch); } -> not character types! -//template <> inline bool cStringIsWhiteSpace(signed char ch) { return cStringIsWhiteSpace<char>(ch); } -template <> inline bool cStringIsWhiteSpace(wchar_t ch) { return std::iswspace(ch) != 0; } - -template <> inline -bool cStringIsDigit(char ch) -{ - return std::isdigit(static_cast<unsigned char>(ch)) != 0; //caveat: takes an int, but expects an unsigned char -} +template <> inline bool cStringIsWhiteSpace(wchar_t ch) { return std::iswspace(ch) != 0; } -template <> inline -bool cStringIsDigit(wchar_t ch) +template <class Char> inline +bool cStringIsDigit(Char ch) //similar to implmenetation of std::::isdigit()! { - return std::iswdigit(ch) != 0; + assert_static((IsSameType<Char, char>::result || IsSameType<Char, wchar_t>::result)); + return static_cast<Char>('0') <= ch && ch <= static_cast<Char>('9'); } template <class S, class T> inline bool startsWith(const S& str, const T& prefix) { - assert_static(StringTraits<S>::isStringLike); - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<S>::result); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; const size_t pfLength = strLength(prefix); if (strLength(str) < pfLength) return false; - return std::equal(strBegin(str), strBegin(str) + pfLength, + const CharType* const strFirst = strBegin(str); + return std::equal(strFirst, strFirst + pfLength, strBegin(prefix)); } @@ -126,18 +122,17 @@ bool startsWith(const S& str, const T& prefix) template <class S, class T> inline bool endsWith(const S& str, const T& postfix) { - assert_static(StringTraits<S>::isStringLike); - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<S>::result); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; - size_t strLen = strLength(str); - size_t pfLen = strLength(postfix); + const size_t strLen = strLength(str); + const size_t pfLen = strLength(postfix); if (strLen < pfLen) return false; - typedef typename StringTraits<S>::CharType CharType; - - const CharType* cmpBegin = strBegin(str) + strLen - pfLen; - return std::equal(cmpBegin, cmpBegin + pfLen, + const CharType* const cmpFirst = strBegin(str) + strLen - pfLen; + return std::equal(cmpFirst, cmpFirst + pfLen, strBegin(postfix)); } @@ -146,16 +141,22 @@ bool endsWith(const S& str, const T& postfix) template <class S, class T> inline S afterLast(const S& str, const T& ch) { - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; - const size_t pos = str.rfind(ch); - if (pos != S::npos) - { - size_t chLen = strLength(ch); - return S(str.c_str() + pos + chLen, str.length() - pos - chLen); - } - else + const size_t chLen = strLength(ch); + + const CharType* const strFirst = strBegin(str); + const CharType* const strLast = strFirst + strLength(str); + const CharType* const chFirst = strBegin(ch); + + const CharType* iter = search_last(strFirst, strLast, + chFirst, chFirst + chLen); + if (iter == strLast) return str; + + iter += chLen; + return S(iter, strLast - iter); } @@ -163,13 +164,19 @@ S afterLast(const S& str, const T& ch) template <class S, class T> inline S beforeLast(const S& str, const T& ch) { - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; - const size_t pos = str.rfind(ch); - if (pos != S::npos) - return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! - else + const CharType* const strFirst = strBegin(str); + const CharType* const strLast = strFirst + strLength(str); + const CharType* const chFirst = strBegin(ch); + + const CharType* iter = search_last(strFirst, strLast, + chFirst, chFirst + strLength(ch)); + if (iter == strLast) return S(); + + return S(strFirst, iter - strFirst); } @@ -177,17 +184,21 @@ S beforeLast(const S& str, const T& ch) template <class S, class T> inline S afterFirst(const S& str, const T& ch) { - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; - const size_t pos = str.find(ch); - if (pos != S::npos) - { - size_t chLen = strLength(ch); - return S(str.c_str() + pos + chLen, str.length() - pos - chLen); - } - else + const size_t chLen = strLength(ch); + const CharType* const strFirst = strBegin(str); + const CharType* const strLast = strFirst + strLength(str); + const CharType* const chFirst = strBegin(ch); + + const CharType* iter = std::search(strFirst, strLast, + chFirst, chFirst + chLen); + if (iter == strLast) return S(); + iter += chLen; + return S(iter, strLast - iter); } @@ -195,34 +206,48 @@ S afterFirst(const S& str, const T& ch) template <class S, class T> inline S beforeFirst(const S& str, const T& ch) { - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; - const size_t pos = str.find(ch); - if (pos != S::npos) - return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! - else - return str; + const CharType* const strFirst = strBegin(str); + const CharType* const chFirst = strBegin(ch); + + return S(strFirst, std::search(strFirst, strFirst + strLength(str), + chFirst, chFirst + strLength(ch)) - strFirst); } template <class S, class T> inline std::vector<S> split(const S& str, const T& delimiter) { - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<T>::result); + typedef typename GetCharType<S>::Result CharType; std::vector<S> output; - size_t bockStart = 0; - size_t delimLen = strLength(delimiter); - if (delimLen != 0) + + const size_t delimLen = strLength(delimiter); + + if (delimLen == 0) + output.push_back(str); + else { - for (size_t blockEnd = str.find(delimiter, bockStart); - blockEnd != S::npos; - bockStart = blockEnd + delimLen, blockEnd = str.find(delimiter, bockStart)) + const CharType* const delimFirst = strBegin(delimiter); + const CharType* const delimLast = delimFirst + delimLen; + + const CharType* blockStart = strBegin(str); + const CharType* const strLast = blockStart + strLength(str); + + for (;;) { - output.push_back(S(str.c_str() + bockStart, blockEnd - bockStart)); + const CharType* const blockEnd = std::search(blockStart, strLast, + delimFirst, delimLast); + + output.push_back(S(blockStart, blockEnd - blockStart)); + if (blockEnd == strLast) + break; + blockStart = blockEnd + delimLen; } } - output.push_back(S(str.c_str() + bockStart, str.length() - bockStart)); return output; } @@ -235,38 +260,54 @@ void truncate(S& str, size_t newLen) } +namespace implementation +{ +ZEN_INIT_DETECT_MEMBER(append); + +//either call operator+=(S(str, len)) or append(str, len) +template <class S, class Char> inline +typename EnableIf<HasMember_append<S>::result>::Result stringAppend(S& str, const Char* other, size_t len) { str.append(other, len); } + +template <class S, class Char> inline +typename EnableIf<!HasMember_append<S>::result>::Result stringAppend(S& str, const Char* other, size_t len) { str += S(other, len); } +} + + template <class S, class T, class U> inline S replaceCpy(const S& str, const T& oldOne, const U& newOne, bool replaceAll) { - assert_static(StringTraits<T>::isStringLike); - assert_static(StringTraits<U>::isStringLike); + assert_static(IsStringLike<T>::result); + assert_static(IsStringLike<U>::result); - typedef typename StringTraits<S>::CharType CharType; + typedef typename GetCharType<S>::Result CharType; const size_t oldLen = strLength(oldOne); const size_t newLen = strLength(newOne); S output; - const CharType* strPos = strBegin(str); - const CharType* strEnd = strPos + strLength(str); + const CharType* strPos = strBegin(str); + const CharType* const strEnd = strPos + strLength(str); + + const CharType* const oldBegin = strBegin(oldOne); + const CharType* const newBegin = strBegin(newOne); for (;;) { const CharType* ptr = std::search(strPos, strEnd, - strBegin(oldOne), strBegin(oldOne) + oldLen); + oldBegin, oldBegin + oldLen); if (ptr == strEnd) break; - output += S(strPos, ptr - strPos); - output += S(strBegin(newOne), newLen); + implementation::stringAppend(output, strPos, ptr - strPos); + implementation::stringAppend(output, newBegin, newLen); strPos = ptr + oldLen; if (!replaceAll) break; } - output += S(strPos, strEnd - strPos); + implementation::stringAppend(output, strPos, strEnd - strPos); return output; } @@ -284,10 +325,11 @@ void trim(S& str, bool fromLeft, bool fromRight) { assert(fromLeft || fromRight); - typedef typename S::value_type CharType; + typedef typename GetCharType<S>::Result CharType; //don't use value_type! (wxString, Glib::ustring) - const CharType* newBegin = str.c_str(); - const CharType* newEnd = str.c_str() + str.length(); + const CharType* const oldBegin = str.c_str(); + const CharType* newBegin = oldBegin; + const CharType* newEnd = oldBegin + str.length(); if (fromRight) while (newBegin != newEnd && cStringIsWhiteSpace(newEnd[-1])) @@ -300,7 +342,7 @@ void trim(S& str, bool fromLeft, bool fromRight) const size_t newLength = newEnd - newBegin; if (newLength != str.length()) { - if (newBegin != str.c_str()) + if (newBegin != oldBegin) str = S(newBegin, newLength); //minor inefficiency: in case "str" is not shared, we could save an allocation and do a memory move only else str.resize(newLength); @@ -311,20 +353,20 @@ void trim(S& str, bool fromLeft, bool fromRight) namespace implementation { template <class S, class T> -struct CnvtStringToString +struct CopyStringToString { - T convert(const S& src) const { return T(strBegin(src), strLength(src)); } + T copy(const S& src) const { return T(strBegin(src), strLength(src)); } }; template <class S> -struct CnvtStringToString<S, S> //perf: we don't need a deep copy if string types match +struct CopyStringToString<S, S> //perf: we don't need a deep copy if string types match { - const S& convert(const S& src) const { return src; } + const S& copy(const S& src) const { return src; } }; } template <class T, class S> inline -T cvrtString(const S& str) { return implementation::CnvtStringToString<S, T>().convert(str); } +T copyStringTo(const S& str) { return implementation::CopyStringToString<S, T>().copy(str); } namespace implementation @@ -333,7 +375,7 @@ template <class Num> inline int saferPrintf(char* buffer, size_t bufferSize, const char* format, const Num& number) //there is no such thing as a "safe" printf ;) { #ifdef _MSC_VER - return ::_snprintf(buffer, bufferSize, format, number); //VS2010 doesn't respect ISO C + return ::_snprintf(buffer, bufferSize, format, number); //VS2010 doesn't respect ISO C #else return std::snprintf(buffer, bufferSize, format, number); //C99 #endif @@ -342,8 +384,8 @@ int saferPrintf(char* buffer, size_t bufferSize, const char* format, const Num& template <class Num> inline int saferPrintf(wchar_t* buffer, size_t bufferSize, const wchar_t* format, const Num& number) { -#ifdef __MINGW32__ //MinGW doesn't respect ISO C - return ::snwprintf(buffer, bufferSize, format, number); +#ifdef __MINGW32__ + return ::snwprintf(buffer, bufferSize, format, number); //MinGW doesn't respect ISO C #else return std::swprintf(buffer, bufferSize, format, number); //C99 #endif @@ -353,12 +395,12 @@ int saferPrintf(wchar_t* buffer, size_t bufferSize, const wchar_t* format, const template <class S, class T, class Num> inline S printNumber(const T& format, const Num& number) //format a single number using ::sprintf { - assert_static(StringTraits<T>::isStringLike); + assert_static(IsStringLike<T>::result); assert_static((IsSameType< - typename StringTraits<S>::CharType, - typename StringTraits<T>::CharType>::result)); + typename GetCharType<S>::Result, + typename GetCharType<T>::Result>::result)); - typedef typename StringTraits<S>::CharType CharType; + typedef typename GetCharType<S>::Result CharType; const int BUFFER_SIZE = 128; CharType buffer[BUFFER_SIZE]; @@ -379,29 +421,26 @@ enum NumberType }; -template <class S, class Num, NumberType> -struct CvrtNumberToString +template <class S, class Num> inline +S toString(const Num& number, Int2Type<NUM_TYPE_OTHER>) //default number to string conversion using streams: convenient, but SLOW, SLOW, SLOW!!!! (~ factor of 20) { - S convert(const Num& number) const //default number to string conversion using streams: convenient, but SLOW, SLOW, SLOW!!!! (~ factor of 20) - { - typedef typename StringTraits<S>::CharType CharType; + typedef typename GetCharType<S>::Result CharType; + + std::basic_ostringstream<CharType> ss; + ss << number; + return copyStringTo<S>(ss.str()); +} - std::basic_ostringstream<CharType> ss; - ss << number; - return cvrtString<S>(ss.str()); - } -}; +template <class S, class Num> inline S floatToString(const Num& number, char ) { return printNumber<S>( "%g", static_cast<double>(number)); } +template <class S, class Num> inline S floatToString(const Num& number, wchar_t) { return printNumber<S>(L"%g", static_cast<double>(number)); } -template <class S, class Num> -struct CvrtNumberToString<S, Num, NUM_TYPE_FLOATING_POINT> +template <class S, class Num> inline +S toString(const Num& number, Int2Type<NUM_TYPE_FLOATING_POINT>) { - S convert(const Num& number) const { return convertFloat(number, typename StringTraits<S>::CharType()); } + return floatToString<S>(number, typename GetCharType<S>::Result()); +} -private: - S convertFloat(const Num& number, char ) const { return printNumber<S>( "%g", static_cast<double>(number)); } - S convertFloat(const Num& number, wchar_t) const { return printNumber<S>(L"%g", static_cast<double>(number)); } -}; /* perf: integer to string: (executed 10 mio. times) @@ -413,62 +452,62 @@ perf: integer to string: (executed 10 mio. times) template <class S, class Num> inline S formatInteger(Num n, bool hasMinus) { + typedef typename GetCharType<S>::Result CharType; + assert(n >= 0); S output; do { - output += '0' + n % 10; + output += static_cast<CharType>('0' + n % 10); n /= 10; } while (n != 0); if (hasMinus) - output += '-'; + output += static_cast<CharType>('-'); std::reverse(output.begin(), output.end()); return output; } -template <class S, class Num> -struct CvrtNumberToString<S, Num, NUM_TYPE_SIGNED_INT> +template <class S, class Num> inline +S toString(const Num& number, Int2Type<NUM_TYPE_SIGNED_INT>) { - S convert(const Num& number) const { return formatInteger<S>(number < 0 ? -number : number, number < 0); } -}; + return formatInteger<S>(number < 0 ? -number : number, number < 0); +} + -template <class S, class Num> -struct CvrtNumberToString<S, Num, NUM_TYPE_UNSIGNED_INT> +template <class S, class Num> inline +S toString(const Num& number, Int2Type<NUM_TYPE_UNSIGNED_INT>) { - S convert(const Num& number) const { return formatInteger<S>(number, false); } -}; + return formatInteger<S>(number, false); +} //-------------------------------------------------------------------------------- -template <class S, class Num, NumberType> -struct CvrtStringToNumber + +template <class Num, class S> inline +Num toNumber(const S& str, Int2Type<NUM_TYPE_OTHER>) //default string to number conversion using streams: convenient, but SLOW { - Num convert(const S& str) const //default string to number conversion using streams: convenient, but SLOW - { - typedef typename StringTraits<S>::CharType CharType; - Num number = 0; - std::basic_istringstream<CharType>(cvrtString<std::basic_string<CharType> >(str)) >> number; - return number; - } -}; + typedef typename GetCharType<S>::Result CharType; + Num number = 0; + std::basic_istringstream<CharType>(copyStringTo<std::basic_string<CharType> >(str)) >> number; + return number; +} -template <class S, class Num> -struct CvrtStringToNumber<S, Num, NUM_TYPE_FLOATING_POINT> -{ - Num convert(const S& str) const { return convertFloat(strBegin(str)); } +template <class Num> inline Num stringToFloat(const char* str) { return std::strtod(str, NULL); } +template <class Num> inline Num stringToFloat(const wchar_t* str) { return std::wcstod(str, NULL); } -private: - Num convertFloat(const char* str) const { return std::strtod(str, NULL); } - Num convertFloat(const wchar_t* str) const { return std::wcstod(str, NULL); } -}; +template <class Num, class S> inline +Num toNumber(const S& str, Int2Type<NUM_TYPE_FLOATING_POINT>) +{ + return stringToFloat<Num>(strBegin(str)); +} template <class Num, class S> Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic { - typedef typename StringTraits<S>::CharType CharType; + typedef typename GetCharType<S>::Result CharType; const CharType* first = strBegin(str); const CharType* last = first + strLength(str); @@ -476,15 +515,16 @@ Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to i while (first != last && cStringIsWhiteSpace(*first)) //skip leading whitespace ++first; - hasMinusSign = false; //handle minus sign + //handle minus sign + hasMinusSign = false; if (first != last) { - if (*first == '-') + if (*first == static_cast<CharType>('-')) { hasMinusSign = true; ++first; } - else if (*first == '+') + else if (*first == static_cast<CharType>('+')) ++first; } @@ -492,14 +532,15 @@ Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to i for (const CharType* iter = first; iter != last; ++iter) { const CharType c = *iter; - if ('0' <= c && c <= '9') + if (static_cast<CharType>('0') <= c && c <= static_cast<CharType>('9')) { number *= 10; - number += c - '0'; + number += c - static_cast<CharType>('0'); } else { - assert(std::find_if(iter, last, std::not1(std::ptr_fun(&cStringIsWhiteSpace<CharType>))) == last); //rest of string should contain whitespace only + //rest of string should contain whitespace only + //assert(std::find_if(iter, last, std::not1(std::ptr_fun(&cStringIsWhiteSpace<CharType>))) == last); -> this is NO assert situation break; } } @@ -507,61 +548,53 @@ Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to i } -template <class S, class Num> -struct CvrtStringToNumber<S, Num, NUM_TYPE_SIGNED_INT> +template <class Num, class S> inline +Num toNumber(const S& str, Int2Type<NUM_TYPE_SIGNED_INT>) { - Num convert(const S& str) const - { - bool hasMinusSign = false; //handle minus sign - const Num number = extractInteger<Num>(str, hasMinusSign); - return hasMinusSign ? -number : number; - } -}; + bool hasMinusSign = false; //handle minus sign + const Num number = extractInteger<Num>(str, hasMinusSign); + return hasMinusSign ? -number : number; +} -template <class S, class Num> -struct CvrtStringToNumber<S, Num, NUM_TYPE_UNSIGNED_INT> +template <class Num, class S> inline +Num toNumber(const S& str, Int2Type<NUM_TYPE_UNSIGNED_INT>) //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic { - Num convert(const S& str) const //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic + bool hasMinusSign = false; //handle minus sign + const Num number = extractInteger<Num>(str, hasMinusSign); + if (hasMinusSign) { - bool hasMinusSign = false; //handle minus sign - const Num number = extractInteger<Num>(str, hasMinusSign); - if (hasMinusSign) - { - assert(false); - return 0U; - } - return number; + assert(false); + return 0U; } -}; + return number; +} } -template <class S, class Num> -inline +template <class S, class Num> inline S toString(const Num& number) //convert number to string the C++ way { - using namespace implementation; - return CvrtNumberToString<S, Num, - IsSignedInt <Num>::result ? NUM_TYPE_SIGNED_INT : - IsUnsignedInt<Num>::result ? NUM_TYPE_UNSIGNED_INT : - IsFloat <Num>::result ? NUM_TYPE_FLOATING_POINT : - NUM_TYPE_OTHER - >().convert(number); + typedef Int2Type< + IsSignedInt <Num>::result ? implementation::NUM_TYPE_SIGNED_INT : + IsUnsignedInt<Num>::result ? implementation::NUM_TYPE_UNSIGNED_INT : + IsFloat <Num>::result ? implementation::NUM_TYPE_FLOATING_POINT : + implementation::NUM_TYPE_OTHER> TypeTag; + + return implementation::toString<S>(number, TypeTag()); } -template <class Num, class S> -inline +template <class Num, class S> inline Num toNumber(const S& str) //convert string to number the C++ way { - using namespace implementation; - return CvrtStringToNumber<S, Num, - IsSignedInt <Num>::result ? NUM_TYPE_SIGNED_INT : - IsUnsignedInt<Num>::result ? NUM_TYPE_UNSIGNED_INT : - IsFloat <Num>::result ? NUM_TYPE_FLOATING_POINT : - NUM_TYPE_OTHER - >().convert(str); + typedef Int2Type< + IsSignedInt <Num>::result ? implementation::NUM_TYPE_SIGNED_INT : + IsUnsignedInt<Num>::result ? implementation::NUM_TYPE_UNSIGNED_INT : + IsFloat <Num>::result ? implementation::NUM_TYPE_FLOATING_POINT : + implementation::NUM_TYPE_OTHER> TypeTag; + + return implementation::toNumber<Num>(str, TypeTag()); } } |