// ************************************************************************** // * This file is part of the zenXML project. It is distributed under the * // * Boost Software License, Version 1.0. See accompanying file * // * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt. * // * Copyright (C) 2011 ZenJu (zhnmju123 AT gmx.de) * // ************************************************************************** #ifndef STRING_TOOLS_HEADER_213458973046 #define STRING_TOOLS_HEADER_213458973046 #include //size_t #include //isspace #include //iswspace #include //swprintf #include //sprintf #include #include #include #include #include #include "loki/TypeManip.h" #include "loki/EmptyType.h" #include "loki/TypeTraits.h" #include "assert_static.h" #ifdef _MSC_VER template <> struct Loki::IsCustomUnsignedInt { enum { value = 1 }; }; template <> struct Loki::IsCustomSignedInt { enum { value = 1 }; }; #endif //enhance arbitray string class with useful non-member functions: namespace zen { template size_t cStringLength(const C* str); //strlen() template bool cStringIsWhiteSpace(C ch); template bool cStringIsDigit(C ch); //uniform access to string-like types: classes and character arrays /* strBegin(): std::wstring str(L"dummy"); char array[] = "dummy"; const wchar_t* iter = strBegin(str); //returns str.c_str() const char* iter2 = strBegin(array); //returns array strLength(): strLength(str); //equals str.size() strLength(array); //equals cStringLength(array) StringTraits<>: StringTraits::CharType //equals wchar_t StringTraits ::CharType //equals wchar_t StringTraits::isStringLike; //equals "true" StringTraits ::isStringLike; //equals "false" StringTraits::isStringClass //equals "true" StringTraits ::isStringClass //equals "false" */ template bool startsWith(const S& str, const T& prefix); //both S and T can be strings or char/wchar_t arrays or simple char/wchar_t template bool endsWith (const S& str, const T& postfix); // template S afterLast (const S& str, const T& ch); //returns the whole string if ch not found template S beforeLast (const S& str, const T& ch); //returns empty string if ch not found template S afterFirst (const S& str, const T& ch); //returns empty string if ch not found template S beforeFirst(const S& str, const T& ch); //returns the whole string if ch not found template std::vector split(const S& str, const T& delimiter); template void truncate(S& str, size_t newLen); template void replace(S& str, const T& old, const U& replacement, bool replaceAll = true); template void trim(S& str, bool fromLeft = true, bool fromRight = true); //high-performance conversion from numbers to strings template S toString(const Num& number); template Num toNumber(const S& str); //string to string conversion: converst string-like type into compatible target string class template T cvrtString(const S& str); //---------------------- implementation ---------------------- template inline size_t cStringLength(const C* str) //strlen() { assert_static((Loki::IsSameType::value || Loki::IsSameType::value)); size_t len = 0; while (*str++ != 0) ++len; return len; } template <> inline bool cStringIsWhiteSpace(char ch) { //caveat 1: std::isspace() takes an int, but expects an unsigned char //caveat 2: some parts of UTF-8 chars are erroneously seen as whitespace, e.g. the a0 from "\xec\x8b\xa0" (MSVC) return static_cast(ch) < 128 && std::isspace(static_cast(ch)) != 0; } template <> inline bool cStringIsWhiteSpace(unsigned char ch) { return cStringIsWhiteSpace(ch); } template <> inline bool cStringIsWhiteSpace(signed char ch) { return cStringIsWhiteSpace(ch); } template <> inline bool cStringIsWhiteSpace(wchar_t ch) { return std::iswspace(ch) != 0; } template <> inline bool cStringIsDigit(char ch) { return std::isdigit(static_cast(ch)) != 0; //caveat: takes an int, but expects an unsigned char } template <> inline bool cStringIsDigit(wchar_t ch) { return std::iswdigit(ch) != 0; } namespace implementation { template struct UnArray { typedef T NonArrayType; }; template struct UnArray { typedef T NonArrayType; }; template struct UnPointer { typedef T NonPtrType; }; template struct UnPointer { typedef T NonPtrType; }; template struct UnReference { typedef T NonRefType; }; template struct UnReference { typedef T NonRefType; }; template class HasValueType { typedef char Yes[1]; typedef char No [2]; template class HelperTp {}; //detect presence of a member type called value_type template static Yes& hasMemberValueType(HelperTp*); template static No& hasMemberValueType(...); public: enum { Result = sizeof(hasMemberValueType(NULL)) == sizeof(Yes) }; }; template class HasStringMembers { public: enum { Result = false }; }; template class HasStringMembers { typedef char Yes[1]; typedef char No [2]; //detect presence of member functions (without specific restriction on return type, within T or one of it's base classes) template class HelperFn {}; struct Fallback { int c_str; int length; }; template struct Helper2 : public U, public Fallback {}; //U must be a class-type! //we don't know the exact declaration of the member attribute (may be in base class), but we know what NOT to expect: template static No& hasMemberCstr(HelperFn::c_str>*); template static Yes& hasMemberCstr(...); template static No& hasMemberLength(HelperFn::length>*); template static Yes& hasMemberLength(...); public: enum { Result = sizeof(hasMemberCstr (NULL)) == sizeof(Yes) && sizeof(hasMemberLength(NULL)) == sizeof(Yes) }; }; template struct StringTraits2 { typedef Loki::EmptyType Result; }; //"StringTraits2": fix some VS bug with namespace and partial template specialization template struct StringTraits2 { typedef typename S::value_type Result; }; template <> struct StringTraits2 { typedef char Result; }; template <> struct StringTraits2 { typedef wchar_t Result; }; } template struct StringTraits { private: typedef typename implementation::UnReference::NonRefType NonRefType; typedef typename Loki::TypeTraits::NonConstType UndecoratedType; typedef typename implementation::UnArray::NonArrayType NonArrayType; typedef typename implementation::UnPointer::NonPtrType NonPtrType; typedef typename Loki::TypeTraits::NonConstType NonConstValType; //handle "const char* const" public: enum { isStringClass = implementation::HasStringMembers::Result>::Result }; typedef typename implementation::StringTraits2::Result CharType; enum { isStringLike = Loki::IsSameType::value || Loki::IsSameType::value }; }; template inline const typename StringTraits::CharType* strBegin(const S& str, typename S::value_type dummy = 0) { return str.c_str(); } //SFINAE: T must be a "string" template inline const typename StringTraits::CharType* strBegin(const Char* str) { return str; } inline const char* strBegin(const char& ch) { return &ch; } inline const wchar_t* strBegin(const wchar_t& ch) { return &ch; } template inline size_t strLength(const S& str, typename S::value_type dummy = 0) { return str.length(); } //SFINAE: T must be a "string" template inline size_t strLength(const Char* str) { return cStringLength(str); } inline size_t strLength(char) { return 1; } inline size_t strLength(wchar_t) { return 1; } template inline bool startsWith(const S& str, const T& prefix) { assert_static(StringTraits::isStringLike); assert_static(StringTraits::isStringLike); const size_t pfLength = strLength(prefix); if (strLength(str) < pfLength) return false; return std::equal(strBegin(str), strBegin(str) + pfLength, strBegin(prefix)); } template inline bool endsWith(const S& str, const T& postfix) { assert_static(StringTraits::isStringLike); assert_static(StringTraits::isStringLike); size_t strLen = strLength(str); size_t pfLen = strLength(postfix); if (strLen < pfLen) return false; typedef typename StringTraits::CharType CharType; const CharType* cmpBegin = strBegin(str) + strLen - pfLen; return std::equal(cmpBegin, cmpBegin + pfLen, strBegin(postfix)); } // get all characters after the last occurence of ch // (returns the whole string if ch not found) template inline S afterLast(const S& str, const T& ch) { assert_static(StringTraits::isStringLike); const size_t pos = str.rfind(ch); if (pos != S::npos) { size_t chLen = strLength(ch); return S(str.c_str() + pos + chLen, str.length() - pos - chLen); } else return str; } // get all characters before the last occurence of ch // (returns empty string if ch not found) template inline S beforeLast(const S& str, const T& ch) { assert_static(StringTraits::isStringLike); const size_t pos = str.rfind(ch); if (pos != S::npos) return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! else return S(); } //returns empty string if ch not found template inline S afterFirst(const S& str, const T& ch) { assert_static(StringTraits::isStringLike); const size_t pos = str.find(ch); if (pos != S::npos) { size_t chLen = strLength(ch); return S(str.c_str() + pos + chLen, str.length() - pos - chLen); } else return S(); } //returns the whole string if ch not found template inline S beforeFirst(const S& str, const T& ch) { assert_static(StringTraits::isStringLike); const size_t pos = str.find(ch); if (pos != S::npos) return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! else return str; } template inline std::vector split(const S& str, const T& delimiter) { assert_static(StringTraits::isStringLike); std::vector output; size_t bockStart = 0; size_t delimLen = strLength(delimiter); if (delimLen != 0) { for (size_t blockEnd = str.find(delimiter, bockStart); blockEnd != S::npos; bockStart = blockEnd + delimLen, blockEnd = str.find(delimiter, bockStart)) { output.push_back(S(str.c_str() + bockStart, blockEnd - bockStart)); } } output.push_back(S(str.c_str() + bockStart, str.length() - bockStart)); return output; } template inline void truncate(S& str, size_t newLen) { if (newLen < str.length()) str.resize(newLen); } template inline void replace(S& str, const T& old, const U& replacement, bool replaceAll) { assert_static(StringTraits::isStringLike); assert_static(StringTraits::isStringLike); size_t pos = 0; size_t oldLen = strLength(old); size_t repLen = strLength(replacement); while ((pos = str.find(old, pos)) != S::npos) { str.replace(pos, oldLen, replacement); pos += repLen; //move past the string that was replaced if (!replaceAll) break; } } template inline void trim(S& str, bool fromLeft, bool fromRight) { assert(fromLeft || fromRight); typedef typename S::value_type CharType; const CharType* newBegin = str.c_str(); const CharType* newEnd = str.c_str() + str.length(); if (fromRight) while (newBegin != newEnd && cStringIsWhiteSpace(newEnd[-1])) --newEnd; if (fromLeft) while (newBegin != newEnd && cStringIsWhiteSpace(*newBegin)) ++newBegin; const size_t newLength = newEnd - newBegin; if (newLength != str.length()) { if (newBegin != str.c_str()) str = S(newBegin, newLength); //minor inefficiency: in case "str" is not shared, we could save an allocation and do a memory move only else str.resize(newLength); } } namespace implementation { template struct CnvtStringToString { T convert(const S& src) const { return T(strBegin(src), strLength(src)); } }; template struct CnvtStringToString //perf: we don't need a deep copy if string types match { const S& convert(const S& src) const { return src; } }; } template inline T cvrtString(const S& str) { return implementation::CnvtStringToString().convert(str); } namespace implementation { enum NumberType { NUM_TYPE_SIGNED_INT, NUM_TYPE_UNSIGNED_INT, NUM_TYPE_FLOATING_POINT, NUM_TYPE_OTHER, }; template struct CvrtNumberToString { S convert(const Num& number) const //default number to string conversion using streams: convenient, but SLOW, SLOW, SLOW!!!! (~ factor of 20) { typedef typename StringTraits::CharType CharType; std::basic_ostringstream ss; ss << number; return cvrtString(ss.str()); } }; template struct CvrtNumberToString { S convert(const Num& number) const { return convertFloat(number, typename StringTraits::CharType()); } private: S convertFloat(const Num& number, char) const { char buffer[50]; int charsWritten = std::sprintf(buffer, "%f", static_cast(number)); return charsWritten > 0 ? S(buffer, charsWritten) : S(); } S convertFloat(const Num& number, wchar_t) const { wchar_t buffer[50]; #ifdef __MINGW32__ int charsWritten = ::swprintf(buffer, L"%f", static_cast(number)); //MinGW does not comply to the C standard! #else int charsWritten = std::swprintf(buffer, 50, L"%f", static_cast(number)); #endif return charsWritten > 0 ? S(buffer, charsWritten) : S(); } }; /* perf: integer to string: (executed 10 mio. times) std::stringstream - 14796 ms std::sprintf - 3086 ms hand coded - 778 ms */ template inline S formatInteger(Num n, bool hasMinus) { assert(n >= 0); S output; do { output += '0' + n % 10; n /= 10; } while (n != 0); if (hasMinus) output += '-'; std::reverse(output.begin(), output.end()); return output; } template struct CvrtNumberToString { S convert(const Num& number) const { return formatInteger(number < 0 ? -number : number, number < 0); } }; template struct CvrtNumberToString { S convert(const Num& number) const { return formatInteger(number, false); } }; //-------------------------------------------------------------------------------- template struct CvrtStringToNumber { Num convert(const S& str) const //default string to number conversion using streams: convenient, but SLOW { typedef typename StringTraits::CharType CharType; Num number = 0; std::basic_istringstream(cvrtString >(str)) >> number; return number; } }; template struct CvrtStringToNumber { Num convert(const S& str) const { return convertFloat(strBegin(str)); } private: Num convertFloat(const char* str) const { return std::strtod(str, NULL); } Num convertFloat(const wchar_t* str) const { return std::wcstod(str, NULL); } }; template Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic { typedef typename StringTraits::CharType CharType; const CharType* first = strBegin(str); const CharType* last = first + strLength(str); while (first != last && cStringIsWhiteSpace(*first)) //skip leading whitespace ++first; hasMinusSign = false; //handle minus sign if (first != last) { if (*first == '-') { hasMinusSign = true; ++first; } else if (*first == '+') ++first; } Num number = 0; for (const CharType* iter = first; iter != last; ++iter) { const CharType c = *iter; if ('0' <= c && c <= '9') { number *= 10; number += c - '0'; } else { assert(std::find_if(iter, last, std::not1(std::ptr_fun(&cStringIsWhiteSpace))) == last); //rest of string should contain whitespace only break; } } return number; } template struct CvrtStringToNumber { Num convert(const S& str) const { bool hasMinusSign = false; //handle minus sign const Num number = extractInteger(str, hasMinusSign); return hasMinusSign ? -number : number; } }; template struct CvrtStringToNumber { Num convert(const S& str) const //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic { bool hasMinusSign = false; //handle minus sign const Num number = extractInteger(str, hasMinusSign); if (hasMinusSign) { assert(false); return 0U; } return number; } }; } template inline S toString(const Num& number) //convert number to string the C++ way { using namespace implementation; return CvrtNumberToString::isSignedInt ? NUM_TYPE_SIGNED_INT : Loki::TypeTraits::isUnsignedInt ? NUM_TYPE_UNSIGNED_INT : Loki::TypeTraits::isFloat ? NUM_TYPE_FLOATING_POINT : NUM_TYPE_OTHER >().convert(number); } template inline Num toNumber(const S& str) //convert string to number the C++ way { using namespace implementation; return CvrtStringToNumber::isSignedInt ? NUM_TYPE_SIGNED_INT : Loki::TypeTraits::isUnsignedInt ? NUM_TYPE_UNSIGNED_INT : Loki::TypeTraits::isFloat ? NUM_TYPE_FLOATING_POINT : NUM_TYPE_OTHER >().convert(str); } } #endif //STRING_TOOLS_HEADER_213458973046