diff options
author | Daniel Wilhelm <shieldwed@outlook.com> | 2017-04-20 16:55:28 -0600 |
---|---|---|
committer | Daniel Wilhelm <shieldwed@outlook.com> | 2017-04-20 16:59:56 -0600 |
commit | 823740e1ffa2b3bd39f8dea8062f5c5a0d9c741b (patch) | |
tree | 63222010af3b90a36f29c1c9c360116973cdff38 /zen/utf.h | |
parent | add .gitattributes for less line ending hassles (diff) | |
download | FreeFileSync-823740e1ffa2b3bd39f8dea8062f5c5a0d9c741b.tar.gz FreeFileSync-823740e1ffa2b3bd39f8dea8062f5c5a0d9c741b.tar.bz2 FreeFileSync-823740e1ffa2b3bd39f8dea8062f5c5a0d9c741b.zip |
normalize most lineendings
Diffstat (limited to 'zen/utf.h')
-rwxr-xr-x | zen/utf.h | 774 |
1 files changed, 387 insertions, 387 deletions
@@ -1,387 +1,387 @@ -// *****************************************************************************
-// * This file is part of the FreeFileSync project. It is distributed under *
-// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 *
-// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved *
-// *****************************************************************************
-
-#ifndef UTF_H_01832479146991573473545
-#define UTF_H_01832479146991573473545
-
-#include <cstdint>
-#include <iterator>
-#include "string_tools.h" //copyStringTo
-#include "optional.h"
-
-namespace zen
-{
-//convert all(!) char- and wchar_t-based "string-like" objects applying a UTF8 conversions (but only if necessary!)
-template <class TargetString, class SourceString>
-TargetString utfTo(const SourceString& str);
-
-const char BYTE_ORDER_MARK_UTF8[] = "\xEF\xBB\xBF";
-
-template <class UtfString>
-bool isValidUtf(const UtfString& str); //check for UTF-8 encoding errors
-
-//access unicode characters in UTF-encoded string (char- or wchar_t-based)
-template <class UtfString>
-size_t unicodeLength(const UtfString& str); //return number of code points for UTF-encoded string
-
-template <class UtfString>
-UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t uniPosLast);
-
-
-
-
-
-
-
-
-
-//----------------------- implementation ----------------------------------
-namespace implementation
-{
-using CodePoint = uint32_t;
-using Char16 = uint16_t;
-using Char8 = uint8_t;
-
-const CodePoint LEAD_SURROGATE = 0xd800;
-const CodePoint TRAIL_SURROGATE = 0xdc00; //== LEAD_SURROGATE_MAX + 1
-const CodePoint TRAIL_SURROGATE_MAX = 0xdfff;
-
-const CodePoint REPLACEMENT_CHAR = 0xfffd;
-const CodePoint CODE_POINT_MAX = 0x10ffff;
-
-
-template <class Function> inline
-void codePointToUtf16(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char16
-{
- //http://en.wikipedia.org/wiki/UTF-16
- if (cp < LEAD_SURROGATE)
- writeOutput(static_cast<Char16>(cp));
- else if (cp <= TRAIL_SURROGATE_MAX) //invalid code point
- codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
- else if (cp < 0x10000)
- writeOutput(static_cast<Char16>(cp));
- else if (cp <= CODE_POINT_MAX)
- {
- cp -= 0x10000;
- writeOutput(static_cast<Char16>( LEAD_SURROGATE + (cp >> 10)));
- writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0x3ff)));
- }
- else //invalid code point
- codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16
-}
-
-
-inline
-size_t getUtf16Len(Char16 ch) //ch must be first code unit! returns 0 on error!
-{
- if (ch < LEAD_SURROGATE)
- return 1;
- else if (ch < TRAIL_SURROGATE)
- return 2;
- else if (ch <= TRAIL_SURROGATE_MAX)
- return 0; //unexpected trail surrogate!
- else
- return 1;
-}
-
-
-class Utf16Decoder
-{
-public:
- Utf16Decoder(const Char16* str, size_t len) : it_(str), last_(str + len) {}
-
- Opt<CodePoint> getNext()
- {
- if (it_ == last_)
- return NoValue();
-
- const Char16 ch = *it_++;
- CodePoint cp = ch;
- switch (getUtf16Len(ch))
- {
- case 0: //invalid utf16 character
- cp = REPLACEMENT_CHAR;
- break;
- case 1:
- break;
- case 2:
- decodeTrail(cp);
- break;
- }
- return cp;
- }
-
-private:
- void decodeTrail(CodePoint& cp)
- {
- if (it_ != last_) //trail surrogate expected!
- {
- const Char16 ch = *it_;
- if (TRAIL_SURROGATE <= ch && ch <= TRAIL_SURROGATE_MAX) //trail surrogate expected!
- {
- cp = ((cp - LEAD_SURROGATE) << 10) + (ch - TRAIL_SURROGATE) + 0x10000;
- ++it_;
- return;
- }
- }
- cp = REPLACEMENT_CHAR;
- }
-
- const Char16* it_;
- const Char16* const last_;
-};
-
-//----------------------------------------------------------------------------------------------------------------
-
-template <class Function> inline
-void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char8
-{
- //http://en.wikipedia.org/wiki/UTF-8
- //assert(cp < LEAD_SURROGATE || TRAIL_SURROGATE_MAX < cp); //code points [0xd800, 0xdfff] are reserved for UTF-16 and *should* not be encoded in UTF-8
-
- if (cp < 0x80)
- writeOutput(static_cast<Char8>(cp));
- else if (cp < 0x800)
- {
- writeOutput(static_cast<Char8>((cp >> 6 ) | 0xc0));
- writeOutput(static_cast<Char8>((cp & 0x3f) | 0x80));
- }
- else if (cp < 0x10000)
- {
- writeOutput(static_cast<Char8>( (cp >> 12 ) | 0xe0));
- writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80));
- writeOutput(static_cast<Char8>( (cp & 0x3f) | 0x80));
- }
- else if (cp <= CODE_POINT_MAX)
- {
- writeOutput(static_cast<Char8>( (cp >> 18 ) | 0xf0));
- writeOutput(static_cast<Char8>(((cp >> 12) & 0x3f) | 0x80));
- writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80));
- writeOutput(static_cast<Char8>( (cp & 0x3f) | 0x80));
- }
- else //invalid code point
- codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte utf8
-}
-
-
-inline
-size_t getUtf8Len(Char8 ch) //ch must be first code unit! returns 0 on error!
-{
- if (ch < 0x80)
- return 1;
- if (ch >> 5 == 0x6)
- return 2;
- if (ch >> 4 == 0xe)
- return 3;
- if (ch >> 3 == 0x1e)
- return 4;
- return 0; //innvalid begin of UTF8 encoding
-}
-
-
-class Utf8Decoder
-{
-public:
- Utf8Decoder(const Char8* str, size_t len) : it_(str), last_(str + len) {}
-
- Opt<CodePoint> getNext()
- {
- if (it_ == last_)
- return NoValue();
-
- const Char8 ch = *it_++;
- CodePoint cp = ch;
- switch (getUtf8Len(ch))
- {
- case 0: //invalid utf8 character
- cp = REPLACEMENT_CHAR;
- break;
- case 1:
- break;
- case 2:
- cp &= 0x1f;
- decodeTrail(cp);
- break;
- case 3:
- cp &= 0xf;
- if (decodeTrail(cp))
- decodeTrail(cp);
- break;
- case 4:
- cp &= 0x7;
- if (decodeTrail(cp))
- if (decodeTrail(cp))
- decodeTrail(cp);
- if (cp > CODE_POINT_MAX) cp = REPLACEMENT_CHAR;
- break;
- }
- return cp;
- }
-
-private:
- bool decodeTrail(CodePoint& cp)
- {
- if (it_ != last_) //trail surrogate expected!
- {
- const Char8 ch = *it_;
- if (ch >> 6 == 0x2) //trail surrogate expected!
- {
- cp = (cp << 6) + (ch & 0x3f);
- ++it_;
- return true;
- }
- }
- cp = REPLACEMENT_CHAR;
- return false;
- }
-
- const Char8* it_;
- const Char8* const last_;
-};
-
-//----------------------------------------------------------------------------------------------------------------
-
-template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<1>) { codePointToUtf8 (cp, writeOutput); } //UTF8-char
-template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<2>) { codePointToUtf16(cp, writeOutput); } //Windows: UTF16-wchar_t
-template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<4>) { writeOutput(cp); } //other OS: UTF32-wchar_t
-
-template <class CharType, class Function> inline
-void codePointToUtf(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a CharType
-{
- return codePointToUtf(cp, writeOutput, Int2Type<sizeof(CharType)>());
-}
-
-//----------------------------------------------------------------------------------------------------------------
-
-template <class CharType, int charSize>
-class UtfDecoderImpl;
-
-
-template <class CharType>
-class UtfDecoderImpl<CharType, 1> //UTF8-char
-{
-public:
- UtfDecoderImpl(const CharType* str, size_t len) : decoder_(reinterpret_cast<const Char8*>(str), len) {}
- Opt<CodePoint> getNext() { return decoder_.getNext(); }
-private:
- Utf8Decoder decoder_;
-};
-
-
-template <class CharType>
-class UtfDecoderImpl<CharType, 2> //Windows: UTF16-wchar_t
-{
-public:
- UtfDecoderImpl(const CharType* str, size_t len) : decoder_(reinterpret_cast<const Char16*>(str), len) {}
- Opt<CodePoint> getNext() { return decoder_.getNext(); }
-private:
- Utf16Decoder decoder_;
-};
-
-
-template <class CharType>
-class UtfDecoderImpl<CharType, 4> //other OS: UTF32-wchar_t
-{
-public:
- UtfDecoderImpl(const CharType* str, size_t len) : it_(reinterpret_cast<const CodePoint*>(str)), last_(it_ + len) {}
- Opt<CodePoint> getNext()
- {
- if (it_ == last_)
- return NoValue();
- return *it_++;
- }
-private:
- const CodePoint* it_;
- const CodePoint* last_;
-};
-
-
-template <class CharType>
-using UtfDecoder = UtfDecoderImpl<CharType, sizeof(CharType)>;
-}
-
-//-------------------------------------------------------------------------------------------
-
-template <class UtfString> inline
-bool isValidUtf(const UtfString& str)
-{
- using namespace implementation;
-
- UtfDecoder<typename GetCharType<UtfString>::Type> decoder(strBegin(str), strLength(str));
- while (Opt<CodePoint> cp = decoder.getNext())
- if (*cp == REPLACEMENT_CHAR)
- return false;
-
- return true;
-}
-
-
-template <class UtfString> inline
-size_t unicodeLength(const UtfString& str) //return number of code points (+ correctly handle broken UTF encoding)
-{
- size_t uniLen = 0;
- implementation::UtfDecoder<typename GetCharType<UtfString>::Type> decoder(strBegin(str), strLength(str));
- while (decoder.getNext())
- ++uniLen;
- return uniLen;
-}
-
-
-template <class UtfString> inline
-UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t uniPosLast) //return position of unicode char in UTF-encoded string
-{
- assert(uniPosFirst <= uniPosLast && uniPosLast <= unicodeLength(str));
- using namespace implementation;
- using CharType = typename GetCharType<UtfString>::Type;
- UtfString output;
- if (uniPosFirst >= uniPosLast) //optimize for empty range
- return output;
-
- UtfDecoder<CharType> decoder(strBegin(str), strLength(str));
- for (size_t uniPos = 0; Opt<CodePoint> cp = decoder.getNext(); ++uniPos) //[!] declaration in condition part of the for-loop
- if (uniPosFirst <= uniPos)
- {
- if (uniPos >= uniPosLast)
- break;
- codePointToUtf<CharType>(*cp, [&](CharType c) { output += c; });
- }
- return output;
-}
-
-//-------------------------------------------------------------------------------------------
-
-namespace implementation
-{
-template <class TargetString, class SourceString> inline
-TargetString utfTo(const SourceString& str, FalseType)
-{
- using CharSrc = typename GetCharType<SourceString>::Type;
- using CharTrg = typename GetCharType<TargetString>::Type;
- static_assert(sizeof(CharSrc) != sizeof(CharTrg), "no UTF-conversion needed");
-
- TargetString output;
-
- UtfDecoder<CharSrc> decoder(strBegin(str), strLength(str));
- while (Opt<CodePoint> cp = decoder.getNext())
- codePointToUtf<CharTrg>(*cp, [&](CharTrg c) { output += c; });
-
- return output;
-}
-
-
-template <class TargetString, class SourceString> inline
-TargetString utfTo(const SourceString& str, TrueType) { return copyStringTo<TargetString>(str); }
-}
-
-
-template <class TargetString, class SourceString> inline
-TargetString utfTo(const SourceString& str)
-{
- return implementation::utfTo<TargetString>(str, StaticBool<sizeof(typename GetCharType<SourceString>::Type) == sizeof(typename GetCharType<TargetString>::Type)>());
-}
-}
-
-#endif //UTF_H_01832479146991573473545
+// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef UTF_H_01832479146991573473545 +#define UTF_H_01832479146991573473545 + +#include <cstdint> +#include <iterator> +#include "string_tools.h" //copyStringTo +#include "optional.h" + +namespace zen +{ +//convert all(!) char- and wchar_t-based "string-like" objects applying a UTF8 conversions (but only if necessary!) +template <class TargetString, class SourceString> +TargetString utfTo(const SourceString& str); + +const char BYTE_ORDER_MARK_UTF8[] = "\xEF\xBB\xBF"; + +template <class UtfString> +bool isValidUtf(const UtfString& str); //check for UTF-8 encoding errors + +//access unicode characters in UTF-encoded string (char- or wchar_t-based) +template <class UtfString> +size_t unicodeLength(const UtfString& str); //return number of code points for UTF-encoded string + +template <class UtfString> +UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t uniPosLast); + + + + + + + + + +//----------------------- implementation ---------------------------------- +namespace implementation +{ +using CodePoint = uint32_t; +using Char16 = uint16_t; +using Char8 = uint8_t; + +const CodePoint LEAD_SURROGATE = 0xd800; +const CodePoint TRAIL_SURROGATE = 0xdc00; //== LEAD_SURROGATE_MAX + 1 +const CodePoint TRAIL_SURROGATE_MAX = 0xdfff; + +const CodePoint REPLACEMENT_CHAR = 0xfffd; +const CodePoint CODE_POINT_MAX = 0x10ffff; + + +template <class Function> inline +void codePointToUtf16(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char16 +{ + //http://en.wikipedia.org/wiki/UTF-16 + if (cp < LEAD_SURROGATE) + writeOutput(static_cast<Char16>(cp)); + else if (cp <= TRAIL_SURROGATE_MAX) //invalid code point + codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16 + else if (cp < 0x10000) + writeOutput(static_cast<Char16>(cp)); + else if (cp <= CODE_POINT_MAX) + { + cp -= 0x10000; + writeOutput(static_cast<Char16>( LEAD_SURROGATE + (cp >> 10))); + writeOutput(static_cast<Char16>(TRAIL_SURROGATE + (cp & 0x3ff))); + } + else //invalid code point + codePointToUtf16(REPLACEMENT_CHAR, writeOutput); //resolves to 1-character utf16 +} + + +inline +size_t getUtf16Len(Char16 ch) //ch must be first code unit! returns 0 on error! +{ + if (ch < LEAD_SURROGATE) + return 1; + else if (ch < TRAIL_SURROGATE) + return 2; + else if (ch <= TRAIL_SURROGATE_MAX) + return 0; //unexpected trail surrogate! + else + return 1; +} + + +class Utf16Decoder +{ +public: + Utf16Decoder(const Char16* str, size_t len) : it_(str), last_(str + len) {} + + Opt<CodePoint> getNext() + { + if (it_ == last_) + return NoValue(); + + const Char16 ch = *it_++; + CodePoint cp = ch; + switch (getUtf16Len(ch)) + { + case 0: //invalid utf16 character + cp = REPLACEMENT_CHAR; + break; + case 1: + break; + case 2: + decodeTrail(cp); + break; + } + return cp; + } + +private: + void decodeTrail(CodePoint& cp) + { + if (it_ != last_) //trail surrogate expected! + { + const Char16 ch = *it_; + if (TRAIL_SURROGATE <= ch && ch <= TRAIL_SURROGATE_MAX) //trail surrogate expected! + { + cp = ((cp - LEAD_SURROGATE) << 10) + (ch - TRAIL_SURROGATE) + 0x10000; + ++it_; + return; + } + } + cp = REPLACEMENT_CHAR; + } + + const Char16* it_; + const Char16* const last_; +}; + +//---------------------------------------------------------------------------------------------------------------- + +template <class Function> inline +void codePointToUtf8(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a Char8 +{ + //http://en.wikipedia.org/wiki/UTF-8 + //assert(cp < LEAD_SURROGATE || TRAIL_SURROGATE_MAX < cp); //code points [0xd800, 0xdfff] are reserved for UTF-16 and *should* not be encoded in UTF-8 + + if (cp < 0x80) + writeOutput(static_cast<Char8>(cp)); + else if (cp < 0x800) + { + writeOutput(static_cast<Char8>((cp >> 6 ) | 0xc0)); + writeOutput(static_cast<Char8>((cp & 0x3f) | 0x80)); + } + else if (cp < 0x10000) + { + writeOutput(static_cast<Char8>( (cp >> 12 ) | 0xe0)); + writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80)); + writeOutput(static_cast<Char8>( (cp & 0x3f) | 0x80)); + } + else if (cp <= CODE_POINT_MAX) + { + writeOutput(static_cast<Char8>( (cp >> 18 ) | 0xf0)); + writeOutput(static_cast<Char8>(((cp >> 12) & 0x3f) | 0x80)); + writeOutput(static_cast<Char8>(((cp >> 6) & 0x3f) | 0x80)); + writeOutput(static_cast<Char8>( (cp & 0x3f) | 0x80)); + } + else //invalid code point + codePointToUtf8(REPLACEMENT_CHAR, writeOutput); //resolves to 3-byte utf8 +} + + +inline +size_t getUtf8Len(Char8 ch) //ch must be first code unit! returns 0 on error! +{ + if (ch < 0x80) + return 1; + if (ch >> 5 == 0x6) + return 2; + if (ch >> 4 == 0xe) + return 3; + if (ch >> 3 == 0x1e) + return 4; + return 0; //innvalid begin of UTF8 encoding +} + + +class Utf8Decoder +{ +public: + Utf8Decoder(const Char8* str, size_t len) : it_(str), last_(str + len) {} + + Opt<CodePoint> getNext() + { + if (it_ == last_) + return NoValue(); + + const Char8 ch = *it_++; + CodePoint cp = ch; + switch (getUtf8Len(ch)) + { + case 0: //invalid utf8 character + cp = REPLACEMENT_CHAR; + break; + case 1: + break; + case 2: + cp &= 0x1f; + decodeTrail(cp); + break; + case 3: + cp &= 0xf; + if (decodeTrail(cp)) + decodeTrail(cp); + break; + case 4: + cp &= 0x7; + if (decodeTrail(cp)) + if (decodeTrail(cp)) + decodeTrail(cp); + if (cp > CODE_POINT_MAX) cp = REPLACEMENT_CHAR; + break; + } + return cp; + } + +private: + bool decodeTrail(CodePoint& cp) + { + if (it_ != last_) //trail surrogate expected! + { + const Char8 ch = *it_; + if (ch >> 6 == 0x2) //trail surrogate expected! + { + cp = (cp << 6) + (ch & 0x3f); + ++it_; + return true; + } + } + cp = REPLACEMENT_CHAR; + return false; + } + + const Char8* it_; + const Char8* const last_; +}; + +//---------------------------------------------------------------------------------------------------------------- + +template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<1>) { codePointToUtf8 (cp, writeOutput); } //UTF8-char +template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<2>) { codePointToUtf16(cp, writeOutput); } //Windows: UTF16-wchar_t +template <class Function> inline void codePointToUtf(CodePoint cp, Function writeOutput, Int2Type<4>) { writeOutput(cp); } //other OS: UTF32-wchar_t + +template <class CharType, class Function> inline +void codePointToUtf(CodePoint cp, Function writeOutput) //"writeOutput" is a unary function taking a CharType +{ + return codePointToUtf(cp, writeOutput, Int2Type<sizeof(CharType)>()); +} + +//---------------------------------------------------------------------------------------------------------------- + +template <class CharType, int charSize> +class UtfDecoderImpl; + + +template <class CharType> +class UtfDecoderImpl<CharType, 1> //UTF8-char +{ +public: + UtfDecoderImpl(const CharType* str, size_t len) : decoder_(reinterpret_cast<const Char8*>(str), len) {} + Opt<CodePoint> getNext() { return decoder_.getNext(); } +private: + Utf8Decoder decoder_; +}; + + +template <class CharType> +class UtfDecoderImpl<CharType, 2> //Windows: UTF16-wchar_t +{ +public: + UtfDecoderImpl(const CharType* str, size_t len) : decoder_(reinterpret_cast<const Char16*>(str), len) {} + Opt<CodePoint> getNext() { return decoder_.getNext(); } +private: + Utf16Decoder decoder_; +}; + + +template <class CharType> +class UtfDecoderImpl<CharType, 4> //other OS: UTF32-wchar_t +{ +public: + UtfDecoderImpl(const CharType* str, size_t len) : it_(reinterpret_cast<const CodePoint*>(str)), last_(it_ + len) {} + Opt<CodePoint> getNext() + { + if (it_ == last_) + return NoValue(); + return *it_++; + } +private: + const CodePoint* it_; + const CodePoint* last_; +}; + + +template <class CharType> +using UtfDecoder = UtfDecoderImpl<CharType, sizeof(CharType)>; +} + +//------------------------------------------------------------------------------------------- + +template <class UtfString> inline +bool isValidUtf(const UtfString& str) +{ + using namespace implementation; + + UtfDecoder<typename GetCharType<UtfString>::Type> decoder(strBegin(str), strLength(str)); + while (Opt<CodePoint> cp = decoder.getNext()) + if (*cp == REPLACEMENT_CHAR) + return false; + + return true; +} + + +template <class UtfString> inline +size_t unicodeLength(const UtfString& str) //return number of code points (+ correctly handle broken UTF encoding) +{ + size_t uniLen = 0; + implementation::UtfDecoder<typename GetCharType<UtfString>::Type> decoder(strBegin(str), strLength(str)); + while (decoder.getNext()) + ++uniLen; + return uniLen; +} + + +template <class UtfString> inline +UtfString getUnicodeSubstring(const UtfString& str, size_t uniPosFirst, size_t uniPosLast) //return position of unicode char in UTF-encoded string +{ + assert(uniPosFirst <= uniPosLast && uniPosLast <= unicodeLength(str)); + using namespace implementation; + using CharType = typename GetCharType<UtfString>::Type; + UtfString output; + if (uniPosFirst >= uniPosLast) //optimize for empty range + return output; + + UtfDecoder<CharType> decoder(strBegin(str), strLength(str)); + for (size_t uniPos = 0; Opt<CodePoint> cp = decoder.getNext(); ++uniPos) //[!] declaration in condition part of the for-loop + if (uniPosFirst <= uniPos) + { + if (uniPos >= uniPosLast) + break; + codePointToUtf<CharType>(*cp, [&](CharType c) { output += c; }); + } + return output; +} + +//------------------------------------------------------------------------------------------- + +namespace implementation +{ +template <class TargetString, class SourceString> inline +TargetString utfTo(const SourceString& str, FalseType) +{ + using CharSrc = typename GetCharType<SourceString>::Type; + using CharTrg = typename GetCharType<TargetString>::Type; + static_assert(sizeof(CharSrc) != sizeof(CharTrg), "no UTF-conversion needed"); + + TargetString output; + + UtfDecoder<CharSrc> decoder(strBegin(str), strLength(str)); + while (Opt<CodePoint> cp = decoder.getNext()) + codePointToUtf<CharTrg>(*cp, [&](CharTrg c) { output += c; }); + + return output; +} + + +template <class TargetString, class SourceString> inline +TargetString utfTo(const SourceString& str, TrueType) { return copyStringTo<TargetString>(str); } +} + + +template <class TargetString, class SourceString> inline +TargetString utfTo(const SourceString& str) +{ + return implementation::utfTo<TargetString>(str, StaticBool<sizeof(typename GetCharType<SourceString>::Type) == sizeof(typename GetCharType<TargetString>::Type)>()); +} +} + +#endif //UTF_H_01832479146991573473545 |