diff options
author | Daniel Wilhelm <shieldwed@outlook.com> | 2016-10-29 11:34:19 +0200 |
---|---|---|
committer | Daniel Wilhelm <shieldwed@outlook.com> | 2016-10-29 11:34:19 +0200 |
commit | 8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b (patch) | |
tree | 48e94273d3ee6b9d755e0081c46d29a387871611 /zen/utf.h | |
parent | 8.3 (diff) | |
download | FreeFileSync-8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b.tar.gz FreeFileSync-8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b.tar.bz2 FreeFileSync-8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b.zip |
8.4
Diffstat (limited to 'zen/utf.h')
-rw-r--r-- | zen/utf.h | 34 |
1 files changed, 26 insertions, 8 deletions
@@ -1,8 +1,8 @@ -// ************************************************************************** -// * This file is part of the FreeFileSync project. It is distributed under * -// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * -// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved * -// ************************************************************************** +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** #ifndef UTF_H_01832479146991573473545 #define UTF_H_01832479146991573473545 @@ -19,6 +19,9 @@ TargetString utfCvrtTo(const SourceString& str); const char BYTE_ORDER_MARK_UTF8[] = "\xEF\xBB\xBF"; +template <class CharString> +bool isValidUtf8(const CharString& str); //check for UTF-8 encoding errors + //---- explicit conversion: wide <-> utf8 ---- template <class CharString, class WideString> CharString wideToUtf8(const WideString& str); //example: std::string tmp = wideToUtf8<std::string>(L"abc"); @@ -53,9 +56,9 @@ size_t findUnicodePos(const UtfString& str, size_t unicodePos); //return positio //----------------------- implementation ---------------------------------- namespace implementation { -typedef std::uint32_t CodePoint; -typedef std::uint16_t Char16; -typedef unsigned char Char8; +using CodePoint = std::uint32_t; +using Char16 = std::uint16_t; +using Char8 = unsigned char; const CodePoint LEAD_SURROGATE = 0xd800; const CodePoint TRAIL_SURROGATE = 0xdc00; //== LEAD_SURROGATE_MAX + 1 @@ -413,6 +416,21 @@ CharString wideToUtf8(const WideString& str, Int2Type<4>) //other OS: convert ut } +template <class CharString> inline +bool isValidUtf8(const CharString& str) +{ + using namespace implementation; + bool valid = true; + utf8ToCodePoint(strBegin(str), strBegin(str) + strLength(str), + [&](CodePoint cp) + { + if (cp == REPLACEMENT_CHAR) + valid = false; //perf: should we use an (expensive) exception for iteration break? + }); + return valid; +} + + template <class WideString, class CharString> inline WideString utf8ToWide(const CharString& str) { |