From 8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b Mon Sep 17 00:00:00 2001 From: Daniel Wilhelm Date: Sat, 29 Oct 2016 11:34:19 +0200 Subject: 8.4 --- zen/utf.h | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'zen/utf.h') diff --git a/zen/utf.h b/zen/utf.h index c0b2b4af..16136349 100644 --- a/zen/utf.h +++ b/zen/utf.h @@ -1,8 +1,8 @@ -// ************************************************************************** -// * This file is part of the FreeFileSync project. It is distributed under * -// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * -// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved * -// ************************************************************************** +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** #ifndef UTF_H_01832479146991573473545 #define UTF_H_01832479146991573473545 @@ -19,6 +19,9 @@ TargetString utfCvrtTo(const SourceString& str); const char BYTE_ORDER_MARK_UTF8[] = "\xEF\xBB\xBF"; +template +bool isValidUtf8(const CharString& str); //check for UTF-8 encoding errors + //---- explicit conversion: wide <-> utf8 ---- template CharString wideToUtf8(const WideString& str); //example: std::string tmp = wideToUtf8(L"abc"); @@ -53,9 +56,9 @@ size_t findUnicodePos(const UtfString& str, size_t unicodePos); //return positio //----------------------- implementation ---------------------------------- namespace implementation { -typedef std::uint32_t CodePoint; -typedef std::uint16_t Char16; -typedef unsigned char Char8; +using CodePoint = std::uint32_t; +using Char16 = std::uint16_t; +using Char8 = unsigned char; const CodePoint LEAD_SURROGATE = 0xd800; const CodePoint TRAIL_SURROGATE = 0xdc00; //== LEAD_SURROGATE_MAX + 1 @@ -413,6 +416,21 @@ CharString wideToUtf8(const WideString& str, Int2Type<4>) //other OS: convert ut } +template inline +bool isValidUtf8(const CharString& str) +{ + using namespace implementation; + bool valid = true; + utf8ToCodePoint(strBegin(str), strBegin(str) + strLength(str), + [&](CodePoint cp) + { + if (cp == REPLACEMENT_CHAR) + valid = false; //perf: should we use an (expensive) exception for iteration break? + }); + return valid; +} + + template inline WideString utf8ToWide(const CharString& str) { -- cgit