summaryrefslogtreecommitdiff
path: root/zen/utf.h
diff options
context:
space:
mode:
authorDaniel Wilhelm <shieldwed@outlook.com>2016-10-29 11:34:19 +0200
committerDaniel Wilhelm <shieldwed@outlook.com>2016-10-29 11:34:19 +0200
commit8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b (patch)
tree48e94273d3ee6b9d755e0081c46d29a387871611 /zen/utf.h
parent8.3 (diff)
downloadFreeFileSync-8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b.tar.gz
FreeFileSync-8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b.tar.bz2
FreeFileSync-8d66e8a2b8cfe4eef4b946a1ab64354dfd7da00b.zip
8.4
Diffstat (limited to 'zen/utf.h')
-rw-r--r--zen/utf.h34
1 files changed, 26 insertions, 8 deletions
diff --git a/zen/utf.h b/zen/utf.h
index c0b2b4af..16136349 100644
--- a/zen/utf.h
+++ b/zen/utf.h
@@ -1,8 +1,8 @@
-// **************************************************************************
-// * This file is part of the FreeFileSync project. It is distributed under *
-// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 *
-// * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
-// **************************************************************************
+// *****************************************************************************
+// * This file is part of the FreeFileSync project. It is distributed under *
+// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 *
+// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved *
+// *****************************************************************************
#ifndef UTF_H_01832479146991573473545
#define UTF_H_01832479146991573473545
@@ -19,6 +19,9 @@ TargetString utfCvrtTo(const SourceString& str);
const char BYTE_ORDER_MARK_UTF8[] = "\xEF\xBB\xBF";
+template <class CharString>
+bool isValidUtf8(const CharString& str); //check for UTF-8 encoding errors
+
//---- explicit conversion: wide <-> utf8 ----
template <class CharString, class WideString>
CharString wideToUtf8(const WideString& str); //example: std::string tmp = wideToUtf8<std::string>(L"abc");
@@ -53,9 +56,9 @@ size_t findUnicodePos(const UtfString& str, size_t unicodePos); //return positio
//----------------------- implementation ----------------------------------
namespace implementation
{
-typedef std::uint32_t CodePoint;
-typedef std::uint16_t Char16;
-typedef unsigned char Char8;
+using CodePoint = std::uint32_t;
+using Char16 = std::uint16_t;
+using Char8 = unsigned char;
const CodePoint LEAD_SURROGATE = 0xd800;
const CodePoint TRAIL_SURROGATE = 0xdc00; //== LEAD_SURROGATE_MAX + 1
@@ -413,6 +416,21 @@ CharString wideToUtf8(const WideString& str, Int2Type<4>) //other OS: convert ut
}
+template <class CharString> inline
+bool isValidUtf8(const CharString& str)
+{
+ using namespace implementation;
+ bool valid = true;
+ utf8ToCodePoint(strBegin(str), strBegin(str) + strLength(str),
+ [&](CodePoint cp)
+ {
+ if (cp == REPLACEMENT_CHAR)
+ valid = false; //perf: should we use an (expensive) exception for iteration break?
+ });
+ return valid;
+}
+
+
template <class WideString, class CharString> inline
WideString utf8ToWide(const CharString& str)
{
bgstack15