7 #ifndef ZEN_XML_PARSER_HEADER_81248670213764583021432
8 #define ZEN_XML_PARSER_HEADER_81248670213764583021432
12 #include <zen/string_traits.h>
31 const std::string& lineBreak =
"\r\n",
32 const std::string& indent =
" ");
75 namespace implementation
78 std::pair<char, char> hexify(
unsigned char c)
80 auto hexifyDigit = [](
int num) ->
char
82 assert(0 <= num&& num <= 15);
83 return static_cast<char>(num <= 9 ?
87 return std::make_pair(hexifyDigit(c / 16), hexifyDigit(c % 16));
92 char unhexify(
char high,
char low)
94 auto unhexifyDigit = [](
char hex) ->
int
96 if (
'0' <= hex && hex <=
'9')
98 else if (
'A' <= hex && hex <=
'F')
99 return (hex -
'A') + 10;
100 else if (
'a' <= hex && hex <=
'f')
101 return (hex -
'a') + 10;
105 return static_cast<unsigned char>(16 * unhexifyDigit(high) + unhexifyDigit(low));
109 template <
class Predicate>
inline
110 std::string normalize(
const std::string& str, Predicate pred)
113 std::for_each(str.begin(), str.end(),
131 const auto hexDigits = hexify(c);
132 output += hexDigits.first;
133 output += hexDigits.second;
144 std::string normalizeName(
const std::string& str)
146 return normalize(str, [](
char c) {
return isWhiteSpace(c) || c ==
'=' || c ==
'/' || c ==
'\'' || c ==
'\"'; });
150 std::string normalizeElementValue(
const std::string& str)
152 return normalize(str, [](
char c) {
return static_cast<unsigned char>(c) < 32; });
156 std::string normalizeAttribValue(
const std::string& str)
158 return normalize(str, [](
char c) {
return static_cast<unsigned char>(c) < 32 || c ==
'\'' || c ==
'\"'; });
162 template <
class CharIterator,
size_t N>
inline
163 bool checkEntity(CharIterator& first, CharIterator last,
const char (&placeholder)[N])
165 assert(placeholder[N - 1] == 0);
166 const ptrdiff_t strLen = N - 1;
167 if (last - first >= strLen && std::equal(first, first + strLen, placeholder))
178 std::string denormalize(
const std::string& str)
181 for (
auto it = str.begin(); it != str.end(); ++it)
187 if (checkEntity(it, str.end(),
"&"))
189 else if (checkEntity(it, str.end(),
"<"))
191 else if (checkEntity(it, str.end(),
">"))
193 else if (checkEntity(it, str.end(),
"'"))
195 else if (checkEntity(it, str.end(),
"""))
197 else if (str.end() - it >= 6 &&
202 output += unhexify(it[3], it[4]);
210 auto itNext = it + 1;
211 if (itNext != str.end() && *itNext ==
'\n')
222 void serialize(
const XmlElement& element, std::string& stream,
223 const std::string& lineBreak,
224 const std::string& indent,
227 const std::string& nameFmt = normalizeName(element.getNameAs<std::string>());
229 for (
size_t i = 0; i < indentLevel; ++i)
232 stream +=
'<' + nameFmt;
234 auto attr = element.getAttributes();
235 for (
auto it = attr.first; it != attr.second; ++it)
236 stream +=
' ' + normalizeName(it->first) +
"=\"" + normalizeAttribValue(it->second) +
'\"';
239 auto iterPair = element.getChildren();
240 if (iterPair.first != iterPair.second)
242 stream +=
'>' + lineBreak;
244 std::for_each(iterPair.first, iterPair.second,
245 [&](
const XmlElement & el) {
serialize(el, stream, lineBreak, indent, indentLevel + 1); });
247 for (
size_t i = 0; i < indentLevel; ++i)
249 stream +=
"</" + nameFmt +
'>' + lineBreak;
254 element.getValue(value);
257 stream +=
'>' + normalizeElementValue(value) +
"</" + nameFmt +
'>' + lineBreak;
259 stream +=
"/>" + lineBreak;
264 const std::string& lineBreak,
265 const std::string& indent)
267 std::string version = doc.getVersionAs<std::string>();
268 if (!version.empty())
269 version =
" version=\"" + normalizeAttribValue(version) +
'\"';
271 std::string encoding = doc.getEncodingAs<std::string>();
272 if (!encoding.empty())
273 encoding =
" encoding=\"" + normalizeAttribValue(encoding) +
'\"';
275 std::string standalone = doc.getStandaloneAs<std::string>();
276 if (!standalone.empty())
277 standalone =
" standalone=\"" + normalizeAttribValue(standalone) +
'\"';
279 std::string output =
"<?xml" + version + encoding + standalone +
"?>" + lineBreak;
280 serialize(doc.root(), output, lineBreak, indent, 0);
288 const std::string& lineBreak,
289 const std::string& indent) {
return implementation::serialize(doc, lineBreak, indent); }
315 namespace implementation
333 Token(Type t) : type(t) {}
334 Token(
const std::string& txt) : type(TK_NAME), name(txt) {}
343 Scanner(
const std::string& stream) :
344 xmlCommentBegin(
"<!--"),
345 xmlCommentEnd (
"-->"),
349 if (zen::startsWith(stream_, BYTE_ORDER_MARK_UTF8))
350 pos += strLength(BYTE_ORDER_MARK_UTF8);
352 tokens.push_back(std::make_pair(
"<?xml", Token::TK_DECL_BEGIN));
353 tokens.push_back(std::make_pair(
"?>", Token::TK_DECL_END));
354 tokens.push_back(std::make_pair(
"</", Token::TK_LESS_SLASH));
355 tokens.push_back(std::make_pair(
"/>", Token::TK_SLASH_GREATER));
356 tokens.push_back(std::make_pair(
"<" , Token::TK_LESS));
357 tokens.push_back(std::make_pair(
">" , Token::TK_GREATER));
358 tokens.push_back(std::make_pair(
"=" , Token::TK_EQUAL));
359 tokens.push_back(std::make_pair(
"\"", Token::TK_QUOTE));
360 tokens.push_back(std::make_pair(
"\'", Token::TK_QUOTE));
366 pos = std::find_if(pos, stream_.end(), [](
char c) {
return !zen::isWhiteSpace(c); });
368 if (pos == stream_.end())
369 return Token::TK_END;
372 if (startsWith(xmlCommentBegin))
374 auto it = std::search(pos + xmlCommentBegin.size(), stream_.end(), xmlCommentEnd.begin(), xmlCommentEnd.end());
375 if (it != stream_.end())
377 pos = it + xmlCommentEnd.size();
382 for (
auto it = tokens.begin(); it != tokens.end(); ++it)
383 if (startsWith(it->first))
385 pos += it->first.size();
389 auto nameEnd = std::find_if(pos, stream_.end(), [](
char c)
397 zen::isWhiteSpace(c);
402 std::string name(&*pos, nameEnd - pos);
404 return implementation::denormalize(name);
408 throw XmlParsingError(posRow(), posCol());
411 std::string extractElementValue()
413 auto it = std::find_if(pos, stream_.end(), [](
char c)
418 std::string output(pos, it);
420 return implementation::denormalize(output);
423 std::string extractAttributeValue()
425 auto it = std::find_if(pos, stream_.end(), [](
char c)
432 std::string output(pos, it);
434 return implementation::denormalize(output);
437 size_t posRow() const
439 const size_t crSum = std::count(stream_.begin(), pos,
'\r');
440 const size_t nlSum = std::count(stream_.begin(), pos,
'\n');
441 assert(crSum == 0 || nlSum == 0 || crSum == nlSum);
442 return std::max(crSum, nlSum);
445 size_t posCol() const
448 for (
auto it = pos; it != stream_.begin(); )
451 if (*it ==
'\r' || *it ==
'\n')
454 return pos - stream_.begin();
458 Scanner(
const Scanner&);
459 Scanner& operator=(
const Scanner&);
461 bool startsWith(
const std::string& prefix)
const
463 if (stream_.end() - pos < static_cast<ptrdiff_t>(prefix.size()))
465 return std::equal(prefix.begin(), prefix.end(), pos);
468 typedef std::vector<std::pair<std::string, Token::Type> > TokenList;
471 const std::string xmlCommentBegin;
472 const std::string xmlCommentEnd;
474 const std::string stream_;
475 std::string::const_iterator pos;
482 XmlParser(
const std::string& stream) :
484 tk(scn.nextToken()) {}
491 if (token().type == Token::TK_DECL_BEGIN)
495 while (token().type == Token::TK_NAME)
497 std::string attribName = token().name;
500 consumeToken(Token::TK_EQUAL);
501 expectToken(Token::TK_QUOTE);
502 std::string attribValue = scn.extractAttributeValue();
505 consumeToken(Token::TK_QUOTE);
507 if (attribName ==
"version")
508 doc.setVersion(attribValue);
509 else if (attribName ==
"encoding")
510 doc.setEncoding(attribValue);
511 else if (attribName ==
"standalone")
512 doc.setStandalone(attribValue);
514 consumeToken(Token::TK_DECL_END);
518 XmlElement& elemTmp = dummy.root();
519 parseChildElements(elemTmp);
521 auto iterPair = elemTmp.getChildren();
522 if (iterPair.first != iterPair.second)
523 doc.root().swap(*iterPair.first);
525 expectToken(Token::TK_END);
530 XmlParser(
const XmlParser&);
531 XmlParser& operator=(
const XmlParser&);
533 void parseChildElements(XmlElement& parent)
535 while (token().type == Token::TK_LESS)
539 expectToken(Token::TK_NAME);
540 std::string elementName = token().name;
543 XmlElement& newElement = parent.addChild(elementName);
545 parseAttributes(newElement);
547 if (token().type == Token::TK_SLASH_GREATER)
553 expectToken(Token::TK_GREATER);
554 std::string elementValue = scn.extractElementValue();
558 if (token().type == Token::TK_LESS)
559 parseChildElements(newElement);
561 newElement.setValue(elementValue);
563 consumeToken(Token::TK_LESS_SLASH);
565 if (token().type != Token::TK_NAME ||
566 elementName != token().name)
567 throw XmlParsingError(scn.posRow(), scn.posCol());
570 consumeToken(Token::TK_GREATER);
574 void parseAttributes(XmlElement& element)
576 while (token().type == Token::TK_NAME)
578 std::string attribName = token().name;
581 consumeToken(Token::TK_EQUAL);
582 expectToken(Token::TK_QUOTE);
583 std::string attribValue = scn.extractAttributeValue();
586 consumeToken(Token::TK_QUOTE);
587 element.setAttribute(attribName, attribValue);
591 const Token& token()
const {
return tk; }
592 void nextToken() { tk = scn.nextToken(); }
594 void consumeToken(Token::Type t)
600 void expectToken(Token::Type t)
602 if (token().type != t)
603 throw XmlParsingError(scn.posRow(), scn.posCol());
614 return implementation::XmlParser(stream).parse();
618 #endif //ZEN_XML_PARSER_HEADER_81248670213764583021432
XmlDoc parse(const std::string &stream)
Load XML document from a byte stream.
Definition: parser.h:612
std::string serialize(const XmlDoc &doc, const std::string &lineBreak="\r\n", const std::string &indent=" ")
Save XML document as a byte stream.
Definition: parser.h:287
size_t row
Input file row where the parsing error occured (zero-based)
Definition: parser.h:39
The complete XML document.
Definition: dom.h:249
size_t col
Input file column where the parsing error occured (zero-based)
Definition: parser.h:41
Exception thrown due to an XML parsing error.
Definition: parser.h:35
Exception base class for zen::Xml.
Definition: error.h:13