1 files changed, 582 insertions, 0 deletions
diff --git a/zenXml/zenxml/parser.h b/zenXml/zenxml/parser.h
new file mode 100755
index 00000000..5a529f9a
--- /dev/null
+++ b/zenXml/zenxml/parser.h
@@ -0,0 +1,582 @@
+// *****************************************************************************
+// * This file is part of the FreeFileSync project. It is distributed under    *
+// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0           *
+// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved *
+// *****************************************************************************
+
+#ifndef PARSER_H_81248670213764583021432
+#define PARSER_H_81248670213764583021432
+
+#include <cstdio>
+#include <cstddef> //ptrdiff_t; req. on Linux
+#include <zen/string_tools.h>
+#include "dom.h"
+#include "error.h"
+
+
+namespace zen
+{
+/**
+\file
+\brief Convert an XML document object model (class XmlDoc) to and from a byte stream representation.
+*/
+
+///Save XML document as a byte stream
+/**
+\param doc Input XML document
+\param lineBreak Line break, default: carriage return + new line
+\param indent Indentation, default: four space characters
+\return Output byte stream
+*/
+std::string serialize(const XmlDoc& doc,
+                      const std::string& lineBreak = "\r\n",
+                      const std::string& indent = "    "); //throw ()
+
+///Exception thrown due to an XML parsing error
+struct XmlParsingError : public XmlError
+{
+    XmlParsingError(size_t rowNo, size_t colNo) : row(rowNo), col(colNo) {}
+    ///Input file row where the parsing error occured (zero-based)
+    const size_t row; //beginning with 0
+    ///Input file column where the parsing error occured (zero-based)
+    const size_t col; //
+};
+
+
+///Load XML document from a byte stream
+/**
+\param stream Input byte stream
+\returns Output XML document
+\throw XmlParsingError
+*/
+XmlDoc parse(const std::string& stream); //throw XmlParsingError
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//---------------------------- implementation ----------------------------
+//see: http://www.w3.org/TR/xml/
+
+namespace implementation
+{
+template <class Predicate> inline
+std::string normalize(const std::string& str, Predicate pred) //pred: unary function taking a char, return true if value shall be encoded as hex
+{
+    std::string output;
+    for (const char c : str)
+    {
+        if (c == '&')      //
+            output += "&amp;";
+        else if (c == '<') //normalization mandatory: http://www.w3.org/TR/xml/#syntax
+            output += "&lt;";
+        else if (c == '>') //
+            output += "&gt;";
+        else if (pred(c))
+        {
+            if (c == '\'')
+                output += "&apos;";
+            else if (c == '\"')
+                output += "&quot;";
+            else
+            {
+                output += "&#x";
+                const auto hexDigits = hexify(c);
+                output += hexDigits.first;
+                output += hexDigits.second;
+                output += ';';
+            }
+        }
+        else
+            output += c;
+    }
+    return output;
+}
+
+inline
+std::string normalizeName(const std::string& str)
+{
+    return normalize(str, [](char c) { return isWhiteSpace(c) || c == '=' || c == '/' || c == '\'' || c == '\"'; });
+}
+
+inline
+std::string normalizeElementValue(const std::string& str)
+{
+    return normalize(str, [](char c) { return static_cast<unsigned char>(c) < 32; });
+}
+
+inline
+std::string normalizeAttribValue(const std::string& str)
+{
+    return normalize(str, [](char c) { return static_cast<unsigned char>(c) < 32 || c == '\'' || c == '\"'; });
+}
+
+
+template <class CharIterator, size_t N> inline
+bool checkEntity(CharIterator& first, CharIterator last, const char (&placeholder)[N])
+{
+    assert(placeholder[N - 1] == 0);
+    const ptrdiff_t strLen = N - 1; //don't count null-terminator
+    if (last - first >= strLen && std::equal(first, first + strLen, placeholder))
+    {
+        first += strLen - 1;
+        return true;
+    }
+    return false;
+}
+
+
+namespace
+{
+std::string denormalize(const std::string& str)
+{
+    std::string output;
+    for (auto it = str.begin(); it != str.end(); ++it)
+    {
+        const char c = *it;
+
+        if (c == '&')
+        {
+            if (checkEntity(it, str.end(), "&amp;"))
+                output += '&';
+            else if (checkEntity(it, str.end(), "&lt;"))
+                output += '<';
+            else if (checkEntity(it, str.end(), "&gt;"))
+                output += '>';
+            else if (checkEntity(it, str.end(), "&apos;"))
+                output += '\'';
+            else if (checkEntity(it, str.end(), "&quot;"))
+                output += '\"';
+            else if (str.end() - it >= 6 &&
+                     it[1] == '#' &&
+                     it[2] == 'x' &&
+                     it[5] == ';')
+            {
+                output += unhexify(it[3], it[4]);
+                it += 5;
+            }
+            else
+                output += c; //unexpected char!
+        }
+        else if (c == '\r') //map all end-of-line characters to \n http://www.w3.org/TR/xml/#sec-line-ends
+        {
+            auto itNext = it + 1;
+            if (itNext != str.end() && *itNext == '\n')
+                ++it;
+            output += '\n';
+        }
+        else
+            output += c;
+    }
+    return output;
+}
+
+
+void serialize(const XmlElement& element, std::string& stream,
+               const std::string& lineBreak,
+               const std::string& indent,
+               size_t indentLevel)
+{
+    const std::string& nameFmt = normalizeName(element.getNameAs<std::string>());
+
+    for (size_t i = 0; i < indentLevel; ++i)
+        stream += indent;
+
+    stream += '<' + nameFmt;
+
+    auto attr = element.getAttributes();
+    for (auto it = attr.first; it != attr.second; ++it)
+        stream += ' ' + normalizeName(it->first) + "=\"" + normalizeAttribValue(it->second) + '\"';
+
+    //no support for mixed-mode content
+    auto iterPair = element.getChildren();
+    if (iterPair.first != iterPair.second) //structured element
+    {
+        stream += '>' + lineBreak;
+
+        std::for_each(iterPair.first, iterPair.second,
+        [&](const XmlElement& el) { serialize(el, stream, lineBreak, indent, indentLevel + 1); });
+
+        for (size_t i = 0; i < indentLevel; ++i)
+            stream += indent;
+        stream += "</" + nameFmt + '>' + lineBreak;
+    }
+    else
+    {
+        std::string value;
+        element.getValue(value);
+
+        if (!value.empty()) //value element
+            stream += '>' + normalizeElementValue(value) + "</" + nameFmt + '>' + lineBreak;
+        else //empty element
+            stream += "/>" + lineBreak;
+    }
+}
+
+std::string serialize(const XmlDoc& doc,
+                      const std::string& lineBreak,
+                      const std::string& indent)
+{
+    std::string version = doc.getVersionAs<std::string>();
+    if (!version.empty())
+        version = " version=\"" + normalizeAttribValue(version) + '\"';
+
+    std::string encoding = doc.getEncodingAs<std::string>();
+    if (!encoding.empty())
+        encoding = " encoding=\"" + normalizeAttribValue(encoding) + '\"';
+
+    std::string standalone = doc.getStandaloneAs<std::string>();
+    if (!standalone.empty())
+        standalone = " standalone=\"" + normalizeAttribValue(standalone) + '\"';
+
+    std::string output = "<?xml" + version + encoding + standalone + "?>" + lineBreak;
+    serialize(doc.root(), output, lineBreak, indent, 0);
+    return output;
+}
+}
+}
+
+inline
+std::string serialize(const XmlDoc& doc,
+                      const std::string& lineBreak,
+                      const std::string& indent) { return implementation::serialize(doc, lineBreak, indent); }
+
+/*
+Grammar for XML parser
+-------------------------------
+document-expression:
+    <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+    element-expression:
+
+element-expression:
+    <string attributes-expression/>
+    <string attributes-expression> pm-expression </string>
+
+element-list-expression:
+    <empty>
+    element-expression element-list-expression
+
+attributes-expression:
+    <empty>
+    string="string" attributes-expression
+
+pm-expression:
+    string
+    element-list-expression
+*/
+
+namespace implementation
+{
+struct Token
+{
+    enum Type
+    {
+        TK_LESS,
+        TK_GREATER,
+        TK_LESS_SLASH,
+        TK_SLASH_GREATER,
+        TK_EQUAL,
+        TK_QUOTE,
+        TK_DECL_BEGIN,
+        TK_DECL_END,
+        TK_NAME,
+        TK_END
+    };
+
+    Token(Type t) : type(t) {}
+    Token(const std::string& txt) : type(TK_NAME), name(txt) {}
+
+    Type type;
+    std::string name; //filled if type == TK_NAME
+};
+
+class Scanner
+{
+public:
+    Scanner(const std::string& stream) : stream_(stream), pos(stream_.begin())
+    {
+        if (zen::startsWith(stream_, BYTE_ORDER_MARK_UTF8))
+            pos += strLength(BYTE_ORDER_MARK_UTF8);
+    }
+
+    Token nextToken() //throw XmlParsingError
+    {
+        //skip whitespace
+        pos = std::find_if(pos, stream_.end(), [](char c) { return !zen::isWhiteSpace(c); });
+
+        if (pos == stream_.end())
+            return Token::TK_END;
+
+        //skip XML comments
+        if (startsWith(xmlCommentBegin))
+        {
+            auto it = std::search(pos + xmlCommentBegin.size(), stream_.end(), xmlCommentEnd.begin(), xmlCommentEnd.end());
+            if (it != stream_.end())
+            {
+                pos = it + xmlCommentEnd.size();
+                return nextToken();
+            }
+        }
+
+        for (auto it = tokens.begin(); it != tokens.end(); ++it)
+            if (startsWith(it->first))
+            {
+                pos += it->first.size();
+                return it->second;
+            }
+
+        auto nameEnd = std::find_if(pos, stream_.end(), [](char c)
+        {
+            return c == '<'  ||
+                   c == '>'  ||
+                   c == '='  ||
+                   c == '/'  ||
+                   c == '\'' ||
+                   c == '\"' ||
+                   zen::isWhiteSpace(c);
+        });
+
+        if (nameEnd != pos)
+        {
+            std::string name(&*pos, nameEnd - pos);
+            pos = nameEnd;
+            return implementation::denormalize(name);
+        }
+
+        //unknown token
+        throw XmlParsingError(posRow(), posCol());
+    }
+
+    std::string extractElementValue()
+    {
+        auto it = std::find_if(pos, stream_.end(), [](char c)
+        {
+            return c == '<'  ||
+                   c == '>';
+        });
+        std::string output(pos, it);
+        pos = it;
+        return implementation::denormalize(output);
+    }
+
+    std::string extractAttributeValue()
+    {
+        auto it = std::find_if(pos, stream_.end(), [](char c)
+        {
+            return c == '<'  ||
+                   c == '>'  ||
+                   c == '\'' ||
+                   c == '\"';
+        });
+        std::string output(pos, it);
+        pos = it;
+        return implementation::denormalize(output);
+    }
+
+    size_t posRow() const //current row beginning with 0
+    {
+        const size_t crSum = std::count(stream_.begin(), pos, '\r'); //carriage returns
+        const size_t nlSum = std::count(stream_.begin(), pos, '\n'); //new lines
+        assert(crSum == 0 || nlSum == 0 || crSum == nlSum);
+        return std::max(crSum, nlSum); //be compatible with Linux/Mac/Win
+    }
+
+    size_t posCol() const //current col beginning with 0
+    {
+        //seek beginning of line
+        for (auto it = pos; it != stream_.begin(); )
+        {
+            --it;
+            if (*it == '\r' || *it == '\n')
+                return pos - it - 1;
+        }
+        return pos - stream_.begin();
+    }
+
+private:
+    Scanner           (const Scanner&) = delete;
+    Scanner& operator=(const Scanner&) = delete;
+
+    bool startsWith(const std::string& prefix) const
+    {
+        if (stream_.end() - pos < static_cast<ptrdiff_t>(prefix.size()))
+            return false;
+        return std::equal(prefix.begin(), prefix.end(), pos);
+    }
+
+    using TokenList = std::vector<std::pair<std::string, Token::Type>>;
+    const TokenList tokens
+    {
+        { "<?xml", Token::TK_DECL_BEGIN    },
+        { "?>",    Token::TK_DECL_END      },
+        { "</",    Token::TK_LESS_SLASH    },
+        { "/>",    Token::TK_SLASH_GREATER },
+        { "<",     Token::TK_LESS          },  //evaluate after TK_DECL_BEGIN!
+        { ">",     Token::TK_GREATER       },
+        { "=",     Token::TK_EQUAL         },
+        { "\"",    Token::TK_QUOTE         },
+        { "\'",    Token::TK_QUOTE         },
+    };
+
+    const std::string xmlCommentBegin = "<!--";
+    const std::string xmlCommentEnd   = "-->";
+
+    const std::string stream_;
+    std::string::const_iterator pos;
+};
+
+
+class XmlParser
+{
+public:
+    XmlParser(const std::string& stream) :
+        scn_(stream),
+        tk_(scn_.nextToken()) {}
+
+    XmlDoc parse() //throw XmlParsingError
+    {
+        XmlDoc doc;
+
+        //declaration (optional)
+        if (token().type == Token::TK_DECL_BEGIN)
+        {
+            nextToken();
+
+            while (token().type == Token::TK_NAME)
+            {
+                std::string attribName = token().name;
+                nextToken();
+
+                consumeToken(Token::TK_EQUAL);
+                expectToken(Token::TK_QUOTE);
+                std::string attribValue = scn_.extractAttributeValue();
+                nextToken();
+
+                consumeToken(Token::TK_QUOTE);
+
+                if (attribName == "version")
+                    doc.setVersion(attribValue);
+                else if (attribName == "encoding")
+                    doc.setEncoding(attribValue);
+                else if (attribName == "standalone")
+                    doc.setStandalone(attribValue);
+            }
+            consumeToken(Token::TK_DECL_END);
+        }
+
+        XmlElement dummy;
+        parseChildElements(dummy);
+
+        auto itPair = dummy.getChildren();
+        if (itPair.first != itPair.second)
+            doc.root().swapSubtree(*itPair.first);
+
+        expectToken(Token::TK_END);
+        return doc;
+    }
+
+private:
+    XmlParser           (const XmlParser&) = delete;
+    XmlParser& operator=(const XmlParser&) = delete;
+
+    void parseChildElements(XmlElement& parent)
+    {
+        while (token().type == Token::TK_LESS)
+        {
+            nextToken();
+
+            expectToken(Token::TK_NAME);
+            std::string elementName = token().name;
+            nextToken();
+
+            XmlElement& newElement = parent.addChild(elementName);
+
+            parseAttributes(newElement);
+
+            if (token().type == Token::TK_SLASH_GREATER) //empty element
+            {
+                nextToken();
+                continue;
+            }
+
+            expectToken(Token::TK_GREATER);
+            std::string elementValue = scn_.extractElementValue();
+            nextToken();
+
+            //no support for mixed-mode content
+            if (token().type == Token::TK_LESS) //structured element
+                parseChildElements(newElement);
+            else //value element
+                newElement.setValue(elementValue);
+
+            consumeToken(Token::TK_LESS_SLASH);
+
+            if (token().type != Token::TK_NAME ||
+                elementName != token().name)
+                throw XmlParsingError(scn_.posRow(), scn_.posCol());
+            nextToken();
+
+            consumeToken(Token::TK_GREATER);
+        }
+    }
+
+    void parseAttributes(XmlElement& element)
+    {
+        while (token().type == Token::TK_NAME)
+        {
+            std::string attribName = token().name;
+            nextToken();
+
+            consumeToken(Token::TK_EQUAL);
+            expectToken(Token::TK_QUOTE);
+            std::string attribValue = scn_.extractAttributeValue();
+            nextToken();
+
+            consumeToken(Token::TK_QUOTE);
+            element.setAttribute(attribName, attribValue);
+        }
+    }
+
+    const Token& token() const { return tk_; }
+    void nextToken() { tk_ = scn_.nextToken(); }
+
+    void consumeToken(Token::Type t) //throw XmlParsingError
+    {
+        expectToken(t); //throw XmlParsingError
+        nextToken();
+    }
+
+    void expectToken(Token::Type t) //throw XmlParsingError
+    {
+        if (token().type != t)
+            throw XmlParsingError(scn_.posRow(), scn_.posCol());
+    }
+
+    Scanner scn_;
+    Token tk_;
+};
+}
+
+inline
+XmlDoc parse(const std::string& stream) //throw XmlParsingError
+{
+    return implementation::XmlParser(stream).parse();  //throw XmlParsingError
+}
+}
+
+#endif //PARSER_H_81248670213764583021432