diff options
author | Daniel Wilhelm <shieldwed@outlook.com> | 2017-03-12 22:00:35 -0600 |
---|---|---|
committer | Daniel Wilhelm <shieldwed@outlook.com> | 2017-03-12 22:00:35 -0600 |
commit | 3ba62ef1de77153e5a8c7bad4451b96f6a1678b0 (patch) | |
tree | e6e69717e394a528a2e2aca3af036d4befaa9658 /zenXml/zenxml | |
parent | 8.9 (diff) | |
download | FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.tar.gz FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.tar.bz2 FreeFileSync-3ba62ef1de77153e5a8c7bad4451b96f6a1678b0.zip |
8.10
Diffstat (limited to 'zenXml/zenxml')
-rwxr-xr-x | zenXml/zenxml/bind.h | 389 | ||||
-rwxr-xr-x | zenXml/zenxml/cvrt_struc.h | 210 | ||||
-rwxr-xr-x | zenXml/zenxml/cvrt_text.h | 220 | ||||
-rwxr-xr-x | zenXml/zenxml/dom.h | 332 | ||||
-rwxr-xr-x | zenXml/zenxml/error.h | 19 | ||||
-rwxr-xr-x | zenXml/zenxml/io.h | 97 | ||||
-rwxr-xr-x | zenXml/zenxml/parser.h | 582 | ||||
-rwxr-xr-x | zenXml/zenxml/xml.h | 15 |
8 files changed, 1864 insertions, 0 deletions
diff --git a/zenXml/zenxml/bind.h b/zenXml/zenxml/bind.h new file mode 100755 index 00000000..6de10a44 --- /dev/null +++ b/zenXml/zenxml/bind.h @@ -0,0 +1,389 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef BIND_H_9081740816593478258435 +#define BIND_H_9081740816593478258435 + +#include <set> +#include "cvrt_struc.h" +#include "parser.h" +#include "io.h" + +namespace zen +{ +/** +\file +\brief Map user data types to XML +*/ + +///Load XML document from a file +/** +Convenience function that does nothing more than calling loadStream() and parse(). + +\tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... +\param filename Input file name +\returns The loaded XML document +\throw XmlFileError +\throw XmlParsingError +*/ +template <class String> inline +XmlDoc load(const String& filename) //throw XmlFileError, XmlParsingError +{ + std::string stream = loadStream(filename); //throw XmlFileError + return parse(stream); //throw XmlParsingError +} + + +///Save XML document to a file +/** +Convenience function that does nothing more than calling serialize() and saveStream(). + +\tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... +\param doc The XML document to save +\param filename Output file name +\param lineBreak Line break, default: carriage return + new line +\param indent Indentation, default: four space characters +\throw XmlFileError +*/ +template <class String> inline +void save(const XmlDoc& doc, + const String& filename, + const std::string& lineBreak = "\r\n", + const std::string& indent = " ") //throw XmlFileError +{ + std::string stream = serialize(doc, lineBreak, indent); //throw () + saveStream(stream, filename); //throw XmlFileError +} + + +///Proxy class to conveniently convert user data into XML structure +class XmlOut +{ +public: + ///Construct an output proxy for an XML document + /** + \code + zen::XmlDoc doc; + + zen::XmlOut out(doc); + out["elem1"]( 1); // + out["elem2"]( 2); //write data into XML elements + out["elem3"](-3); // + + save(doc, "out.xml"); //throw XmlFileError + \endcode + Output: + \verbatim + <?xml version="1.0" encoding="UTF-8"?> + <Root> + <elem1>1</elem1> + <elem2>2</elem2> + <elem3>-3</elem3> + </Root> + \endverbatim + */ + XmlOut(XmlDoc& doc) : ref_(&doc.root()) {} + ///Construct an output proxy for a single XML element + /** + \sa XmlOut(XmlDoc& doc) + */ + XmlOut(XmlElement& element) : ref_(&element) {} + + ///Retrieve a handle to an XML child element for writing + /** + The child element will be created if it is not yet existing. + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \param name The name of the child element + */ + template <class String> + XmlOut operator[](const String& name) const + { + const std::string utf8name = utfTo<std::string>(name); + XmlElement* child = ref_->getChild(utf8name); + return child ? *child : ref_->addChild(utf8name); + } + + ///Write user data to the underlying XML element + /** + This conversion requires a specialization of zen::writeText() or zen::writeStruc() for type T. + \tparam T User type that is converted into an XML element value. + */ + template <class T> + void operator()(const T& value) { writeStruc(value, *ref_); } + + ///Write user data to an XML attribute + /** + This conversion requires a specialization of zen::writeText() for type T. + \code + zen::XmlDoc doc; + + zen::XmlOut out(doc); + out["elem"].attribute("attr1", 1); // + out["elem"].attribute("attr2", 2); //write data into XML attributes + out["elem"].attribute("attr3", -3); // + + save(doc, "out.xml"); //throw XmlFileError + \endcode + Output: + \verbatim + <?xml version="1.0" encoding="UTF-8"?> + <Root> + <elem attr1="1" attr2="2" attr3="-3"/> + </Root> + \endverbatim + + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \tparam T String-convertible user data type: e.g. any string-like type, all built-in arithmetic numbers + \sa XmlElement::setAttribute() + */ + template <class String, class T> + void attribute(const String& name, const T& value) { ref_->setAttribute(name, value); } + + ///Return a reference to the underlying Xml element + XmlElement& ref() { return *ref_; } + +private: + XmlElement* ref_; //always bound! +}; + + +///Proxy class to conveniently convert XML structure to user data +class XmlIn +{ + class ErrorLog; + +public: + ///Construct an input proxy for an XML document + /** + \code + zen::XmlDoc doc; + ... //load document + zen::XmlIn in(doc); + in["elem1"](value1); // + in["elem2"](value2); //read data from XML elements into variables "value1", "value2", "value3" + in["elem3"](value3); // + \endcode + */ + XmlIn(const XmlDoc& doc) : log(std::make_shared<ErrorLog>()) { refList.push_back(&doc.root()); } + ///Construct an input proxy for a single XML element, may be nullptr + /** + \sa XmlIn(const XmlDoc& doc) + */ + XmlIn(const XmlElement* element) : log(std::make_shared<ErrorLog>()) { refList.push_back(element); } + ///Construct an input proxy for a single XML element + /** + \sa XmlIn(const XmlDoc& doc) + */ + XmlIn(const XmlElement& element) : log(std::make_shared<ErrorLog>()) { refList.push_back(&element); } + + ///Retrieve a handle to an XML child element for reading + /** + It is \b not an error if the child element does not exist, but only later if a conversion to user data is attempted. + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \param name The name of the child element + */ + template <class String> + XmlIn operator[](const String& name) const + { + std::vector<const XmlElement*> childList; + + if (refIndex < refList.size()) + { + auto iterPair = refList[refIndex]->getChildren(name); + std::for_each(iterPair.first, iterPair.second, + [&](const XmlElement& child) { childList.push_back(&child); }); + } + + return XmlIn(childList, childList.empty() ? getChildNameFormatted(name) : std::string(), log); + } + + ///Refer to next sibling element with the same name + /** + <b>Example:</b> Loop over all XML child elements named "Item" + \verbatim + <?xml version="1.0" encoding="UTF-8"?> + <Root> + <Item>1</Item> + <Item>3</Item> + <Item>5</Item> + </Root> + \endverbatim + + \code + zen::XmlIn in(doc); + ... + for (zen::XmlIn child = in["Item"]; child; child.next()) + { + ... + } + \endcode + */ + void next() { ++refIndex; } + + ///Read user data from the underlying XML element + /** + This conversion requires a specialization of zen::readText() or zen::readStruc() for type T. + \tparam T User type that receives the data + \return "true" if data was read successfully + */ + template <class T> + bool operator()(T& value) const + { + if (refIndex < refList.size()) + { + bool success = readStruc(*refList[refIndex], value); + if (!success) + log->notifyConversionError(getNameFormatted()); + return success; + } + else + { + log->notifyMissingElement(getNameFormatted()); + return false; + } + } + + ///Read user data from an XML attribute + /** + This conversion requires a specialization of zen::readText() for type T. + + \code + zen::XmlDoc doc; + ... //load document + zen::XmlIn in(doc); + in["elem"].attribute("attr1", value1); // + in["elem"].attribute("attr2", value2); //read data from XML attributes into variables "value1", "value2", "value3" + in["elem"].attribute("attr3", value3); // + \endcode + + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \tparam T String-convertible user data type: e.g. any string-like type, all built-in arithmetic numbers + \returns "true" if the attribute was found and the conversion to the output value was successful. + \sa XmlElement::getAttribute() + */ + template <class String, class T> + bool attribute(const String& name, T& value) const + { + if (refIndex < refList.size()) + { + bool success = refList[refIndex]->getAttribute(name, value); + if (!success) + log->notifyMissingAttribute(getNameFormatted(), utfTo<std::string>(name)); + return success; + } + else + { + log->notifyMissingElement(getNameFormatted()); + return false; + } + } + + ///Return a pointer to the underlying Xml element, may be nullptr + const XmlElement* get() const { return refIndex < refList.size() ? refList[refIndex] : nullptr; } + + ///Test whether the underlying XML element exists + /** + \code + XmlIn in(doc); + XmlIn child = in["elem1"]; + if (child) + ... + \endcode + Use member pointer as implicit conversion to bool (C++ Templates - Vandevoorde/Josuttis; chapter 20) + */ + explicit operator bool() const { return get() != nullptr; } + + ///Notifies errors while mapping the XML to user data + /** + Error logging is shared by each hiearchy of XmlIn proxy instances that are created from each other. Consequently it doesn't matter which instance you query for errors: + \code + XmlIn in(doc); + XmlIn inItem = in["item1"]; + + int value = 0; + inItem(value); //let's assume this conversion failed + + assert(in.errorsOccured() == inItem.errorsOccured()); + assert(in.getErrorsAs<std::string>() == inItem.getErrorsAs<std::string>()); + \endcode + + Note that error logging is \b NOT global, but owned by all instances of a hierarchy of XmlIn proxies. + Therefore it's safe to use unrelated XmlIn proxies in multiple threads. + \n\n + However be aware that the chain of connected proxy instances will be broken once you call XmlIn::get() to retrieve the underlying pointer. + Errors that occur when working with this pointer are not logged by the original set of related instances. + */ + bool errorsOccured() const { return !log->elementList().empty(); } + + ///Get a list of XML element and attribute names which failed to convert to user data. + /** + \tparam String Arbitrary string class: e.g. std::string, std::wstring, wxString, MyStringClass, ... + \returns A list of XML element and attribute names, empty list if no errors occured. + */ + template <class String> + std::vector<String> getErrorsAs() const + { + std::vector<String> output; + const auto& elements = log->elementList(); + std::transform(elements.begin(), elements.end(), std::back_inserter(output), [](const std::string& str) { return utfTo<String>(str); }); + return output; + } + +private: + XmlIn(const std::vector<const XmlElement*>& siblingList, const std::string& elementNameFmt, const std::shared_ptr<ErrorLog>& sharedlog) : + refList(siblingList), formattedName(elementNameFmt), log(sharedlog) + { assert((!siblingList.empty() && elementNameFmt.empty()) || (siblingList.empty() && !elementNameFmt.empty())); } + + static std::string getNameFormatted(const XmlElement& elem) //"<Root> <Level1> <Level2>" + { + return (elem.parent() ? getNameFormatted(*elem.parent()) + " " : std::string()) + "<" + elem.getNameAs<std::string>() + ">"; + } + + std::string getNameFormatted() const + { + if (refIndex < refList.size()) + { + assert(formattedName.empty()); + return getNameFormatted(*refList[refIndex]); + } + else + return formattedName; + } + + std::string getChildNameFormatted(const std::string& childName) const + { + std::string parentName = getNameFormatted(); + return (parentName.empty() ? std::string() : (parentName + " ")) + "<" + childName + ">"; + } + + class ErrorLog + { + public: + void notifyConversionError (const std::string& displayName) { insert(displayName); } + void notifyMissingElement (const std::string& displayName) { insert(displayName); } + void notifyMissingAttribute(const std::string& displayName, const std::string& attribName) { insert(displayName + " @" + attribName); } + + const std::vector<std::string>& elementList() const { return failedElements; } + + private: + void insert(const std::string& newVal) + { + if (usedElements.insert(newVal).second) + failedElements.push_back(newVal); + } + + std::vector<std::string> failedElements; //unique list of failed elements + std::set<std::string> usedElements; + }; + + std::vector<const XmlElement*> refList; //all sibling elements with same name (all pointers bound!) + size_t refIndex = 0; //this sibling's index in refList + std::string formattedName; //contains full and formatted element name if (and only if) refList is empty + std::shared_ptr<ErrorLog> log; //always bound +}; +} + +#endif //BIND_H_9081740816593478258435 diff --git a/zenXml/zenxml/cvrt_struc.h b/zenXml/zenxml/cvrt_struc.h new file mode 100755 index 00000000..7797c0fe --- /dev/null +++ b/zenXml/zenxml/cvrt_struc.h @@ -0,0 +1,210 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef CVRT_STRUC_H_018727409908342709743 +#define CVRT_STRUC_H_018727409908342709743 + +#include "dom.h" + +namespace zen +{ +/** +\file +\brief Handle conversion of arbitrary types to and from XML elements. +See comments in cvrt_text.h +*/ + +///Convert XML element to structured user data +/** + \param input The input XML element. + \param value Conversion target value. + \return "true" if value was read successfully. +*/ +template <class T> bool readStruc(const XmlElement& input, T& value); +///Convert structured user data into an XML element +/** + \param value The value to be converted. + \param output The output XML element. +*/ +template <class T> void writeStruc(const T& value, XmlElement& output); + + + + + + + + + + + + + + + + + +//------------------------------ implementation ------------------------------------- +namespace impl_2384343 +{ +ZEN_INIT_DETECT_MEMBER_TYPE(value_type); +ZEN_INIT_DETECT_MEMBER_TYPE(iterator); +ZEN_INIT_DETECT_MEMBER_TYPE(const_iterator); + +ZEN_INIT_DETECT_MEMBER(begin) // +ZEN_INIT_DETECT_MEMBER(end) //we don't know the exact declaration of the member attribute: may be in a base class! +ZEN_INIT_DETECT_MEMBER(insert) // +} + +template <typename T> +struct IsStlContainer : + StaticBool< + impl_2384343::HasMemberType_value_type <T>::value&& + impl_2384343::HasMemberType_iterator <T>::value&& + impl_2384343::HasMemberType_const_iterator<T>::value&& + impl_2384343::HasMember_begin <T>::value&& + impl_2384343::HasMember_end <T>::value&& + impl_2384343::HasMember_insert <T>::value> {}; + + +namespace impl_2384343 +{ +ZEN_INIT_DETECT_MEMBER_TYPE(first_type); +ZEN_INIT_DETECT_MEMBER_TYPE(second_type); + +ZEN_INIT_DETECT_MEMBER(first) //we don't know the exact declaration of the member attribute: may be in a base class! +ZEN_INIT_DETECT_MEMBER(second) // +} + +template <typename T> +struct IsStlPair : + StaticBool< + impl_2384343::HasMemberType_first_type <T>::value&& + impl_2384343::HasMemberType_second_type<T>::value&& + impl_2384343::HasMember_first <T>::value&& + impl_2384343::HasMember_second <T>::value> {}; + +//###################################################################################### + +//Conversion from arbitrary types to an XML element +enum ValueType +{ + VALUE_TYPE_STL_CONTAINER, + VALUE_TYPE_STL_PAIR, + VALUE_TYPE_OTHER, +}; + +template <class T> +struct GetValueType : StaticEnum<ValueType, + GetTextType<T>::value != TEXT_TYPE_OTHER ? VALUE_TYPE_OTHER : //some string classes are also STL containers, so check this first + IsStlContainer<T>::value ? VALUE_TYPE_STL_CONTAINER : + IsStlPair<T>::value ? VALUE_TYPE_STL_PAIR : + VALUE_TYPE_OTHER> {}; + + +template <class T, ValueType type> +struct ConvertElement; +/* -> expected interface +{ + void writeStruc(const T& value, XmlElement& output) const; + bool readStruc(const XmlElement& input, T& value) const; +}; +*/ + + +//partial specialization: handle conversion for all STL-container types! +template <class T> +struct ConvertElement<T, VALUE_TYPE_STL_CONTAINER> +{ + void writeStruc(const T& value, XmlElement& output) const + { + for (const typename T::value_type& childVal : value) + { + XmlElement& newChild = output.addChild("Item"); + zen::writeStruc(childVal, newChild); + } + } + bool readStruc(const XmlElement& input, T& value) const + { + bool success = true; + value.clear(); + + auto itPair = input.getChildren("Item"); + for (auto it = itPair.first; it != itPair.second; ++it) + { + typename T::value_type childVal; //MSVC 2010 bug: cannot put this into a lambda body + if (zen::readStruc(*it, childVal)) + value.insert(value.end(), childVal); + else + success = false; + } + return success; + } +}; + + +//partial specialization: handle conversion for std::pair +template <class T> +struct ConvertElement<T, VALUE_TYPE_STL_PAIR> +{ + void writeStruc(const T& value, XmlElement& output) const + { + XmlElement& child1 = output.addChild("one"); //don't use "1st/2nd", this will confuse a few pedantic XML parsers + zen::writeStruc(value.first, child1); + + XmlElement& child2 = output.addChild("two"); + zen::writeStruc(value.second, child2); + } + bool readStruc(const XmlElement& input, T& value) const + { + bool success = true; + const XmlElement* child1 = input.getChild("one"); + if (!child1 || !zen::readStruc(*child1, value.first)) + success = false; + + const XmlElement* child2 = input.getChild("two"); + if (!child2 || !zen::readStruc(*child2, value.second)) + success = false; + + return success; + } +}; + + +//partial specialization: not a pure structured type, try text conversion (thereby respect user specializations of writeText()/readText()) +template <class T> +struct ConvertElement<T, VALUE_TYPE_OTHER> +{ + void writeStruc(const T& value, XmlElement& output) const + { + std::string tmp; + writeText(value, tmp); + output.setValue(tmp); + } + bool readStruc(const XmlElement& input, T& value) const + { + std::string rawStr; + input.getValue(rawStr); + return readText(rawStr, value); + } +}; + + +template <class T> inline +void writeStruc(const T& value, XmlElement& output) +{ + ConvertElement<T, GetValueType<T>::value>().writeStruc(value, output); +} + + +template <class T> inline +bool readStruc(const XmlElement& input, T& value) +{ + return ConvertElement<T, GetValueType<T>::value>().readStruc(input, value); +} +} + +#endif //CVRT_STRUC_H_018727409908342709743 diff --git a/zenXml/zenxml/cvrt_text.h b/zenXml/zenxml/cvrt_text.h new file mode 100755 index 00000000..529ead57 --- /dev/null +++ b/zenXml/zenxml/cvrt_text.h @@ -0,0 +1,220 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef CVRT_TEXT_H_018727339083427097434 +#define CVRT_TEXT_H_018727339083427097434 + +#include <zen/utf.h> +#include <zen/string_tools.h> + +namespace zen +{ +/** +\file +\brief Handle conversion of string-convertible types to and from std::string. + +It is \b not required to call these functions directly. They are implicitly used by zen::XmlElement::getValue(), +zen::XmlElement::setValue(), zen::XmlElement::getAttribute() and zen::XmlElement::setAttribute(). +\n\n +Conversions for the following user types are supported by default: + - strings - std::string, std::wstring, char*, wchar_t*, char, wchar_t, ect..., all STL-compatible-string-classes + - numbers - int, double, float, bool, long, ect..., all built-in numbers + - STL containers - std::map, std::set, std::vector, std::list, ect..., all STL-compatible-containers + - std::pair + +You can add support for additional types via template specialization. \n\n +Specialize zen::readStruc() and zen::writeStruc() to enable conversion from structured user types to XML elements. +Specialize zen::readText() and zen::writeText() to enable conversion from string-convertible user types to std::string. +Prefer latter if possible since it does not only enable conversions from XML elements to user data, but also from and to XML attributes. +\n\n +<b> Example: </b> type "bool" +\code +namespace zen +{ +template <> inline +void writeText(const bool& value, std::string& output) +{ + output = value ? "true" : "false"; +} + +template <> inline +bool readText(const std::string& input, bool& value) +{ + const std::string tmp = trimCpy(input); + if (tmp == "true") + value = true; + else if (tmp == "false") + value = false; + else + return false; + return true; +} +} +\endcode +*/ + + +///Convert text to user data - used by XML elements and attributes +/** + \param input Input text. + \param value Conversion target value. + \return "true" if value was read successfully. +*/ +template <class T> bool readText(const std::string& input, T& value); +///Convert user data into text - used by XML elements and attributes +/** + \param value The value to be converted. + \param output Output text. +*/ +template <class T> void writeText(const T& value, std::string& output); + + +/* Different classes of data types: + +----------------------------- +| structured | readStruc/writeStruc - e.g. string-convertible types, STL containers, std::pair, structured user types +| ------------------------- | +| | to-string-convertible | | readText/writeText - e.g. string-like types, all built-in arithmetic numbers, bool +| | --------------- | | +| | | string-like | | | utfTo - e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... +| | --------------- | | +| ------------------------- | +----------------------------- +*/ + + + + + + + + + + + + + + + +//------------------------------ implementation ------------------------------------- + +//Conversion from arbitrary types to text (for use with XML elements and attributes) +enum TextType +{ + TEXT_TYPE_BOOL, + TEXT_TYPE_NUMBER, + TEXT_TYPE_STRING, + TEXT_TYPE_OTHER, +}; + +template <class T> +struct GetTextType : StaticEnum<TextType, + IsSameType<T, bool>::value ? TEXT_TYPE_BOOL : + IsStringLike<T>::value ? TEXT_TYPE_STRING : //string before number to correctly handle char/wchar_t -> this was an issue with Loki only! + IsArithmetic<T>::value ? TEXT_TYPE_NUMBER : // + TEXT_TYPE_OTHER> {}; + +//###################################################################################### + +template <class T, TextType type> +struct ConvertText; +/* -> expected interface +{ + void writeText(const T& value, std::string& output) const; + bool readText(const std::string& input, T& value) const; +}; +*/ + +//partial specialization: type bool +template <class T> +struct ConvertText<T, TEXT_TYPE_BOOL> +{ + void writeText(bool value, std::string& output) const + { + output = value ? "true" : "false"; + } + bool readText(const std::string& input, bool& value) const + { + const std::string tmp = trimCpy(input); + if (tmp == "true") + value = true; + else if (tmp == "false") + value = false; + else + return false; + return true; + } +}; + +//partial specialization: handle conversion for all built-in arithmetic types! +template <class T> +struct ConvertText<T, TEXT_TYPE_NUMBER> +{ + void writeText(const T& value, std::string& output) const + { + output = numberTo<std::string>(value); + } + bool readText(const std::string& input, T& value) const + { + value = stringTo<T>(input); + return true; + } +}; + +//partial specialization: handle conversion for all string-like types! +template <class T> +struct ConvertText<T, TEXT_TYPE_STRING> +{ + void writeText(const T& value, std::string& output) const + { + output = utfTo<std::string>(value); + } + bool readText(const std::string& input, T& value) const + { + value = utfTo<T>(input); + return true; + } +}; + + +//partial specialization: unknown type +template <class T> +struct ConvertText<T, TEXT_TYPE_OTHER> +{ + //########################################################################################################################################### + static_assert(sizeof(T) == -1, ""); + /* + ATTENTION: The data type T is yet unknown to the zen::Xml framework! + + Please provide a specialization for T of the following two functions in order to handle conversions to XML elements and attributes + + template <> void zen::writeText(const T& value, std::string& output) + template <> bool zen::readText(const std::string& input, T& value) + + If T is structured and cannot be converted to a text representation specialize these two functions to allow at least for conversions to XML elements: + + template <> void zen::writeStruc(const T& value, XmlElement& output) + template <> bool zen::readStruc(const XmlElement& input, T& value) + */ + //########################################################################################################################################### +}; + + +template <class T> inline +void writeText(const T& value, std::string& output) +{ + ConvertText<T, GetTextType<T>::value>().writeText(value, output); +} + + +template <class T> inline +bool readText(const std::string& input, T& value) +{ + return ConvertText<T, GetTextType<T>::value>().readText(input, value); +} +} + +#endif //CVRT_TEXT_H_018727339083427097434 diff --git a/zenXml/zenxml/dom.h b/zenXml/zenxml/dom.h new file mode 100755 index 00000000..1b1267d8 --- /dev/null +++ b/zenXml/zenxml/dom.h @@ -0,0 +1,332 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef DOM_H_82085720723894567204564256 +#define DOM_H_82085720723894567204564256 + +#include <string> +#include <map> +#include <zen/fixed_list.h> +#include "cvrt_text.h" //"readText/writeText" + +namespace zen +{ +class XmlDoc; + +/// An XML element +class XmlElement +{ +public: + XmlElement() : parent_(nullptr) {} + + //Construct an empty XML element + template <class String> + explicit XmlElement(const String& name, XmlElement* parentElement = nullptr) : name_(utfTo<std::string>(name)), parent_(parentElement) {} + + ///Retrieve the name of this XML element. + /** + \tparam String Arbitrary string class: e.g. std::string, std::wstring, wxString, MyStringClass, ... + \returns Name of the XML element. + */ + template <class String> + String getNameAs() const { return utfTo<String>(name_); } + + ///Get the value of this element as a user type. + /** + \tparam T Arbitrary user data type: e.g. any string class, all built-in arithmetic numbers, STL container, ... + \returns "true" if Xml element was successfully converted to value, cannot fail for string-like types + */ + template <class T> + bool getValue(T& value) const { return readStruc(*this, value); } + + ///Set the value of this element. + /** + \tparam T Arbitrary user data type: e.g. any string-like type, all built-in arithmetic numbers, STL container, ... + */ + template <class T> + void setValue(const T& value) { writeStruc(value, *this); } + + ///Retrieve an attribute by name. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \tparam T String-convertible user data type: e.g. any string class, all built-in arithmetic numbers + \param name The name of the attribute to retrieve. + \param value The value of the attribute converted to T. + \return "true" if value was retrieved successfully. + */ + template <class String, class T> + bool getAttribute(const String& name, T& value) const + { + auto it = attributes.find(utfTo<std::string>(name)); + return it == attributes.end() ? false : readText(it->second, value); + } + + ///Create or update an XML attribute. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \tparam T String-convertible user data type: e.g. any string-like type, all built-in arithmetic numbers + \param name The name of the attribute to create or update. + \param value The value to set. + */ + template <class String, class T> + void setAttribute(const String& name, const T& value) + { + std::string attrValue; + writeText(value, attrValue); + attributes[utfTo<std::string>(name)] = attrValue; + } + + ///Remove the attribute with the given name. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + */ + template <class String> + void removeAttribute(const String& name) { attributes.erase(utfTo<std::string>(name)); } + + ///Create a new child element and return a reference to it. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \param name The name of the child element to be created. + */ + template <class String> + XmlElement& addChild(const String& name) + { + std::string utf8Name = utfTo<std::string>(name); + childElements.emplace_back(utf8Name, this); + XmlElement& newElement = childElements.back(); + childElementsSorted.emplace(utf8Name, &newElement); + return newElement; + } + + ///Retrieve a child element with the given name. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \param name The name of the child element to be retrieved. + \return A pointer to the child element or nullptr if none was found. + */ + template <class String> + const XmlElement* getChild(const String& name) const + { + auto it = childElementsSorted.find(utfTo<std::string>(name)); + return it == childElementsSorted.end() ? nullptr : it->second; + } + + ///\sa getChild + template <class String> + XmlElement* getChild(const String& name) + { + return const_cast<XmlElement*>(static_cast<const XmlElement*>(this)->getChild(name)); + } + + template < class IterTy, //underlying iterator type + class T, //target object type + class AccessPolicy > //access policy: see AccessPtrMap + class PtrIter : public std::iterator<std::input_iterator_tag, T>, private AccessPolicy //get rid of shared_ptr indirection + { + public: + PtrIter(IterTy it) : it_(it) {} + PtrIter(const PtrIter& other) : it_(other.it_) {} + PtrIter& operator++() { ++it_; return *this; } + PtrIter operator++(int) { PtrIter tmp(*this); operator++(); return tmp; } + inline friend bool operator==(const PtrIter& lhs, const PtrIter& rhs) { return lhs.it_ == rhs.it_; } + inline friend bool operator!=(const PtrIter& lhs, const PtrIter& rhs) { return !(lhs == rhs); } + T& operator* () const { return AccessPolicy::template objectRef<T>(it_); } + T* operator->() const { return &AccessPolicy::template objectRef<T>(it_); } + private: + IterTy it_; + }; + + struct AccessMapElement + { + template <class T, class IterTy> + T& objectRef(const IterTy& it) const { return *(it->second); } + }; + + using ChildIter2 = PtrIter<std::multimap<std::string, XmlElement*>::iterator, XmlElement, AccessMapElement>; + using ChildIterConst2 = PtrIter<std::multimap<std::string, XmlElement*>::const_iterator, const XmlElement, AccessMapElement>; + + ///Access all child elements with the given name via STL iterators. + /** + \code + auto iterPair = elem.getChildren("Item"); + std::for_each(iterPair.first, iterPair.second, + [](const XmlElement& child) { ... }); + \endcode + \param name The name of the child elements to be retrieved. + \return A pair of STL begin/end iterators to access the child elements sequentially. + */ + template <class String> + std::pair<ChildIterConst2, ChildIterConst2> getChildren(const String& name) const { return childElementsSorted.equal_range(utfTo<std::string>(name)); } + + ///\sa getChildren + template <class String> + std::pair<ChildIter2, ChildIter2> getChildren(const String& name) { return childElementsSorted.equal_range(utfTo<std::string>(name)); } + + struct AccessListElement + { + template <class T, class IterTy> + T& objectRef(const IterTy& it) const { return *it; } + }; + + using ChildIter = PtrIter<FixedList<XmlElement>::iterator, XmlElement, AccessListElement>; + using ChildIterConst = PtrIter<FixedList<XmlElement>::const_iterator, const XmlElement, AccessListElement>; + + ///Access all child elements sequentially via STL iterators. + /** + \code + auto iterPair = elem.getChildren(); + std::for_each(iterPair.first, iterPair.second, + [](const XmlElement& child) { ... }); + \endcode + \return A pair of STL begin/end iterators to access all child elements sequentially. + */ + std::pair<ChildIterConst, ChildIterConst> getChildren() const { return std::make_pair(childElements.begin(), childElements.end()); } + + ///\sa getChildren + std::pair<ChildIter, ChildIter> getChildren() { return std::make_pair(childElements.begin(), childElements.end()); } + + ///Get parent XML element, may be nullptr for root element + XmlElement* parent() { return parent_; } + ///Get parent XML element, may be nullptr for root element + const XmlElement* parent() const { return parent_; } + + using AttrIter = std::map<std::string, std::string>::const_iterator; + + /* -> disabled documentation extraction + \brief Get all attributes associated with the element. + \code + auto iterPair = elem.getAttributes(); + for (auto it = iterPair.first; it != iterPair.second; ++it) + std::cout << "name: " << it->first << " value: " << it->second << "\n"; + \endcode + \return A pair of STL begin/end iterators to access all attributes sequentially as a list of name/value pairs of std::string. + */ + std::pair<AttrIter, AttrIter> getAttributes() const { return std::make_pair(attributes.begin(), attributes.end()); } + + //swap two elements while keeping references to parent. -> disabled documentation extraction + void swapSubtree(XmlElement& other) + { + name_ .swap(other.name_); + value_ .swap(other.value_); + attributes.swap(other.attributes); + childElements.swap(other.childElements); + childElementsSorted.swap(other.childElementsSorted); + //std::swap(parent_, other.parent_); -> parent is physical location; update children's parent reference instead: + for (XmlElement& child : childElements) + child.parent_ = this; + for (XmlElement& child : other.childElements) + child.parent_ = &other; + } + +private: + XmlElement (const XmlElement&) = delete; + XmlElement& operator=(const XmlElement&) = delete; + + std::string name_; + std::string value_; + std::map<std::string, std::string> attributes; + FixedList<XmlElement> childElements; //all child elements in order of creation + std::multimap<std::string, XmlElement*> childElementsSorted; //alternate key: sorted by element name + XmlElement* parent_; +}; + + +//XmlElement::setValue<T>() calls zen::writeStruc() which calls XmlElement::setValue() ... => these two specializations end the circle +template <> inline +void XmlElement::setValue(const std::string& value) { value_ = value; } + +template <> inline +bool XmlElement::getValue(std::string& value) const { value = value_; return true; } + + +///The complete XML document +class XmlDoc +{ +public: + ///Default constructor setting up an empty XML document with a standard declaration: <?xml version="1.0" encoding="UTF-8" ?> + XmlDoc() : version_("1.0"), encoding_("UTF-8"), rootElement("Root") {} + + XmlDoc(XmlDoc&& tmp) { swap(tmp); } + XmlDoc& operator=(XmlDoc&& tmp) { swap(tmp); return *this; } + + //Setup an empty XML document + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + \param rootName The name of the XML document's root element. + */ + template <class String> + XmlDoc(String rootName) : version_("1.0"), encoding_("UTF-8"), rootElement(rootName) {} + + ///Get a const reference to the document's root element. + const XmlElement& root() const { return rootElement; } + ///Get a reference to the document's root element. + XmlElement& root() { return rootElement; } + + ///Get the version used in the XML declaration. + /** + \tparam String Arbitrary string class: e.g. std::string, std::wstring, wxString, MyStringClass, ... + */ + template <class String> + String getVersionAs() const { return utfTo<String>(version_); } + + ///Set the version used in the XML declaration. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + */ + template <class String> + void setVersion(const String& version) { version_ = utfTo<std::string>(version); } + + ///Get the encoding used in the XML declaration. + /** + \tparam String Arbitrary string class: e.g. std::string, std::wstring, wxString, MyStringClass, ... + */ + template <class String> + String getEncodingAs() const { return utfTo<String>(encoding_); } + + ///Set the encoding used in the XML declaration. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + */ + template <class String> + void setEncoding(const String& encoding) { encoding_ = utfTo<std::string>(encoding); } + + ///Get the standalone string used in the XML declaration. + /** + \tparam String Arbitrary string class: e.g. std::string, std::wstring, wxString, MyStringClass, ... + */ + template <class String> + String getStandaloneAs() const { return utfTo<String>(standalone_); } + + ///Set the standalone string used in the XML declaration. + /** + \tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... + */ + template <class String> + void setStandalone(const String& standalone) { standalone_ = utfTo<std::string>(standalone); } + + //Transactionally swap two elements. -> disabled documentation extraction + void swap(XmlDoc& other) + { + version_ .swap(other.version_); + encoding_ .swap(other.encoding_); + standalone_.swap(other.standalone_); + rootElement.swapSubtree(other.rootElement); + } + +private: + XmlDoc (const XmlDoc&) = delete; //not implemented, thanks to XmlElement::parent_ + XmlDoc& operator=(const XmlDoc&) = delete; + + std::string version_; + std::string encoding_; + std::string standalone_; + + XmlElement rootElement; +}; + +} + +#endif //DOM_H_82085720723894567204564256 diff --git a/zenXml/zenxml/error.h b/zenXml/zenxml/error.h new file mode 100755 index 00000000..b50da44a --- /dev/null +++ b/zenXml/zenxml/error.h @@ -0,0 +1,19 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef ERROR_H_01873461843302148947321 +#define ERROR_H_01873461843302148947321 + +namespace zen +{ +///Exception base class for zen::Xml +struct XmlError +{ + virtual ~XmlError() {} +}; +} + +#endif //ERROR_H_01873461843302148947321 diff --git a/zenXml/zenxml/io.h b/zenXml/zenxml/io.h new file mode 100755 index 00000000..7bb1a01d --- /dev/null +++ b/zenXml/zenxml/io.h @@ -0,0 +1,97 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef IO_H_8917640501480763248343343 +#define IO_H_8917640501480763248343343 + +#include <cstdio> +#include <cerrno> +#include <zen/scope_guard.h> +#include <zen/utf.h> +#include "error.h" + +namespace zen +{ +/** +\file +\brief Save and load byte streams from files +*/ + + + +///Exception thrown due to failed file I/O +struct XmlFileError : public XmlError +{ + using ErrorCode = int; + + explicit XmlFileError(ErrorCode ec) : lastError(ec) {} + ///Native error code: errno + ErrorCode lastError; +}; + + + + +///Save byte stream to a file +/** +\tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... +\param stream Input byte stream +\param filename Output file name +\throw XmlFileError +*/ +template <class String> +void saveStream(const std::string& stream, const String& filename) //throw XmlFileError +{ + FILE* handle = ::fopen(utfTo<std::string>(filename).c_str(), "w"); + if (!handle) + throw XmlFileError(errno); + ZEN_ON_SCOPE_EXIT(::fclose(handle)); + + const size_t bytesWritten = ::fwrite(stream.c_str(), 1, stream.size(), handle); + if (::ferror(handle) != 0) + throw XmlFileError(errno); + + (void)bytesWritten; + assert(bytesWritten == stream.size()); +} + + +///Load byte stream from a file +/** +\tparam String Arbitrary string-like type: e.g. std::string, wchar_t*, char[], wchar_t, wxString, MyStringClass, ... +\param filename Input file name +\return Output byte stream +\throw XmlFileError +*/ +template <class String> +std::string loadStream(const String& filename) //throw XmlFileError +{ + FILE* handle = ::fopen(utfTo<std::string>(filename).c_str(), "r"); + if (!handle) + throw XmlFileError(errno); + ZEN_ON_SCOPE_EXIT(::fclose(handle)); + + std::string stream; + const size_t blockSize = 64 * 1024; + do + { + stream.resize(stream.size() + blockSize); //let's pray std::string implements exponential growth! + + const size_t bytesRead = ::fread(&*(stream.end() - blockSize), 1, blockSize, handle); + if (::ferror(handle)) + throw XmlFileError(errno); + if (bytesRead > blockSize) + throw XmlFileError(0); + if (bytesRead < blockSize) + stream.resize(stream.size() - blockSize + bytesRead); //caveat: unsigned arithmetics + } + while (!::feof(handle)); + + return stream; +} +} + +#endif //IO_H_8917640501480763248343343 diff --git a/zenXml/zenxml/parser.h b/zenXml/zenxml/parser.h new file mode 100755 index 00000000..5a529f9a --- /dev/null +++ b/zenXml/zenxml/parser.h @@ -0,0 +1,582 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef PARSER_H_81248670213764583021432 +#define PARSER_H_81248670213764583021432 + +#include <cstdio> +#include <cstddef> //ptrdiff_t; req. on Linux +#include <zen/string_tools.h> +#include "dom.h" +#include "error.h" + + +namespace zen +{ +/** +\file +\brief Convert an XML document object model (class XmlDoc) to and from a byte stream representation. +*/ + +///Save XML document as a byte stream +/** +\param doc Input XML document +\param lineBreak Line break, default: carriage return + new line +\param indent Indentation, default: four space characters +\return Output byte stream +*/ +std::string serialize(const XmlDoc& doc, + const std::string& lineBreak = "\r\n", + const std::string& indent = " "); //throw () + +///Exception thrown due to an XML parsing error +struct XmlParsingError : public XmlError +{ + XmlParsingError(size_t rowNo, size_t colNo) : row(rowNo), col(colNo) {} + ///Input file row where the parsing error occured (zero-based) + const size_t row; //beginning with 0 + ///Input file column where the parsing error occured (zero-based) + const size_t col; // +}; + + +///Load XML document from a byte stream +/** +\param stream Input byte stream +\returns Output XML document +\throw XmlParsingError +*/ +XmlDoc parse(const std::string& stream); //throw XmlParsingError + + + + + + + + + + + + + + + + + + + + +//---------------------------- implementation ---------------------------- +//see: http://www.w3.org/TR/xml/ + +namespace implementation +{ +template <class Predicate> inline +std::string normalize(const std::string& str, Predicate pred) //pred: unary function taking a char, return true if value shall be encoded as hex +{ + std::string output; + for (const char c : str) + { + if (c == '&') // + output += "&"; + else if (c == '<') //normalization mandatory: http://www.w3.org/TR/xml/#syntax + output += "<"; + else if (c == '>') // + output += ">"; + else if (pred(c)) + { + if (c == '\'') + output += "'"; + else if (c == '\"') + output += """; + else + { + output += "&#x"; + const auto hexDigits = hexify(c); + output += hexDigits.first; + output += hexDigits.second; + output += ';'; + } + } + else + output += c; + } + return output; +} + +inline +std::string normalizeName(const std::string& str) +{ + return normalize(str, [](char c) { return isWhiteSpace(c) || c == '=' || c == '/' || c == '\'' || c == '\"'; }); +} + +inline +std::string normalizeElementValue(const std::string& str) +{ + return normalize(str, [](char c) { return static_cast<unsigned char>(c) < 32; }); +} + +inline +std::string normalizeAttribValue(const std::string& str) +{ + return normalize(str, [](char c) { return static_cast<unsigned char>(c) < 32 || c == '\'' || c == '\"'; }); +} + + +template <class CharIterator, size_t N> inline +bool checkEntity(CharIterator& first, CharIterator last, const char (&placeholder)[N]) +{ + assert(placeholder[N - 1] == 0); + const ptrdiff_t strLen = N - 1; //don't count null-terminator + if (last - first >= strLen && std::equal(first, first + strLen, placeholder)) + { + first += strLen - 1; + return true; + } + return false; +} + + +namespace +{ +std::string denormalize(const std::string& str) +{ + std::string output; + for (auto it = str.begin(); it != str.end(); ++it) + { + const char c = *it; + + if (c == '&') + { + if (checkEntity(it, str.end(), "&")) + output += '&'; + else if (checkEntity(it, str.end(), "<")) + output += '<'; + else if (checkEntity(it, str.end(), ">")) + output += '>'; + else if (checkEntity(it, str.end(), "'")) + output += '\''; + else if (checkEntity(it, str.end(), """)) + output += '\"'; + else if (str.end() - it >= 6 && + it[1] == '#' && + it[2] == 'x' && + it[5] == ';') + { + output += unhexify(it[3], it[4]); + it += 5; + } + else + output += c; //unexpected char! + } + else if (c == '\r') //map all end-of-line characters to \n http://www.w3.org/TR/xml/#sec-line-ends + { + auto itNext = it + 1; + if (itNext != str.end() && *itNext == '\n') + ++it; + output += '\n'; + } + else + output += c; + } + return output; +} + + +void serialize(const XmlElement& element, std::string& stream, + const std::string& lineBreak, + const std::string& indent, + size_t indentLevel) +{ + const std::string& nameFmt = normalizeName(element.getNameAs<std::string>()); + + for (size_t i = 0; i < indentLevel; ++i) + stream += indent; + + stream += '<' + nameFmt; + + auto attr = element.getAttributes(); + for (auto it = attr.first; it != attr.second; ++it) + stream += ' ' + normalizeName(it->first) + "=\"" + normalizeAttribValue(it->second) + '\"'; + + //no support for mixed-mode content + auto iterPair = element.getChildren(); + if (iterPair.first != iterPair.second) //structured element + { + stream += '>' + lineBreak; + + std::for_each(iterPair.first, iterPair.second, + [&](const XmlElement& el) { serialize(el, stream, lineBreak, indent, indentLevel + 1); }); + + for (size_t i = 0; i < indentLevel; ++i) + stream += indent; + stream += "</" + nameFmt + '>' + lineBreak; + } + else + { + std::string value; + element.getValue(value); + + if (!value.empty()) //value element + stream += '>' + normalizeElementValue(value) + "</" + nameFmt + '>' + lineBreak; + else //empty element + stream += "/>" + lineBreak; + } +} + +std::string serialize(const XmlDoc& doc, + const std::string& lineBreak, + const std::string& indent) +{ + std::string version = doc.getVersionAs<std::string>(); + if (!version.empty()) + version = " version=\"" + normalizeAttribValue(version) + '\"'; + + std::string encoding = doc.getEncodingAs<std::string>(); + if (!encoding.empty()) + encoding = " encoding=\"" + normalizeAttribValue(encoding) + '\"'; + + std::string standalone = doc.getStandaloneAs<std::string>(); + if (!standalone.empty()) + standalone = " standalone=\"" + normalizeAttribValue(standalone) + '\"'; + + std::string output = "<?xml" + version + encoding + standalone + "?>" + lineBreak; + serialize(doc.root(), output, lineBreak, indent, 0); + return output; +} +} +} + +inline +std::string serialize(const XmlDoc& doc, + const std::string& lineBreak, + const std::string& indent) { return implementation::serialize(doc, lineBreak, indent); } + +/* +Grammar for XML parser +------------------------------- +document-expression: + <?xml version="1.0" encoding="UTF-8" standalone="yes"?> + element-expression: + +element-expression: + <string attributes-expression/> + <string attributes-expression> pm-expression </string> + +element-list-expression: + <empty> + element-expression element-list-expression + +attributes-expression: + <empty> + string="string" attributes-expression + +pm-expression: + string + element-list-expression +*/ + +namespace implementation +{ +struct Token +{ + enum Type + { + TK_LESS, + TK_GREATER, + TK_LESS_SLASH, + TK_SLASH_GREATER, + TK_EQUAL, + TK_QUOTE, + TK_DECL_BEGIN, + TK_DECL_END, + TK_NAME, + TK_END + }; + + Token(Type t) : type(t) {} + Token(const std::string& txt) : type(TK_NAME), name(txt) {} + + Type type; + std::string name; //filled if type == TK_NAME +}; + +class Scanner +{ +public: + Scanner(const std::string& stream) : stream_(stream), pos(stream_.begin()) + { + if (zen::startsWith(stream_, BYTE_ORDER_MARK_UTF8)) + pos += strLength(BYTE_ORDER_MARK_UTF8); + } + + Token nextToken() //throw XmlParsingError + { + //skip whitespace + pos = std::find_if(pos, stream_.end(), [](char c) { return !zen::isWhiteSpace(c); }); + + if (pos == stream_.end()) + return Token::TK_END; + + //skip XML comments + if (startsWith(xmlCommentBegin)) + { + auto it = std::search(pos + xmlCommentBegin.size(), stream_.end(), xmlCommentEnd.begin(), xmlCommentEnd.end()); + if (it != stream_.end()) + { + pos = it + xmlCommentEnd.size(); + return nextToken(); + } + } + + for (auto it = tokens.begin(); it != tokens.end(); ++it) + if (startsWith(it->first)) + { + pos += it->first.size(); + return it->second; + } + + auto nameEnd = std::find_if(pos, stream_.end(), [](char c) + { + return c == '<' || + c == '>' || + c == '=' || + c == '/' || + c == '\'' || + c == '\"' || + zen::isWhiteSpace(c); + }); + + if (nameEnd != pos) + { + std::string name(&*pos, nameEnd - pos); + pos = nameEnd; + return implementation::denormalize(name); + } + + //unknown token + throw XmlParsingError(posRow(), posCol()); + } + + std::string extractElementValue() + { + auto it = std::find_if(pos, stream_.end(), [](char c) + { + return c == '<' || + c == '>'; + }); + std::string output(pos, it); + pos = it; + return implementation::denormalize(output); + } + + std::string extractAttributeValue() + { + auto it = std::find_if(pos, stream_.end(), [](char c) + { + return c == '<' || + c == '>' || + c == '\'' || + c == '\"'; + }); + std::string output(pos, it); + pos = it; + return implementation::denormalize(output); + } + + size_t posRow() const //current row beginning with 0 + { + const size_t crSum = std::count(stream_.begin(), pos, '\r'); //carriage returns + const size_t nlSum = std::count(stream_.begin(), pos, '\n'); //new lines + assert(crSum == 0 || nlSum == 0 || crSum == nlSum); + return std::max(crSum, nlSum); //be compatible with Linux/Mac/Win + } + + size_t posCol() const //current col beginning with 0 + { + //seek beginning of line + for (auto it = pos; it != stream_.begin(); ) + { + --it; + if (*it == '\r' || *it == '\n') + return pos - it - 1; + } + return pos - stream_.begin(); + } + +private: + Scanner (const Scanner&) = delete; + Scanner& operator=(const Scanner&) = delete; + + bool startsWith(const std::string& prefix) const + { + if (stream_.end() - pos < static_cast<ptrdiff_t>(prefix.size())) + return false; + return std::equal(prefix.begin(), prefix.end(), pos); + } + + using TokenList = std::vector<std::pair<std::string, Token::Type>>; + const TokenList tokens + { + { "<?xml", Token::TK_DECL_BEGIN }, + { "?>", Token::TK_DECL_END }, + { "</", Token::TK_LESS_SLASH }, + { "/>", Token::TK_SLASH_GREATER }, + { "<", Token::TK_LESS }, //evaluate after TK_DECL_BEGIN! + { ">", Token::TK_GREATER }, + { "=", Token::TK_EQUAL }, + { "\"", Token::TK_QUOTE }, + { "\'", Token::TK_QUOTE }, + }; + + const std::string xmlCommentBegin = "<!--"; + const std::string xmlCommentEnd = "-->"; + + const std::string stream_; + std::string::const_iterator pos; +}; + + +class XmlParser +{ +public: + XmlParser(const std::string& stream) : + scn_(stream), + tk_(scn_.nextToken()) {} + + XmlDoc parse() //throw XmlParsingError + { + XmlDoc doc; + + //declaration (optional) + if (token().type == Token::TK_DECL_BEGIN) + { + nextToken(); + + while (token().type == Token::TK_NAME) + { + std::string attribName = token().name; + nextToken(); + + consumeToken(Token::TK_EQUAL); + expectToken(Token::TK_QUOTE); + std::string attribValue = scn_.extractAttributeValue(); + nextToken(); + + consumeToken(Token::TK_QUOTE); + + if (attribName == "version") + doc.setVersion(attribValue); + else if (attribName == "encoding") + doc.setEncoding(attribValue); + else if (attribName == "standalone") + doc.setStandalone(attribValue); + } + consumeToken(Token::TK_DECL_END); + } + + XmlElement dummy; + parseChildElements(dummy); + + auto itPair = dummy.getChildren(); + if (itPair.first != itPair.second) + doc.root().swapSubtree(*itPair.first); + + expectToken(Token::TK_END); + return doc; + } + +private: + XmlParser (const XmlParser&) = delete; + XmlParser& operator=(const XmlParser&) = delete; + + void parseChildElements(XmlElement& parent) + { + while (token().type == Token::TK_LESS) + { + nextToken(); + + expectToken(Token::TK_NAME); + std::string elementName = token().name; + nextToken(); + + XmlElement& newElement = parent.addChild(elementName); + + parseAttributes(newElement); + + if (token().type == Token::TK_SLASH_GREATER) //empty element + { + nextToken(); + continue; + } + + expectToken(Token::TK_GREATER); + std::string elementValue = scn_.extractElementValue(); + nextToken(); + + //no support for mixed-mode content + if (token().type == Token::TK_LESS) //structured element + parseChildElements(newElement); + else //value element + newElement.setValue(elementValue); + + consumeToken(Token::TK_LESS_SLASH); + + if (token().type != Token::TK_NAME || + elementName != token().name) + throw XmlParsingError(scn_.posRow(), scn_.posCol()); + nextToken(); + + consumeToken(Token::TK_GREATER); + } + } + + void parseAttributes(XmlElement& element) + { + while (token().type == Token::TK_NAME) + { + std::string attribName = token().name; + nextToken(); + + consumeToken(Token::TK_EQUAL); + expectToken(Token::TK_QUOTE); + std::string attribValue = scn_.extractAttributeValue(); + nextToken(); + + consumeToken(Token::TK_QUOTE); + element.setAttribute(attribName, attribValue); + } + } + + const Token& token() const { return tk_; } + void nextToken() { tk_ = scn_.nextToken(); } + + void consumeToken(Token::Type t) //throw XmlParsingError + { + expectToken(t); //throw XmlParsingError + nextToken(); + } + + void expectToken(Token::Type t) //throw XmlParsingError + { + if (token().type != t) + throw XmlParsingError(scn_.posRow(), scn_.posCol()); + } + + Scanner scn_; + Token tk_; +}; +} + +inline +XmlDoc parse(const std::string& stream) //throw XmlParsingError +{ + return implementation::XmlParser(stream).parse(); //throw XmlParsingError +} +} + +#endif //PARSER_H_81248670213764583021432 diff --git a/zenXml/zenxml/xml.h b/zenXml/zenxml/xml.h new file mode 100755 index 00000000..d3ce2814 --- /dev/null +++ b/zenXml/zenxml/xml.h @@ -0,0 +1,15 @@ +// ***************************************************************************** +// * This file is part of the FreeFileSync project. It is distributed under * +// * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * +// * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * +// ***************************************************************************** + +#ifndef XML_H_349578228034572457454554 +#define XML_H_349578228034572457454554 + +#include "bind.h" + +/// The zen::Xml namespace +namespace zen {} + +#endif //XML_H_349578228034572457454554 |