summaryrefslogtreecommitdiff
path: root/shared/parse_lng.h
diff options
context:
space:
mode:
authorDaniel Wilhelm <daniel@wili.li>2014-04-18 17:15:16 +0200
committerDaniel Wilhelm <daniel@wili.li>2014-04-18 17:15:16 +0200
commitbd6336c629841c6db3a6ca53a936d629d34db53b (patch)
tree3721ef997864108df175ce677a8a7d4342a6f1d2 /shared/parse_lng.h
parent4.0 (diff)
downloadFreeFileSync-bd6336c629841c6db3a6ca53a936d629d34db53b.tar.gz
FreeFileSync-bd6336c629841c6db3a6ca53a936d629d34db53b.tar.bz2
FreeFileSync-bd6336c629841c6db3a6ca53a936d629d34db53b.zip
4.1
Diffstat (limited to 'shared/parse_lng.h')
-rw-r--r--shared/parse_lng.h618
1 files changed, 0 insertions, 618 deletions
diff --git a/shared/parse_lng.h b/shared/parse_lng.h
deleted file mode 100644
index 034deb68..00000000
--- a/shared/parse_lng.h
+++ /dev/null
@@ -1,618 +0,0 @@
-// **************************************************************************
-// * This file is part of the FreeFileSync project. It is distributed under *
-// * GNU General Public License: http://www.gnu.org/licenses/gpl.html *
-// * Copyright (C) 2008-2011 ZenJu (zhnmju123 AT gmx.de) *
-// **************************************************************************
-
-#ifndef PARSE_LNG_HEADER_INCLUDED
-#define PARSE_LNG_HEADER_INCLUDED
-
-#include <string>
-#include <sstream>
-#include <map>
-#include <set>
-#include <vector>
-#include <algorithm>
-#include <functional>
-#include <cctype>
-#include <list>
-#include <stdexcept>
-
-
-namespace lngfile
-{
-//singular forms
-typedef std::map <std::string, std::string>
-TranslationMap; //orig |-> translation
-
-//plural forms
-typedef std::pair<std::string, std::string>
-SingularPluralPair; //1 house| n houses
-
-typedef std::vector<std::string>
-PluralForms; //1 dom | 2 domy | 5 domów
-
-typedef std::map <SingularPluralPair, PluralForms>
-TranslationPluralMap; //(sing/plu) |-> pluralforms
-
-struct TransHeader
-{
- std::string languageName; //display name: "English (UK)"
- std::string translatorName; //"ZenJu"
- std::string localeName; //ISO 639 language code + ISO 3166 country code, e.g. "en_GB", or "en_US"
- std::string flagFile; //"england.png"
- int pluralCount;
- std::string pluralDefinition; //"nplurals=2; plural= n == 1 ? 0 : 1"
-};
-
-
-struct ParsingError
-{
- ParsingError(size_t rowNo, size_t colNo) : row(rowNo), col(colNo) {}
- size_t row;
- size_t col;
-};
-void parseLng(const std::string& fileStream, TransHeader& header, TranslationMap& out, TranslationPluralMap& pluralOut); //throw ParsingError
-void parseHeader(const std::string& fileStream, TransHeader& header); //throw ParsingError
-
-class TranslationList; //unordered list of unique translation items
-void generateLng(const TranslationList& in, const TransHeader& header, std::string& fileStream);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-//--------------------------- implementation ---------------------------
-class TranslationList //unordered list of unique translation items
-{
-public:
- void addItem(const std::string& orig, const std::string& trans)
- {
- if (!transUnique.insert(orig).second) return;
-
- dump.push_back(RegularItem(std::make_pair(orig, trans)));
- sequence.push_back(&dump.back());
- }
- void addPluralItem(const SingularPluralPair& orig, const PluralForms& trans)
- {
- if (!pluralUnique.insert(orig).second) return;
-
- dumpPlural.push_back(PluralItem(std::make_pair(orig, trans)));
- sequence.push_back(&dumpPlural.back());
- }
-
- bool untranslatedTextExists() const
- {
- for (std::list<RegularItem>::const_iterator i = dump.begin(); i != dump.end(); ++i)
- if (i->value.second.empty())
- return true;
- for (std::list<PluralItem>::const_iterator i = dumpPlural.begin(); i != dumpPlural.end(); ++i)
- if (i->value.second.empty())
- return true;
- return false;
- }
-
-private:
- friend void generateLng(const TranslationList& in, const TransHeader& header, std::string& fileStream);
-
- struct Item {virtual ~Item() {} };
- struct RegularItem : public Item { RegularItem(const TranslationMap ::value_type& val) : value(val) {} TranslationMap ::value_type value; };
- struct PluralItem : public Item { PluralItem (const TranslationPluralMap::value_type& val) : value(val) {} TranslationPluralMap::value_type value; };
-
- std::vector<Item*> sequence; //dynamic list of translation elements
- std::list<RegularItem> dump; //manage memory
- std::list<PluralItem> dumpPlural; //manage memory
-
- std::set<TranslationMap ::key_type> transUnique; //check uniqueness
- std::set<TranslationPluralMap::key_type> pluralUnique; //
-};
-
-
-struct Token
-{
- enum Type
- {
- //header information
- TK_HEADER_BEGIN,
- TK_HEADER_END,
- TK_LANG_NAME_BEGIN,
- TK_LANG_NAME_END,
- TK_TRANS_NAME_BEGIN,
- TK_TRANS_NAME_END,
- TK_LOCALE_NAME_BEGIN,
- TK_LOCALE_NAME_END,
- TK_FLAG_FILE_BEGIN,
- TK_FLAG_FILE_END,
- TK_PLURAL_COUNT_BEGIN,
- TK_PLURAL_COUNT_END,
- TK_PLURAL_DEF_BEGIN,
- TK_PLURAL_DEF_END,
-
- //item level
- TK_SRC_BEGIN,
- TK_SRC_END,
- TK_TRG_BEGIN,
- TK_TRG_END,
- TK_TEXT,
- TK_PLURAL_BEGIN,
- TK_PLURAL_END,
- TK_END
- };
-
- Token(Type t) : type(t) {}
- Type type;
-
- std::string text;
-};
-
-
-class KnownTokens
-{
-public:
- typedef std::map<Token::Type, std::string> TokenMap;
-
- static const TokenMap& asList()
- {
- static KnownTokens inst;
- return inst.tokens;
- }
-
- static std::string text(Token::Type t)
- {
- TokenMap::const_iterator iter = asList().find(t);
- return iter != asList().end() ? iter->second : std::string();
- }
-
-private:
- KnownTokens()
- {
- //header information
- tokens.insert(std::make_pair(Token::TK_HEADER_BEGIN, "<header>"));
- tokens.insert(std::make_pair(Token::TK_HEADER_END, "</header>"));
- tokens.insert(std::make_pair(Token::TK_LANG_NAME_BEGIN, "<language name>"));
- tokens.insert(std::make_pair(Token::TK_LANG_NAME_END, "</language name>"));
- tokens.insert(std::make_pair(Token::TK_TRANS_NAME_BEGIN, "<translator>"));
- tokens.insert(std::make_pair(Token::TK_TRANS_NAME_END, "</translator>"));
- tokens.insert(std::make_pair(Token::TK_LOCALE_NAME_BEGIN, "<locale>"));
- tokens.insert(std::make_pair(Token::TK_LOCALE_NAME_END, "</locale>"));
- tokens.insert(std::make_pair(Token::TK_FLAG_FILE_BEGIN, "<flag file>"));
- tokens.insert(std::make_pair(Token::TK_FLAG_FILE_END, "</flag file>"));
- tokens.insert(std::make_pair(Token::TK_PLURAL_COUNT_BEGIN, "<plural forms>"));
- tokens.insert(std::make_pair(Token::TK_PLURAL_COUNT_END, "</plural forms>"));
- tokens.insert(std::make_pair(Token::TK_PLURAL_DEF_BEGIN, "<plural definition>"));
- tokens.insert(std::make_pair(Token::TK_PLURAL_DEF_END, "</plural definition>"));
-
- //item level
- tokens.insert(std::make_pair(Token::TK_SRC_BEGIN, "<source>"));
- tokens.insert(std::make_pair(Token::TK_SRC_END, "</source>"));
- tokens.insert(std::make_pair(Token::TK_TRG_BEGIN, "<target>"));
- tokens.insert(std::make_pair(Token::TK_TRG_END, "</target>"));
- tokens.insert(std::make_pair(Token::TK_PLURAL_BEGIN, "<pluralform>"));
- tokens.insert(std::make_pair(Token::TK_PLURAL_END, "</pluralform>"));
- }
- TokenMap tokens;
-};
-
-struct IsWhiteSpace : public std::unary_function<char, bool>
-{
- bool operator()(char c) const
- {
- const unsigned char usc = c; //caveat 1: std::isspace() takes an int, but expects an unsigned char
- return usc < 128 && //caveat 2: some parts of UTF-8 chars are erroneously seen as whitespace, e.g. the a0 from "\xec\x8b\a0" (MSVC)
- std::isspace(usc) != 0; //[!]
- }
-};
-
-class Scanner
-{
-public:
- Scanner(const std::string& fileStream) : stream(fileStream), pos(stream.begin()) {}
-
- Token nextToken()
- {
- //skip whitespace
- pos = std::find_if(pos, stream.end(), std::not1(IsWhiteSpace()));
-
- if (pos == stream.end())
- return Token(Token::TK_END);
-
- for (KnownTokens::TokenMap::const_iterator i = KnownTokens::asList().begin(); i != KnownTokens::asList().end(); ++i)
- if (startsWith(i->second))
- {
- pos += i->second.size();
- return Token(i->first);
- }
-
- //rest must be "text"
- std::string::const_iterator textBegin = pos;
- while (pos != stream.end() && !startsWithKnownTag())
- pos = std::find(pos + 1, stream.end(), '<');
-
- std::string text(textBegin, pos);
-
- normalize(text); //remove whitespace from end ect.
-
- if (text.empty() && pos == stream.end())
- return Token(Token::TK_END);
-
- Token out(Token::TK_TEXT);
- out.text = text;
- return out;
- }
-
- std::pair<size_t, size_t> position() const //current (row/col) beginning with 1
- {
- //seek last line break
- std::string::const_iterator iter = pos;
- while (iter != stream.begin() && *iter != '\n')
- --iter;
-
- return std::make_pair(std::count(stream.begin(), pos, '\n') + 1, pos - iter);
- }
-
-private:
- bool startsWithKnownTag() const
- {
- for (KnownTokens::TokenMap::const_iterator i = KnownTokens::asList().begin(); i != KnownTokens::asList().end(); ++i)
- if (startsWith(i->second))
- return true;
- return false;
- }
-
- bool startsWith(const std::string& prefix) const
- {
- if (stream.end() - pos < static_cast<int>(prefix.size()))
- return false;
- return std::equal(prefix.begin(), prefix.end(), pos);
- }
-
- static void normalize(std::string& text)
- {
- //remmove whitespace from end
- while (!text.empty() && IsWhiteSpace()(*text.rbegin()))
- text.resize(text.size() - 1);
-
- //ensure c-style line breaks
-
- //Delimiter:
- //----------
- //Linux: 0xA \n
- //Mac: 0xD \r
- //Win: 0xD 0xA \r\n <- language files are in Windows format
- if (text.find('\r') != std::string::npos)
- {
- std::string tmp;
- for (std::string::const_iterator i = text.begin(); i != text.end(); ++i)
- if (*i == '\r')
- {
- std::string::const_iterator next = i + 1;
- if (next != text.end() && *next == '\n')
- ++i;
- tmp += '\n';
- }
- else
- tmp += *i;
- text = tmp;
- }
- }
-
- const std::string stream;
- std::string::const_iterator pos;
-};
-
-template <class C, class T>
-inline
-std::basic_string<C> numberToString(const T& number) //convert number to string the C++ way
-{
- std::basic_ostringstream<C> ss;
- ss << number;
- return ss.str();
-}
-
-template <class T, class C>
-inline
-T stringToNumber(const std::basic_string<C>& str) //convert string to number the C++ way
-{
- T number = 0;
- std::basic_istringstream<C>(str) >> number;
- return number;
-}
-
-
-class LngParser
-{
-public:
- LngParser(const std::string& fileStream) : scn(fileStream), tk(scn.nextToken()) {}
-
- void parse(TranslationMap& out, TranslationPluralMap& pluralOut, TransHeader& header)
- {
- //header
- parseHeader(header);
-
- //items
- while (token().type != Token::TK_END)
- parseRegular(out, pluralOut, header.pluralCount);
- }
-
- void parseHeader(TransHeader& header)
- {
- consumeToken(Token::TK_HEADER_BEGIN);
-
- consumeToken(Token::TK_LANG_NAME_BEGIN);
- header.languageName = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_LANG_NAME_END);
-
- consumeToken(Token::TK_TRANS_NAME_BEGIN);
- header.translatorName = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_TRANS_NAME_END);
-
- consumeToken(Token::TK_LOCALE_NAME_BEGIN);
- header.localeName = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_LOCALE_NAME_END);
-
- consumeToken(Token::TK_FLAG_FILE_BEGIN);
- header.flagFile = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_FLAG_FILE_END);
-
- consumeToken(Token::TK_PLURAL_COUNT_BEGIN);
- header.pluralCount = stringToNumber<int>(tk.text);
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_PLURAL_COUNT_END);
-
- consumeToken(Token::TK_PLURAL_DEF_BEGIN);
- header.pluralDefinition = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_PLURAL_DEF_END);
-
- consumeToken(Token::TK_HEADER_END);
- }
-
-private:
- void parseRegular(TranslationMap& out, TranslationPluralMap& pluralOut, int formCount)
- {
- consumeToken(Token::TK_SRC_BEGIN);
-
- if (token().type == Token::TK_PLURAL_BEGIN)
- return parsePlural(pluralOut, formCount);
-
- std::string original = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_SRC_END);
-
- consumeToken(Token::TK_TRG_BEGIN);
- std::string translation;
- if (token().type == Token::TK_TEXT)
- {
- translation = token().text;
- nextToken();
- }
- consumeToken(Token::TK_TRG_END);
-
- if (!translation.empty()) //only add if translation is existing
- out.insert(std::make_pair(original, translation));
- }
-
- void parsePlural(TranslationPluralMap& pluralOut, int formCount)
- {
- //Token::TK_SRC_BEGIN already consumed
-
- consumeToken(Token::TK_PLURAL_BEGIN);
- std::string engSingular = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_PLURAL_END);
-
- consumeToken(Token::TK_PLURAL_BEGIN);
- std::string engPlural = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_PLURAL_END);
-
- consumeToken(Token::TK_SRC_END);
-
- consumeToken(Token::TK_TRG_BEGIN);
-
- PluralForms pluralList;
- while (token().type == Token::TK_PLURAL_BEGIN)
- {
- consumeToken(Token::TK_PLURAL_BEGIN);
- std::string pluralForm = tk.text;
- consumeToken(Token::TK_TEXT);
- consumeToken(Token::TK_PLURAL_END);
- pluralList.push_back(pluralForm);
-
- }
-
- if (!pluralList.empty() && static_cast<int>(pluralList.size()) != formCount) //invalid number of plural forms
- throw ParsingError(scn.position().first, scn.position().second);
-
- consumeToken(Token::TK_TRG_END);
-
- if (!pluralList.empty()) //only add if translation is existing
- pluralOut.insert(std::make_pair(SingularPluralPair(engSingular, engPlural), pluralList));
- }
-
-
- void nextToken() { tk = scn.nextToken(); }
- const Token& token() const { return tk; }
-
- void consumeToken(Token::Type t)
- {
- if (token().type != t)
- throw ParsingError(scn.position().first, scn.position().second);
- nextToken();
- }
-
- Scanner scn;
- Token tk;
-};
-
-
-inline
-void parseLng(const std::string& fileStream, TransHeader& header, TranslationMap& out, TranslationPluralMap& pluralOut) //throw ParsingError
-{
- out.clear();
- pluralOut.clear();
-
- //skip UTF-8 Byte Ordering Mark
- const bool hasBOM = fileStream.size() >= 3 && fileStream.substr(0, 3) == "\xef\xbb\xbf";
-
- LngParser prs(hasBOM ? fileStream.substr(3) : fileStream);
- prs.parse(out, pluralOut, header);
-}
-
-
-inline
-void parseHeader(const std::string& fileStream, TransHeader& header) //throw ParsingError
-{
- //skip UTF-8 Byte Ordering Mark
- const bool hasBOM = fileStream.size() >= 3 && fileStream.substr(0, 3) == "\xef\xbb\xbf";
-
- LngParser prs(hasBOM ? fileStream.substr(3) : fileStream);
- prs.parseHeader(header);
-}
-
-
-inline
-void formatMultiLineText(std::string& text)
-{
- if (text.find('\n') != std::string::npos) //multiple lines
- {
- if (*text.begin() != '\n')
- text = '\n' + text;
- if (*text.rbegin() != '\n')
- text += '\n';
- }
-}
-
-
-const std::string LB = "\n";
-const std::string TAB = "\t";
-
-
-void generateLng(const TranslationList& in, const TransHeader& header, std::string& fileStream)
-{
- //header
- fileStream += KnownTokens::text(Token::TK_HEADER_BEGIN) + LB;
-
- fileStream += TAB + KnownTokens::text(Token::TK_LANG_NAME_BEGIN);
- fileStream += header.languageName;
- fileStream += KnownTokens::text(Token::TK_LANG_NAME_END) + LB;
-
- fileStream += TAB + KnownTokens::text(Token::TK_TRANS_NAME_BEGIN);
- fileStream += header.translatorName;
- fileStream += KnownTokens::text(Token::TK_TRANS_NAME_END) + LB;
-
- fileStream += TAB + KnownTokens::text(Token::TK_LOCALE_NAME_BEGIN);
- fileStream += header.localeName;
- fileStream += KnownTokens::text(Token::TK_LOCALE_NAME_END) + LB;
-
- fileStream += TAB + KnownTokens::text(Token::TK_FLAG_FILE_BEGIN);
- fileStream += header.flagFile;
- fileStream += KnownTokens::text(Token::TK_FLAG_FILE_END) + LB;
-
- fileStream += TAB + KnownTokens::text(Token::TK_PLURAL_COUNT_BEGIN);
- fileStream += numberToString<char>(header.pluralCount);
- fileStream += KnownTokens::text(Token::TK_PLURAL_COUNT_END) + LB;
-
- fileStream += TAB + KnownTokens::text(Token::TK_PLURAL_DEF_BEGIN);
- fileStream += header.pluralDefinition;
- fileStream += KnownTokens::text(Token::TK_PLURAL_DEF_END) + LB;
-
- fileStream += KnownTokens::text(Token::TK_HEADER_END) + LB;
-
- fileStream += LB;
-
-
- //items
- for (std::vector<TranslationList::Item*>::const_iterator i = in.sequence.begin(); i != in.sequence.end(); ++i)
- {
- const TranslationList::RegularItem* regular = dynamic_cast<const TranslationList::RegularItem*>(*i);
- const TranslationList::PluralItem* plural = dynamic_cast<const TranslationList::PluralItem*>(*i);
-
- if (regular)
- {
- std::string original = regular->value.first;
- std::string translation = regular->value.second;
-
- formatMultiLineText(original);
- formatMultiLineText(translation);
-
- fileStream += KnownTokens::text(Token::TK_SRC_BEGIN);
- fileStream += original;
- fileStream += KnownTokens::text(Token::TK_SRC_END) + LB;
-
- fileStream += KnownTokens::text(Token::TK_TRG_BEGIN);
- fileStream += translation;
- fileStream += KnownTokens::text(Token::TK_TRG_END) + LB + LB;
-
- }
- else if (plural)
- {
- std::string engSingular = plural->value.first.first;
- std::string engPlural = plural->value.first.second;
- const PluralForms& forms = plural->value.second;
-
- formatMultiLineText(engSingular);
- formatMultiLineText(engPlural);
-
- fileStream += KnownTokens::text(Token::TK_SRC_BEGIN) + LB;
- fileStream += KnownTokens::text(Token::TK_PLURAL_BEGIN);
- fileStream += engSingular;
- fileStream += KnownTokens::text(Token::TK_PLURAL_END) + LB;
- fileStream += KnownTokens::text(Token::TK_PLURAL_BEGIN);
- fileStream += engPlural;
- fileStream += KnownTokens::text(Token::TK_PLURAL_END) + LB;
- fileStream += KnownTokens::text(Token::TK_SRC_END) + LB;
-
- fileStream += KnownTokens::text(Token::TK_TRG_BEGIN);
- if (!forms.empty()) fileStream += LB;
-
- for (PluralForms::const_iterator j = forms.begin(); j != forms.end(); ++j)
- {
- std::string plForm = *j;
- formatMultiLineText(plForm);
-
- fileStream += KnownTokens::text(Token::TK_PLURAL_BEGIN);
- fileStream += plForm;
- fileStream += KnownTokens::text(Token::TK_PLURAL_END) + LB;
- }
- fileStream += KnownTokens::text(Token::TK_TRG_END) + LB + LB;
- }
- else
- {
- throw std::logic_error("that's what you get for brittle design ;)");
- }
- }
-}
-}
-
-#endif //PARSE_LNG_HEADER_INCLUDED
bgstack15