summaryrefslogtreecommitdiff
path: root/lib/parse_lng.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parse_lng.h')
-rw-r--r--lib/parse_lng.h191
1 files changed, 79 insertions, 112 deletions
diff --git a/lib/parse_lng.h b/lib/parse_lng.h
index 92564a1e..b5afe50c 100644
--- a/lib/parse_lng.h
+++ b/lib/parse_lng.h
@@ -51,19 +51,7 @@ void parseLng(const std::string& fileStream, TransHeader& header, TranslationMap
void parseHeader(const std::string& fileStream, TransHeader& header); //throw ParsingError
class TranslationList; //unordered list of unique translation items
-void generateLng(const TranslationList& in, const TransHeader& header, std::string& fileStream);
-
-
-
-
-
-
-
-
-
-
-
-
+std::string generateLng(const TranslationList& in, const TransHeader& header);
@@ -114,7 +102,7 @@ public:
}
private:
- friend void generateLng(const TranslationList& in, const TransHeader& header, std::string& fileStream);
+ friend std::string generateLng(const TranslationList& in, const TransHeader& header);
struct Item {virtual ~Item() {} };
struct RegularItem : public Item { RegularItem(const TranslationMap ::value_type& val) : value(val) {} TranslationMap ::value_type value; };
@@ -218,7 +206,11 @@ private:
class Scanner
{
public:
- Scanner(const std::string& fileStream) : stream(fileStream), pos(stream.begin()) {}
+ Scanner(const std::string& fileStream) : stream(fileStream), pos(stream.begin())
+ {
+ if (zen::startsWith(stream, zen::BYTE_ORDER_MARK_UTF8))
+ pos += zen::strLength(zen::BYTE_ORDER_MARK_UTF8);
+ }
Token nextToken()
{
@@ -228,19 +220,19 @@ public:
if (pos == stream.end())
return Token(Token::TK_END);
- for (KnownTokens::TokenMap::const_iterator i = KnownTokens::asList().begin(); i != KnownTokens::asList().end(); ++i)
- if (startsWith(i->second))
+ for (auto it = KnownTokens::asList().begin(); it != KnownTokens::asList().end(); ++it)
+ if (startsWith(it->second))
{
- pos += i->second.size();
- return Token(i->first);
+ pos += it->second.size();
+ return Token(it->first);
}
//rest must be "text"
- std::string::const_iterator textBegin = pos;
+ std::string::const_iterator itBegin = pos;
while (pos != stream.end() && !startsWithKnownTag())
pos = std::find(pos + 1, stream.end(), '<');
- std::string text(textBegin, pos);
+ std::string text(itBegin, pos);
normalize(text); //remove whitespace from end ect.
@@ -255,13 +247,8 @@ public:
size_t posRow() const //current row beginning with 0
{
//count line endings
- size_t crSum = 0; //carriage returns
- size_t nlSum = 0; //new lines
- for (auto it = stream.begin(); it != pos; ++it)
- if (*it == '\r')
- ++crSum;
- else if (*it == '\n')
- ++nlSum;
+ const size_t crSum = std::count(stream.begin(), pos, '\r'); //carriage returns
+ const size_t nlSum = std::count(stream.begin(), pos, '\n'); //new lines
assert(crSum == 0 || nlSum == 0 || crSum == nlSum);
return std::max(crSum, nlSum); //be compatible with Linux/Mac/Win
}
@@ -294,32 +281,15 @@ private:
static void normalize(std::string& text)
{
- //remmove whitespace from end
- while (!text.empty() && zen::isWhiteSpace(*text.rbegin()))
- text.resize(text.size() - 1);
-
- //ensure c-style line breaks
+ zen::trim(text); //remmove whitespace from end
//Delimiter:
//----------
//Linux: 0xA \n
//Mac: 0xD \r
//Win: 0xD 0xA \r\n <- language files are in Windows format
- if (text.find('\r') != std::string::npos)
- {
- std::string tmp;
- for (std::string::const_iterator i = text.begin(); i != text.end(); ++i)
- if (*i == '\r')
- {
- std::string::const_iterator next = i + 1;
- if (next != text.end() && *next == '\n')
- ++i;
- tmp += '\n';
- }
- else
- tmp += *i;
- text = tmp;
- }
+ zen::replace(text, "\r\n", '\n'); //
+ zen::replace(text, "\r", '\n'); //ensure c-style line breaks
}
const std::string stream;
@@ -334,7 +304,6 @@ public:
void parse(TranslationMap& out, TranslationPluralMap& pluralOut, TransHeader& header)
{
- //header
parseHeader(header);
//items
@@ -399,9 +368,7 @@ private:
nextToken();
}
consumeToken(Token::TK_TRG_END);
-
- if (!translation.empty()) //only add if translation is existing
- out.insert(std::make_pair(original, translation));
+ out.insert(std::make_pair(original, translation));
}
void parsePlural(TranslationPluralMap& pluralOut, int formCount)
@@ -437,20 +404,23 @@ private:
throw ParsingError(scn.posRow(), scn.posCol());
consumeToken(Token::TK_TRG_END);
-
- if (!pluralList.empty()) //only add if translation is existing
- pluralOut.insert(std::make_pair(SingularPluralPair(engSingular, engPlural), pluralList));
+ pluralOut.insert(std::make_pair(SingularPluralPair(engSingular, engPlural), pluralList));
}
void nextToken() { tk = scn.nextToken(); }
const Token& token() const { return tk; }
- void consumeToken(Token::Type t)
+ void consumeToken(Token::Type t) //throw ParsingError
+ {
+ expectToken(t); //throw ParsingError
+ nextToken();
+ }
+
+ void expectToken(Token::Type t) //throw ParsingError
{
if (token().type != t)
throw ParsingError(scn.posRow(), scn.posCol());
- nextToken();
}
Scanner scn;
@@ -464,24 +434,22 @@ void parseLng(const std::string& fileStream, TransHeader& header, TranslationMap
out.clear();
pluralOut.clear();
- //skip UTF-8 Byte Ordering Mark
- LngParser prs(zen::startsWith(fileStream, zen::BYTE_ORDER_MARK_UTF8) ? fileStream.substr(3) : fileStream);
- prs.parse(out, pluralOut, header);
+ LngParser(fileStream).parse(out, pluralOut, header);
}
inline
void parseHeader(const std::string& fileStream, TransHeader& header) //throw ParsingError
{
- //skip UTF-8 Byte Ordering Mark
- LngParser prs(zen::startsWith(fileStream, zen::BYTE_ORDER_MARK_UTF8) ? fileStream.substr(3) : fileStream);
- prs.parseHeader(header);
+ LngParser(fileStream).parseHeader(header);
}
inline
void formatMultiLineText(std::string& text)
{
+ assert(!zen::contains(text, "\r\n"));
+
if (text.find('\n') != std::string::npos) //multiple lines
{
if (*text.begin() != '\n')
@@ -492,49 +460,46 @@ void formatMultiLineText(std::string& text)
}
-const std::string LB = "\n";
-const std::string TAB = "\t";
-
-
-void generateLng(const TranslationList& in, const TransHeader& header, std::string& fileStream)
+std::string generateLng(const TranslationList& in, const TransHeader& header)
{
+ std::string out;
//header
- fileStream += KnownTokens::text(Token::TK_HEADER_BEGIN) + LB;
+ out += KnownTokens::text(Token::TK_HEADER_BEGIN) + '\n';
- fileStream += TAB + KnownTokens::text(Token::TK_LANG_NAME_BEGIN);
- fileStream += header.languageName;
- fileStream += KnownTokens::text(Token::TK_LANG_NAME_END) + LB;
+ out += '\t' + KnownTokens::text(Token::TK_LANG_NAME_BEGIN);
+ out += header.languageName;
+ out += KnownTokens::text(Token::TK_LANG_NAME_END) + '\n';
- fileStream += TAB + KnownTokens::text(Token::TK_TRANS_NAME_BEGIN);
- fileStream += header.translatorName;
- fileStream += KnownTokens::text(Token::TK_TRANS_NAME_END) + LB;
+ out += '\t' + KnownTokens::text(Token::TK_TRANS_NAME_BEGIN);
+ out += header.translatorName;
+ out += KnownTokens::text(Token::TK_TRANS_NAME_END) + '\n';
- fileStream += TAB + KnownTokens::text(Token::TK_LOCALE_NAME_BEGIN);
- fileStream += header.localeName;
- fileStream += KnownTokens::text(Token::TK_LOCALE_NAME_END) + LB;
+ out += '\t' + KnownTokens::text(Token::TK_LOCALE_NAME_BEGIN);
+ out += header.localeName;
+ out += KnownTokens::text(Token::TK_LOCALE_NAME_END) + '\n';
- fileStream += TAB + KnownTokens::text(Token::TK_FLAG_FILE_BEGIN);
- fileStream += header.flagFile;
- fileStream += KnownTokens::text(Token::TK_FLAG_FILE_END) + LB;
+ out += '\t' + KnownTokens::text(Token::TK_FLAG_FILE_BEGIN);
+ out += header.flagFile;
+ out += KnownTokens::text(Token::TK_FLAG_FILE_END) + '\n';
- fileStream += TAB + KnownTokens::text(Token::TK_PLURAL_COUNT_BEGIN);
- fileStream += zen::numberTo<std::string>(header.pluralCount);
- fileStream += KnownTokens::text(Token::TK_PLURAL_COUNT_END) + LB;
+ out += '\t' + KnownTokens::text(Token::TK_PLURAL_COUNT_BEGIN);
+ out += zen::numberTo<std::string>(header.pluralCount);
+ out += KnownTokens::text(Token::TK_PLURAL_COUNT_END) + '\n';
- fileStream += TAB + KnownTokens::text(Token::TK_PLURAL_DEF_BEGIN);
- fileStream += header.pluralDefinition;
- fileStream += KnownTokens::text(Token::TK_PLURAL_DEF_END) + LB;
+ out += '\t' + KnownTokens::text(Token::TK_PLURAL_DEF_BEGIN);
+ out += header.pluralDefinition;
+ out += KnownTokens::text(Token::TK_PLURAL_DEF_END) + '\n';
- fileStream += KnownTokens::text(Token::TK_HEADER_END) + LB;
+ out += KnownTokens::text(Token::TK_HEADER_END) + '\n';
- fileStream += LB;
+ out += '\n';
//items
- for (std::vector<TranslationList::Item*>::const_iterator i = in.sequence.begin(); i != in.sequence.end(); ++i)
+ for (auto it = in.sequence.begin(); it != in.sequence.end(); ++it)
{
- const TranslationList::RegularItem* regular = dynamic_cast<const TranslationList::RegularItem*>(*i);
- const TranslationList::PluralItem* plural = dynamic_cast<const TranslationList::PluralItem*>(*i);
+ const TranslationList::RegularItem* regular = dynamic_cast<const TranslationList::RegularItem*>(*it);
+ const TranslationList::PluralItem* plural = dynamic_cast<const TranslationList::PluralItem* >(*it);
if (regular)
{
@@ -544,13 +509,13 @@ void generateLng(const TranslationList& in, const TransHeader& header, std::stri
formatMultiLineText(original);
formatMultiLineText(translation);
- fileStream += KnownTokens::text(Token::TK_SRC_BEGIN);
- fileStream += original;
- fileStream += KnownTokens::text(Token::TK_SRC_END) + LB;
+ out += KnownTokens::text(Token::TK_SRC_BEGIN);
+ out += original;
+ out += KnownTokens::text(Token::TK_SRC_END) + '\n';
- fileStream += KnownTokens::text(Token::TK_TRG_BEGIN);
- fileStream += translation;
- fileStream += KnownTokens::text(Token::TK_TRG_END) + LB + LB;
+ out += KnownTokens::text(Token::TK_TRG_BEGIN);
+ out += translation;
+ out += KnownTokens::text(Token::TK_TRG_END) + '\n' + '\n';
}
else if (plural)
@@ -562,34 +527,36 @@ void generateLng(const TranslationList& in, const TransHeader& header, std::stri
formatMultiLineText(engSingular);
formatMultiLineText(engPlural);
- fileStream += KnownTokens::text(Token::TK_SRC_BEGIN) + LB;
- fileStream += KnownTokens::text(Token::TK_PLURAL_BEGIN);
- fileStream += engSingular;
- fileStream += KnownTokens::text(Token::TK_PLURAL_END) + LB;
- fileStream += KnownTokens::text(Token::TK_PLURAL_BEGIN);
- fileStream += engPlural;
- fileStream += KnownTokens::text(Token::TK_PLURAL_END) + LB;
- fileStream += KnownTokens::text(Token::TK_SRC_END) + LB;
+ out += KnownTokens::text(Token::TK_SRC_BEGIN) + '\n';
+ out += KnownTokens::text(Token::TK_PLURAL_BEGIN);
+ out += engSingular;
+ out += KnownTokens::text(Token::TK_PLURAL_END) + '\n';
+ out += KnownTokens::text(Token::TK_PLURAL_BEGIN);
+ out += engPlural;
+ out += KnownTokens::text(Token::TK_PLURAL_END) + '\n';
+ out += KnownTokens::text(Token::TK_SRC_END) + '\n';
- fileStream += KnownTokens::text(Token::TK_TRG_BEGIN);
- if (!forms.empty()) fileStream += LB;
+ out += KnownTokens::text(Token::TK_TRG_BEGIN);
+ if (!forms.empty()) out += '\n';
for (PluralForms::const_iterator j = forms.begin(); j != forms.end(); ++j)
{
std::string plForm = *j;
formatMultiLineText(plForm);
- fileStream += KnownTokens::text(Token::TK_PLURAL_BEGIN);
- fileStream += plForm;
- fileStream += KnownTokens::text(Token::TK_PLURAL_END) + LB;
+ out += KnownTokens::text(Token::TK_PLURAL_BEGIN);
+ out += plForm;
+ out += KnownTokens::text(Token::TK_PLURAL_END) + '\n';
}
- fileStream += KnownTokens::text(Token::TK_TRG_END) + LB + LB;
+ out += KnownTokens::text(Token::TK_TRG_END) + '\n' + '\n';
}
else
{
throw std::logic_error("that's what you get for brittle design ;)");
}
}
+ assert(!zen::contains(out, "\r\n") && !zen::contains(out, "\r"));
+ return zen::replaceCpy(out, '\n', "\r\n"); //back to win line endings
}
}
bgstack15