From b654dbfa5f3e4a4d02f72023f7c5895635aa6396 Mon Sep 17 00:00:00 2001 From: Daniel Wilhelm Date: Fri, 18 Apr 2014 17:12:17 +0200 Subject: 3.18 --- shared/tinyxml/tinyxmlparser.cpp | 1649 -------------------------------------- 1 file changed, 1649 deletions(-) delete mode 100644 shared/tinyxml/tinyxmlparser.cpp (limited to 'shared/tinyxml/tinyxmlparser.cpp') diff --git a/shared/tinyxml/tinyxmlparser.cpp b/shared/tinyxml/tinyxmlparser.cpp deleted file mode 100644 index 6b21a2b5..00000000 --- a/shared/tinyxml/tinyxmlparser.cpp +++ /dev/null @@ -1,1649 +0,0 @@ -/* -www.sourceforge.net/projects/tinyxml -Original code by Lee Thomason (www.grinninglizard.com) - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any -damages arising from the use of this software. - -Permission is granted to anyone to use this software for any -purpose, including commercial applications, and to alter it and -redistribute it freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must -not claim that you wrote the original software. If you use this -software in a product, an acknowledgment in the product documentation -would be appreciated but is not required. - -2. Altered source versions must be plainly marked as such, and -must not be misrepresented as being the original software. - -3. This notice may not be removed or altered from any source -distribution. -*/ - -#include -#include - -#include "tinyxml.h" - -//#define DEBUG_PARSER -#if defined( DEBUG_PARSER ) -# if defined( DEBUG ) && defined( _MSC_VER ) -# include -# define TIXML_LOG OutputDebugString -# else -# define TIXML_LOG printf -# endif -#endif - -// Note tha "PutString" hardcodes the same list. This -// is less flexible than it appears. Changing the entries -// or order will break putstring. -TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] = -{ - { "&", 5, '&' }, - { "<", 4, '<' }, - { ">", 4, '>' }, - { """, 6, '\"' }, - { "'", 6, '\'' } -}; - -// Bunch of unicode info at: -// http://www.unicode.org/faq/utf_bom.html -// Including the basic of this table, which determines the #bytes in the -// sequence from the lead byte. 1 placed for invalid sequences -- -// although the result will be junk, pass it through as much as possible. -// Beware of the non-characters in UTF-8: -// ef bb bf (Microsoft "lead bytes") -// ef bf be -// ef bf bf - -const unsigned char TIXML_UTF_LEAD_0 = 0xefU; -const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; -const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; - -const int TiXmlBase::utf8ByteTable[256] = -{ - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte - 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid -}; - - -void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) -{ - const unsigned long BYTE_MASK = 0xBF; - const unsigned long BYTE_MARK = 0x80; - const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - - if (input < 0x80) - *length = 1; - else if ( input < 0x800 ) - *length = 2; - else if ( input < 0x10000 ) - *length = 3; - else if ( input < 0x200000 ) - *length = 4; - else - { *length = 0; return; } // This code won't covert this correctly anyway. - - output += *length; - - // Scary scary fall throughs. - switch (*length) - { - case 4: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 3: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 2: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 1: - --output; - *output = (char)(input | FIRST_BYTE_MARK[*length]); - } -} - - -/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) -{ - // This will only work for low-ascii, everything else is assumed to be a valid - // letter. I'm not sure this is the best approach, but it is quite tricky trying - // to figure out alhabetical vs. not across encoding. So take a very - // conservative approach. - - // if ( encoding == TIXML_ENCODING_UTF8 ) - // { - if ( anyByte < 127 ) - return isalpha( anyByte ); - else - return 1; // What else to do? The unicode set is huge...get the english ones right. - // } - // else - // { - // return isalpha( anyByte ); - // } -} - - -/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) -{ - // This will only work for low-ascii, everything else is assumed to be a valid - // letter. I'm not sure this is the best approach, but it is quite tricky trying - // to figure out alhabetical vs. not across encoding. So take a very - // conservative approach. - - // if ( encoding == TIXML_ENCODING_UTF8 ) - // { - if ( anyByte < 127 ) - return isalnum( anyByte ); - else - return 1; // What else to do? The unicode set is huge...get the english ones right. - // } - // else - // { - // return isalnum( anyByte ); - // } -} - - -class TiXmlParsingData -{ - friend class TiXmlDocument; -public: - void Stamp( const char* now, TiXmlEncoding encoding ); - - const TiXmlCursor& Cursor() const { return cursor; } - -private: - // Only used by the document! - TiXmlParsingData( const char* start, int _tabsize, int row, int col ) - { - assert( start ); - stamp = start; - tabsize = _tabsize; - cursor.row = row; - cursor.col = col; - } - - TiXmlCursor cursor; - const char* stamp; - int tabsize; -}; - - -void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) -{ - assert( now ); - - // Do nothing if the tabsize is 0. - if ( tabsize < 1 ) - { - return; - } - - // Get the current row, column. - int row = cursor.row; - int col = cursor.col; - const char* p = stamp; - assert( p ); - - while ( p < now ) - { - // Treat p as unsigned, so we have a happy compiler. - const unsigned char* pU = (const unsigned char*)p; - - // Code contributed by Fletcher Dunn: (modified by lee) - switch (*pU) - { - case 0: - // We *should* never get here, but in case we do, don't - // advance past the terminating null character, ever - return; - - case '\r': - // bump down to the next line - ++row; - col = 0; - // Eat the character - ++p; - - // Check for \r\n sequence, and treat this as a single character - if (*p == '\n') - { - ++p; - } - break; - - case '\n': - // bump down to the next line - ++row; - col = 0; - - // Eat the character - ++p; - - // Check for \n\r sequence, and treat this as a single - // character. (Yes, this bizarre thing does occur still - // on some arcane platforms...) - if (*p == '\r') - { - ++p; - } - break; - - case '\t': - // Eat the character - ++p; - - // Skip to next tab stop - col = (col / tabsize + 1) * tabsize; - break; - - case TIXML_UTF_LEAD_0: - if ( encoding == TIXML_ENCODING_UTF8 ) - { - if ( *(p+1) && *(p+2) ) - { - // In these cases, don't advance the column. These are - // 0-width spaces. - if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 ) - p += 3; - else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU ) - p += 3; - else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU ) - p += 3; - else - { p +=3; ++col; } // A normal character. - } - } - else - { - ++p; - ++col; - } - break; - - default: - if ( encoding == TIXML_ENCODING_UTF8 ) - { - // Eat the 1 to 4 byte utf8 character. - int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)]; - if ( step == 0 ) - step = 1; // Error case from bad encoding, but handle gracefully. - p += step; - - // Just advance one column, of course. - ++col; - } - else - { - ++p; - ++col; - } - break; - } - } - cursor.row = row; - cursor.col = col; - assert( cursor.row >= -1 ); - assert( cursor.col >= -1 ); - stamp = p; - assert( stamp ); -} - - -const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) -{ - if ( !p || !*p ) - { - return 0; - } - if ( encoding == TIXML_ENCODING_UTF8 ) - { - while ( *p ) - { - const unsigned char* pU = (const unsigned char*)p; - - // Skip the stupid Microsoft UTF-8 Byte order marks - if ( *(pU+0)==TIXML_UTF_LEAD_0 - && *(pU+1)==TIXML_UTF_LEAD_1 - && *(pU+2)==TIXML_UTF_LEAD_2 ) - { - p += 3; - continue; - } - else if(*(pU+0)==TIXML_UTF_LEAD_0 - && *(pU+1)==0xbfU - && *(pU+2)==0xbeU ) - { - p += 3; - continue; - } - else if(*(pU+0)==TIXML_UTF_LEAD_0 - && *(pU+1)==0xbfU - && *(pU+2)==0xbfU ) - { - p += 3; - continue; - } - - if ( IsWhiteSpace( *p ) ) // Still using old rules for white space. - ++p; - else - break; - } - } - else - { - while ( *p && IsWhiteSpace( *p ) ) - ++p; - } - - return p; -} - -#ifdef TIXML_USE_STL -/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream* in, TIXML_STRING* tag ) -{ - for( ;; ) - { - if ( !in->good() ) return false; - - int c = in->peek(); - // At this scope, we can't get to a document. So fail silently. - if ( !IsWhiteSpace( c ) || c <= 0 ) - return true; - - *tag += (char) in->get(); - } -} - -/*static*/ bool TiXmlBase::StreamTo( std::istream* in, int character, TIXML_STRING* tag ) -{ - //assert( character > 0 && character < 128 ); // else it won't work in utf-8 - while ( in->good() ) - { - int c = in->peek(); - if ( c == character ) - return true; - if ( c <= 0 ) // Silent failure: can't get document at this scope - return false; - - in->get(); - *tag += (char) c; - } - return false; -} -#endif - -// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The -// "assign" optimization removes over 10% of the execution time. -// -const char* TiXmlBase::ReadName( const char* p, TIXML_STRING* name, TiXmlEncoding encoding ) -{ - // Oddly, not supported on some comilers, - //name->clear(); - // So use this: - *name = ""; - assert( p ); - - // Names start with letters or underscores. - // Of course, in unicode, tinyxml has no idea what a letter *is*. The - // algorithm is generous. - // - // After that, they can be letters, underscores, numbers, - // hyphens, or colons. (Colons are valid ony for namespaces, - // but tinyxml can't tell namespaces from names.) - if ( p && *p - && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) - { - const char* start = p; - while( p && *p - && ( IsAlphaNum( (unsigned char ) *p, encoding ) - || *p == '_' - || *p == '-' - || *p == '.' - || *p == ':' ) ) - { - //(*name) += *p; // expensive - ++p; - } - if ( p-start > 0 ) - { - name->assign( start, p-start ); - } - return p; - } - return 0; -} - -const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) -{ - // Presume an entity, and pull it out. - TIXML_STRING ent; - int i; - *length = 0; - - if ( *(p+1) && *(p+1) == '#' && *(p+2) ) - { - unsigned long ucs = 0; - ptrdiff_t delta = 0; - unsigned mult = 1; - - if ( *(p+2) == 'x' ) - { - // Hexadecimal. - if ( !*(p+3) ) return 0; - - const char* q = p+3; - q = strchr( q, ';' ); - - if ( !q || !*q ) return 0; - - delta = q-p; - --q; - - while ( *q != 'x' ) - { - if ( *q >= '0' && *q <= '9' ) - ucs += mult * (*q - '0'); - else if ( *q >= 'a' && *q <= 'f' ) - ucs += mult * (*q - 'a' + 10); - else if ( *q >= 'A' && *q <= 'F' ) - ucs += mult * (*q - 'A' + 10 ); - else - return 0; - mult *= 16; - --q; - } - } - else - { - // Decimal. - if ( !*(p+2) ) return 0; - - const char* q = p+2; - q = strchr( q, ';' ); - - if ( !q || !*q ) return 0; - - delta = q-p; - --q; - - while ( *q != '#' ) - { - if ( *q >= '0' && *q <= '9' ) - ucs += mult * (*q - '0'); - else - return 0; - mult *= 10; - --q; - } - } - if ( encoding == TIXML_ENCODING_UTF8 ) - { - // convert the UCS to UTF-8 - ConvertUTF32ToUTF8( ucs, value, length ); - } - else - { - *value = (char)ucs; - *length = 1; - } - return p + delta + 1; - } - - // Now try to match it. - for( i=0; iappend( cArr, len ); - } - } - else - { - bool whitespace = false; - - // Remove leading white space: - p = SkipWhiteSpace( p, encoding ); - while ( p && *p - && !StringEqual( p, endTag, caseInsensitive, encoding ) ) - { - if ( *p == '\r' || *p == '\n' ) - { - whitespace = true; - ++p; - } - else if ( IsWhiteSpace( *p ) ) - { - whitespace = true; - ++p; - } - else - { - // If we've found whitespace, add it before the - // new character. Any whitespace just becomes a space. - if ( whitespace ) - { - (*text) += ' '; - whitespace = false; - } - int len; - char cArr[4] = { 0, 0, 0, 0 }; - p = GetChar( p, cArr, &len, encoding ); - if ( len == 1 ) - (*text) += cArr[0]; // more efficient - else - text->append( cArr, len ); - } - } - } - if ( p && *p ) - p += strlen( endTag ); - return ( p && *p ) ? p : 0; -} - -#ifdef TIXML_USE_STL - -void TiXmlDocument::StreamIn( std::istream* in, TIXML_STRING* tag ) -{ - // The basic issue with a document is that we don't know what we're - // streaming. Read something presumed to be a tag (and hope), then - // identify it, and call the appropriate stream method on the tag. - // - // This "pre-streaming" will never read the closing ">" so the - // sub-tag can orient itself. - - if ( !StreamTo( in, '<', tag ) ) - { - SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - - while ( in->good() ) - { - int tagIndex = (int) tag->length(); - while ( in->good() && in->peek() != '>' ) - { - int c = in->get(); - if ( c <= 0 ) - { - SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - break; - } - (*tag) += (char) c; - } - - if ( in->good() ) - { - // We now have something we presume to be a node of - // some sort. Identify it, and call the node to - // continue streaming. - TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); - - if ( node ) - { - node->StreamIn( in, tag ); - bool isElement = node->ToElement() != 0; - delete node; - node = 0; - - // If this is the root element, we're done. Parsing will be - // done by the >> operator. - if ( isElement ) - { - return; - } - } - else - { - SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - } - } - // We should have returned sooner. - SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); -} - -#endif - -const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) -{ - ClearError(); - - // Parse away, at the document level. Since a document - // contains nothing but other tags, most of what happens - // here is skipping white space. - if ( !p || !*p ) - { - SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); - return 0; - } - - // Note that, for a document, this needs to come - // before the while space skip, so that parsing - // starts from the pointer we are given. - location.Clear(); - if ( prevData ) - { - location.row = prevData->cursor.row; - location.col = prevData->cursor.col; - } - else - { - location.row = 0; - location.col = 0; - } - TiXmlParsingData data( p, TabSize(), location.row, location.col ); - location = data.Cursor(); - - if ( encoding == TIXML_ENCODING_UNKNOWN ) - { - // Check for the Microsoft UTF-8 lead bytes. - const unsigned char* pU = (const unsigned char*)p; - if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0 - && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1 - && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 ) - { - encoding = TIXML_ENCODING_UTF8; - useMicrosoftBOM = true; - } - } - - p = SkipWhiteSpace( p, encoding ); - if ( !p ) - { - SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); - return 0; - } - - while ( p && *p ) - { - TiXmlNode* node = Identify( p, encoding ); - if ( node ) - { - p = node->Parse( p, &data, encoding ); - LinkEndChild( node ); - } - else - { - break; - } - - // Did we get encoding info? - if ( encoding == TIXML_ENCODING_UNKNOWN - && node->ToDeclaration() ) - { - TiXmlDeclaration* dec = node->ToDeclaration(); - const char* enc = dec->Encoding(); - assert( enc ); - - if ( *enc == 0 ) - encoding = TIXML_ENCODING_UTF8; - else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) - encoding = TIXML_ENCODING_UTF8; - else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) - encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice - else - encoding = TIXML_ENCODING_LEGACY; - } - - p = SkipWhiteSpace( p, encoding ); - } - - // Was this empty? - if ( !firstChild ) - { - SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding ); - return 0; - } - - // All is well. - return p; -} - -void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) -{ - // The first error in a chain is more accurate - don't set again! - if ( error ) - return; - - assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); - error = true; - errorId = err; - errorDesc = errorString[ errorId ]; - - errorLocation.Clear(); - if ( pError && data ) - { - data->Stamp( pError, encoding ); - errorLocation = data->Cursor(); - } -} - - -TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) -{ - TiXmlNode* returnNode = 0; - - p = SkipWhiteSpace( p, encoding ); - if( !p || !*p || *p != '<' ) - { - return 0; - } - - p = SkipWhiteSpace( p, encoding ); - - if ( !p || !*p ) - { - return 0; - } - - // What is this thing? - // - Elements start with a letter or underscore, but xml is reserved. - // - Comments: "; - - if ( !StringEqual( p, startTag, false, encoding ) ) - { - if ( document ) - document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); - return 0; - } - p += strlen( startTag ); - - // [ 1475201 ] TinyXML parses entities in comments - // Oops - ReadText doesn't work, because we don't want to parse the entities. - // p = ReadText( p, &value, false, endTag, false, encoding ); - // - // from the XML spec: - /* - [Definition: Comments may appear anywhere in a document outside other markup; in addition, - they may appear within the document type declaration at places allowed by the grammar. - They are not part of the document's character data; an XML processor MAY, but need not, - make it possible for an application to retrieve the text of comments. For compatibility, - the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity - references MUST NOT be recognized within comments. - - An example of a comment: - - - */ - - value = ""; - // Keep all the white space. - while ( p && *p && !StringEqual( p, endTag, false, encoding ) ) - { - value.append( p, 1 ); - ++p; - } - if ( p && *p ) - p += strlen( endTag ); - - return p; -} - - -const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) -{ - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p ) return 0; - - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - // Read the name, the '=' and the value. - const char* pErr = p; - p = ReadName( p, &name, encoding ); - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); - return 0; - } - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p || *p != '=' ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); - return 0; - } - - ++p; // skip '=' - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); - return 0; - } - - const char* end; - const char SINGLE_QUOTE = '\''; - const char DOUBLE_QUOTE = '\"'; - - if ( *p == SINGLE_QUOTE ) - { - ++p; - end = "\'"; // single quote in string - p = ReadText( p, &value, false, end, false, encoding ); - } - else if ( *p == DOUBLE_QUOTE ) - { - ++p; - end = "\""; // double quote in string - p = ReadText( p, &value, false, end, false, encoding ); - } - else - { - // All attribute values should be in single or double quotes. - // But this is such a common error that the parser will try - // its best, even without them. - value = ""; - while ( p && *p // existence - && !IsWhiteSpace( *p ) // whitespace - && *p != '/' && *p != '>' ) // tag end - { - if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) - { - // [ 1451649 ] Attribute values with trailing quotes not handled correctly - // We did not have an opening quote but seem to have a - // closing one. Give up and throw an error. - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); - return 0; - } - value += *p; - ++p; - } - } - return p; -} - -#ifdef TIXML_USE_STL -void TiXmlText::StreamIn( std::istream* in, TIXML_STRING* tag ) -{ - while ( in->good() ) - { - int c = in->peek(); - if ( !cdata && (c == '<' ) ) - { - return; - } - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - - (*tag) += (char) c; - in->get(); // "commits" the peek made above - - if ( cdata && c == '>' && tag->size() >= 3 ) - { - size_t len = tag->size(); - if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) - { - // terminator of cdata. - return; - } - } - } -} -#endif - -const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) -{ - value = ""; - TiXmlDocument* document = GetDocument(); - - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - - const char* const startTag = ""; - - if ( cdata || StringEqual( p, startTag, false, encoding ) ) - { - cdata = true; - - if ( !StringEqual( p, startTag, false, encoding ) ) - { - if ( document ) - document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding ); - return 0; - } - p += strlen( startTag ); - - // Keep all the white space, ignore the encoding, etc. - while ( p && *p - && !StringEqual( p, endTag, false, encoding ) - ) - { - value += *p; - ++p; - } - - TIXML_STRING dummy; - p = ReadText( p, &dummy, false, endTag, false, encoding ); - return p; - } - else - { - bool ignoreWhite = true; - - const char* end = "<"; - p = ReadText( p, &value, ignoreWhite, end, false, encoding ); - if ( p && *p ) - return p-1; // don't truncate the '<' - return 0; - } -} - -#ifdef TIXML_USE_STL -void TiXmlDeclaration::StreamIn( std::istream* in, TIXML_STRING* tag ) -{ - while ( in->good() ) - { - int c = in->get(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - (*tag) += (char) c; - - if ( c == '>' ) - { - // All is well. - return; - } - } -} -#endif - -const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) -{ - p = SkipWhiteSpace( p, _encoding ); - // Find the beginning, find the end, and look for - // the stuff in-between. - TiXmlDocument* document = GetDocument(); - if ( !p || !*p || !StringEqual( p, "SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); - return 0; - } - if ( data ) - { - data->Stamp( p, _encoding ); - location = data->Cursor(); - } - p += 5; - - version = ""; - encoding = ""; - standalone = ""; - - while ( p && *p ) - { - if ( *p == '>' ) - { - ++p; - return p; - } - - p = SkipWhiteSpace( p, _encoding ); - if ( StringEqual( p, "version", true, _encoding ) ) - { - TiXmlAttribute attrib; - p = attrib.Parse( p, data, _encoding ); - version = attrib.Value(); - } - else if ( StringEqual( p, "encoding", true, _encoding ) ) - { - TiXmlAttribute attrib; - p = attrib.Parse( p, data, _encoding ); - encoding = attrib.Value(); - } - else if ( StringEqual( p, "standalone", true, _encoding ) ) - { - TiXmlAttribute attrib; - p = attrib.Parse( p, data, _encoding ); - standalone = attrib.Value(); - } - else - { - // Read over whatever it is. - while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) - ++p; - } - } - return 0; -} - -bool TiXmlText::Blank() const -{ - for ( unsigned i=0; i