diff options
| author | Paul Buetow <paul@buetow.org> | 2013-04-06 13:14:44 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2013-04-06 13:14:44 +0200 |
| commit | c8b2ef7b899766d04562f7e04a84251cea8fa701 (patch) | |
| tree | 52816b17c17e2db0cf89e68537ad1a52392f1510 /src/contrib/xml/tinyxmlparser.cpp | |
| parent | ca28c0e618890330d429c0dc12429255b20f0c90 (diff) | |
tagging ychat-0.8.0ychat-0.8.0
Diffstat (limited to 'src/contrib/xml/tinyxmlparser.cpp')
| -rw-r--r-- | src/contrib/xml/tinyxmlparser.cpp | 2684 |
1 files changed, 1372 insertions, 1312 deletions
diff --git a/src/contrib/xml/tinyxmlparser.cpp b/src/contrib/xml/tinyxmlparser.cpp index f06d1cd..9c696b6 100644 --- a/src/contrib/xml/tinyxmlparser.cpp +++ b/src/contrib/xml/tinyxmlparser.cpp @@ -1,23 +1,23 @@ /* www.sourceforge.net/projects/tinyxml Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) - + This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. - + Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: - + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. - + 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. - + 3. This notice may not be removed or altered from any source distribution. */ @@ -29,1465 +29,1525 @@ distribution. // Note tha "PutString" hardcodes the same list. This // is less flexible than it appears. Changing the entries -// or order will break putstring. -TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = -{ - { "&", 5, '&' }, - { "<", 4, '<' }, - { ">", 4, '>' }, - { """, 6, '\"' }, - { "'", 6, '\'' } -}; +// or order will break putstring. +TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = + { + { "&", 5, '&' }, + { "<", 4, '<' }, + { ">", 4, '>' }, + { """, 6, '\"' }, + { "'", 6, '\'' } + }; // Bunch of unicode info at: // http://www.unicode.org/faq/utf_bom.html // Including the basic of this table, which determines the #bytes in the // sequence from the lead byte. 1 placed for invalid sequences -- // although the result will be junk, pass it through as much as possible. -// Beware of the non-characters in UTF-8: +// Beware of the non-characters in UTF-8: // ef bb bf (Microsoft "lead bytes") // ef bf be -// ef bf bf - - - -const int TiXmlBase::utf8ByteTable[256] = -{ - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte - 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid -}; +// ef bf bf + + + +const int TiXmlBase::utf8ByteTable[256] = + { + // 0 1 2 3 4 5 6 7 8 9 a b c d e f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte + 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid + }; void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) { - const unsigned long BYTE_MASK = 0xBF; - const unsigned long BYTE_MARK = 0x80; - const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - - if (input < 0x80) - *length = 1; - else if ( input < 0x800 ) - *length = 2; - else if ( input < 0x10000 ) - *length = 3; - else if ( input < 0x200000 ) - *length = 4; - else - { *length = 0; return; } // This code won't covert this correctly anyway. - - output += *length; - - // Scary scary fall throughs. - switch (*length) - { - case 4: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 3: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 2: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 1: - --output; - *output = (char)(input | FIRST_BYTE_MARK[*length]); - } + const unsigned long BYTE_MASK = 0xBF; + const unsigned long BYTE_MARK = 0x80; + const unsigned long FIRST_BYTE_MARK[7] = + { + 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC + }; + + if (input < 0x80) + *length = 1; + else if ( input < 0x800 ) + *length = 2; + else if ( input < 0x10000 ) + *length = 3; + else if ( input < 0x200000 ) + *length = 4; + else + { + *length = 0; + return; + } // This code won't covert this correctly anyway. + + output += *length; + + // Scary scary fall throughs. + switch (*length) + { + case 4: + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + case 3: + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + case 2: + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + case 1: + --output; + *output = (char)(input | FIRST_BYTE_MARK[*length]); + } } /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding encoding ) { - // This will only work for low-ascii, everything else is assumed to be a valid - // letter. I'm not sure this is the best approach, but it is quite tricky trying - // to figure out alhabetical vs. not across encoding. So take a very - // conservative approach. - -// if ( encoding == TIXML_ENCODING_UTF8 ) -// { - if ( anyByte < 127 ) - return isalpha( anyByte ); - else - return 1; // What else to do? The unicode set is huge...get the english ones right. -// } -// else -// { -// return isalpha( anyByte ); -// } + // This will only work for low-ascii, everything else is assumed to be a valid + // letter. I'm not sure this is the best approach, but it is quite tricky trying + // to figure out alhabetical vs. not across encoding. So take a very + // conservative approach. + + // if ( encoding == TIXML_ENCODING_UTF8 ) + // { + if ( anyByte < 127 ) + return isalpha( anyByte ); + else + return 1; // What else to do? The unicode set is huge...get the english ones right. + // } + // else + // { + // return isalpha( anyByte ); + // } } /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding encoding ) { - // This will only work for low-ascii, everything else is assumed to be a valid - // letter. I'm not sure this is the best approach, but it is quite tricky trying - // to figure out alhabetical vs. not across encoding. So take a very - // conservative approach. - -// if ( encoding == TIXML_ENCODING_UTF8 ) -// { - if ( anyByte < 127 ) - return isalnum( anyByte ); - else - return 1; // What else to do? The unicode set is huge...get the english ones right. -// } -// else -// { -// return isalnum( anyByte ); -// } + // This will only work for low-ascii, everything else is assumed to be a valid + // letter. I'm not sure this is the best approach, but it is quite tricky trying + // to figure out alhabetical vs. not across encoding. So take a very + // conservative approach. + + // if ( encoding == TIXML_ENCODING_UTF8 ) + // { + if ( anyByte < 127 ) + return isalnum( anyByte ); + else + return 1; // What else to do? The unicode set is huge...get the english ones right. + // } + // else + // { + // return isalnum( anyByte ); + // } } class TiXmlParsingData { - friend class TiXmlDocument; - public: - void Stamp( const char* now, TiXmlEncoding encoding ); - - const TiXmlCursor& Cursor() { return cursor; } - - private: - // Only used by the document! - TiXmlParsingData( const char* start, int _tabsize, int row, int col ) - { - assert( start ); - stamp = start; - tabsize = _tabsize; - cursor.row = row; - cursor.col = col; - } - - TiXmlCursor cursor; - const char* stamp; - int tabsize; + friend class TiXmlDocument; +public: + void Stamp( const char* now, TiXmlEncoding encoding ); + + const TiXmlCursor& Cursor() + { + return cursor; + } + +private: + // Only used by the document! + TiXmlParsingData( const char* start, int _tabsize, int row, int col ) + { + assert( start ); + stamp = start; + tabsize = _tabsize; + cursor.row = row; + cursor.col = col; + } + + TiXmlCursor cursor; + const char* stamp; + int tabsize; }; void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) { - assert( now ); - - // Do nothing if the tabsize is 0. - if ( tabsize < 1 ) - { - return; - } - - // Get the current row, column. - int row = cursor.row; - int col = cursor.col; - const char* p = stamp; - assert( p ); - - while ( p < now ) - { - // Code contributed by Fletcher Dunn: (modified by lee) - switch (*p) { - case 0: - // We *should* never get here, but in case we do, don't - // advance past the terminating null character, ever - return; - - case '\r': - // bump down to the next line - ++row; - col = 0; - // Eat the character - ++p; - - // Check for \r\n sequence, and treat this as a single character - if (*p == '\n') { - ++p; - } - break; - - case '\n': - // bump down to the next line - ++row; - col = 0; - - // Eat the character - ++p; - - // Check for \n\r sequence, and treat this as a single - // character. (Yes, this bizarre thing does occur still - // on some arcane platforms...) - if (*p == '\r') { - ++p; - } - break; - - case '\t': - // Eat the character - ++p; - - // Skip to next tab stop - col = (col / tabsize + 1) * tabsize; - break; - - case (char)(0xef): - if ( encoding == TIXML_ENCODING_UTF8 ) - { - if ( *(p+1) && *(p+2) ) - { - // In these cases, don't advance the column. These are - // 0-width spaces. - if ( *(p+1)==(char)(0xbb) && *(p+2)==(char)(0xbf) ) - p += 3; - else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbe) ) - p += 3; - else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbf) ) - p += 3; - else - { p +=3; ++col; } // A normal character. - } - } - else - { - ++p; - ++col; - } - break; - - default: - if ( encoding == TIXML_ENCODING_UTF8 ) - { - // Eat the 1 to 4 byte utf8 character. - int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)]; - if ( step == 0 ) - step = 1; // Error case from bad encoding, but handle gracefully. - p += step; - - // Just advance one column, of course. - ++col; - } - else - { - ++p; - ++col; - } - break; - } - } - cursor.row = row; - cursor.col = col; - assert( cursor.row >= -1 ); - assert( cursor.col >= -1 ); - stamp = p; - assert( stamp ); + assert( now ); + + // Do nothing if the tabsize is 0. + if ( tabsize < 1 ) + { + return; + } + + // Get the current row, column. + int row = cursor.row; + int col = cursor.col; + const char* p = stamp; + assert( p ); + + while ( p < now ) + { + // Code contributed by Fletcher Dunn: (modified by lee) + switch (*p) + { + case 0: + // We *should* never get here, but in case we do, don't + // advance past the terminating null character, ever + return; + + case '\r': + // bump down to the next line + ++row; + col = 0; + // Eat the character + ++p; + + // Check for \r\n sequence, and treat this as a single character + if (*p == '\n') + { + ++p; + } + break; + + case '\n': + // bump down to the next line + ++row; + col = 0; + + // Eat the character + ++p; + + // Check for \n\r sequence, and treat this as a single + // character. (Yes, this bizarre thing does occur still + // on some arcane platforms...) + if (*p == '\r') + { + ++p; + } + break; + + case '\t': + // Eat the character + ++p; + + // Skip to next tab stop + col = (col / tabsize + 1) * tabsize; + break; + + case (char)(0xef): + if ( encoding == TIXML_ENCODING_UTF8 ) + { + if ( *(p+1) && *(p+2) ) + { + // In these cases, don't advance the column. These are + // 0-width spaces. + if ( *(p+1)==(char)(0xbb) && *(p+2)==(char)(0xbf) ) + p += 3; + else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbe) ) + p += 3; + else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbf) ) + p += 3; + else + { + p +=3; + ++col; + } // A normal character. + } + } + else + { + ++p; + ++col; + } + break; + + default: + if ( encoding == TIXML_ENCODING_UTF8 ) + { + // Eat the 1 to 4 byte utf8 character. + int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)]; + if ( step == 0 ) + step = 1; // Error case from bad encoding, but handle gracefully. + p += step; + + // Just advance one column, of course. + ++col; + } + else + { + ++p; + ++col; + } + break; + } + } + cursor.row = row; + cursor.col = col; + assert( cursor.row >= -1 ); + assert( cursor.col >= -1 ); + stamp = p; + assert( stamp ); } const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) { - if ( !p || !*p ) - { - return 0; - } - if ( encoding == TIXML_ENCODING_UTF8 ) - { - while ( *p ) - { - // Skip the stupid Microsoft UTF-8 Byte order marks - if ( *(p+0)==(char) 0xef - && *(p+1)==(char) 0xbb - && *(p+2)==(char) 0xbf ) - { - p += 3; - continue; - } - else if(*(p+0)==(char) 0xef - && *(p+1)==(char) 0xbf - && *(p+2)==(char) 0xbe ) - { - p += 3; - continue; - } - else if(*(p+0)==(char) 0xef - && *(p+1)==(char) 0xbf - && *(p+2)==(char) 0xbf ) - { - p += 3; - continue; - } - - if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space. - ++p; - else - break; - } - } - else - { - while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) - ++p; - } - - return p; + if ( !p || !*p ) + { + return 0; + } + if ( encoding == TIXML_ENCODING_UTF8 ) + { + while ( *p ) + { + // Skip the stupid Microsoft UTF-8 Byte order marks + if ( *(p+0)==(char) 0xef + && *(p+1)==(char) 0xbb + && *(p+2)==(char) 0xbf ) + { + p += 3; + continue; + } + else if(*(p+0)==(char) 0xef + && *(p+1)==(char) 0xbf + && *(p+2)==(char) 0xbe ) + { + p += 3; + continue; + } + else if(*(p+0)==(char) 0xef + && *(p+1)==(char) 0xbf + && *(p+2)==(char) 0xbf ) + { + p += 3; + continue; + } + + if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space. + ++p; + else + break; + } + } + else + { + while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) + ++p; + } + + return p; } #ifdef TIXML_USE_STL /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag ) { - for( ;; ) - { - if ( !in->good() ) return false; - - int c = in->peek(); - // At this scope, we can't get to a document. So fail silently. - if ( !IsWhiteSpace( c ) || c <= 0 ) - return true; - - *tag += (char) in->get(); - } + for( ;; ) + { + if ( !in->good() ) + return false; + + int c = in->peek(); + // At this scope, we can't get to a document. So fail silently. + if ( !IsWhiteSpace( c ) || c <= 0 ) + return true; + + *tag += (char) in->get + (); + } } /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag ) { - //assert( character > 0 && character < 128 ); // else it won't work in utf-8 - while ( in->good() ) - { - int c = in->peek(); - if ( c == character ) - return true; - if ( c <= 0 ) // Silent failure: can't get document at this scope - return false; - - in->get(); - *tag += (char) c; - } - return false; + //assert( character > 0 && character < 128 ); // else it won't work in utf-8 + while ( in->good() ) + { + int c = in->peek(); + if ( c == character ) + return true; + if ( c <= 0 ) // Silent failure: can't get document at this scope + return false; + + in->get + (); + *tag += (char) c; + } + return false; } #endif const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding ) { - *name = ""; - assert( p ); - - // Names start with letters or underscores. - // Of course, in unicode, tinyxml has no idea what a letter *is*. The - // algorithm is generous. - // - // After that, they can be letters, underscores, numbers, - // hyphens, or colons. (Colons are valid ony for namespaces, - // but tinyxml can't tell namespaces from names.) - if ( p && *p - && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) - { - while( p && *p - && ( IsAlphaNum( (unsigned char ) *p, encoding ) - || *p == '_' - || *p == '-' - || *p == '.' - || *p == ':' ) ) - { - (*name) += *p; - ++p; - } - return p; - } - return 0; + *name = ""; + assert( p ); + + // Names start with letters or underscores. + // Of course, in unicode, tinyxml has no idea what a letter *is*. The + // algorithm is generous. + // + // After that, they can be letters, underscores, numbers, + // hyphens, or colons. (Colons are valid ony for namespaces, + // but tinyxml can't tell namespaces from names.) + if ( p && *p + && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) + { + while( p && *p + && ( IsAlphaNum( (unsigned char ) *p, encoding ) + || *p == '_' + || *p == '-' + || *p == '.' + || *p == ':' ) ) + { + (*name) += *p; + ++p; + } + return p; + } + return 0; } const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) { - // Presume an entity, and pull it out. - TIXML_STRING ent; - int i; - *length = 0; - - if ( *(p+1) && *(p+1) == '#' && *(p+2) ) - { - unsigned long ucs = 0; - unsigned delta = 0; - unsigned mult = 1; - - if ( *(p+2) == 'x' ) - { - // Hexadecimal. - if ( !*(p+3) ) return 0; - - const char* q = p+3; - q = strchr( q, ';' ); - - if ( !q || !*q ) return 0; - - delta = q-p; - --q; - - while ( *q != 'x' ) - { - if ( *q >= '0' && *q <= '9' ) - ucs += mult * (*q - '0'); - else if ( *q >= 'a' && *q <= 'f' ) - ucs += mult * (*q - 'a' + 10); - else if ( *q >= 'A' && *q <= 'F' ) - ucs += mult * (*q - 'A' + 10 ); - else - return 0; - mult *= 16; - --q; - } - } - else - { - // Decimal. - if ( !*(p+2) ) return 0; - - const char* q = p+2; - q = strchr( q, ';' ); - - if ( !q || !*q ) return 0; - - delta = q-p; - --q; - - while ( *q != '#' ) - { - if ( *q >= '0' && *q <= '9' ) - ucs += mult * (*q - '0'); - else - return 0; - mult *= 10; - --q; - } - } - if ( encoding == TIXML_ENCODING_UTF8 ) - { - // convert the UCS to UTF-8 - ConvertUTF32ToUTF8( ucs, value, length ); - } - else - { - *value = (char)ucs; - *length = 1; - } - return p + delta + 1; - } - - // Now try to match it. - for( i=0; i<NUM_ENTITY; ++i ) - { - if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) - { - assert( strlen( entity[i].str ) == entity[i].strLength ); - *value = entity[i].chr; - *length = 1; - return ( p + entity[i].strLength ); - } - } - - // So it wasn't an entity, its unrecognized, or something like that. - *value = *p; // Don't put back the last one, since we return it! - return p+1; + // Presume an entity, and pull it out. + TIXML_STRING ent; + int i; + *length = 0; + + if ( *(p+1) && *(p+1) == '#' && *(p+2) ) + { + unsigned long ucs = 0; + unsigned delta = 0; + unsigned mult = 1; + + if ( *(p+2) == 'x' ) + { + // Hexadecimal. + if ( !*(p+3) ) + return 0; + + const char* q = p+3; + q = strchr( q, ';' ); + + if ( !q || !*q ) + return 0; + + delta = q-p; + --q; + + while ( *q != 'x' ) + { + if ( *q >= '0' && *q <= '9' ) + ucs += mult * (*q - '0'); + else if ( *q >= 'a' && *q <= 'f' ) + ucs += mult * (*q - 'a' + 10); + else if ( *q >= 'A' && *q <= 'F' ) + ucs += mult * (*q - 'A' + 10 ); + else + return 0; + mult *= 16; + --q; + } + } + else + { + // Decimal. + if ( !*(p+2) ) + return 0; + + const char* q = p+2; + q = strchr( q, ';' ); + + if ( !q || !*q ) + return 0; + + delta = q-p; + --q; + + while ( *q != '#' ) + { + if ( *q >= '0' && *q <= '9' ) + ucs += mult * (*q - '0'); + else + return 0; + mult *= 10; + --q; + } + } + if ( encoding == TIXML_ENCODING_UTF8 ) + { + // convert the UCS to UTF-8 + ConvertUTF32ToUTF8( ucs, value, length ); + } + else + { + *value = (char)ucs; + *length = 1; + } + return p + delta + 1; + } + + // Now try to match it. + for( i=0; i<NUM_ENTITY; ++i ) + { + if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) + { + assert( strlen( entity[i].str ) == entity[i].strLength ); + *value = entity[i].chr; + *length = 1; + return ( p + entity[i].strLength ); + } + } + + // So it wasn't an entity, its unrecognized, or something like that. + *value = *p; // Don't put back the last one, since we return it! + return p+1; } bool TiXmlBase::StringEqual( const char* p, - const char* tag, - bool ignoreCase, - TiXmlEncoding encoding ) + const char* tag, + bool ignoreCase, + TiXmlEncoding encoding ) { - assert( p ); - assert( tag ); - if ( !p || !*p ) - { - assert( 0 ); - return false; - } - - const char* q = p; - - if ( ignoreCase ) - { - while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) - { - ++q; - ++tag; - } - - if ( *tag == 0 ) - return true; - } - else - { - while ( *q && *tag && *q == *tag ) - { - ++q; - ++tag; - } - - if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? - return true; - } - return false; + assert( p ); + assert( tag ); + if ( !p || !*p ) + { + assert( 0 ); + return false; + } + + const char* q = p; + + if ( ignoreCase ) + { + while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) + { + ++q; + ++tag; + } + + if ( *tag == 0 ) + return true; + } + else + { + while ( *q && *tag && *q == *tag ) + { + ++q; + ++tag; + } + + if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? + return true; + } + return false; } -const char* TiXmlBase::ReadText( const char* p, - TIXML_STRING * text, - bool trimWhiteSpace, - const char* endTag, - bool caseInsensitive, - TiXmlEncoding encoding ) +const char* TiXmlBase::ReadText( const char* p, + TIXML_STRING * text, + bool trimWhiteSpace, + const char* endTag, + bool caseInsensitive, + TiXmlEncoding encoding ) { - *text = ""; - if ( !trimWhiteSpace // certain tags always keep whitespace - || !condenseWhiteSpace ) // if true, whitespace is always kept - { - // Keep all the white space. - while ( p && *p - && !StringEqual( p, endTag, caseInsensitive, encoding ) - ) - { - int len; - char cArr[4] = { 0, 0, 0, 0 }; - p = GetChar( p, cArr, &len, encoding ); - text->append( cArr, len ); - } - } - else - { - bool whitespace = false; - - // Remove leading white space: - p = SkipWhiteSpace( p, encoding ); - while ( p && *p - && !StringEqual( p, endTag, caseInsensitive, encoding ) ) - { - if ( *p == '\r' || *p == '\n' ) - { - whitespace = true; - ++p; - } - else if ( IsWhiteSpace( *p ) ) - { - whitespace = true; - ++p; - } - else - { - // If we've found whitespace, add it before the - // new character. Any whitespace just becomes a space. - if ( whitespace ) - { - (*text) += ' '; - whitespace = false; - } - int len; - char cArr[4] = { 0, 0, 0, 0 }; - p = GetChar( p, cArr, &len, encoding ); - if ( len == 1 ) - (*text) += cArr[0]; // more efficient - else - text->append( cArr, len ); - } - } - } - return p + strlen( endTag ); + *text = ""; + if ( !trimWhiteSpace // certain tags always keep whitespace + || !condenseWhiteSpace ) // if true, whitespace is always kept + { + // Keep all the white space. + while ( p && *p + && !StringEqual( p, endTag, caseInsensitive, encoding ) + ) + { + int len; + char cArr[4] = { 0, 0, 0, 0 }; + p = GetChar( p, cArr, &len, encoding ); + text->append( cArr, len ); + } + } + else + { + bool whitespace = false; + + // Remove leading white space: + p = SkipWhiteSpace( p, encoding ); + while ( p && *p + && !StringEqual( p, endTag, caseInsensitive, encoding ) ) + { + if ( *p == '\r' || *p == '\n' ) + { + whitespace = true; + ++p; + } + else if ( IsWhiteSpace( *p ) ) + { + whitespace = true; + ++p; + } + else + { + // If we've found whitespace, add it before the + // new character. Any whitespace just becomes a space. + if ( whitespace ) + { + (*text) += ' '; + whitespace = false; + } + int len; + char cArr[4] = { 0, 0, 0, 0 }; + p = GetChar( p, cArr, &len, encoding ); + if ( len == 1 ) + (*text) += cArr[0]; // more efficient + else + text->append( cArr, len ); + } + } + } + return p + strlen( endTag ); } #ifdef TIXML_USE_STL void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) { - // The basic issue with a document is that we don't know what we're - // streaming. Read something presumed to be a tag (and hope), then - // identify it, and call the appropriate stream method on the tag. - // - // This "pre-streaming" will never read the closing ">" so the - // sub-tag can orient itself. - - if ( !StreamTo( in, '<', tag ) ) - { - SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - - while ( in->good() ) - { - int tagIndex = (int) tag->length(); - while ( in->good() && in->peek() != '>' ) - { - int c = in->get(); - if ( c <= 0 ) - { - SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - break; - } - (*tag) += (char) c; - } - - if ( in->good() ) - { - // We now have something we presume to be a node of - // some sort. Identify it, and call the node to - // continue streaming. - TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); - - if ( node ) - { - node->StreamIn( in, tag ); - bool isElement = node->ToElement() != 0; - delete node; - node = 0; - - // If this is the root element, we're done. Parsing will be - // done by the >> operator. - if ( isElement ) - { - return; - } - } - else - { - SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - } - } - // We should have returned sooner. - SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); + // The basic issue with a document is that we don't know what we're + // streaming. Read something presumed to be a tag (and hope), then + // identify it, and call the appropriate stream method on the tag. + // + // This "pre-streaming" will never read the closing ">" so the + // sub-tag can orient itself. + + if ( !StreamTo( in, '<', tag ) ) + { + SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + + while ( in->good() ) + { + int tagIndex = (int) tag->length(); + while ( in->good() && in->peek() != '>' ) + { + int c = in->get + (); + if ( c <= 0 ) + { + SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + break; + } + (*tag) += (char) c; + } + + if ( in->good() ) + { + // We now have something we presume to be a node of + // some sort. Identify it, and call the node to + // continue streaming. + TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); + + if ( node ) + { + node->StreamIn( in, tag ); + bool isElement = node->ToElement() != 0; + delete node; + node = 0; + + // If this is the root element, we're done. Parsing will be + // done by the >> operator. + if ( isElement ) + { + return; + } + } + else + { + SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + } + } + // We should have returned sooner. + SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); } #endif const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) { - ClearError(); - - // Parse away, at the document level. Since a document - // contains nothing but other tags, most of what happens - // here is skipping white space. - if ( !p || !*p ) - { - SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); - return 0; - } - - // Note that, for a document, this needs to come - // before the while space skip, so that parsing - // starts from the pointer we are given. - location.Clear(); - if ( prevData ) - { - location.row = prevData->cursor.row; - location.col = prevData->cursor.col; - } - else - { - location.row = 0; - location.col = 0; - } - TiXmlParsingData data( p, TabSize(), location.row, location.col ); - location = data.Cursor(); - - if ( encoding == TIXML_ENCODING_UNKNOWN ) - { - // Check for the Microsoft UTF-8 lead bytes. - if ( *(p+0) && *(p+0) == (char)(0xef) - && *(p+1) && *(p+1) == (char)(0xbb) - && *(p+2) && *(p+2) == (char)(0xbf) ) - { - encoding = TIXML_ENCODING_UTF8; - } - } + ClearError(); + + // Parse away, at the document level. Since a document + // contains nothing but other tags, most of what happens + // here is skipping white space. + if ( !p || !*p ) + { + SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); + return 0; + } + + // Note that, for a document, this needs to come + // before the while space skip, so that parsing + // starts from the pointer we are given. + location.Clear(); + if ( prevData ) + { + location.row = prevData->cursor.row; + location.col = prevData->cursor.col; + } + else + { + location.row = 0; + location.col = 0; + } + TiXmlParsingData data( p, TabSize(), location.row, location.col ); + location = data.Cursor(); + + if ( encoding == TIXML_ENCODING_UNKNOWN ) + { + // Check for the Microsoft UTF-8 lead bytes. + if ( *(p+0) && *(p+0) == (char)(0xef) + && *(p+1) && *(p+1) == (char)(0xbb) + && *(p+2) && *(p+2) == (char)(0xbf) ) + { + encoding = TIXML_ENCODING_UTF8; + } + } + + p = SkipWhiteSpace( p, encoding ); + if ( !p ) + { + SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); + return 0; + } + + while ( p && *p ) + { + TiXmlNode* node = Identify( p, encoding ); + if ( node ) + { + p = node->Parse( p, &data, encoding ); + LinkEndChild( node ); + } + else + { + break; + } + + // Did we get encoding info? + if ( encoding == TIXML_ENCODING_UNKNOWN + && node->ToDeclaration() ) + { + TiXmlDeclaration* dec = node->ToDeclaration(); + const char* enc = dec->Encoding(); + assert( enc ); + + if ( *enc == 0 ) + encoding = TIXML_ENCODING_UTF8; + else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) + encoding = TIXML_ENCODING_UTF8; + else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) + encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice + else + encoding = TIXML_ENCODING_LEGACY; + } p = SkipWhiteSpace( p, encoding ); - if ( !p ) - { - SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); - return 0; - } - - while ( p && *p ) - { - TiXmlNode* node = Identify( p, encoding ); - if ( node ) - { - p = node->Parse( p, &data, encoding ); - LinkEndChild( node ); - } - else - { - break; - } - - // Did we get encoding info? - if ( encoding == TIXML_ENCODING_UNKNOWN - && node->ToDeclaration() ) - { - TiXmlDeclaration* dec = node->ToDeclaration(); - const char* enc = dec->Encoding(); - assert( enc ); - - if ( *enc == 0 ) - encoding = TIXML_ENCODING_UTF8; - else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) - encoding = TIXML_ENCODING_UTF8; - else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) - encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice - else - encoding = TIXML_ENCODING_LEGACY; - } - - p = SkipWhiteSpace( p, encoding ); - } - - // All is well. - return p; + } + + // All is well. + return p; } void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) -{ - // The first error in a chain is more accurate - don't set again! - if ( error ) - return; - - assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); - error = true; - errorId = err; - errorDesc = errorString[ errorId ]; - - errorLocation.Clear(); - if ( pError && data ) - { - //TiXmlParsingData data( pError, prevData ); - data->Stamp( pError, encoding ); - errorLocation = data->Cursor(); - } +{ + // The first error in a chain is more accurate - don't set again! + if ( error ) + return; + + assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); + error = true; + errorId = err; + errorDesc = errorString[ errorId ]; + + errorLocation.Clear(); + if ( pError && data ) + { + //TiXmlParsingData data( pError, prevData ); + data->Stamp( pError, encoding ); + errorLocation = data->Cursor(); + } } TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) { - TiXmlNode* returnNode = 0; - - p = SkipWhiteSpace( p, encoding ); - if( !p || !*p || *p != '<' ) - { - return 0; - } - - TiXmlDocument* doc = GetDocument(); - p = SkipWhiteSpace( p, encoding ); - - if ( !p || !*p ) - { - return 0; - } - - // What is this thing? - // - Elements start with a letter or underscore, but xml is reserved. - // - Comments: <!-- - // - Decleration: <?xml - // - Everthing else is unknown to tinyxml. - // - - const char* xmlHeader = { "<?xml" }; - const char* commentHeader = { "<!--" }; - const char* dtdHeader = { "<!" }; - - if ( StringEqual( p, xmlHeader, true, encoding ) ) - { - #ifdef DEBUG_PARSER - TIXML_LOG( "XML parsing Declaration\n" ); - #endif - returnNode = new TiXmlDeclaration(); - } - else if ( StringEqual( p, commentHeader, false, encoding ) ) - { - #ifdef DEBUG_PARSER - TIXML_LOG( "XML parsing Comment\n" ); - #endif - returnNode = new TiXmlComment(); - } - else if ( StringEqual( p, dtdHeader, false, encoding ) ) - { - #ifdef DEBUG_PARSER - TIXML_LOG( "XML parsing Unknown(1)\n" ); - #endif - returnNode = new TiXmlUnknown(); - } - else if ( IsAlpha( *(p+1), encoding ) - || *(p+1) == '_' ) - { - #ifdef DEBUG_PARSER - TIXML_LOG( "XML parsing Element\n" ); - #endif - returnNode = new TiXmlElement( "" ); - } - else - { - #ifdef DEBUG_PARSER - TIXML_LOG( "XML parsing Unknown(2)\n" ); - #endif - returnNode = new TiXmlUnknown(); - } - - if ( returnNode ) - { - // Set the parent, so it can report errors - returnNode->parent = this; - } - else - { - if ( doc ) - doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); - } - return returnNode; + TiXmlNode* returnNode = 0; + + p = SkipWhiteSpace( p, encoding ); + if( !p || !*p || *p != '<' ) + { + return 0; + } + + TiXmlDocument* doc = GetDocument(); + p = SkipWhiteSpace( p, encoding ); + + if ( !p || !*p ) + { + return 0; + } + + // What is this thing? + // - Elements start with a letter or underscore, but xml is reserved. + // - Comments: <!-- + // - Decleration: <?xml + // - Everthing else is unknown to tinyxml. + // + + const char* xmlHeader = + { "<?xml" + }; + const char* commentHeader = + { "<!--" + }; + const char* dtdHeader = + { "<!" + }; + + if ( StringEqual( p, xmlHeader, true, encoding ) ) + { +#ifdef DEBUG_PARSER + TIXML_LOG( "XML parsing Declaration\n" ); +#endif + + returnNode = new TiXmlDeclaration(); + } + else if ( StringEqual( p, commentHeader, false, encoding ) ) + { +#ifdef DEBUG_PARSER + TIXML_LOG( "XML parsing Comment\n" ); +#endif + + returnNode = new TiXmlComment(); + } + else if ( StringEqual( p, dtdHeader, false, encoding ) ) + { +#ifdef DEBUG_PARSER + TIXML_LOG( "XML parsing Unknown(1)\n" ); +#endif + + returnNode = new TiXmlUnknown(); + } + else if ( IsAlpha( *(p+1), encoding ) + || *(p+1) == '_' ) + { +#ifdef DEBUG_PARSER + TIXML_LOG( "XML parsing Element\n" ); +#endif + + returnNode = new TiXmlElement( "" ); + } + else + { +#ifdef DEBUG_PARSER + TIXML_LOG( "XML parsing Unknown(2)\n" ); +#endif + + returnNode = new TiXmlUnknown(); + } + + if ( returnNode ) + { + // Set the parent, so it can report errors + returnNode->parent = this; + } + else + { + if ( doc ) + doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); + } + return returnNode; } #ifdef TIXML_USE_STL void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag) { - // We're called with some amount of pre-parsing. That is, some of "this" - // element is in "tag". Go ahead and stream to the closing ">" - while( in->good() ) - { - int c = in->get(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - (*tag) += (char) c ; - - if ( c == '>' ) - break; - } - - if ( tag->length() < 3 ) return; - - // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. - // If not, identify and stream. - - if ( tag->at( tag->length() - 1 ) == '>' - && tag->at( tag->length() - 2 ) == '/' ) - { - // All good! - return; - } - else if ( tag->at( tag->length() - 1 ) == '>' ) - { - // There is more. Could be: - // text - // closing tag - // another node. - for ( ;; ) - { - StreamWhiteSpace( in, tag ); - - // Do we have text? - if ( in->good() && in->peek() != '<' ) - { - // Yep, text. - TiXmlText text( "" ); - text.StreamIn( in, tag ); - - // What follows text is a closing tag or another node. - // Go around again and figure it out. - continue; - } - - // We now have either a closing tag...or another node. - // We should be at a "<", regardless. - if ( !in->good() ) return; - assert( in->peek() == '<' ); - int tagIndex = tag->length(); - - bool closingTag = false; - bool firstCharFound = false; - - for( ;; ) - { - if ( !in->good() ) - return; - - int c = in->peek(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - - if ( c == '>' ) - break; - - *tag += (char) c; - in->get(); - - if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) - { - firstCharFound = true; - if ( c == '/' ) - closingTag = true; - } - } - // If it was a closing tag, then read in the closing '>' to clean up the input stream. - // If it was not, the streaming will be done by the tag. - if ( closingTag ) - { - if ( !in->good() ) - return; - - int c = in->get(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - assert( c == '>' ); - *tag += (char) c; - - // We are done, once we've found our closing tag. - return; - } - else - { - // If not a closing tag, id it, and stream. - const char* tagloc = tag->c_str() + tagIndex; - TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); - if ( !node ) - return; - node->StreamIn( in, tag ); - delete node; - node = 0; - - // No return: go around from the beginning: text, closing tag, or node. - } - } - } + // We're called with some amount of pre-parsing. That is, some of "this" + // element is in "tag". Go ahead and stream to the closing ">" + while( in->good() ) + { + int c = in->get + (); + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + (*tag) += (char) c ; + + if ( c == '>' ) + break; + } + + if ( tag->length() < 3 ) + return; + + // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. + // If not, identify and stream. + + if ( tag->at( tag->length() - 1 ) == '>' + && tag->at( tag->length() - 2 ) == '/' ) + { + // All good! + return; + } + else if ( tag->at( tag->length() - 1 ) == '>' ) + { + // There is more. Could be: + // text + // closing tag + // another node. + for ( ;; ) + { + StreamWhiteSpace( in, tag ); + + // Do we have text? + if ( in->good() && in->peek() != '<' ) + { + // Yep, text. + TiXmlText text( "" ); + text.StreamIn( in, tag ); + + // What follows text is a closing tag or another node. + // Go around again and figure it out. + continue; + } + + // We now have either a closing tag...or another node. + // We should be at a "<", regardless. + if ( !in->good() ) + return; + assert( in->peek() == '<' ); + int tagIndex = tag->length(); + + bool closingTag = false; + bool firstCharFound = false; + + for( ;; ) + { + if ( !in->good() ) + return; + + int c = in->peek(); + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + + if ( c == '>' ) + break; + + *tag += (char) c; + in->get + (); + + if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) + { + firstCharFound = true; + if ( c == '/' ) + closingTag = true; + } + } + // If it was a closing tag, then read in the closing '>' to clean up the input stream. + // If it was not, the streaming will be done by the tag. + if ( closingTag ) + { + if ( !in->good() ) + return; + + int c = in->get + (); + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + assert( c == '>' ); + *tag += (char) c; + + // We are done, once we've found our closing tag. + return; + } + else + { + // If not a closing tag, id it, and stream. + const char* tagloc = tag->c_str() + tagIndex; + TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); + if ( !node ) + return; + node->StreamIn( in, tag ); + delete node; + node = 0; + + // No return: go around from the beginning: text, closing tag, or node. + } + } + } } #endif const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { - p = SkipWhiteSpace( p, encoding ); - TiXmlDocument* document = GetDocument(); - - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); - return 0; - } - -// TiXmlParsingData data( p, prevData ); - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - - if ( *p != '<' ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); - return 0; - } - - p = SkipWhiteSpace( p+1, encoding ); - - // Read the name. - const char* pErr = p; - - p = ReadName( p, &value, encoding ); - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); - return 0; - } - - TIXML_STRING endTag ("</"); - endTag += value; - endTag += ">"; - - // Check for and read attributes. Also look for an empty - // tag or an end tag. - while ( p && *p ) - { - pErr = p; - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); - return 0; - } - if ( *p == '/' ) - { - ++p; - // Empty tag. - if ( *p != '>' ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); - return 0; - } - return (p+1); - } - else if ( *p == '>' ) - { - // Done with attributes (if there were any.) - // Read the value -- which can include other - // elements -- read the end tag, and return. - ++p; - p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. - if ( !p || !*p ) - return 0; - - // We should find the end tag now - if ( StringEqual( p, endTag.c_str(), false, encoding ) ) - { - p += endTag.length(); - return p; - } - else - { - if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); - return 0; - } - } - else - { - // Try to read an attribute: - TiXmlAttribute* attrib = new TiXmlAttribute(); - if ( !attrib ) - { - if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding ); - return 0; - } - - attrib->SetDocument( document ); - const char* pErr = p; - p = attrib->Parse( p, data, encoding ); - - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); - delete attrib; - return 0; - } - - // Handle the strange case of double attributes: - TiXmlAttribute* node = attributeSet.Find( attrib->Name() ); - if ( node ) - { - node->SetValue( attrib->Value() ); - delete attrib; - return 0; - } - - attributeSet.Add( attrib ); - } - } - return p; + p = SkipWhiteSpace( p, encoding ); + TiXmlDocument* document = GetDocument(); + + if ( !p || !*p ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); + return 0; + } + + // TiXmlParsingData data( p, prevData ); + if ( data ) + { + data->Stamp( p, encoding ); + location = data->Cursor(); + } + + if ( *p != '<' ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); + return 0; + } + + p = SkipWhiteSpace( p+1, encoding ); + + // Read the name. + const char* pErr = p; + + p = ReadName( p, &value, encoding ); + if ( !p || !*p ) + { + if ( document ) + document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); + return 0; + } + + TIXML_STRING endTag ("</"); + endTag += value; + endTag += ">"; + + // Check for and read attributes. Also look for an empty + // tag or an end tag. + while ( p && *p ) + { + pErr = p; + p = SkipWhiteSpace( p, encoding ); + if ( !p || !*p ) + { + if ( document ) + document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); + return 0; + } + if ( *p == '/' ) + { + ++p; + // Empty tag. + if ( *p != '>' ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); + return 0; + } + return (p+1); + } + else if ( *p == '>' ) + { + // Done with attributes (if there were any.) + // Read the value -- which can include other + // elements -- read the end tag, and return. + ++p; + p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. + if ( !p || !*p ) + return 0; + + // We should find the end tag now + if ( StringEqual( p, endTag.c_str(), false, encoding ) ) + { + p += endTag.length(); + return p; + } + else + { + if ( document ) + document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); + return 0; + } + } + else + { + // Try to read an attribute: + TiXmlAttribute* attrib = new TiXmlAttribute(); + if ( !attrib ) + { + if ( document ) + document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding ); + return 0; + } + + attrib->SetDocument( document ); + const char* pErr = p; + p = attrib->Parse( p, data, encoding ); + + if ( !p || !*p ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); + delete attrib; + return 0; + } + + // Handle the strange case of double attributes: + TiXmlAttribute* node = attributeSet.Find( attrib->Name() ); + if ( node ) + { + node->SetValue( attrib->Value() ); + delete attrib; + return 0; + } + + attributeSet.Add( attrib ); + } + } + return p; } const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { - TiXmlDocument* document = GetDocument(); - - const char* pWithWhiteSpace = p; - // Read in text and elements in any order. - p = SkipWhiteSpace( p, encoding ); - while ( p && *p ) - { - if ( *p != '<' ) - { - // Take what we have, make a text element. - TiXmlText* textNode = new TiXmlText( "" ); - - if ( !textNode ) - { - if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding ); - return 0; - } - - if ( TiXmlBase::IsWhiteSpaceCondensed() ) - { - p = textNode->Parse( p, data, encoding ); - } - else - { - // Special case: we want to keep the white space - // so that leading spaces aren't removed. - p = textNode->Parse( pWithWhiteSpace, data, encoding ); - } - - if ( !textNode->Blank() ) - LinkEndChild( textNode ); - else - delete textNode; - } - else - { - // We hit a '<' - // Have we hit a new element or an end tag? - if ( StringEqual( p, "</", false, encoding ) ) - { - return p; - } - else - { - TiXmlNode* node = Identify( p, encoding ); - if ( node ) - { - p = node->Parse( p, data, encoding ); - LinkEndChild( node ); - } - else - { - return 0; - } - } - } - p = SkipWhiteSpace( p, encoding ); - } - - if ( !p ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); - } - return p; + TiXmlDocument* document = GetDocument(); + + const char* pWithWhiteSpace = p; + // Read in text and elements in any order. + p = SkipWhiteSpace( p, encoding ); + while ( p && *p ) + { + if ( *p != '<' ) + { + // Take what we have, make a text element. + TiXmlText* textNode = new TiXmlText( "" ); + + if ( !textNode ) + { + if ( document ) + document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding ); + return 0; + } + + if ( TiXmlBase::IsWhiteSpaceCondensed() ) + { + p = textNode->Parse( p, data, encoding ); + } + else + { + // Special case: we want to keep the white space + // so that leading spaces aren't removed. + p = textNode->Parse( pWithWhiteSpace, data, encoding ); + } + + if ( !textNode->Blank() ) + LinkEndChild( textNode ); + else + delete textNode; + } + else + { + // We hit a '<' + // Have we hit a new element or an end tag? + if ( StringEqual( p, "</", false, encoding ) ) + { + return p; + } + else + { + TiXmlNode* node = Identify( p, encoding ); + if ( node ) + { + p = node->Parse( p, data, encoding ); + LinkEndChild( node ); + } + else + { + return 0; + } + } + } + p = SkipWhiteSpace( p, encoding ); + } + + if ( !p ) + { + if ( document ) + document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); + } + return p; } #ifdef TIXML_USE_STL void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) { - while ( in->good() ) - { - int c = in->get(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - (*tag) += (char) c; - - if ( c == '>' ) - { - // All is well. - return; - } - } + while ( in->good() ) + { + int c = in->get + (); + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + (*tag) += (char) c; + + if ( c == '>' ) + { + // All is well. + return; + } + } } #endif const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { - TiXmlDocument* document = GetDocument(); - p = SkipWhiteSpace( p, encoding ); - -// TiXmlParsingData data( p, prevData ); - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - if ( !p || !*p || *p != '<' ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding ); - return 0; - } - ++p; - value = ""; - - while ( p && *p && *p != '>' ) - { - value += *p; - ++p; - } - - if ( !p ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding ); - } - if ( *p == '>' ) - return p+1; - return p; + TiXmlDocument* document = GetDocument(); + p = SkipWhiteSpace( p, encoding ); + + // TiXmlParsingData data( p, prevData ); + if ( data ) + { + data->Stamp( p, encoding ); + location = data->Cursor(); + } + if ( !p || !*p || *p != '<' ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding ); + return 0; + } + ++p; + value = ""; + + while ( p && *p && *p != '>' ) + { + value += *p; + ++p; + } + + if ( !p ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding ); + } + if ( *p == '>' ) + return p+1; + return p; } #ifdef TIXML_USE_STL void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) { - while ( in->good() ) - { - int c = in->get(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - - (*tag) += (char) c; - - if ( c == '>' - && tag->at( tag->length() - 2 ) == '-' - && tag->at( tag->length() - 3 ) == '-' ) - { - // All is well. - return; - } - } + while ( in->good() ) + { + int c = in->get + (); + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + + (*tag) += (char) c; + + if ( c == '>' + && tag->at( tag->length() - 2 ) == '-' + && tag->at( tag->length() - 3 ) == '-' ) + { + // All is well. + return; + } + } } #endif const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { - TiXmlDocument* document = GetDocument(); - value = ""; - - p = SkipWhiteSpace( p, encoding ); - -// TiXmlParsingData data( p, prevData ); - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - const char* startTag = "<!--"; - const char* endTag = "-->"; - - if ( !StringEqual( p, startTag, false, encoding ) ) - { - document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); - return 0; - } - p += strlen( startTag ); - p = ReadText( p, &value, false, endTag, false, encoding ); - return p; + TiXmlDocument* document = GetDocument(); + value = ""; + + p = SkipWhiteSpace( p, encoding ); + + // TiXmlParsingData data( p, prevData ); + if ( data ) + { + data->Stamp( p, encoding ); + location = data->Cursor(); + } + const char* startTag = "<!--"; + const char* endTag = "-->"; + + if ( !StringEqual( p, startTag, false, encoding ) ) + { + document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); + return 0; + } + p += strlen( startTag ); + p = ReadText( p, &value, false, endTag, false, encoding ); + return p; } const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p ) return 0; - - int tabsize = 4; - if ( document ) - tabsize = document->TabSize(); - -// TiXmlParsingData data( p, prevData ); - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - // Read the name, the '=' and the value. - const char* pErr = p; - p = ReadName( p, &name, encoding ); - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); - return 0; - } - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p || *p != '=' ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); - return 0; - } - - ++p; // skip '=' - p = SkipWhiteSpace( p, encoding ); - if ( !p || !*p ) - { - if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); - return 0; - } - - const char* end; - - if ( *p == '\'' ) - { - ++p; - end = "\'"; - p = ReadText( p, &value, false, end, false, encoding ); - } - else if ( *p == '"' ) - { - ++p; - end = "\""; - p = ReadText( p, &value, false, end, false, encoding ); - } - else - { - // All attribute values should be in single or double quotes. - // But this is such a common error that the parser will try - // its best, even without them. - value = ""; - while ( p && *p // existence - && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace - && *p != '/' && *p != '>' ) // tag end - { - value += *p; - ++p; - } - } - return p; + p = SkipWhiteSpace( p, encoding ); + if ( !p || !*p ) + return 0; + + int tabsize = 4; + if ( document ) + tabsize = document->TabSize(); + + // TiXmlParsingData data( p, prevData ); + if ( data ) + { + data->Stamp( p, encoding ); + location = data->Cursor(); + } + // Read the name, the '=' and the value. + const char* pErr = p; + p = ReadName( p, &name, encoding ); + if ( !p || !*p ) + { + if ( document ) + document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); + return 0; + } + p = SkipWhiteSpace( p, encoding ); + if ( !p || !*p || *p != '=' ) + { + if ( document ) + document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); + return 0; + } + + ++p; // skip '=' + p = SkipWhiteSpace( p, encoding ); + if ( !p || !*p ) + { + if ( document ) + document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); + return 0; + } + + const char* end; + + if ( *p == '\'' ) + { + ++p; + end = "\'"; + p = ReadText( p, &value, false, end, false, encoding ); + } + else if ( *p == '"' ) + { + ++p; + end = "\""; + p = ReadText( p, &value, false, end, false, encoding ); + } + else + { + // All attribute values should be in single or double quotes. + // But this is such a common error that the parser will try + // its best, even without them. + value = ""; + while ( p && *p // existence + && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace + && *p != '/' && *p != '>' ) // tag end + { + value += *p; + ++p; + } + } + return p; } #ifdef TIXML_USE_STL void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) { - while ( in->good() ) - { - int c = in->peek(); - if ( c == '<' ) - return; - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - - (*tag) += (char) c; - in->get(); - } + while ( in->good() ) + { + int c = in->peek(); + if ( c == '<' ) + return; + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + + (*tag) += (char) c; + in->get + (); + } } #endif const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) { - value = ""; -// TiXmlParsingData data( p, prevData ); - if ( data ) - { - data->Stamp( p, encoding ); - location = data->Cursor(); - } - bool ignoreWhite = true; - - const char* end = "<"; - p = ReadText( p, &value, ignoreWhite, end, false, encoding ); - if ( p ) - return p-1; // don't truncate the '<' - return 0; + value = ""; + // TiXmlParsingData data( p, prevData ); + if ( data ) + { + data->Stamp( p, encoding ); + location = data->Cursor(); + } + bool ignoreWhite = true; + + const char* end = "<"; + p = ReadText( p, &value, ignoreWhite, end, false, encoding ); + if ( p ) + return p-1; // don't truncate the '<' + return 0; } #ifdef TIXML_USE_STL void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) { - while ( in->good() ) - { - int c = in->get(); - if ( c <= 0 ) - { - TiXmlDocument* document = GetDocument(); - if ( document ) - document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); - return; - } - (*tag) += (char) c; - - if ( c == '>' ) - { - // All is well. - return; - } - } + while ( in->good() ) + { + int c = in->get + (); + if ( c <= 0 ) + { + TiXmlDocument* document = GetDocument(); + if ( document ) + document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); + return; + } + (*tag) += (char) c; + + if ( c == '>' ) + { + // All is well. + return; + } + } } #endif const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) { - p = SkipWhiteSpace( p, _encoding ); - // Find the beginning, find the end, and look for - // the stuff in-between. - TiXmlDocument* document = GetDocument(); - if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) - { - if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); - return 0; - } -// TiXmlParsingData data( p, prevData ); - if ( data ) - { - data->Stamp( p, _encoding ); - location = data->Cursor(); - } - p += 5; - - version = ""; - encoding = ""; - standalone = ""; - - while ( p && *p ) - { - if ( *p == '>' ) - { - ++p; - return p; - } - - p = SkipWhiteSpace( p, _encoding ); - if ( StringEqual( p, "version", true, _encoding ) ) - { - TiXmlAttribute attrib; - p = attrib.Parse( p, data, _encoding ); - version = attrib.Value(); - } - else if ( StringEqual( p, "encoding", true, _encoding ) ) - { - TiXmlAttribute attrib; - p = attrib.Parse( p, data, _encoding ); - encoding = attrib.Value(); - } - else if ( StringEqual( p, "standalone", true, _encoding ) ) - { - TiXmlAttribute attrib; - p = attrib.Parse( p, data, _encoding ); - standalone = attrib.Value(); - } - else - { - // Read over whatever it is. - while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) - ++p; - } - } - return 0; + p = SkipWhiteSpace( p, _encoding ); + // Find the beginning, find the end, and look for + // the stuff in-between. + TiXmlDocument* document = GetDocument(); + if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) + { + if ( document ) + document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); + return 0; + } + // TiXmlParsingData data( p, prevData ); + if ( data ) + { + data->Stamp( p, _encoding ); + location = data->Cursor(); + } + p += 5; + + version = ""; + encoding = ""; + standalone = ""; + + while ( p && *p ) + { + if ( *p == '>' ) + { + ++p; + return p; + } + + p = SkipWhiteSpace( p, _encoding ); + if ( StringEqual( p, "version", true, _encoding ) ) + { + TiXmlAttribute attrib; + p = attrib.Parse( p, data, _encoding ); + version = attrib.Value(); + } + else if ( StringEqual( p, "encoding", true, _encoding ) ) + { + TiXmlAttribute attrib; + p = attrib.Parse( p, data, _encoding ); + encoding = attrib.Value(); + } + else if ( StringEqual( p, "standalone", true, _encoding ) ) + { + TiXmlAttribute attrib; + p = attrib.Parse( p, data, _encoding ); + standalone = attrib.Value(); + } + else + { + // Read over whatever it is. + while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) + ++p; + } + } + return 0; } bool TiXmlText::Blank() const { - for ( unsigned i=0; i<value.length(); i++ ) - if ( !IsWhiteSpace( value[i] ) ) - return false; - return true; + for ( unsigned i=0; i<value.length(); i++ ) + if ( !IsWhiteSpace( value[i] ) ) + return false; + return true; } |
