Root/
Source at commit 1349 created 12 years 10 months ago. By meklort, Rename tar.gz to tgz | |
---|---|
1 | /*␊ |
2 | www.sourceforge.net/projects/tinyxml␊ |
3 | Original code by Lee Thomason (www.grinninglizard.com)␊ |
4 | ␊ |
5 | This software is provided 'as-is', without any express or implied ␊ |
6 | warranty. In no event will the authors be held liable for any ␊ |
7 | damages arising from the use of this software.␊ |
8 | ␊ |
9 | Permission is granted to anyone to use this software for any ␊ |
10 | purpose, including commercial applications, and to alter it and ␊ |
11 | redistribute it freely, subject to the following restrictions:␊ |
12 | ␊ |
13 | 1. The origin of this software must not be misrepresented; you must ␊ |
14 | not claim that you wrote the original software. If you use this␊ |
15 | software in a product, an acknowledgment in the product documentation␊ |
16 | would be appreciated but is not required.␊ |
17 | ␊ |
18 | 2. Altered source versions must be plainly marked as such, and ␊ |
19 | must not be misrepresented as being the original software.␊ |
20 | ␊ |
21 | 3. This notice may not be removed or altered from any source ␊ |
22 | distribution.␊ |
23 | */␊ |
24 | ␊ |
25 | #include <ctype.h>␊ |
26 | #include <stddef.h>␊ |
27 | ␊ |
28 | #include "tinyxml.h"␊ |
29 | ␊ |
30 | //#define DEBUG_PARSER␊ |
31 | #if defined( DEBUG_PARSER )␊ |
32 | #␉if defined( DEBUG ) && defined( _MSC_VER )␊ |
33 | #␉␉include <windows.h>␊ |
34 | #␉␉define TIXML_LOG OutputDebugString␊ |
35 | #␉else␊ |
36 | #␉␉define TIXML_LOG printf␊ |
37 | #␉endif␊ |
38 | #endif␊ |
39 | ␊ |
40 | // Note tha "PutString" hardcodes the same list. This␊ |
41 | // is less flexible than it appears. Changing the entries␊ |
42 | // or order will break putstring.␉␊ |
43 | TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] = ␊ |
44 | {␊ |
45 | ␉{ "&", 5, '&' },␊ |
46 | ␉{ "<", 4, '<' },␊ |
47 | ␉{ ">", 4, '>' },␊ |
48 | ␉{ """, 6, '\"' },␊ |
49 | ␉{ "'", 6, '\'' }␊ |
50 | };␊ |
51 | ␊ |
52 | // Bunch of unicode info at:␊ |
53 | //␉␉http://www.unicode.org/faq/utf_bom.html␊ |
54 | // Including the basic of this table, which determines the #bytes in the␊ |
55 | // sequence from the lead byte. 1 placed for invalid sequences --␊ |
56 | // although the result will be junk, pass it through as much as possible.␊ |
57 | // Beware of the non-characters in UTF-8:␉␊ |
58 | //␉␉␉␉ef bb bf (Microsoft "lead bytes")␊ |
59 | //␉␉␉␉ef bf be␊ |
60 | //␉␉␉␉ef bf bf ␊ |
61 | ␊ |
62 | const unsigned char TIXML_UTF_LEAD_0 = 0xefU;␊ |
63 | const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;␊ |
64 | const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;␊ |
65 | ␊ |
66 | const int TiXmlBase::utf8ByteTable[256] = ␊ |
67 | {␊ |
68 | ␉//␉0␉1␉2␉3␉4␉5␉6␉7␉8␉9␉a␉b␉c␉d␉e␉f␊ |
69 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x00␊ |
70 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x10␊ |
71 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x20␊ |
72 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x30␊ |
73 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x40␊ |
74 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x50␊ |
75 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x60␊ |
76 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x70␉End of ASCII range␊ |
77 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x80 0x80 to 0xc1 invalid␊ |
78 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0x90 ␊ |
79 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0xa0 ␊ |
80 | ␉␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉// 0xb0 ␊ |
81 | ␉␉1,␉1,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉// 0xc0 0xc2 to 0xdf 2 byte␊ |
82 | ␉␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉2,␉// 0xd0␊ |
83 | ␉␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉3,␉// 0xe0 0xe0 to 0xef 3 byte␊ |
84 | ␉␉4,␉4,␉4,␉4,␉4,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1,␉1␉// 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid␊ |
85 | };␊ |
86 | ␊ |
87 | ␊ |
88 | void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )␊ |
89 | {␊ |
90 | ␉const unsigned long BYTE_MASK = 0xBF;␊ |
91 | ␉const unsigned long BYTE_MARK = 0x80;␊ |
92 | ␉const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };␊ |
93 | ␊ |
94 | ␉if (input < 0x80) ␊ |
95 | ␉␉*length = 1;␊ |
96 | ␉else if ( input < 0x800 )␊ |
97 | ␉␉*length = 2;␊ |
98 | ␉else if ( input < 0x10000 )␊ |
99 | ␉␉*length = 3;␊ |
100 | ␉else if ( input < 0x200000 )␊ |
101 | ␉␉*length = 4;␊ |
102 | ␉else␊ |
103 | ␉␉{ *length = 0; return; }␉// This code won't covert this correctly anyway.␊ |
104 | ␊ |
105 | ␉output += *length;␊ |
106 | ␊ |
107 | ␉// Scary scary fall throughs.␊ |
108 | ␉switch (*length) ␊ |
109 | ␉{␊ |
110 | ␉␉case 4:␊ |
111 | ␉␉␉--output; ␊ |
112 | ␉␉␉*output = (char)((input | BYTE_MARK) & BYTE_MASK); ␊ |
113 | ␉␉␉input >>= 6;␊ |
114 | ␉␉case 3:␊ |
115 | ␉␉␉--output; ␊ |
116 | ␉␉␉*output = (char)((input | BYTE_MARK) & BYTE_MASK); ␊ |
117 | ␉␉␉input >>= 6;␊ |
118 | ␉␉case 2:␊ |
119 | ␉␉␉--output; ␊ |
120 | ␉␉␉*output = (char)((input | BYTE_MARK) & BYTE_MASK); ␊ |
121 | ␉␉␉input >>= 6;␊ |
122 | ␉␉case 1:␊ |
123 | ␉␉␉--output; ␊ |
124 | ␉␉␉*output = (char)(input | FIRST_BYTE_MARK[*length]);␊ |
125 | ␉}␊ |
126 | }␊ |
127 | ␊ |
128 | ␊ |
129 | /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )␊ |
130 | {␊ |
131 | ␉// This will only work for low-ascii, everything else is assumed to be a valid␊ |
132 | ␉// letter. I'm not sure this is the best approach, but it is quite tricky trying␊ |
133 | ␉// to figure out alhabetical vs. not across encoding. So take a very ␊ |
134 | ␉// conservative approach.␊ |
135 | ␊ |
136 | //␉if ( encoding == TIXML_ENCODING_UTF8 )␊ |
137 | //␉{␊ |
138 | ␉␉if ( anyByte < 127 )␊ |
139 | ␉␉␉return isalpha( anyByte );␊ |
140 | ␉␉else␊ |
141 | ␉␉␉return 1;␉// What else to do? The unicode set is huge...get the english ones right.␊ |
142 | //␉}␊ |
143 | //␉else␊ |
144 | //␉{␊ |
145 | //␉␉return isalpha( anyByte );␊ |
146 | //␉}␊ |
147 | }␊ |
148 | ␊ |
149 | ␊ |
150 | /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )␊ |
151 | {␊ |
152 | ␉// This will only work for low-ascii, everything else is assumed to be a valid␊ |
153 | ␉// letter. I'm not sure this is the best approach, but it is quite tricky trying␊ |
154 | ␉// to figure out alhabetical vs. not across encoding. So take a very ␊ |
155 | ␉// conservative approach.␊ |
156 | ␊ |
157 | //␉if ( encoding == TIXML_ENCODING_UTF8 )␊ |
158 | //␉{␊ |
159 | ␉␉if ( anyByte < 127 )␊ |
160 | ␉␉␉return isalnum( anyByte );␊ |
161 | ␉␉else␊ |
162 | ␉␉␉return 1;␉// What else to do? The unicode set is huge...get the english ones right.␊ |
163 | //␉}␊ |
164 | //␉else␊ |
165 | //␉{␊ |
166 | //␉␉return isalnum( anyByte );␊ |
167 | //␉}␊ |
168 | }␊ |
169 | ␊ |
170 | ␊ |
171 | class TiXmlParsingData␊ |
172 | {␊ |
173 | ␉friend class TiXmlDocument;␊ |
174 | public:␊ |
175 | ␉void Stamp( const char* now, TiXmlEncoding encoding );␊ |
176 | ␊ |
177 | ␉const TiXmlCursor& Cursor() const␉{ return cursor; }␊ |
178 | ␊ |
179 | private:␊ |
180 | ␉// Only used by the document!␊ |
181 | ␉TiXmlParsingData( const char* start, int _tabsize, int row, int col )␊ |
182 | ␉{␊ |
183 | ␉␉assert( start );␊ |
184 | ␉␉stamp = start;␊ |
185 | ␉␉tabsize = _tabsize;␊ |
186 | ␉␉cursor.row = row;␊ |
187 | ␉␉cursor.col = col;␊ |
188 | ␉}␊ |
189 | ␊ |
190 | ␉TiXmlCursor␉␉cursor;␊ |
191 | ␉const char*␉␉stamp;␊ |
192 | ␉int␉␉␉␉tabsize;␊ |
193 | };␊ |
194 | ␊ |
195 | ␊ |
196 | void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )␊ |
197 | {␊ |
198 | ␉assert( now );␊ |
199 | ␊ |
200 | ␉// Do nothing if the tabsize is 0.␊ |
201 | ␉if ( tabsize < 1 )␊ |
202 | ␉{␊ |
203 | ␉␉return;␊ |
204 | ␉}␊ |
205 | ␊ |
206 | ␉// Get the current row, column.␊ |
207 | ␉int row = cursor.row;␊ |
208 | ␉int col = cursor.col;␊ |
209 | ␉const char* p = stamp;␊ |
210 | ␉assert( p );␊ |
211 | ␊ |
212 | ␉while ( p < now )␊ |
213 | ␉{␊ |
214 | ␉␉// Treat p as unsigned, so we have a happy compiler.␊ |
215 | ␉␉const unsigned char* pU = (const unsigned char*)p;␊ |
216 | ␊ |
217 | ␉␉// Code contributed by Fletcher Dunn: (modified by lee)␊ |
218 | ␉␉switch (*pU) {␊ |
219 | ␉␉␉case 0:␊ |
220 | ␉␉␉␉// We *should* never get here, but in case we do, don't␊ |
221 | ␉␉␉␉// advance past the terminating null character, ever␊ |
222 | ␉␉␉␉return;␊ |
223 | ␊ |
224 | ␉␉␉case '\r':␊ |
225 | ␉␉␉␉// bump down to the next line␊ |
226 | ␉␉␉␉++row;␊ |
227 | ␉␉␉␉col = 0;␉␉␉␉␊ |
228 | ␉␉␉␉// Eat the character␊ |
229 | ␉␉␉␉++p;␊ |
230 | ␊ |
231 | ␉␉␉␉// Check for \r\n sequence, and treat this as a single character␊ |
232 | ␉␉␉␉if (*p == '\n') {␊ |
233 | ␉␉␉␉␉++p;␊ |
234 | ␉␉␉␉}␊ |
235 | ␉␉␉␉break;␊ |
236 | ␊ |
237 | ␉␉␉case '\n':␊ |
238 | ␉␉␉␉// bump down to the next line␊ |
239 | ␉␉␉␉++row;␊ |
240 | ␉␉␉␉col = 0;␊ |
241 | ␊ |
242 | ␉␉␉␉// Eat the character␊ |
243 | ␉␉␉␉++p;␊ |
244 | ␊ |
245 | ␉␉␉␉// Check for \n\r sequence, and treat this as a single␊ |
246 | ␉␉␉␉// character. (Yes, this bizarre thing does occur still␊ |
247 | ␉␉␉␉// on some arcane platforms...)␊ |
248 | ␉␉␉␉if (*p == '\r') {␊ |
249 | ␉␉␉␉␉++p;␊ |
250 | ␉␉␉␉}␊ |
251 | ␉␉␉␉break;␊ |
252 | ␊ |
253 | ␉␉␉case '\t':␊ |
254 | ␉␉␉␉// Eat the character␊ |
255 | ␉␉␉␉++p;␊ |
256 | ␊ |
257 | ␉␉␉␉// Skip to next tab stop␊ |
258 | ␉␉␉␉col = (col / tabsize + 1) * tabsize;␊ |
259 | ␉␉␉␉break;␊ |
260 | ␊ |
261 | ␉␉␉case TIXML_UTF_LEAD_0:␊ |
262 | ␉␉␉␉if ( encoding == TIXML_ENCODING_UTF8 )␊ |
263 | ␉␉␉␉{␊ |
264 | ␉␉␉␉␉if ( *(p+1) && *(p+2) )␊ |
265 | ␉␉␉␉␉{␊ |
266 | ␉␉␉␉␉␉// In these cases, don't advance the column. These are␊ |
267 | ␉␉␉␉␉␉// 0-width spaces.␊ |
268 | ␉␉␉␉␉␉if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )␊ |
269 | ␉␉␉␉␉␉␉p += 3;␉␊ |
270 | ␉␉␉␉␉␉else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )␊ |
271 | ␉␉␉␉␉␉␉p += 3;␉␊ |
272 | ␉␉␉␉␉␉else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )␊ |
273 | ␉␉␉␉␉␉␉p += 3;␉␊ |
274 | ␉␉␉␉␉␉else␊ |
275 | ␉␉␉␉␉␉␉{ p +=3; ++col; }␉// A normal character.␊ |
276 | ␉␉␉␉␉}␊ |
277 | ␉␉␉␉}␊ |
278 | ␉␉␉␉else␊ |
279 | ␉␉␉␉{␊ |
280 | ␉␉␉␉␉++p;␊ |
281 | ␉␉␉␉␉++col;␊ |
282 | ␉␉␉␉}␊ |
283 | ␉␉␉␉break;␊ |
284 | ␊ |
285 | ␉␉␉default:␊ |
286 | ␉␉␉␉if ( encoding == TIXML_ENCODING_UTF8 )␊ |
287 | ␉␉␉␉{␊ |
288 | ␉␉␉␉␉// Eat the 1 to 4 byte utf8 character.␊ |
289 | ␉␉␉␉␉int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];␊ |
290 | ␉␉␉␉␉if ( step == 0 )␊ |
291 | ␉␉␉␉␉␉step = 1;␉␉// Error case from bad encoding, but handle gracefully.␊ |
292 | ␉␉␉␉␉p += step;␊ |
293 | ␊ |
294 | ␉␉␉␉␉// Just advance one column, of course.␊ |
295 | ␉␉␉␉␉++col;␊ |
296 | ␉␉␉␉}␊ |
297 | ␉␉␉␉else␊ |
298 | ␉␉␉␉{␊ |
299 | ␉␉␉␉␉++p;␊ |
300 | ␉␉␉␉␉++col;␊ |
301 | ␉␉␉␉}␊ |
302 | ␉␉␉␉break;␊ |
303 | ␉␉}␊ |
304 | ␉}␊ |
305 | ␉cursor.row = row;␊ |
306 | ␉cursor.col = col;␊ |
307 | ␉assert( cursor.row >= -1 );␊ |
308 | ␉assert( cursor.col >= -1 );␊ |
309 | ␉stamp = p;␊ |
310 | ␉assert( stamp );␊ |
311 | }␊ |
312 | ␊ |
313 | ␊ |
314 | const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )␊ |
315 | {␊ |
316 | ␉if ( !p || !*p )␊ |
317 | ␉{␊ |
318 | ␉␉return 0;␊ |
319 | ␉}␊ |
320 | ␉if ( encoding == TIXML_ENCODING_UTF8 )␊ |
321 | ␉{␊ |
322 | ␉␉while ( *p )␊ |
323 | ␉␉{␊ |
324 | ␉␉␉const unsigned char* pU = (const unsigned char*)p;␊ |
325 | ␉␉␉␊ |
326 | ␉␉␉// Skip the stupid Microsoft UTF-8 Byte order marks␊ |
327 | ␉␉␉if (␉*(pU+0)==TIXML_UTF_LEAD_0␊ |
328 | ␉␉␉␉ && *(pU+1)==TIXML_UTF_LEAD_1 ␊ |
329 | ␉␉␉␉ && *(pU+2)==TIXML_UTF_LEAD_2 )␊ |
330 | ␉␉␉{␊ |
331 | ␉␉␉␉p += 3;␊ |
332 | ␉␉␉␉continue;␊ |
333 | ␉␉␉}␊ |
334 | ␉␉␉else if(*(pU+0)==TIXML_UTF_LEAD_0␊ |
335 | ␉␉␉␉ && *(pU+1)==0xbfU␊ |
336 | ␉␉␉␉ && *(pU+2)==0xbeU )␊ |
337 | ␉␉␉{␊ |
338 | ␉␉␉␉p += 3;␊ |
339 | ␉␉␉␉continue;␊ |
340 | ␉␉␉}␊ |
341 | ␉␉␉else if(*(pU+0)==TIXML_UTF_LEAD_0␊ |
342 | ␉␉␉␉ && *(pU+1)==0xbfU␊ |
343 | ␉␉␉␉ && *(pU+2)==0xbfU )␊ |
344 | ␉␉␉{␊ |
345 | ␉␉␉␉p += 3;␊ |
346 | ␉␉␉␉continue;␊ |
347 | ␉␉␉}␊ |
348 | ␊ |
349 | ␉␉␉if ( IsWhiteSpace( *p ) )␉␉// Still using old rules for white space.␊ |
350 | ␉␉␉␉++p;␊ |
351 | ␉␉␉else␊ |
352 | ␉␉␉␉break;␊ |
353 | ␉␉}␊ |
354 | ␉}␊ |
355 | ␉else␊ |
356 | ␉{␊ |
357 | ␉␉while ( *p && IsWhiteSpace( *p ) )␊ |
358 | ␉␉␉++p;␊ |
359 | ␉}␊ |
360 | ␊ |
361 | ␉return p;␊ |
362 | }␊ |
363 | ␊ |
364 | #ifdef TIXML_USE_STL␊ |
365 | /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )␊ |
366 | {␊ |
367 | ␉for( ;; )␊ |
368 | ␉{␊ |
369 | ␉␉if ( !in->good() ) return false;␊ |
370 | ␊ |
371 | ␉␉int c = in->peek();␊ |
372 | ␉␉// At this scope, we can't get to a document. So fail silently.␊ |
373 | ␉␉if ( !IsWhiteSpace( c ) || c <= 0 )␊ |
374 | ␉␉␉return true;␊ |
375 | ␊ |
376 | ␉␉*tag += (char) in->get();␊ |
377 | ␉}␊ |
378 | }␊ |
379 | ␊ |
380 | /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )␊ |
381 | {␊ |
382 | ␉//assert( character > 0 && character < 128 );␉// else it won't work in utf-8␊ |
383 | ␉while ( in->good() )␊ |
384 | ␉{␊ |
385 | ␉␉int c = in->peek();␊ |
386 | ␉␉if ( c == character )␊ |
387 | ␉␉␉return true;␊ |
388 | ␉␉if ( c <= 0 )␉␉// Silent failure: can't get document at this scope␊ |
389 | ␉␉␉return false;␊ |
390 | ␊ |
391 | ␉␉in->get();␊ |
392 | ␉␉*tag += (char) c;␊ |
393 | ␉}␊ |
394 | ␉return false;␊ |
395 | }␊ |
396 | #endif␊ |
397 | ␊ |
398 | // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The␊ |
399 | // "assign" optimization removes over 10% of the execution time.␊ |
400 | //␊ |
401 | const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )␊ |
402 | {␊ |
403 | ␉// Oddly, not supported on some comilers,␊ |
404 | ␉//name->clear();␊ |
405 | ␉// So use this:␊ |
406 | ␉*name = "";␊ |
407 | ␉assert( p );␊ |
408 | ␊ |
409 | ␉// Names start with letters or underscores.␊ |
410 | ␉// Of course, in unicode, tinyxml has no idea what a letter *is*. The␊ |
411 | ␉// algorithm is generous.␊ |
412 | ␉//␊ |
413 | ␉// After that, they can be letters, underscores, numbers,␊ |
414 | ␉// hyphens, or colons. (Colons are valid ony for namespaces,␊ |
415 | ␉// but tinyxml can't tell namespaces from names.)␊ |
416 | ␉if ( p && *p ␊ |
417 | ␉␉ && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )␊ |
418 | ␉{␊ |
419 | ␉␉const char* start = p;␊ |
420 | ␉␉while(␉␉p && *p␊ |
421 | ␉␉␉␉&&␉(␉␉IsAlphaNum( (unsigned char ) *p, encoding ) ␊ |
422 | ␉␉␉␉␉␉ || *p == '_'␊ |
423 | ␉␉␉␉␉␉ || *p == '-'␊ |
424 | ␉␉␉␉␉␉ || *p == '.'␊ |
425 | ␉␉␉␉␉␉ || *p == ':' ) )␊ |
426 | ␉␉{␊ |
427 | ␉␉␉//(*name) += *p; // expensive␊ |
428 | ␉␉␉++p;␊ |
429 | ␉␉}␊ |
430 | ␉␉if ( p-start > 0 ) {␊ |
431 | ␉␉␉name->assign( start, p-start );␊ |
432 | ␉␉}␊ |
433 | ␉␉return p;␊ |
434 | ␉}␊ |
435 | ␉return 0;␊ |
436 | }␊ |
437 | ␊ |
438 | const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )␊ |
439 | {␊ |
440 | ␉// Presume an entity, and pull it out.␊ |
441 | TIXML_STRING ent;␊ |
442 | ␉int i;␊ |
443 | ␉*length = 0;␊ |
444 | ␊ |
445 | ␉if ( *(p+1) && *(p+1) == '#' && *(p+2) )␊ |
446 | ␉{␊ |
447 | ␉␉unsigned long ucs = 0;␊ |
448 | ␉␉ptrdiff_t delta = 0;␊ |
449 | ␉␉unsigned mult = 1;␊ |
450 | ␊ |
451 | ␉␉if ( *(p+2) == 'x' )␊ |
452 | ␉␉{␊ |
453 | ␉␉␉// Hexadecimal.␊ |
454 | ␉␉␉if ( !*(p+3) ) return 0;␊ |
455 | ␊ |
456 | ␉␉␉const char* q = p+3;␊ |
457 | ␉␉␉q = strchr( q, ';' );␊ |
458 | ␊ |
459 | ␉␉␉if ( !q || !*q ) return 0;␊ |
460 | ␊ |
461 | ␉␉␉delta = q-p;␊ |
462 | ␉␉␉--q;␊ |
463 | ␊ |
464 | ␉␉␉while ( *q != 'x' )␊ |
465 | ␉␉␉{␊ |
466 | ␉␉␉␉if ( *q >= '0' && *q <= '9' )␊ |
467 | ␉␉␉␉␉ucs += mult * (*q - '0');␊ |
468 | ␉␉␉␉else if ( *q >= 'a' && *q <= 'f' )␊ |
469 | ␉␉␉␉␉ucs += mult * (*q - 'a' + 10);␊ |
470 | ␉␉␉␉else if ( *q >= 'A' && *q <= 'F' )␊ |
471 | ␉␉␉␉␉ucs += mult * (*q - 'A' + 10 );␊ |
472 | ␉␉␉␉else ␊ |
473 | ␉␉␉␉␉return 0;␊ |
474 | ␉␉␉␉mult *= 16;␊ |
475 | ␉␉␉␉--q;␊ |
476 | ␉␉␉}␊ |
477 | ␉␉}␊ |
478 | ␉␉else␊ |
479 | ␉␉{␊ |
480 | ␉␉␉// Decimal.␊ |
481 | ␉␉␉if ( !*(p+2) ) return 0;␊ |
482 | ␊ |
483 | ␉␉␉const char* q = p+2;␊ |
484 | ␉␉␉q = strchr( q, ';' );␊ |
485 | ␊ |
486 | ␉␉␉if ( !q || !*q ) return 0;␊ |
487 | ␊ |
488 | ␉␉␉delta = q-p;␊ |
489 | ␉␉␉--q;␊ |
490 | ␊ |
491 | ␉␉␉while ( *q != '#' )␊ |
492 | ␉␉␉{␊ |
493 | ␉␉␉␉if ( *q >= '0' && *q <= '9' )␊ |
494 | ␉␉␉␉␉ucs += mult * (*q - '0');␊ |
495 | ␉␉␉␉else ␊ |
496 | ␉␉␉␉␉return 0;␊ |
497 | ␉␉␉␉mult *= 10;␊ |
498 | ␉␉␉␉--q;␊ |
499 | ␉␉␉}␊ |
500 | ␉␉}␊ |
501 | ␉␉if ( encoding == TIXML_ENCODING_UTF8 )␊ |
502 | ␉␉{␊ |
503 | ␉␉␉// convert the UCS to UTF-8␊ |
504 | ␉␉␉ConvertUTF32ToUTF8( ucs, value, length );␊ |
505 | ␉␉}␊ |
506 | ␉␉else␊ |
507 | ␉␉{␊ |
508 | ␉␉␉*value = (char)ucs;␊ |
509 | ␉␉␉*length = 1;␊ |
510 | ␉␉}␊ |
511 | ␉␉return p + delta + 1;␊ |
512 | ␉}␊ |
513 | ␊ |
514 | ␉// Now try to match it.␊ |
515 | ␉for( i=0; i<NUM_ENTITY; ++i )␊ |
516 | ␉{␊ |
517 | ␉␉if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )␊ |
518 | ␉␉{␊ |
519 | ␉␉␉assert( strlen( entity[i].str ) == entity[i].strLength );␊ |
520 | ␉␉␉*value = entity[i].chr;␊ |
521 | ␉␉␉*length = 1;␊ |
522 | ␉␉␉return ( p + entity[i].strLength );␊ |
523 | ␉␉}␊ |
524 | ␉}␊ |
525 | ␊ |
526 | ␉// So it wasn't an entity, its unrecognized, or something like that.␊ |
527 | ␉*value = *p;␉// Don't put back the last one, since we return it!␊ |
528 | ␉//*length = 1;␉// Leave unrecognized entities - this doesn't really work.␊ |
529 | ␉␉␉␉␉// Just writes strange XML.␊ |
530 | ␉return p+1;␊ |
531 | }␊ |
532 | ␊ |
533 | ␊ |
534 | bool TiXmlBase::StringEqual( const char* p,␊ |
535 | ␉␉␉␉␉␉␉ const char* tag,␊ |
536 | ␉␉␉␉␉␉␉ bool ignoreCase,␊ |
537 | ␉␉␉␉␉␉␉ TiXmlEncoding encoding )␊ |
538 | {␊ |
539 | ␉assert( p );␊ |
540 | ␉assert( tag );␊ |
541 | ␉if ( !p || !*p )␊ |
542 | ␉{␊ |
543 | ␉␉assert( 0 );␊ |
544 | ␉␉return false;␊ |
545 | ␉}␊ |
546 | ␊ |
547 | ␉const char* q = p;␊ |
548 | ␊ |
549 | ␉if ( ignoreCase )␊ |
550 | ␉{␊ |
551 | ␉␉while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )␊ |
552 | ␉␉{␊ |
553 | ␉␉␉++q;␊ |
554 | ␉␉␉++tag;␊ |
555 | ␉␉}␊ |
556 | ␊ |
557 | ␉␉if ( *tag == 0 )␊ |
558 | ␉␉␉return true;␊ |
559 | ␉}␊ |
560 | ␉else␊ |
561 | ␉{␊ |
562 | ␉␉while ( *q && *tag && *q == *tag )␊ |
563 | ␉␉{␊ |
564 | ␉␉␉++q;␊ |
565 | ␉␉␉++tag;␊ |
566 | ␉␉}␊ |
567 | ␊ |
568 | ␉␉if ( *tag == 0 )␉␉// Have we found the end of the tag, and everything equal?␊ |
569 | ␉␉␉return true;␊ |
570 | ␉}␊ |
571 | ␉return false;␊ |
572 | }␊ |
573 | ␊ |
574 | const char* TiXmlBase::ReadText(␉const char* p, ␊ |
575 | ␉␉␉␉␉␉␉␉␉TIXML_STRING * text, ␊ |
576 | ␉␉␉␉␉␉␉␉␉bool trimWhiteSpace, ␊ |
577 | ␉␉␉␉␉␉␉␉␉const char* endTag, ␊ |
578 | ␉␉␉␉␉␉␉␉␉bool caseInsensitive,␊ |
579 | ␉␉␉␉␉␉␉␉␉TiXmlEncoding encoding )␊ |
580 | {␊ |
581 | *text = "";␊ |
582 | ␉if ( !trimWhiteSpace␉␉␉// certain tags always keep whitespace␊ |
583 | ␉␉ || !condenseWhiteSpace )␉// if true, whitespace is always kept␊ |
584 | ␉{␊ |
585 | ␉␉// Keep all the white space.␊ |
586 | ␉␉while (␉ p && *p␊ |
587 | ␉␉␉␉&& !StringEqual( p, endTag, caseInsensitive, encoding )␊ |
588 | ␉␉␉ )␊ |
589 | ␉␉{␊ |
590 | ␉␉␉int len;␊ |
591 | ␉␉␉char cArr[4] = { 0, 0, 0, 0 };␊ |
592 | ␉␉␉p = GetChar( p, cArr, &len, encoding );␊ |
593 | ␉␉␉text->append( cArr, len );␊ |
594 | ␉␉}␊ |
595 | ␉}␊ |
596 | ␉else␊ |
597 | ␉{␊ |
598 | ␉␉bool whitespace = false;␊ |
599 | ␊ |
600 | ␉␉// Remove leading white space:␊ |
601 | ␉␉p = SkipWhiteSpace( p, encoding );␊ |
602 | ␉␉while (␉ p && *p␊ |
603 | ␉␉␉␉&& !StringEqual( p, endTag, caseInsensitive, encoding ) )␊ |
604 | ␉␉{␊ |
605 | ␉␉␉if ( *p == '\r' || *p == '\n' )␊ |
606 | ␉␉␉{␊ |
607 | ␉␉␉␉whitespace = true;␊ |
608 | ␉␉␉␉++p;␊ |
609 | ␉␉␉}␊ |
610 | ␉␉␉else if ( IsWhiteSpace( *p ) )␊ |
611 | ␉␉␉{␊ |
612 | ␉␉␉␉whitespace = true;␊ |
613 | ␉␉␉␉++p;␊ |
614 | ␉␉␉}␊ |
615 | ␉␉␉else␊ |
616 | ␉␉␉{␊ |
617 | ␉␉␉␉// If we've found whitespace, add it before the␊ |
618 | ␉␉␉␉// new character. Any whitespace just becomes a space.␊ |
619 | ␉␉␉␉if ( whitespace )␊ |
620 | ␉␉␉␉{␊ |
621 | ␉␉␉␉␉(*text) += ' ';␊ |
622 | ␉␉␉␉␉whitespace = false;␊ |
623 | ␉␉␉␉}␊ |
624 | ␉␉␉␉int len;␊ |
625 | ␉␉␉␉char cArr[4] = { 0, 0, 0, 0 };␊ |
626 | ␉␉␉␉p = GetChar( p, cArr, &len, encoding );␊ |
627 | ␉␉␉␉if ( len == 1 )␊ |
628 | ␉␉␉␉␉(*text) += cArr[0];␉// more efficient␊ |
629 | ␉␉␉␉else␊ |
630 | ␉␉␉␉␉text->append( cArr, len );␊ |
631 | ␉␉␉}␊ |
632 | ␉␉}␊ |
633 | ␉}␊ |
634 | ␉if ( p && *p )␊ |
635 | ␉␉p += strlen( endTag );␊ |
636 | ␉return ( p && *p ) ? p : 0;␊ |
637 | }␊ |
638 | ␊ |
639 | #ifdef TIXML_USE_STL␊ |
640 | ␊ |
641 | void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )␊ |
642 | {␊ |
643 | ␉// The basic issue with a document is that we don't know what we're␊ |
644 | ␉// streaming. Read something presumed to be a tag (and hope), then␊ |
645 | ␉// identify it, and call the appropriate stream method on the tag.␊ |
646 | ␉//␊ |
647 | ␉// This "pre-streaming" will never read the closing ">" so the␊ |
648 | ␉// sub-tag can orient itself.␊ |
649 | ␊ |
650 | ␉if ( !StreamTo( in, '<', tag ) ) ␊ |
651 | ␉{␊ |
652 | ␉␉SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
653 | ␉␉return;␊ |
654 | ␉}␊ |
655 | ␊ |
656 | ␉while ( in->good() )␊ |
657 | ␉{␊ |
658 | ␉␉int tagIndex = (int) tag->length();␊ |
659 | ␉␉while ( in->good() && in->peek() != '>' )␊ |
660 | ␉␉{␊ |
661 | ␉␉␉int c = in->get();␊ |
662 | ␉␉␉if ( c <= 0 )␊ |
663 | ␉␉␉{␊ |
664 | ␉␉␉␉SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
665 | ␉␉␉␉break;␊ |
666 | ␉␉␉}␊ |
667 | ␉␉␉(*tag) += (char) c;␊ |
668 | ␉␉}␊ |
669 | ␊ |
670 | ␉␉if ( in->good() )␊ |
671 | ␉␉{␊ |
672 | ␉␉␉// We now have something we presume to be a node of ␊ |
673 | ␉␉␉// some sort. Identify it, and call the node to␊ |
674 | ␉␉␉// continue streaming.␊ |
675 | ␉␉␉TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );␊ |
676 | ␊ |
677 | ␉␉␉if ( node )␊ |
678 | ␉␉␉{␊ |
679 | ␉␉␉␉node->StreamIn( in, tag );␊ |
680 | ␉␉␉␉bool isElement = node->ToElement() != 0;␊ |
681 | ␉␉␉␉delete node;␊ |
682 | ␉␉␉␉node = 0;␊ |
683 | ␊ |
684 | ␉␉␉␉// If this is the root element, we're done. Parsing will be␊ |
685 | ␉␉␉␉// done by the >> operator.␊ |
686 | ␉␉␉␉if ( isElement )␊ |
687 | ␉␉␉␉{␊ |
688 | ␉␉␉␉␉return;␊ |
689 | ␉␉␉␉}␊ |
690 | ␉␉␉}␊ |
691 | ␉␉␉else␊ |
692 | ␉␉␉{␊ |
693 | ␉␉␉␉SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
694 | ␉␉␉␉return;␊ |
695 | ␉␉␉}␊ |
696 | ␉␉}␊ |
697 | ␉}␊ |
698 | ␉// We should have returned sooner.␊ |
699 | ␉SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
700 | }␊ |
701 | ␊ |
702 | #endif␊ |
703 | ␊ |
704 | const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )␊ |
705 | {␊ |
706 | ␉ClearError();␊ |
707 | ␊ |
708 | ␉// Parse away, at the document level. Since a document␊ |
709 | ␉// contains nothing but other tags, most of what happens␊ |
710 | ␉// here is skipping white space.␊ |
711 | ␉if ( !p || !*p )␊ |
712 | ␉{␊ |
713 | ␉␉SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
714 | ␉␉return 0;␊ |
715 | ␉}␊ |
716 | ␊ |
717 | ␉// Note that, for a document, this needs to come␊ |
718 | ␉// before the while space skip, so that parsing␊ |
719 | ␉// starts from the pointer we are given.␊ |
720 | ␉location.Clear();␊ |
721 | ␉if ( prevData )␊ |
722 | ␉{␊ |
723 | ␉␉location.row = prevData->cursor.row;␊ |
724 | ␉␉location.col = prevData->cursor.col;␊ |
725 | ␉}␊ |
726 | ␉else␊ |
727 | ␉{␊ |
728 | ␉␉location.row = 0;␊ |
729 | ␉␉location.col = 0;␊ |
730 | ␉}␊ |
731 | ␉TiXmlParsingData data( p, TabSize(), location.row, location.col );␊ |
732 | ␉location = data.Cursor();␊ |
733 | ␊ |
734 | ␉if ( encoding == TIXML_ENCODING_UNKNOWN )␊ |
735 | ␉{␊ |
736 | ␉␉// Check for the Microsoft UTF-8 lead bytes.␊ |
737 | ␉␉const unsigned char* pU = (const unsigned char*)p;␊ |
738 | ␉␉if (␉*(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0␊ |
739 | ␉␉␉ && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1␊ |
740 | ␉␉␉ && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )␊ |
741 | ␉␉{␊ |
742 | ␉␉␉encoding = TIXML_ENCODING_UTF8;␊ |
743 | ␉␉␉useMicrosoftBOM = true;␊ |
744 | ␉␉}␊ |
745 | ␉}␊ |
746 | ␊ |
747 | p = SkipWhiteSpace( p, encoding );␊ |
748 | ␉if ( !p )␊ |
749 | ␉{␊ |
750 | ␉␉SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
751 | ␉␉return 0;␊ |
752 | ␉}␊ |
753 | ␊ |
754 | ␉while ( p && *p )␊ |
755 | ␉{␊ |
756 | ␉␉TiXmlNode* node = Identify( p, encoding );␊ |
757 | ␉␉if ( node )␊ |
758 | ␉␉{␊ |
759 | ␉␉␉p = node->Parse( p, &data, encoding );␊ |
760 | ␉␉␉LinkEndChild( node );␊ |
761 | ␉␉}␊ |
762 | ␉␉else␊ |
763 | ␉␉{␊ |
764 | ␉␉␉break;␊ |
765 | ␉␉}␊ |
766 | ␊ |
767 | ␉␉// Did we get encoding info?␊ |
768 | ␉␉if ( encoding == TIXML_ENCODING_UNKNOWN␊ |
769 | ␉␉␉ && node->ToDeclaration() )␊ |
770 | ␉␉{␊ |
771 | ␉␉␉TiXmlDeclaration* dec = node->ToDeclaration();␊ |
772 | ␉␉␉const char* enc = dec->Encoding();␊ |
773 | ␉␉␉assert( enc );␊ |
774 | ␊ |
775 | ␉␉␉if ( *enc == 0 )␊ |
776 | ␉␉␉␉encoding = TIXML_ENCODING_UTF8;␊ |
777 | ␉␉␉else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )␊ |
778 | ␉␉␉␉encoding = TIXML_ENCODING_UTF8;␊ |
779 | ␉␉␉else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )␊ |
780 | ␉␉␉␉encoding = TIXML_ENCODING_UTF8;␉// incorrect, but be nice␊ |
781 | ␉␉␉else ␊ |
782 | ␉␉␉␉encoding = TIXML_ENCODING_LEGACY;␊ |
783 | ␉␉}␊ |
784 | ␊ |
785 | ␉␉p = SkipWhiteSpace( p, encoding );␊ |
786 | ␉}␊ |
787 | ␊ |
788 | ␉// Was this empty?␊ |
789 | ␉if ( !firstChild ) {␊ |
790 | ␉␉SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );␊ |
791 | ␉␉return 0;␊ |
792 | ␉}␊ |
793 | ␊ |
794 | ␉// All is well.␊ |
795 | ␉return p;␊ |
796 | }␊ |
797 | ␊ |
798 | void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
799 | {␉␊ |
800 | ␉// The first error in a chain is more accurate - don't set again!␊ |
801 | ␉if ( error )␊ |
802 | ␉␉return;␊ |
803 | ␊ |
804 | ␉assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );␊ |
805 | ␉error = true;␊ |
806 | ␉errorId = err;␊ |
807 | ␉errorDesc = errorString[ errorId ];␊ |
808 | ␊ |
809 | ␉errorLocation.Clear();␊ |
810 | ␉if ( pError && data )␊ |
811 | ␉{␊ |
812 | ␉␉data->Stamp( pError, encoding );␊ |
813 | ␉␉errorLocation = data->Cursor();␊ |
814 | ␉}␊ |
815 | }␊ |
816 | ␊ |
817 | ␊ |
818 | TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )␊ |
819 | {␊ |
820 | ␉TiXmlNode* returnNode = 0;␊ |
821 | ␊ |
822 | ␉p = SkipWhiteSpace( p, encoding );␊ |
823 | ␉if( !p || !*p || *p != '<' )␊ |
824 | ␉{␊ |
825 | ␉␉return 0;␊ |
826 | ␉}␊ |
827 | ␊ |
828 | ␉p = SkipWhiteSpace( p, encoding );␊ |
829 | ␊ |
830 | ␉if ( !p || !*p )␊ |
831 | ␉{␊ |
832 | ␉␉return 0;␊ |
833 | ␉}␊ |
834 | ␊ |
835 | ␉// What is this thing? ␊ |
836 | ␉// - Elements start with a letter or underscore, but xml is reserved.␊ |
837 | ␉// - Comments: <!--␊ |
838 | ␉// - Decleration: <?xml␊ |
839 | ␉// - Everthing else is unknown to tinyxml.␊ |
840 | ␉//␊ |
841 | ␊ |
842 | ␉const char* xmlHeader = { "<?xml" };␊ |
843 | ␉const char* commentHeader = { "<!--" };␊ |
844 | ␉const char* dtdHeader = { "<!" };␊ |
845 | ␉const char* cdataHeader = { "<![CDATA[" };␊ |
846 | ␊ |
847 | ␉if ( StringEqual( p, xmlHeader, true, encoding ) )␊ |
848 | ␉{␊ |
849 | ␉␉#ifdef DEBUG_PARSER␊ |
850 | ␉␉␉TIXML_LOG( "XML parsing Declaration\n" );␊ |
851 | ␉␉#endif␊ |
852 | ␉␉returnNode = new TiXmlDeclaration();␊ |
853 | ␉}␊ |
854 | ␉else if ( StringEqual( p, commentHeader, false, encoding ) )␊ |
855 | ␉{␊ |
856 | ␉␉#ifdef DEBUG_PARSER␊ |
857 | ␉␉␉TIXML_LOG( "XML parsing Comment\n" );␊ |
858 | ␉␉#endif␊ |
859 | ␉␉returnNode = new TiXmlComment();␊ |
860 | ␉}␊ |
861 | ␉else if ( StringEqual( p, cdataHeader, false, encoding ) )␊ |
862 | ␉{␊ |
863 | ␉␉#ifdef DEBUG_PARSER␊ |
864 | ␉␉␉TIXML_LOG( "XML parsing CDATA\n" );␊ |
865 | ␉␉#endif␊ |
866 | ␉␉TiXmlText* text = new TiXmlText( "" );␊ |
867 | ␉␉text->SetCDATA( true );␊ |
868 | ␉␉returnNode = text;␊ |
869 | ␉}␊ |
870 | ␉else if ( StringEqual( p, dtdHeader, false, encoding ) )␊ |
871 | ␉{␊ |
872 | ␉␉#ifdef DEBUG_PARSER␊ |
873 | ␉␉␉TIXML_LOG( "XML parsing Unknown(1)\n" );␊ |
874 | ␉␉#endif␊ |
875 | ␉␉returnNode = new TiXmlUnknown();␊ |
876 | ␉}␊ |
877 | ␉else if ( IsAlpha( *(p+1), encoding )␊ |
878 | ␉␉␉ || *(p+1) == '_' )␊ |
879 | ␉{␊ |
880 | ␉␉#ifdef DEBUG_PARSER␊ |
881 | ␉␉␉TIXML_LOG( "XML parsing Element\n" );␊ |
882 | ␉␉#endif␊ |
883 | ␉␉returnNode = new TiXmlElement( "" );␊ |
884 | ␉}␊ |
885 | ␉else␊ |
886 | ␉{␊ |
887 | ␉␉#ifdef DEBUG_PARSER␊ |
888 | ␉␉␉TIXML_LOG( "XML parsing Unknown(2)\n" );␊ |
889 | ␉␉#endif␊ |
890 | ␉␉returnNode = new TiXmlUnknown();␊ |
891 | ␉}␊ |
892 | ␊ |
893 | ␉if ( returnNode )␊ |
894 | ␉{␊ |
895 | ␉␉// Set the parent, so it can report errors␊ |
896 | ␉␉returnNode->parent = this;␊ |
897 | ␉}␊ |
898 | ␉return returnNode;␊ |
899 | }␊ |
900 | ␊ |
901 | #ifdef TIXML_USE_STL␊ |
902 | ␊ |
903 | void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)␊ |
904 | {␊ |
905 | ␉// We're called with some amount of pre-parsing. That is, some of "this"␊ |
906 | ␉// element is in "tag". Go ahead and stream to the closing ">"␊ |
907 | ␉while( in->good() )␊ |
908 | ␉{␊ |
909 | ␉␉int c = in->get();␊ |
910 | ␉␉if ( c <= 0 )␊ |
911 | ␉␉{␊ |
912 | ␉␉␉TiXmlDocument* document = GetDocument();␊ |
913 | ␉␉␉if ( document )␊ |
914 | ␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
915 | ␉␉␉return;␊ |
916 | ␉␉}␊ |
917 | ␉␉(*tag) += (char) c ;␊ |
918 | ␉␉␊ |
919 | ␉␉if ( c == '>' )␊ |
920 | ␉␉␉break;␊ |
921 | ␉}␊ |
922 | ␊ |
923 | ␉if ( tag->length() < 3 ) return;␊ |
924 | ␊ |
925 | ␉// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.␊ |
926 | ␉// If not, identify and stream.␊ |
927 | ␊ |
928 | ␉if ( tag->at( tag->length() - 1 ) == '>' ␊ |
929 | ␉␉ && tag->at( tag->length() - 2 ) == '/' )␊ |
930 | ␉{␊ |
931 | ␉␉// All good!␊ |
932 | ␉␉return;␊ |
933 | ␉}␊ |
934 | ␉else if ( tag->at( tag->length() - 1 ) == '>' )␊ |
935 | ␉{␊ |
936 | ␉␉// There is more. Could be:␊ |
937 | ␉␉//␉␉text␊ |
938 | ␉␉//␉␉cdata text (which looks like another node)␊ |
939 | ␉␉//␉␉closing tag␊ |
940 | ␉␉//␉␉another node.␊ |
941 | ␉␉for ( ;; )␊ |
942 | ␉␉{␊ |
943 | ␉␉␉StreamWhiteSpace( in, tag );␊ |
944 | ␊ |
945 | ␉␉␉// Do we have text?␊ |
946 | ␉␉␉if ( in->good() && in->peek() != '<' ) ␊ |
947 | ␉␉␉{␊ |
948 | ␉␉␉␉// Yep, text.␊ |
949 | ␉␉␉␉TiXmlText text( "" );␊ |
950 | ␉␉␉␉text.StreamIn( in, tag );␊ |
951 | ␊ |
952 | ␉␉␉␉// What follows text is a closing tag or another node.␊ |
953 | ␉␉␉␉// Go around again and figure it out.␊ |
954 | ␉␉␉␉continue;␊ |
955 | ␉␉␉}␊ |
956 | ␊ |
957 | ␉␉␉// We now have either a closing tag...or another node.␊ |
958 | ␉␉␉// We should be at a "<", regardless.␊ |
959 | ␉␉␉if ( !in->good() ) return;␊ |
960 | ␉␉␉assert( in->peek() == '<' );␊ |
961 | ␉␉␉int tagIndex = (int) tag->length();␊ |
962 | ␊ |
963 | ␉␉␉bool closingTag = false;␊ |
964 | ␉␉␉bool firstCharFound = false;␊ |
965 | ␊ |
966 | ␉␉␉for( ;; )␊ |
967 | ␉␉␉{␊ |
968 | ␉␉␉␉if ( !in->good() )␊ |
969 | ␉␉␉␉␉return;␊ |
970 | ␊ |
971 | ␉␉␉␉int c = in->peek();␊ |
972 | ␉␉␉␉if ( c <= 0 )␊ |
973 | ␉␉␉␉{␊ |
974 | ␉␉␉␉␉TiXmlDocument* document = GetDocument();␊ |
975 | ␉␉␉␉␉if ( document )␊ |
976 | ␉␉␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
977 | ␉␉␉␉␉return;␊ |
978 | ␉␉␉␉}␊ |
979 | ␉␉␉␉␊ |
980 | ␉␉␉␉if ( c == '>' )␊ |
981 | ␉␉␉␉␉break;␊ |
982 | ␊ |
983 | ␉␉␉␉*tag += (char) c;␊ |
984 | ␉␉␉␉in->get();␊ |
985 | ␊ |
986 | ␉␉␉␉// Early out if we find the CDATA id.␊ |
987 | ␉␉␉␉if ( c == '[' && tag->size() >= 9 )␊ |
988 | ␉␉␉␉{␊ |
989 | ␉␉␉␉␉size_t len = tag->size();␊ |
990 | ␉␉␉␉␉const char* start = tag->c_str() + len - 9;␊ |
991 | ␉␉␉␉␉if ( strcmp( start, "<![CDATA[" ) == 0 ) {␊ |
992 | ␉␉␉␉␉␉assert( !closingTag );␊ |
993 | ␉␉␉␉␉␉break;␊ |
994 | ␉␉␉␉␉}␊ |
995 | ␉␉␉␉}␊ |
996 | ␊ |
997 | ␉␉␉␉if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )␊ |
998 | ␉␉␉␉{␊ |
999 | ␉␉␉␉␉firstCharFound = true;␊ |
1000 | ␉␉␉␉␉if ( c == '/' )␊ |
1001 | ␉␉␉␉␉␉closingTag = true;␊ |
1002 | ␉␉␉␉}␊ |
1003 | ␉␉␉}␊ |
1004 | ␉␉␉// If it was a closing tag, then read in the closing '>' to clean up the input stream.␊ |
1005 | ␉␉␉// If it was not, the streaming will be done by the tag.␊ |
1006 | ␉␉␉if ( closingTag )␊ |
1007 | ␉␉␉{␊ |
1008 | ␉␉␉␉if ( !in->good() )␊ |
1009 | ␉␉␉␉␉return;␊ |
1010 | ␊ |
1011 | ␉␉␉␉int c = in->get();␊ |
1012 | ␉␉␉␉if ( c <= 0 )␊ |
1013 | ␉␉␉␉{␊ |
1014 | ␉␉␉␉␉TiXmlDocument* document = GetDocument();␊ |
1015 | ␉␉␉␉␉if ( document )␊ |
1016 | ␉␉␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
1017 | ␉␉␉␉␉return;␊ |
1018 | ␉␉␉␉}␊ |
1019 | ␉␉␉␉assert( c == '>' );␊ |
1020 | ␉␉␉␉*tag += (char) c;␊ |
1021 | ␊ |
1022 | ␉␉␉␉// We are done, once we've found our closing tag.␊ |
1023 | ␉␉␉␉return;␊ |
1024 | ␉␉␉}␊ |
1025 | ␉␉␉else␊ |
1026 | ␉␉␉{␊ |
1027 | ␉␉␉␉// If not a closing tag, id it, and stream.␊ |
1028 | ␉␉␉␉const char* tagloc = tag->c_str() + tagIndex;␊ |
1029 | ␉␉␉␉TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );␊ |
1030 | ␉␉␉␉if ( !node )␊ |
1031 | ␉␉␉␉␉return;␊ |
1032 | ␉␉␉␉node->StreamIn( in, tag );␊ |
1033 | ␉␉␉␉delete node;␊ |
1034 | ␉␉␉␉node = 0;␊ |
1035 | ␊ |
1036 | ␉␉␉␉// No return: go around from the beginning: text, closing tag, or node.␊ |
1037 | ␉␉␉}␊ |
1038 | ␉␉}␊ |
1039 | ␉}␊ |
1040 | }␊ |
1041 | #endif␊ |
1042 | ␊ |
1043 | const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
1044 | {␊ |
1045 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1046 | ␉TiXmlDocument* document = GetDocument();␊ |
1047 | ␊ |
1048 | ␉if ( !p || !*p )␊ |
1049 | ␉{␊ |
1050 | ␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );␊ |
1051 | ␉␉return 0;␊ |
1052 | ␉}␊ |
1053 | ␊ |
1054 | ␉if ( data )␊ |
1055 | ␉{␊ |
1056 | ␉␉data->Stamp( p, encoding );␊ |
1057 | ␉␉location = data->Cursor();␊ |
1058 | ␉}␊ |
1059 | ␊ |
1060 | ␉if ( *p != '<' )␊ |
1061 | ␉{␊ |
1062 | ␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );␊ |
1063 | ␉␉return 0;␊ |
1064 | ␉}␊ |
1065 | ␊ |
1066 | ␉p = SkipWhiteSpace( p+1, encoding );␊ |
1067 | ␊ |
1068 | ␉// Read the name.␊ |
1069 | ␉const char* pErr = p;␊ |
1070 | ␊ |
1071 | p = ReadName( p, &value, encoding );␊ |
1072 | ␉if ( !p || !*p )␊ |
1073 | ␉{␊ |
1074 | ␉␉if ( document )␉document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );␊ |
1075 | ␉␉return 0;␊ |
1076 | ␉}␊ |
1077 | ␊ |
1078 | TIXML_STRING endTag ("</");␊ |
1079 | ␉endTag += value;␊ |
1080 | ␊ |
1081 | ␉// Check for and read attributes. Also look for an empty␊ |
1082 | ␉// tag or an end tag.␊ |
1083 | ␉while ( p && *p )␊ |
1084 | ␉{␊ |
1085 | ␉␉pErr = p;␊ |
1086 | ␉␉p = SkipWhiteSpace( p, encoding );␊ |
1087 | ␉␉if ( !p || !*p )␊ |
1088 | ␉␉{␊ |
1089 | ␉␉␉if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );␊ |
1090 | ␉␉␉return 0;␊ |
1091 | ␉␉}␊ |
1092 | ␉␉if ( *p == '/' )␊ |
1093 | ␉␉{␊ |
1094 | ␉␉␉++p;␊ |
1095 | ␉␉␉// Empty tag.␊ |
1096 | ␉␉␉if ( *p != '>' )␊ |
1097 | ␉␉␉{␊ |
1098 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );␉␉␊ |
1099 | ␉␉␉␉return 0;␊ |
1100 | ␉␉␉}␊ |
1101 | ␉␉␉return (p+1);␊ |
1102 | ␉␉}␊ |
1103 | ␉␉else if ( *p == '>' )␊ |
1104 | ␉␉{␊ |
1105 | ␉␉␉// Done with attributes (if there were any.)␊ |
1106 | ␉␉␉// Read the value -- which can include other␊ |
1107 | ␉␉␉// elements -- read the end tag, and return.␊ |
1108 | ␉␉␉++p;␊ |
1109 | ␉␉␉p = ReadValue( p, data, encoding );␉␉// Note this is an Element method, and will set the error if one happens.␊ |
1110 | ␉␉␉if ( !p || !*p ) {␊ |
1111 | ␉␉␉␉// We were looking for the end tag, but found nothing.␊ |
1112 | ␉␉␉␉// Fix for [ 1663758 ] Failure to report error on bad XML␊ |
1113 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );␊ |
1114 | ␉␉␉␉return 0;␊ |
1115 | ␉␉␉}␊ |
1116 | ␊ |
1117 | ␉␉␉// We should find the end tag now␊ |
1118 | ␉␉␉// note that:␊ |
1119 | ␉␉␉// </foo > and␊ |
1120 | ␉␉␉// </foo> ␊ |
1121 | ␉␉␉// are both valid end tags.␊ |
1122 | ␉␉␉if ( StringEqual( p, endTag.c_str(), false, encoding ) )␊ |
1123 | ␉␉␉{␊ |
1124 | ␉␉␉␉p += endTag.length();␊ |
1125 | ␉␉␉␉p = SkipWhiteSpace( p, encoding );␊ |
1126 | ␉␉␉␉if ( p && *p && *p == '>' ) {␊ |
1127 | ␉␉␉␉␉++p;␊ |
1128 | ␉␉␉␉␉return p;␊ |
1129 | ␉␉␉␉}␊ |
1130 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );␊ |
1131 | ␉␉␉␉return 0;␊ |
1132 | ␉␉␉}␊ |
1133 | ␉␉␉else␊ |
1134 | ␉␉␉{␊ |
1135 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );␊ |
1136 | ␉␉␉␉return 0;␊ |
1137 | ␉␉␉}␊ |
1138 | ␉␉}␊ |
1139 | ␉␉else␊ |
1140 | ␉␉{␊ |
1141 | ␉␉␉// Try to read an attribute:␊ |
1142 | ␉␉␉TiXmlAttribute* attrib = new TiXmlAttribute();␊ |
1143 | ␉␉␉if ( !attrib )␊ |
1144 | ␉␉␉{␊ |
1145 | ␉␉␉␉return 0;␊ |
1146 | ␉␉␉}␊ |
1147 | ␊ |
1148 | ␉␉␉attrib->SetDocument( document );␊ |
1149 | ␉␉␉pErr = p;␊ |
1150 | ␉␉␉p = attrib->Parse( p, data, encoding );␊ |
1151 | ␊ |
1152 | ␉␉␉if ( !p || !*p )␊ |
1153 | ␉␉␉{␊ |
1154 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );␊ |
1155 | ␉␉␉␉delete attrib;␊ |
1156 | ␉␉␉␉return 0;␊ |
1157 | ␉␉␉}␊ |
1158 | ␊ |
1159 | ␉␉␉// Handle the strange case of double attributes:␊ |
1160 | ␉␉␉#ifdef TIXML_USE_STL␊ |
1161 | ␉␉␉TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );␊ |
1162 | ␉␉␉#else␊ |
1163 | ␉␉␉TiXmlAttribute* node = attributeSet.Find( attrib->Name() );␊ |
1164 | ␉␉␉#endif␊ |
1165 | ␉␉␉if ( node )␊ |
1166 | ␉␉␉{␊ |
1167 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );␊ |
1168 | ␉␉␉␉delete attrib;␊ |
1169 | ␉␉␉␉return 0;␊ |
1170 | ␉␉␉}␊ |
1171 | ␊ |
1172 | ␉␉␉attributeSet.Add( attrib );␊ |
1173 | ␉␉}␊ |
1174 | ␉}␊ |
1175 | ␉return p;␊ |
1176 | }␊ |
1177 | ␊ |
1178 | ␊ |
1179 | const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
1180 | {␊ |
1181 | ␉TiXmlDocument* document = GetDocument();␊ |
1182 | ␊ |
1183 | ␉// Read in text and elements in any order.␊ |
1184 | ␉const char* pWithWhiteSpace = p;␊ |
1185 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1186 | ␊ |
1187 | ␉while ( p && *p )␊ |
1188 | ␉{␊ |
1189 | ␉␉if ( *p != '<' )␊ |
1190 | ␉␉{␊ |
1191 | ␉␉␉// Take what we have, make a text element.␊ |
1192 | ␉␉␉TiXmlText* textNode = new TiXmlText( "" );␊ |
1193 | ␊ |
1194 | ␉␉␉if ( !textNode )␊ |
1195 | ␉␉␉{␊ |
1196 | ␉␉␉ return 0;␊ |
1197 | ␉␉␉}␊ |
1198 | ␊ |
1199 | ␉␉␉if ( TiXmlBase::IsWhiteSpaceCondensed() )␊ |
1200 | ␉␉␉{␊ |
1201 | ␉␉␉␉p = textNode->Parse( p, data, encoding );␊ |
1202 | ␉␉␉}␊ |
1203 | ␉␉␉else␊ |
1204 | ␉␉␉{␊ |
1205 | ␉␉␉␉// Special case: we want to keep the white space␊ |
1206 | ␉␉␉␉// so that leading spaces aren't removed.␊ |
1207 | ␉␉␉␉p = textNode->Parse( pWithWhiteSpace, data, encoding );␊ |
1208 | ␉␉␉}␊ |
1209 | ␊ |
1210 | ␉␉␉if ( !textNode->Blank() )␊ |
1211 | ␉␉␉␉LinkEndChild( textNode );␊ |
1212 | ␉␉␉else␊ |
1213 | ␉␉␉␉delete textNode;␊ |
1214 | ␉␉} ␊ |
1215 | ␉␉else ␊ |
1216 | ␉␉{␊ |
1217 | ␉␉␉// We hit a '<'␊ |
1218 | ␉␉␉// Have we hit a new element or an end tag? This could also be␊ |
1219 | ␉␉␉// a TiXmlText in the "CDATA" style.␊ |
1220 | ␉␉␉if ( StringEqual( p, "</", false, encoding ) )␊ |
1221 | ␉␉␉{␊ |
1222 | ␉␉␉␉return p;␊ |
1223 | ␉␉␉}␊ |
1224 | ␉␉␉else␊ |
1225 | ␉␉␉{␊ |
1226 | ␉␉␉␉TiXmlNode* node = Identify( p, encoding );␊ |
1227 | ␉␉␉␉if ( node )␊ |
1228 | ␉␉␉␉{␊ |
1229 | ␉␉␉␉␉p = node->Parse( p, data, encoding );␊ |
1230 | ␉␉␉␉␉LinkEndChild( node );␊ |
1231 | ␉␉␉␉}␉␉␉␉␊ |
1232 | ␉␉␉␉else␊ |
1233 | ␉␉␉␉{␊ |
1234 | ␉␉␉␉␉return 0;␊ |
1235 | ␉␉␉␉}␊ |
1236 | ␉␉␉}␊ |
1237 | ␉␉}␊ |
1238 | ␉␉pWithWhiteSpace = p;␊ |
1239 | ␉␉p = SkipWhiteSpace( p, encoding );␊ |
1240 | ␉}␊ |
1241 | ␊ |
1242 | ␉if ( !p )␊ |
1243 | ␉{␊ |
1244 | ␉␉if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );␊ |
1245 | ␉}␉␊ |
1246 | ␉return p;␊ |
1247 | }␊ |
1248 | ␊ |
1249 | ␊ |
1250 | #ifdef TIXML_USE_STL␊ |
1251 | void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )␊ |
1252 | {␊ |
1253 | ␉while ( in->good() )␊ |
1254 | ␉{␊ |
1255 | ␉␉int c = in->get();␉␊ |
1256 | ␉␉if ( c <= 0 )␊ |
1257 | ␉␉{␊ |
1258 | ␉␉␉TiXmlDocument* document = GetDocument();␊ |
1259 | ␉␉␉if ( document )␊ |
1260 | ␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
1261 | ␉␉␉return;␊ |
1262 | ␉␉}␊ |
1263 | ␉␉(*tag) += (char) c;␊ |
1264 | ␊ |
1265 | ␉␉if ( c == '>' )␊ |
1266 | ␉␉{␊ |
1267 | ␉␉␉// All is well.␊ |
1268 | ␉␉␉return;␉␉␊ |
1269 | ␉␉}␊ |
1270 | ␉}␊ |
1271 | }␊ |
1272 | #endif␊ |
1273 | ␊ |
1274 | ␊ |
1275 | const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
1276 | {␊ |
1277 | ␉TiXmlDocument* document = GetDocument();␊ |
1278 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1279 | ␊ |
1280 | ␉if ( data )␊ |
1281 | ␉{␊ |
1282 | ␉␉data->Stamp( p, encoding );␊ |
1283 | ␉␉location = data->Cursor();␊ |
1284 | ␉}␊ |
1285 | ␉if ( !p || !*p || *p != '<' )␊ |
1286 | ␉{␊ |
1287 | ␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );␊ |
1288 | ␉␉return 0;␊ |
1289 | ␉}␊ |
1290 | ␉++p;␊ |
1291 | value = "";␊ |
1292 | ␊ |
1293 | ␉while ( p && *p && *p != '>' )␊ |
1294 | ␉{␊ |
1295 | ␉␉value += *p;␊ |
1296 | ␉␉++p;␊ |
1297 | ␉}␊ |
1298 | ␊ |
1299 | ␉if ( !p )␊ |
1300 | ␉{␊ |
1301 | ␉␉if ( document )␉␊ |
1302 | ␉␉␉document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );␊ |
1303 | ␉}␊ |
1304 | ␉if ( p && *p == '>' )␊ |
1305 | ␉␉return p+1;␊ |
1306 | ␉return p;␊ |
1307 | }␊ |
1308 | ␊ |
1309 | #ifdef TIXML_USE_STL␊ |
1310 | void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )␊ |
1311 | {␊ |
1312 | ␉while ( in->good() )␊ |
1313 | ␉{␊ |
1314 | ␉␉int c = in->get();␉␊ |
1315 | ␉␉if ( c <= 0 )␊ |
1316 | ␉␉{␊ |
1317 | ␉␉␉TiXmlDocument* document = GetDocument();␊ |
1318 | ␉␉␉if ( document )␊ |
1319 | ␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
1320 | ␉␉␉return;␊ |
1321 | ␉␉}␊ |
1322 | ␊ |
1323 | ␉␉(*tag) += (char) c;␊ |
1324 | ␊ |
1325 | ␉␉if ( c == '>' ␊ |
1326 | ␉␉␉ && tag->at( tag->length() - 2 ) == '-'␊ |
1327 | ␉␉␉ && tag->at( tag->length() - 3 ) == '-' )␊ |
1328 | ␉␉{␊ |
1329 | ␉␉␉// All is well.␊ |
1330 | ␉␉␉return;␉␉␊ |
1331 | ␉␉}␊ |
1332 | ␉}␊ |
1333 | }␊ |
1334 | #endif␊ |
1335 | ␊ |
1336 | ␊ |
1337 | const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
1338 | {␊ |
1339 | ␉TiXmlDocument* document = GetDocument();␊ |
1340 | ␉value = "";␊ |
1341 | ␊ |
1342 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1343 | ␊ |
1344 | ␉if ( data )␊ |
1345 | ␉{␊ |
1346 | ␉␉data->Stamp( p, encoding );␊ |
1347 | ␉␉location = data->Cursor();␊ |
1348 | ␉}␊ |
1349 | ␉const char* startTag = "<!--";␊ |
1350 | ␉const char* endTag = "-->";␊ |
1351 | ␊ |
1352 | ␉if ( !StringEqual( p, startTag, false, encoding ) )␊ |
1353 | ␉{␊ |
1354 | ␉␉if ( document )␊ |
1355 | ␉␉␉document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );␊ |
1356 | ␉␉return 0;␊ |
1357 | ␉}␊ |
1358 | ␉p += strlen( startTag );␊ |
1359 | ␊ |
1360 | ␉// [ 1475201 ] TinyXML parses entities in comments␊ |
1361 | ␉// Oops - ReadText doesn't work, because we don't want to parse the entities.␊ |
1362 | ␉// p = ReadText( p, &value, false, endTag, false, encoding );␊ |
1363 | ␉//␊ |
1364 | ␉// from the XML spec:␊ |
1365 | ␉/*␊ |
1366 | ␉ [Definition: Comments may appear anywhere in a document outside other markup; in addition, ␊ |
1367 | ␉ they may appear within the document type declaration at places allowed by the grammar. ␊ |
1368 | ␉␉␉␉ They are not part of the document's character data; an XML processor MAY, but need not, ␊ |
1369 | ␉␉␉␉ make it possible for an application to retrieve the text of comments. For compatibility, ␊ |
1370 | ␉␉␉␉ the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity ␊ |
1371 | ␉␉␉␉ references MUST NOT be recognized within comments.␊ |
1372 | ␊ |
1373 | ␉␉␉␉ An example of a comment:␊ |
1374 | ␊ |
1375 | ␉␉␉␉ <!-- declarations for <head> & <body> -->␊ |
1376 | ␉*/␊ |
1377 | ␊ |
1378 | value = "";␊ |
1379 | ␉// Keep all the white space.␊ |
1380 | ␉while (␉p && *p && !StringEqual( p, endTag, false, encoding ) )␊ |
1381 | ␉{␊ |
1382 | ␉␉value.append( p, 1 );␊ |
1383 | ␉␉++p;␊ |
1384 | ␉}␊ |
1385 | ␉if ( p && *p ) ␊ |
1386 | ␉␉p += strlen( endTag );␊ |
1387 | ␊ |
1388 | ␉return p;␊ |
1389 | }␊ |
1390 | ␊ |
1391 | ␊ |
1392 | const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
1393 | {␊ |
1394 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1395 | ␉if ( !p || !*p ) return 0;␊ |
1396 | ␊ |
1397 | ␉if ( data )␊ |
1398 | ␉{␊ |
1399 | ␉␉data->Stamp( p, encoding );␊ |
1400 | ␉␉location = data->Cursor();␊ |
1401 | ␉}␊ |
1402 | ␉// Read the name, the '=' and the value.␊ |
1403 | ␉const char* pErr = p;␊ |
1404 | ␉p = ReadName( p, &name, encoding );␊ |
1405 | ␉if ( !p || !*p )␊ |
1406 | ␉{␊ |
1407 | ␉␉if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );␊ |
1408 | ␉␉return 0;␊ |
1409 | ␉}␊ |
1410 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1411 | ␉if ( !p || !*p || *p != '=' )␊ |
1412 | ␉{␊ |
1413 | ␉␉if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );␊ |
1414 | ␉␉return 0;␊ |
1415 | ␉}␊ |
1416 | ␊ |
1417 | ␉++p;␉// skip '='␊ |
1418 | ␉p = SkipWhiteSpace( p, encoding );␊ |
1419 | ␉if ( !p || !*p )␊ |
1420 | ␉{␊ |
1421 | ␉␉if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );␊ |
1422 | ␉␉return 0;␊ |
1423 | ␉}␊ |
1424 | ␉␊ |
1425 | ␉const char* end;␊ |
1426 | ␉const char SINGLE_QUOTE = '\'';␊ |
1427 | ␉const char DOUBLE_QUOTE = '\"';␊ |
1428 | ␊ |
1429 | ␉if ( *p == SINGLE_QUOTE )␊ |
1430 | ␉{␊ |
1431 | ␉␉++p;␊ |
1432 | ␉␉end = "\'";␉␉// single quote in string␊ |
1433 | ␉␉p = ReadText( p, &value, false, end, false, encoding );␊ |
1434 | ␉}␊ |
1435 | ␉else if ( *p == DOUBLE_QUOTE )␊ |
1436 | ␉{␊ |
1437 | ␉␉++p;␊ |
1438 | ␉␉end = "\"";␉␉// double quote in string␊ |
1439 | ␉␉p = ReadText( p, &value, false, end, false, encoding );␊ |
1440 | ␉}␊ |
1441 | ␉else␊ |
1442 | ␉{␊ |
1443 | ␉␉// All attribute values should be in single or double quotes.␊ |
1444 | ␉␉// But this is such a common error that the parser will try␊ |
1445 | ␉␉// its best, even without them.␊ |
1446 | ␉␉value = "";␊ |
1447 | ␉␉while ( p && *p␉␉␉␉␉␉␉␉␉␉␉// existence␊ |
1448 | ␉␉␉␉&& !IsWhiteSpace( *p )␉␉␉␉␉␉␉␉// whitespace␊ |
1449 | ␉␉␉␉&& *p != '/' && *p != '>' )␉␉␉␉␉␉␉// tag end␊ |
1450 | ␉␉{␊ |
1451 | ␉␉␉if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {␊ |
1452 | ␉␉␉␉// [ 1451649 ] Attribute values with trailing quotes not handled correctly␊ |
1453 | ␉␉␉␉// We did not have an opening quote but seem to have a ␊ |
1454 | ␉␉␉␉// closing one. Give up and throw an error.␊ |
1455 | ␉␉␉␉if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );␊ |
1456 | ␉␉␉␉return 0;␊ |
1457 | ␉␉␉}␊ |
1458 | ␉␉␉value += *p;␊ |
1459 | ␉␉␉++p;␊ |
1460 | ␉␉}␊ |
1461 | ␉}␊ |
1462 | ␉return p;␊ |
1463 | }␊ |
1464 | ␊ |
1465 | #ifdef TIXML_USE_STL␊ |
1466 | void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )␊ |
1467 | {␊ |
1468 | ␉while ( in->good() )␊ |
1469 | ␉{␊ |
1470 | ␉␉int c = in->peek();␉␊ |
1471 | ␉␉if ( !cdata && (c == '<' ) ) ␊ |
1472 | ␉␉{␊ |
1473 | ␉␉␉return;␊ |
1474 | ␉␉}␊ |
1475 | ␉␉if ( c <= 0 )␊ |
1476 | ␉␉{␊ |
1477 | ␉␉␉TiXmlDocument* document = GetDocument();␊ |
1478 | ␉␉␉if ( document )␊ |
1479 | ␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
1480 | ␉␉␉return;␊ |
1481 | ␉␉}␊ |
1482 | ␊ |
1483 | ␉␉(*tag) += (char) c;␊ |
1484 | ␉␉in->get();␉// "commits" the peek made above␊ |
1485 | ␊ |
1486 | ␉␉if ( cdata && c == '>' && tag->size() >= 3 ) {␊ |
1487 | ␉␉␉size_t len = tag->size();␊ |
1488 | ␉␉␉if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {␊ |
1489 | ␉␉␉␉// terminator of cdata.␊ |
1490 | ␉␉␉␉return;␊ |
1491 | ␉␉␉}␊ |
1492 | ␉␉} ␊ |
1493 | ␉}␊ |
1494 | }␊ |
1495 | #endif␊ |
1496 | ␊ |
1497 | const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )␊ |
1498 | {␊ |
1499 | ␉value = "";␊ |
1500 | ␉TiXmlDocument* document = GetDocument();␊ |
1501 | ␊ |
1502 | ␉if ( data )␊ |
1503 | ␉{␊ |
1504 | ␉␉data->Stamp( p, encoding );␊ |
1505 | ␉␉location = data->Cursor();␊ |
1506 | ␉}␊ |
1507 | ␊ |
1508 | ␉const char* const startTag = "<![CDATA[";␊ |
1509 | ␉const char* const endTag = "]]>";␊ |
1510 | ␊ |
1511 | ␉if ( cdata || StringEqual( p, startTag, false, encoding ) )␊ |
1512 | ␉{␊ |
1513 | ␉␉cdata = true;␊ |
1514 | ␊ |
1515 | ␉␉if ( !StringEqual( p, startTag, false, encoding ) )␊ |
1516 | ␉␉{␊ |
1517 | ␉␉␉if ( document )␊ |
1518 | ␉␉␉␉document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );␊ |
1519 | ␉␉␉return 0;␊ |
1520 | ␉␉}␊ |
1521 | ␉␉p += strlen( startTag );␊ |
1522 | ␊ |
1523 | ␉␉// Keep all the white space, ignore the encoding, etc.␊ |
1524 | ␉␉while (␉ p && *p␊ |
1525 | ␉␉␉␉&& !StringEqual( p, endTag, false, encoding )␊ |
1526 | ␉␉␉ )␊ |
1527 | ␉␉{␊ |
1528 | ␉␉␉value += *p;␊ |
1529 | ␉␉␉++p;␊ |
1530 | ␉␉}␊ |
1531 | ␊ |
1532 | ␉␉TIXML_STRING dummy; ␊ |
1533 | ␉␉p = ReadText( p, &dummy, false, endTag, false, encoding );␊ |
1534 | ␉␉return p;␊ |
1535 | ␉}␊ |
1536 | ␉else␊ |
1537 | ␉{␊ |
1538 | ␉␉bool ignoreWhite = true;␊ |
1539 | ␊ |
1540 | ␉␉const char* end = "<";␊ |
1541 | ␉␉p = ReadText( p, &value, ignoreWhite, end, false, encoding );␊ |
1542 | ␉␉if ( p && *p )␊ |
1543 | ␉␉␉return p-1;␉// don't truncate the '<'␊ |
1544 | ␉␉return 0;␊ |
1545 | ␉}␊ |
1546 | }␊ |
1547 | ␊ |
1548 | #ifdef TIXML_USE_STL␊ |
1549 | void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )␊ |
1550 | {␊ |
1551 | ␉while ( in->good() )␊ |
1552 | ␉{␊ |
1553 | ␉␉int c = in->get();␊ |
1554 | ␉␉if ( c <= 0 )␊ |
1555 | ␉␉{␊ |
1556 | ␉␉␉TiXmlDocument* document = GetDocument();␊ |
1557 | ␉␉␉if ( document )␊ |
1558 | ␉␉␉␉document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );␊ |
1559 | ␉␉␉return;␊ |
1560 | ␉␉}␊ |
1561 | ␉␉(*tag) += (char) c;␊ |
1562 | ␊ |
1563 | ␉␉if ( c == '>' )␊ |
1564 | ␉␉{␊ |
1565 | ␉␉␉// All is well.␊ |
1566 | ␉␉␉return;␊ |
1567 | ␉␉}␊ |
1568 | ␉}␊ |
1569 | }␊ |
1570 | #endif␊ |
1571 | ␊ |
1572 | const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )␊ |
1573 | {␊ |
1574 | ␉p = SkipWhiteSpace( p, _encoding );␊ |
1575 | ␉// Find the beginning, find the end, and look for␊ |
1576 | ␉// the stuff in-between.␊ |
1577 | ␉TiXmlDocument* document = GetDocument();␊ |
1578 | ␉if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )␊ |
1579 | ␉{␊ |
1580 | ␉␉if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );␊ |
1581 | ␉␉return 0;␊ |
1582 | ␉}␊ |
1583 | ␉if ( data )␊ |
1584 | ␉{␊ |
1585 | ␉␉data->Stamp( p, _encoding );␊ |
1586 | ␉␉location = data->Cursor();␊ |
1587 | ␉}␊ |
1588 | ␉p += 5;␊ |
1589 | ␊ |
1590 | ␉version = "";␊ |
1591 | ␉encoding = "";␊ |
1592 | ␉standalone = "";␊ |
1593 | ␊ |
1594 | ␉while ( p && *p )␊ |
1595 | ␉{␊ |
1596 | ␉␉if ( *p == '>' )␊ |
1597 | ␉␉{␊ |
1598 | ␉␉␉++p;␊ |
1599 | ␉␉␉return p;␊ |
1600 | ␉␉}␊ |
1601 | ␊ |
1602 | ␉␉p = SkipWhiteSpace( p, _encoding );␊ |
1603 | ␉␉if ( StringEqual( p, "version", true, _encoding ) )␊ |
1604 | ␉␉{␊ |
1605 | ␉␉␉TiXmlAttribute attrib;␊ |
1606 | ␉␉␉p = attrib.Parse( p, data, _encoding );␉␉␊ |
1607 | ␉␉␉version = attrib.Value();␊ |
1608 | ␉␉}␊ |
1609 | ␉␉else if ( StringEqual( p, "encoding", true, _encoding ) )␊ |
1610 | ␉␉{␊ |
1611 | ␉␉␉TiXmlAttribute attrib;␊ |
1612 | ␉␉␉p = attrib.Parse( p, data, _encoding );␉␉␊ |
1613 | ␉␉␉encoding = attrib.Value();␊ |
1614 | ␉␉}␊ |
1615 | ␉␉else if ( StringEqual( p, "standalone", true, _encoding ) )␊ |
1616 | ␉␉{␊ |
1617 | ␉␉␉TiXmlAttribute attrib;␊ |
1618 | ␉␉␉p = attrib.Parse( p, data, _encoding );␉␉␊ |
1619 | ␉␉␉standalone = attrib.Value();␊ |
1620 | ␉␉}␊ |
1621 | ␉␉else␊ |
1622 | ␉␉{␊ |
1623 | ␉␉␉// Read over whatever it is.␊ |
1624 | ␉␉␉while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )␊ |
1625 | ␉␉␉␉++p;␊ |
1626 | ␉␉}␊ |
1627 | ␉}␊ |
1628 | ␉return 0;␊ |
1629 | }␊ |
1630 | ␊ |
1631 | bool TiXmlText::Blank() const␊ |
1632 | {␊ |
1633 | ␉for ( unsigned i=0; i<value.length(); i++ )␊ |
1634 | ␉␉if ( !IsWhiteSpace( value[i] ) )␊ |
1635 | ␉␉␉return false;␊ |
1636 | ␉return true;␊ |
1637 | }␊ |
1638 | ␊ |
1639 |