1 // Written in the D programming language.
2 
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5       current standards. It will be removed from Phobos in 2.101.0.
6       If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD))
7 
8 Classes and functions for creating and parsing XML
9 
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
17 
18 Example: This example creates a DOM (Document Object Model) tree
19     from an XML file.
20 ------------------------------------------------------------------------------
21 import std.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
25 
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
28 //
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30 
31 void main()
32 {
33     string s = cast(string) std.file.read("books.xml");
34 
35     // Check for well-formedness
36     check(s);
37 
38     // Make a DOM tree
39     auto doc = new Document(s);
40 
41     // Plain-print it
42     writeln(doc);
43 }
44 ------------------------------------------------------------------------------
45 
46 Example: This example does much the same thing, except that the file is
47     deconstructed and reconstructed by hand. This is more work, but the
48     techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import std.xml;
51 import std.stdio;
52 import std.string;
53 
54 struct Book
55 {
56     string id;
57     string author;
58     string title;
59     string genre;
60     string price;
61     string pubDate;
62     string description;
63 }
64 
65 void main()
66 {
67     string s = cast(string) std.file.read("books.xml");
68 
69     // Check for well-formedness
70     check(s);
71 
72     // Take it apart
73     Book[] books;
74 
75     auto xml = new DocumentParser(s);
76     xml.onStartTag["book"] = (ElementParser xml)
77     {
78         Book book;
79         book.id = xml.tag.attr["id"];
80 
81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
87 
88         xml.parse();
89 
90         books ~= book;
91     };
92     xml.parse();
93 
94     // Put it back together again;
95     auto doc = new Document(new Tag("catalog"));
96     foreach (book;books)
97     {
98         auto element = new Element("book");
99         element.tag.attr["id"] = book.id;
100 
101         element ~= new Element("author",      book.author);
102         element ~= new Element("title",       book.title);
103         element ~= new Element("genre",       book.genre);
104         element ~= new Element("price",       book.price);
105         element ~= new Element("publish-date",book.pubDate);
106         element ~= new Element("description", book.description);
107 
108         doc ~= element;
109     }
110 
111     // Pretty-print it
112     writefln(join(doc.pretty(3),"\n"));
113 }
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors:   Janice Caron
118 Source:    $(PHOBOSSRC std/xml.d)
119 */
120 /*
121          Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123    (See accompanying file LICENSE_1_0.txt or copy at
124          http://www.boost.org/LICENSE_1_0.txt)
125 */
126 module appbase.utils.xml;
127 
128 enum cdata = "<![CDATA[";
129 
130 /**
131  * Returns true if the character is a character according to the XML standard
132  *
133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134  *
135  * Params:
136  *    c = the character to be tested
137  */
138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139 {
140     if (c <= 0xD7FF)
141     {
142         if (c >= 0x20)
143             return true;
144         switch (c)
145         {
146         case 0xA:
147         case 0x9:
148         case 0xD:
149             return true;
150         default:
151             return false;
152         }
153     }
154     else if (0xE000 <= c && c <= 0x10FFFF)
155     {
156         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157             return true;
158     }
159     return false;
160 }
161 
162 @safe @nogc nothrow pure unittest
163 {
164     assert(!isChar(cast(dchar) 0x8));
165     assert( isChar(cast(dchar) 0x9));
166     assert( isChar(cast(dchar) 0xA));
167     assert(!isChar(cast(dchar) 0xB));
168     assert(!isChar(cast(dchar) 0xC));
169     assert( isChar(cast(dchar) 0xD));
170     assert(!isChar(cast(dchar) 0xE));
171     assert(!isChar(cast(dchar) 0x1F));
172     assert( isChar(cast(dchar) 0x20));
173     assert( isChar('J'));
174     assert( isChar(cast(dchar) 0xD7FF));
175     assert(!isChar(cast(dchar) 0xD800));
176     assert(!isChar(cast(dchar) 0xDFFF));
177     assert( isChar(cast(dchar) 0xE000));
178     assert( isChar(cast(dchar) 0xFFFD));
179     assert(!isChar(cast(dchar) 0xFFFE));
180     assert(!isChar(cast(dchar) 0xFFFF));
181     assert( isChar(cast(dchar) 0x10000));
182     assert( isChar(cast(dchar) 0x10FFFF));
183     assert(!isChar(cast(dchar) 0x110000));
184 
185     debug (stdxml_TestHardcodedChecks)
186     {
187         foreach (c; 0 .. dchar.max + 1)
188             assert(isChar(c) == lookup(CharTable, c));
189     }
190 }
191 
192 /**
193  * Returns true if the character is whitespace according to the XML standard
194  *
195  * Only the following characters are considered whitespace in XML - space, tab,
196  * carriage return and linefeed
197  *
198  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199  *
200  * Params:
201  *    c = the character to be tested
202  */
203 bool isSpace(dchar c) @safe @nogc pure nothrow
204 {
205     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206 }
207 
208 /**
209  * Returns true if the character is a digit according to the XML standard
210  *
211  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212  *
213  * Params:
214  *    c = the character to be tested
215  */
216 bool isDigit(dchar c) @safe @nogc pure nothrow
217 {
218     if (c <= 0x0039 && c >= 0x0030)
219         return true;
220     else
221         return lookup(DigitTable,c);
222 }
223 
224 @safe @nogc nothrow pure unittest
225 {
226     debug (stdxml_TestHardcodedChecks)
227     {
228         foreach (c; 0 .. dchar.max + 1)
229             assert(isDigit(c) == lookup(DigitTable, c));
230     }
231 }
232 
233 /**
234  * Returns true if the character is a letter according to the XML standard
235  *
236  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237  *
238  * Params:
239  *    c = the character to be tested
240  */
241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242 {
243     return isIdeographic(c) || isBaseChar(c);
244 }
245 
246 /**
247  * Returns true if the character is an ideographic character according to the
248  * XML standard
249  *
250  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251  *
252  * Params:
253  *    c = the character to be tested
254  */
255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
256 {
257     if (c == 0x3007)
258         return true;
259     if (c <= 0x3029 && c >= 0x3021 )
260         return true;
261     if (c <= 0x9FA5 && c >= 0x4E00)
262         return true;
263     return false;
264 }
265 
266 @safe @nogc nothrow pure unittest
267 {
268     assert(isIdeographic('\u4E00'));
269     assert(isIdeographic('\u9FA5'));
270     assert(isIdeographic('\u3007'));
271     assert(isIdeographic('\u3021'));
272     assert(isIdeographic('\u3029'));
273 
274     debug (stdxml_TestHardcodedChecks)
275     {
276         foreach (c; 0 .. dchar.max + 1)
277             assert(isIdeographic(c) == lookup(IdeographicTable, c));
278     }
279 }
280 
281 /**
282  * Returns true if the character is a base character according to the XML
283  * standard
284  *
285  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286  *
287  * Params:
288  *    c = the character to be tested
289  */
290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
291 {
292     return lookup(BaseCharTable,c);
293 }
294 
295 /**
296  * Returns true if the character is a combining character according to the
297  * XML standard
298  *
299  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300  *
301  * Params:
302  *    c = the character to be tested
303  */
304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305 {
306     return lookup(CombiningCharTable,c);
307 }
308 
309 /**
310  * Returns true if the character is an extender according to the XML standard
311  *
312  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313  *
314  * Params:
315  *    c = the character to be tested
316  */
317 bool isExtender(dchar c) @safe @nogc nothrow pure
318 {
319     return lookup(ExtenderTable,c);
320 }
321 
322 /**
323  * Encodes a string by replacing all characters which need to be escaped with
324  * appropriate predefined XML entities.
325  *
326  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327  * and greater-than), and similarly, decode() unescapes them. These functions
328  * are provided for convenience only. You do not need to use them when using
329  * the std.xml classes, because then all the encoding and decoding will be done
330  * for you automatically.
331  *
332  * If the string is not modified, the original will be returned.
333  *
334  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335  *
336  * Params:
337  *      s = The string to be encoded
338  *
339  * Returns: The encoded string
340  *
341  * Example:
342  * --------------
343  * writefln(encode("a > b")); // writes "a &gt; b"
344  * --------------
345  */
346 S encode(S)(S s)
347 {
348     import std.array : appender;
349 
350     string r;
351     size_t lastI;
352     auto result = appender!S();
353 
354     foreach (i, c; s)
355     {
356         switch (c)
357         {
358         case '&':  r = "&amp;"; break;
359         case '"':  r = "&quot;"; break;
360         case '\'': r = "&apos;"; break;
361         case '<':  r = "&lt;"; break;
362         case '>':  r = "&gt;"; break;
363         default: continue;
364         }
365         // Replace with r
366         result.put(s[lastI .. i]);
367         result.put(r);
368         lastI = i + 1;
369     }
370 
371     if (!result.data.ptr) return s;
372     result.put(s[lastI .. $]);
373     return result.data;
374 }
375 
376 @safe pure unittest
377 {
378     auto s = "hello";
379     assert(encode(s) is s);
380     assert(encode("a > b") == "a &gt; b", encode("a > b"));
381     assert(encode("a < b") == "a &lt; b");
382     assert(encode("don't") == "don&apos;t");
383     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384     assert(encode("cat & dog") == "cat &amp; dog");
385 }
386 
387 /**
388  * Mode to use for decoding.
389  *
390  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393  */
394 enum DecodeMode
395 {
396     NONE, LOOSE, STRICT
397 }
398 
399 /**
400  * Decodes a string by unescaping all predefined XML entities.
401  *
402  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403  * and greater-than), and similarly, decode() unescapes them. These functions
404  * are provided for convenience only. You do not need to use them when using
405  * the std.xml classes, because then all the encoding and decoding will be done
406  * for you automatically.
407  *
408  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409  * &amp;lt; and &amp;gt,
410  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
411  *
412  * If the string does not contain an ampersand, the original will be returned.
413  *
414  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416  * (decode, and throw a DecodeException in the event of an error).
417  *
418  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419  *
420  * Params:
421  *      s = The string to be decoded
422  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423  *
424  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425  *
426  * Returns: The decoded string
427  *
428  * Example:
429  * --------------
430  * writefln(decode("a &gt; b")); // writes "a > b"
431  * --------------
432  */
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434 {
435     import std.algorithm.searching : startsWith;
436 
437     if (mode == DecodeMode.NONE) return s;
438 
439     string buffer;
440     foreach (ref i; 0 .. s.length)
441     {
442         char c = s[i];
443         if (c != '&')
444         {
445             if (buffer.length != 0) buffer ~= c;
446         }
447         else
448         {
449             if (buffer.length == 0)
450             {
451                 buffer = s[0 .. i].dup;
452             }
453             if (startsWith(s[i..$],"&#"))
454             {
455                 try
456                 {
457                     dchar d;
458                     string t = s[i..$];
459                     checkCharRef(t, d);
460                     char[4] temp;
461                     import std.utf : encode;
462                     buffer ~= temp[0 .. encode(temp, d)];
463                     i = s.length - t.length - 1;
464                 }
465                 catch (Err e)
466                 {
467                     if (mode == DecodeMode.STRICT)
468                         throw new DecodeException("Unescaped &");
469                     buffer ~= '&';
470                 }
471             }
472             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
473             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
474             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
476             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
477             else
478             {
479                 if (mode == DecodeMode.STRICT)
480                     throw new DecodeException("Unescaped &");
481                 buffer ~= '&';
482             }
483         }
484     }
485     return (buffer.length == 0) ? s : buffer;
486 }
487 
488 @safe pure unittest
489 {
490     void assertNot(string s) pure
491     {
492         bool b = false;
493         try { decode(s,DecodeMode.STRICT); }
494         catch (DecodeException e) { b = true; }
495         assert(b,s);
496     }
497 
498     // Assert that things that should work, do
499     auto s = "hello";
500     assert(decode(s,                DecodeMode.STRICT) is s);
501     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
502     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
503     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
504     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
506     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
507     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
508     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
509     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
510     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
511     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
512     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
513     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
514 
515     // Assert that things that shouldn't work, don't
516     assertNot("cat & dog");
517     assertNot("a &gt b");
518     assertNot("&#;");
519     assertNot("&#x;");
520     assertNot("&#2G;");
521     assertNot("&#x2G;");
522 }
523 
524 /**
525  * Class representing an XML document.
526  *
527  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528  *
529  */
530 class Document : Element
531 {
532     /**
533      * Contains all text which occurs before the root element.
534      * Defaults to &lt;?xml version="1.0"?&gt;
535      */
536     string prolog = "<?xml version=\"1.0\"?>";
537     /**
538      * Contains all text which occurs after the root element.
539      * Defaults to the empty string
540      */
541     string epilog;
542 
543     /**
544      * Constructs a Document by parsing XML text.
545      *
546      * This function creates a complete DOM (Document Object Model) tree.
547      *
548      * The input to this function MUST be valid XML.
549      * This is enforced by DocumentParser's in contract.
550      *
551      * Params:
552      *      s = the complete XML text.
553      */
554     this(string s)
555     in
556     {
557         assert(s.length != 0);
558     }
559     do
560     {
561         auto xml = new DocumentParser(s);
562         string tagString = xml.tag.tagString;
563 
564         this(xml.tag);
565         prolog = s[0 .. tagString.ptr - s.ptr];
566         parse(xml);
567         epilog = *xml.s;
568     }
569 
570     /**
571      * Constructs a Document from a Tag.
572      *
573      * Params:
574      *      tag = the start tag of the document.
575      */
576     this(const(Tag) tag)
577     {
578         super(tag);
579     }
580 
581     const
582     {
583         /**
584          * Compares two Documents for equality
585          *
586          * Example:
587          * --------------
588          * Document d1,d2;
589          * if (d1 == d2) { }
590          * --------------
591          */
592         override bool opEquals(scope const Object o) const
593         {
594             const doc = toType!(const Document)(o);
595             return prolog == doc.prolog
596                 && (cast(const) this).Element.opEquals(cast(const) doc)
597                 && epilog == doc.epilog;
598         }
599 
600         /**
601          * Compares two Documents
602          *
603          * You should rarely need to call this function. It exists so that
604          * Documents can be used as associative array keys.
605          *
606          * Example:
607          * --------------
608          * Document d1,d2;
609          * if (d1 < d2) { }
610          * --------------
611          */
612         override int opCmp(scope const Object o) scope const
613         {
614             const doc = toType!(const Document)(o);
615             if (prolog != doc.prolog)
616                 return prolog < doc.prolog ? -1 : 1;
617             if (int cmp = this.Element.opCmp(doc))
618                 return cmp;
619             if (epilog != doc.epilog)
620                 return epilog < doc.epilog ? -1 : 1;
621             return 0;
622         }
623 
624         /**
625          * Returns the hash of a Document
626          *
627          * You should rarely need to call this function. It exists so that
628          * Documents can be used as associative array keys.
629          */
630         override size_t toHash() scope const @trusted
631         {
632             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633         }
634 
635         /**
636          * Returns the string representation of a Document. (That is, the
637          * complete XML of a document).
638          */
639         override string toString() scope const @safe
640         {
641             return prolog ~ super.toString() ~ epilog;
642         }
643     }
644 }
645 
646 @system unittest
647 {
648     // https://issues.dlang.org/show_bug.cgi?id=14966
649     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
650 
651     auto a = new Document(xml);
652     auto b = new Document(xml);
653     assert(a == b);
654     assert(!(a < b));
655     int[Document] aa;
656     aa[a] = 1;
657     assert(aa[b] == 1);
658 
659     b ~= new Element("b");
660     assert(a < b);
661     assert(b > a);
662 }
663 
664 /**
665  * Class representing an XML element.
666  *
667  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668  */
669 class Element : Item
670 {
671     Tag tag; /// The start tag of the element
672     Item[] items; /// The element's items
673     Text[] texts; /// The element's text items
674     CData[] cdatas; /// The element's CData items
675     Comment[] comments; /// The element's comments
676     ProcessingInstruction[] pis; /// The element's processing instructions
677     Element[] elements; /// The element's child elements
678 
679     /**
680      * Constructs an Element given a name and a string to be used as a Text
681      * interior.
682      *
683      * Params:
684      *      name = the name of the element.
685      *      interior = (optional) the string interior.
686      *
687      * Example:
688      * -------------------------------------------------------
689      * auto element = new Element("title","Serenity")
690      *     // constructs the element <title>Serenity</title>
691      * -------------------------------------------------------
692      */
693     this(string name, string interior=null) @safe pure
694     {
695         this(new Tag(name));
696         if (interior.length != 0) opOpAssign!("~")(new Text(interior));
697     }
698 
699     /**
700      * Constructs an Element from a Tag.
701      *
702      * Params:
703      *      tag_ = the start or empty tag of the element.
704      */
705     this(const(Tag) tag_) @safe pure
706     {
707         this.tag = new Tag(tag_.name);
708         tag.type = TagType.EMPTY;
709         foreach (k,v;tag_.attr) tag.attr[k] = v;
710         tag.tagString = tag_.tagString;
711     }
712 
713     /**
714      * Append a text item to the interior of this element
715      *
716      * Params:
717      *      item = the item you wish to append.
718      *
719      * Example:
720      * --------------
721      * Element element;
722      * element ~= new Text("hello");
723      * --------------
724      */
725     void opOpAssign(string op)(Text item) @safe pure
726         if (op == "~")
727     {
728         texts ~= item;
729         appendItem(item);
730     }
731 
732     /**
733      * Append a CData item to the interior of this element
734      *
735      * Params:
736      *      item = the item you wish to append.
737      *
738      * Example:
739      * --------------
740      * Element element;
741      * element ~= new CData("hello");
742      * --------------
743      */
744     void opOpAssign(string op)(CData item) @safe pure
745         if (op == "~")
746     {
747         cdatas ~= item;
748         appendItem(item);
749     }
750 
751     /**
752      * Append a comment to the interior of this element
753      *
754      * Params:
755      *      item = the item you wish to append.
756      *
757      * Example:
758      * --------------
759      * Element element;
760      * element ~= new Comment("hello");
761      * --------------
762      */
763     void opOpAssign(string op)(Comment item) @safe pure
764         if (op == "~")
765     {
766         comments ~= item;
767         appendItem(item);
768     }
769 
770     /**
771      * Append a processing instruction to the interior of this element
772      *
773      * Params:
774      *      item = the item you wish to append.
775      *
776      * Example:
777      * --------------
778      * Element element;
779      * element ~= new ProcessingInstruction("hello");
780      * --------------
781      */
782     void opOpAssign(string op)(ProcessingInstruction item) @safe pure
783         if (op == "~")
784     {
785         pis ~= item;
786         appendItem(item);
787     }
788 
789     /**
790      * Append a complete element to the interior of this element
791      *
792      * Params:
793      *      item = the item you wish to append.
794      *
795      * Example:
796      * --------------
797      * Element element;
798      * Element other = new Element("br");
799      * element ~= other;
800      *    // appends element representing <br />
801      * --------------
802      */
803     void opOpAssign(string op)(Element item) @safe pure
804         if (op == "~")
805     {
806         elements ~= item;
807         appendItem(item);
808     }
809 
810     private void appendItem(Item item) @safe pure
811     {
812         items ~= item;
813         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
814             tag.type = TagType.START;
815     }
816 
817     private void parse(ElementParser xml)
818     {
819         xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
820         xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
821         xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
822         xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
823 
824         xml.onStartTag[null] = (ElementParser xml)
825         {
826             auto e = new Element(xml.tag);
827             e.parse(xml);
828             opOpAssign!("~")(e);
829         };
830 
831         xml.parse();
832     }
833 
834     /**
835      * Compares two Elements for equality
836      *
837      * Example:
838      * --------------
839      * Element e1,e2;
840      * if (e1 == e2) { }
841      * --------------
842      */
843     override bool opEquals(scope const Object o) const
844     {
845         const element = toType!(const Element)(o);
846         immutable len = items.length;
847         if (len != element.items.length) return false;
848         foreach (i; 0 .. len)
849         {
850             if (!items[i].opEquals(element.items[i])) return false;
851         }
852         return true;
853     }
854 
855     /**
856      * Compares two Elements
857      *
858      * You should rarely need to call this function. It exists so that Elements
859      * can be used as associative array keys.
860      *
861      * Example:
862      * --------------
863      * Element e1,e2;
864      * if (e1 < e2) { }
865      * --------------
866      */
867     override int opCmp(scope const Object o) @safe const
868     {
869         const element = toType!(const Element)(o);
870         for (uint i=0; ; ++i)
871         {
872             if (i == items.length && i == element.items.length) return 0;
873             if (i == items.length) return -1;
874             if (i == element.items.length) return 1;
875             if (!items[i].opEquals(element.items[i]))
876                 return items[i].opCmp(element.items[i]);
877         }
878     }
879 
880     /**
881      * Returns the hash of an Element
882      *
883      * You should rarely need to call this function. It exists so that Elements
884      * can be used as associative array keys.
885      */
886     override size_t toHash() scope const @safe
887     {
888         size_t hash = tag.toHash();
889         foreach (item;items) hash += item.toHash();
890         return hash;
891     }
892 
893     const
894     {
895         /**
896          * Returns the decoded interior of an element.
897          *
898          * The element is assumed to contain text <i>only</i>. So, for
899          * example, given XML such as "&lt;title&gt;Good &amp;amp;
900          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
901          *
902          * Params:
903          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
904          *
905          * Throws: DecodeException if decode fails
906          */
907         string text(DecodeMode mode=DecodeMode.LOOSE)
908         {
909             string buffer;
910             foreach (item;items)
911             {
912                 Text t = cast(Text) item;
913                 if (t is null) throw new DecodeException(item.toString());
914                 buffer ~= decode(t.toString(),mode);
915             }
916             return buffer;
917         }
918 
919         /**
920          * Returns an indented string representation of this item
921          *
922          * Params:
923          *      indent = (optional) number of spaces by which to indent this
924          *          element. Defaults to 2.
925          */
926         override string[] pretty(uint indent=2) scope
927         {
928             import std.algorithm.searching : count;
929             import std.string : rightJustify;
930 
931             if (isEmptyXML) return [ tag.toEmptyString() ];
932 
933             if (items.length == 1)
934             {
935                 auto t = cast(const(Text))(items[0]);
936                 if (t !is null)
937                 {
938                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
939                 }
940             }
941 
942             string[] a = [ tag.toStartString() ];
943             foreach (item;items)
944             {
945                 string[] b = item.pretty(indent);
946                 foreach (s;b)
947                 {
948                     a ~= rightJustify(s,count(s) + indent);
949                 }
950             }
951             a ~= tag.toEndString();
952             return a;
953         }
954 
955         /**
956          * Returns the string representation of an Element
957          *
958          * Example:
959          * --------------
960          * auto element = new Element("br");
961          * writefln(element.toString()); // writes "<br />"
962          * --------------
963          */
964         override string toString() scope @safe
965         {
966             if (isEmptyXML) return tag.toEmptyString();
967 
968             string buffer = tag.toStartString();
969             foreach (item;items) { buffer ~= item.toString(); }
970             buffer ~= tag.toEndString();
971             return buffer;
972         }
973 
974         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
975     }
976 }
977 
978 /**
979  * Tag types.
980  *
981  * $(DDOC_ENUM_MEMBERS START) Used for start tags
982  * $(DDOC_ENUM_MEMBERS END) Used for end tags
983  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
984  *
985  */
986 enum TagType { START, END, EMPTY }
987 
988 /**
989  * Class representing an XML tag.
990  *
991  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
992  *
993  * The class invariant guarantees
994  * <ul>
995  * <li> that $(B type) is a valid enum TagType value</li>
996  * <li> that $(B name) consists of valid characters</li>
997  * <li> that each attribute name consists of valid characters</li>
998  * </ul>
999  */
1000 class Tag
1001 {
1002     TagType type = TagType.START;   /// Type of tag
1003     string name;                    /// Tag name
1004     string[string] attr;            /// Associative array of attributes
1005     private string tagString;
1006 
1007     invariant()
1008     {
1009         string s;
1010         string t;
1011 
1012         assert(type == TagType.START
1013             || type == TagType.END
1014             || type == TagType.EMPTY);
1015 
1016         s = name;
1017         try { checkName(s,t); }
1018         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1019 
1020         foreach (k,v;attr)
1021         {
1022             s = k;
1023             try { checkName(s,t); }
1024             catch (Err e)
1025                 { assert(false,"Invalid attribute name:" ~ e.toString()); }
1026         }
1027     }
1028 
1029     /**
1030      * Constructs an instance of Tag with a specified name and type
1031      *
1032      * The constructor does not initialize the attributes. To initialize the
1033      * attributes, you access the $(B attr) member variable.
1034      *
1035      * Params:
1036      *      name = the Tag's name
1037      *      type = (optional) the Tag's type. If omitted, defaults to
1038      *          TagType.START.
1039      *
1040      * Example:
1041      * --------------
1042      * auto tag = new Tag("img",Tag.EMPTY);
1043      * tag.attr["src"] = "http://example.com/example.jpg";
1044      * --------------
1045      */
1046     this(string name, TagType type=TagType.START) @safe pure
1047     {
1048         this.name = name;
1049         this.type = type;
1050     }
1051 
1052     /* Private constructor (so don't ddoc this!)
1053      *
1054      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1055      *
1056      * The string is passed by reference, and is advanced over all characters
1057      * consumed.
1058      *
1059      * The second parameter is a dummy parameter only, required solely to
1060      * distinguish this constructor from the public one.
1061      */
1062     private this(ref string s, bool dummy) @safe pure
1063     {
1064         import std.algorithm.searching : countUntil;
1065         import std.ascii : isWhite;
1066         import std.utf : byCodeUnit;
1067 
1068         tagString = s;
1069         try
1070         {
1071             reqc(s,'<');
1072             if (optc(s,'/')) type = TagType.END;
1073             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1074             name = s[0 .. i];
1075             s = s[i .. $];
1076 
1077             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1078             s = s[i .. $];
1079 
1080             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1081             {
1082                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1083                 string key = s[0 .. i];
1084                 s = s[i .. $];
1085 
1086                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1087                 s = s[i .. $];
1088                 reqc(s,'=');
1089                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1090                 s = s[i .. $];
1091 
1092                 immutable char quote = requireOneOf(s,"'\"");
1093                 i = s.byCodeUnit.countUntil(quote);
1094                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1095                 s = s[i .. $];
1096                 reqc(s,quote);
1097 
1098                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1099                 s = s[i .. $];
1100                 attr[key] = val;
1101             }
1102             if (optc(s,'/'))
1103             {
1104                 if (type == TagType.END) throw new TagException("");
1105                 type = TagType.EMPTY;
1106             }
1107             reqc(s,'>');
1108             tagString.length = tagString.length - s.length;
1109         }
1110         catch (XMLException e)
1111         {
1112             tagString.length = tagString.length - s.length;
1113             throw new TagException(tagString);
1114         }
1115     }
1116 
1117     const
1118     {
1119         /**
1120          * Compares two Tags for equality
1121          *
1122          * You should rarely need to call this function. It exists so that Tags
1123          * can be used as associative array keys.
1124          *
1125          * Example:
1126          * --------------
1127          * Tag tag1,tag2
1128          * if (tag1 == tag2) { }
1129          * --------------
1130          */
1131         override bool opEquals(scope Object o)
1132         {
1133             const tag = toType!(const Tag)(o);
1134             return
1135                 (name != tag.name) ? false : (
1136                 (attr != tag.attr) ? false : (
1137                 (type != tag.type) ? false : (
1138             true )));
1139         }
1140 
1141         /**
1142          * Compares two Tags
1143          *
1144          * Example:
1145          * --------------
1146          * Tag tag1,tag2
1147          * if (tag1 < tag2) { }
1148          * --------------
1149          */
1150         override int opCmp(Object o)
1151         {
1152             const tag = toType!(const Tag)(o);
1153             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1154             return
1155                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1156                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1157                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1158             0 )));
1159         }
1160 
1161         /**
1162          * Returns the hash of a Tag
1163          *
1164          * You should rarely need to call this function. It exists so that Tags
1165          * can be used as associative array keys.
1166          */
1167         override size_t toHash()
1168         {
1169             return .hashOf(name);
1170         }
1171 
1172         /**
1173          * Returns the string representation of a Tag
1174          *
1175          * Example:
1176          * --------------
1177          * auto tag = new Tag("book",TagType.START);
1178          * writefln(tag.toString()); // writes "<book>"
1179          * --------------
1180          */
1181         override string toString() @safe
1182         {
1183             if (isEmpty) return toEmptyString();
1184             return (isEnd) ? toEndString() : toStartString();
1185         }
1186 
1187         private
1188         {
1189             string toNonEndString() @safe
1190             {
1191                 import std.format : format;
1192 
1193                 string s = "<" ~ name;
1194                 foreach (key,val;attr)
1195                     s ~= format(" %s=\"%s\"",key,encode(val));
1196                 return s;
1197             }
1198 
1199             string toStartString() @safe { return toNonEndString() ~ ">"; }
1200 
1201             string toEndString() @safe { return "</" ~ name ~ ">"; }
1202 
1203             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1204         }
1205 
1206         /**
1207          * Returns true if the Tag is a start tag
1208          *
1209          * Example:
1210          * --------------
1211          * if (tag.isStart) { }
1212          * --------------
1213          */
1214         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1215 
1216         /**
1217          * Returns true if the Tag is an end tag
1218          *
1219          * Example:
1220          * --------------
1221          * if (tag.isEnd) { }
1222          * --------------
1223          */
1224         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1225 
1226         /**
1227          * Returns true if the Tag is an empty tag
1228          *
1229          * Example:
1230          * --------------
1231          * if (tag.isEmpty) { }
1232          * --------------
1233          */
1234         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1235     }
1236 }
1237 
1238 /**
1239  * Class representing a comment
1240  */
1241 class Comment : Item
1242 {
1243     private string content;
1244 
1245     /**
1246      * Construct a comment
1247      *
1248      * Params:
1249      *      content = the body of the comment
1250      *
1251      * Throws: CommentException if the comment body is illegal (contains "--"
1252      * or exactly equals "-")
1253      *
1254      * Example:
1255      * --------------
1256      * auto item = new Comment("This is a comment");
1257      *    // constructs <!--This is a comment-->
1258      * --------------
1259      */
1260     this(string content) @safe pure
1261     {
1262         import std.string : indexOf;
1263 
1264         if (content == "-" || content.indexOf("--") != -1)
1265             throw new CommentException(content);
1266         this.content = content;
1267     }
1268 
1269     /**
1270      * Compares two comments for equality
1271      *
1272      * Example:
1273      * --------------
1274      * Comment item1,item2;
1275      * if (item1 == item2) { }
1276      * --------------
1277      */
1278     override bool opEquals(scope const Object o) const
1279     {
1280         const item = toType!(const Item)(o);
1281         const t = cast(const Comment) item;
1282         return t !is null && content == t.content;
1283     }
1284 
1285     /**
1286      * Compares two comments
1287      *
1288      * You should rarely need to call this function. It exists so that Comments
1289      * can be used as associative array keys.
1290      *
1291      * Example:
1292      * --------------
1293      * Comment item1,item2;
1294      * if (item1 < item2) { }
1295      * --------------
1296      */
1297     override int opCmp(scope const Object o) scope const
1298     {
1299         const item = toType!(const Item)(o);
1300         const t = cast(const Comment) item;
1301         return t !is null && (content != t.content
1302             ? (content < t.content ? -1 : 1 ) : 0 );
1303     }
1304 
1305     /**
1306      * Returns the hash of a Comment
1307      *
1308      * You should rarely need to call this function. It exists so that Comments
1309      * can be used as associative array keys.
1310      */
1311     override size_t toHash() scope const nothrow { return hash(content); }
1312 
1313     /**
1314      * Returns a string representation of this comment
1315      */
1316     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1317 
1318     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1319 }
1320 
1321 // https://issues.dlang.org/show_bug.cgi?id=16241
1322 @safe unittest
1323 {
1324     import std.exception : assertThrown;
1325     auto c = new Comment("==");
1326     assert(c.content == "==");
1327     assertThrown!CommentException(new Comment("--"));
1328 }
1329 
1330 /**
1331  * Class representing a Character Data section
1332  */
1333 class CData : Item
1334 {
1335     private string content;
1336 
1337     /**
1338      * Construct a character data section
1339      *
1340      * Params:
1341      *      content = the body of the character data segment
1342      *
1343      * Throws: CDataException if the segment body is illegal (contains "]]>")
1344      *
1345      * Example:
1346      * --------------
1347      * auto item = new CData("<b>hello</b>");
1348      *    // constructs <![CDATA[<b>hello</b>]]>
1349      * --------------
1350      */
1351     this(string content) @safe pure
1352     {
1353         import std.string : indexOf;
1354         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1355         this.content = content;
1356     }
1357 
1358     /**
1359      * Compares two CDatas for equality
1360      *
1361      * Example:
1362      * --------------
1363      * CData item1,item2;
1364      * if (item1 == item2) { }
1365      * --------------
1366      */
1367     override bool opEquals(scope const Object o) const
1368     {
1369         const item = toType!(const Item)(o);
1370         const t = cast(const CData) item;
1371         return t !is null && content == t.content;
1372     }
1373 
1374     /**
1375      * Compares two CDatas
1376      *
1377      * You should rarely need to call this function. It exists so that CDatas
1378      * can be used as associative array keys.
1379      *
1380      * Example:
1381      * --------------
1382      * CData item1,item2;
1383      * if (item1 < item2) { }
1384      * --------------
1385      */
1386     override int opCmp(scope const Object o) scope const
1387     {
1388         const item = toType!(const Item)(o);
1389         const t = cast(const CData) item;
1390         return t !is null && (content != t.content
1391             ? (content < t.content ? -1 : 1 ) : 0 );
1392     }
1393 
1394     /**
1395      * Returns the hash of a CData
1396      *
1397      * You should rarely need to call this function. It exists so that CDatas
1398      * can be used as associative array keys.
1399      */
1400     override size_t toHash() scope const nothrow { return hash(content); }
1401 
1402     /**
1403      * Returns a string representation of this CData section
1404      */
1405     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1406 
1407     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1408 }
1409 
1410 /**
1411  * Class representing a text (aka Parsed Character Data) section
1412  */
1413 class Text : Item
1414 {
1415     private string content;
1416 
1417     /**
1418      * Construct a text (aka PCData) section
1419      *
1420      * Params:
1421      *      content = the text. This function encodes the text before
1422      *      insertion, so it is safe to insert any text
1423      *
1424      * Example:
1425      * --------------
1426      * auto Text = new CData("a < b");
1427      *    // constructs a &lt; b
1428      * --------------
1429      */
1430     this(string content) @safe pure
1431     {
1432         this.content = encode(content);
1433     }
1434 
1435     /**
1436      * Compares two text sections for equality
1437      *
1438      * Example:
1439      * --------------
1440      * Text item1,item2;
1441      * if (item1 == item2) { }
1442      * --------------
1443      */
1444     override bool opEquals(scope const Object o) const
1445     {
1446         const item = toType!(const Item)(o);
1447         const t = cast(const Text) item;
1448         return t !is null && content == t.content;
1449     }
1450 
1451     /**
1452      * Compares two text sections
1453      *
1454      * You should rarely need to call this function. It exists so that Texts
1455      * can be used as associative array keys.
1456      *
1457      * Example:
1458      * --------------
1459      * Text item1,item2;
1460      * if (item1 < item2) { }
1461      * --------------
1462      */
1463     override int opCmp(scope const Object o) scope const
1464     {
1465         const item = toType!(const Item)(o);
1466         const t = cast(const Text) item;
1467         return t !is null
1468             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1469     }
1470 
1471     /**
1472      * Returns the hash of a text section
1473      *
1474      * You should rarely need to call this function. It exists so that Texts
1475      * can be used as associative array keys.
1476      */
1477     override size_t toHash() scope const nothrow { return hash(content); }
1478 
1479     /**
1480      * Returns a string representation of this Text section
1481      */
1482     override string toString() scope const @safe @nogc pure nothrow { return content; }
1483 
1484     /**
1485      * Returns true if the content is the empty string
1486      */
1487     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1488 }
1489 
1490 /**
1491  * Class representing an XML Instruction section
1492  */
1493 class XMLInstruction : Item
1494 {
1495     private string content;
1496 
1497     /**
1498      * Construct an XML Instruction section
1499      *
1500      * Params:
1501      *      content = the body of the instruction segment
1502      *
1503      * Throws: XIException if the segment body is illegal (contains ">")
1504      *
1505      * Example:
1506      * --------------
1507      * auto item = new XMLInstruction("ATTLIST");
1508      *    // constructs <!ATTLIST>
1509      * --------------
1510      */
1511     this(string content) @safe pure
1512     {
1513         import std.string : indexOf;
1514         if (content.indexOf(">") != -1) throw new XIException(content);
1515         this.content = content;
1516     }
1517 
1518     /**
1519      * Compares two XML instructions for equality
1520      *
1521      * Example:
1522      * --------------
1523      * XMLInstruction item1,item2;
1524      * if (item1 == item2) { }
1525      * --------------
1526      */
1527     override bool opEquals(scope const Object o) const
1528     {
1529         const item = toType!(const Item)(o);
1530         const t = cast(const XMLInstruction) item;
1531         return t !is null && content == t.content;
1532     }
1533 
1534     /**
1535      * Compares two XML instructions
1536      *
1537      * You should rarely need to call this function. It exists so that
1538      * XmlInstructions can be used as associative array keys.
1539      *
1540      * Example:
1541      * --------------
1542      * XMLInstruction item1,item2;
1543      * if (item1 < item2) { }
1544      * --------------
1545      */
1546     override int opCmp(scope const Object o) scope const
1547     {
1548         const item = toType!(const Item)(o);
1549         const t = cast(const XMLInstruction) item;
1550         return t !is null
1551             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1552     }
1553 
1554     /**
1555      * Returns the hash of an XMLInstruction
1556      *
1557      * You should rarely need to call this function. It exists so that
1558      * XmlInstructions can be used as associative array keys.
1559      */
1560     override size_t toHash() scope const nothrow { return hash(content); }
1561 
1562     /**
1563      * Returns a string representation of this XmlInstruction
1564      */
1565     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1566 
1567     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1568 }
1569 
1570 /**
1571  * Class representing a Processing Instruction section
1572  */
1573 class ProcessingInstruction : Item
1574 {
1575     private string content;
1576 
1577     /**
1578      * Construct a Processing Instruction section
1579      *
1580      * Params:
1581      *      content = the body of the instruction segment
1582      *
1583      * Throws: PIException if the segment body is illegal (contains "?>")
1584      *
1585      * Example:
1586      * --------------
1587      * auto item = new ProcessingInstruction("php");
1588      *    // constructs <?php?>
1589      * --------------
1590      */
1591     this(string content) @safe pure
1592     {
1593         import std.string : indexOf;
1594         if (content.indexOf("?>") != -1) throw new PIException(content);
1595         this.content = content;
1596     }
1597 
1598     /**
1599      * Compares two processing instructions for equality
1600      *
1601      * Example:
1602      * --------------
1603      * ProcessingInstruction item1,item2;
1604      * if (item1 == item2) { }
1605      * --------------
1606      */
1607     override bool opEquals(scope const Object o) const
1608     {
1609         const item = toType!(const Item)(o);
1610         const t = cast(const ProcessingInstruction) item;
1611         return t !is null && content == t.content;
1612     }
1613 
1614     /**
1615      * Compares two processing instructions
1616      *
1617      * You should rarely need to call this function. It exists so that
1618      * ProcessingInstructions can be used as associative array keys.
1619      *
1620      * Example:
1621      * --------------
1622      * ProcessingInstruction item1,item2;
1623      * if (item1 < item2) { }
1624      * --------------
1625      */
1626     override int opCmp(scope const Object o) scope const
1627     {
1628         const item = toType!(const Item)(o);
1629         const t = cast(const ProcessingInstruction) item;
1630         return t !is null
1631             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1632     }
1633 
1634     /**
1635      * Returns the hash of a ProcessingInstruction
1636      *
1637      * You should rarely need to call this function. It exists so that
1638      * ProcessingInstructions can be used as associative array keys.
1639      */
1640     override size_t toHash() scope const nothrow { return hash(content); }
1641 
1642     /**
1643      * Returns a string representation of this ProcessingInstruction
1644      */
1645     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1646 
1647     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1648 }
1649 
1650 /**
1651  * Abstract base class for XML items
1652  */
1653 abstract class Item
1654 {
1655     /// Compares with another Item of same type for equality
1656     abstract override bool opEquals(scope const Object o) @safe const;
1657 
1658     /// Compares with another Item of same type
1659     abstract override int opCmp(scope const Object o) @safe const;
1660 
1661     /// Returns the hash of this item
1662     abstract override size_t toHash() @safe scope const;
1663 
1664     /// Returns a string representation of this item
1665     abstract override string toString() @safe scope const;
1666 
1667     /**
1668      * Returns an indented string representation of this item
1669      *
1670      * Params:
1671      *      indent = number of spaces by which to indent child elements
1672      */
1673     string[] pretty(uint indent) @safe scope const
1674     {
1675         import std.string : strip;
1676         string s = strip(toString());
1677         return s.length == 0 ? [] : [ s ];
1678     }
1679 
1680     /// Returns true if the item represents empty XML text
1681     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1682 }
1683 
1684 /**
1685  * Class for parsing an XML Document.
1686  *
1687  * This is a subclass of ElementParser. Most of the useful functions are
1688  * documented there.
1689  *
1690  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1691  *
1692  * Bugs:
1693  *      Currently only supports UTF documents.
1694  *
1695  *      If there is an encoding attribute in the prolog, it is ignored.
1696  *
1697  */
1698 class DocumentParser : ElementParser
1699 {
1700     string xmlText;
1701 
1702     /**
1703      * Constructs a DocumentParser.
1704      *
1705      * The input to this function MUST be valid XML.
1706      * This is enforced by the function's in contract.
1707      *
1708      * Params:
1709      *      xmlText_ = the entire XML document as text
1710      *
1711      */
1712     this(string xmlText_)
1713     in
1714     {
1715         assert(xmlText_.length != 0);
1716         try
1717         {
1718             // Confirm that the input is valid XML
1719             check(xmlText_);
1720         }
1721         catch (CheckException e)
1722         {
1723             // And if it's not, tell the user why not
1724             assert(false, "\n" ~ e.toString());
1725         }
1726     }
1727     do
1728     {
1729         xmlText = xmlText_;
1730         s = &xmlText;
1731         super();    // Initialize everything
1732         parse();    // Parse through the root tag (but not beyond)
1733     }
1734 }
1735 
1736 @system unittest
1737 {
1738     auto doc = new Document("<root><child><grandchild/></child></root>");
1739     assert(doc.elements.length == 1);
1740     assert(doc.elements[0].tag.name == "child");
1741     assert(doc.items == doc.elements);
1742 }
1743 
1744 /**
1745  * Class for parsing an XML element.
1746  *
1747  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1748  *
1749  * Note that you cannot construct instances of this class directly. You can
1750  * construct a DocumentParser (which is a subclass of ElementParser), but
1751  * otherwise, Instances of ElementParser will be created for you by the
1752  * library, and passed your way via onStartTag handlers.
1753  *
1754  */
1755 class ElementParser
1756 {
1757     alias Handler = void delegate(string);
1758     alias ElementHandler = void delegate(in Element element);
1759     alias ParserHandler = void delegate(ElementParser parser);
1760 
1761     private
1762     {
1763         Tag tag_;
1764         string elementStart;
1765         string* s;
1766 
1767         Handler commentHandler = null;
1768         Handler cdataHandler = null;
1769         Handler xiHandler = null;
1770         Handler piHandler = null;
1771         Handler rawTextHandler = null;
1772         Handler textHandler = null;
1773 
1774         // Private constructor for start tags
1775         this(ElementParser parent) @safe @nogc pure nothrow
1776         {
1777             s = parent.s;
1778             this();
1779             tag_ = parent.tag_;
1780         }
1781 
1782         // Private constructor for empty tags
1783         this(Tag tag, string* t) @safe @nogc pure nothrow
1784         {
1785             s = t;
1786             this();
1787             tag_ = tag;
1788         }
1789     }
1790 
1791     /**
1792      * The Tag at the start of the element being parsed. You can read this to
1793      * determine the tag's name and attributes.
1794      */
1795     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1796 
1797     /**
1798      * Register a handler which will be called whenever a start tag is
1799      * encountered which matches the specified name. You can also pass null as
1800      * the name, in which case the handler will be called for any unmatched
1801      * start tag.
1802      *
1803      * Example:
1804      * --------------
1805      * // Call this function whenever a <podcast> start tag is encountered
1806      * onStartTag["podcast"] = (ElementParser xml)
1807      * {
1808      *     // Your code here
1809      *     //
1810      *     // This is a a closure, so code here may reference
1811      *     // variables which are outside of this scope
1812      * };
1813      *
1814      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1815      * // start tag is encountered
1816      * onStartTag["episode"] = &myEpisodeStartHandler;
1817      *
1818      * // call delegate dg for all other start tags
1819      * onStartTag[null] = dg;
1820      * --------------
1821      *
1822      * This library will supply your function with a new instance of
1823      * ElementHandler, which may be used to parse inside the element whose
1824      * start tag was just found, or to identify the tag attributes of the
1825      * element, etc.
1826      *
1827      * Note that your function will be called for both start tags and empty
1828      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1829      * and &lt;br/&gt;.
1830      */
1831     ParserHandler[string] onStartTag;
1832 
1833     /**
1834      * Register a handler which will be called whenever an end tag is
1835      * encountered which matches the specified name. You can also pass null as
1836      * the name, in which case the handler will be called for any unmatched
1837      * end tag.
1838      *
1839      * Example:
1840      * --------------
1841      * // Call this function whenever a </podcast> end tag is encountered
1842      * onEndTag["podcast"] = (in Element e)
1843      * {
1844      *     // Your code here
1845      *     //
1846      *     // This is a a closure, so code here may reference
1847      *     // variables which are outside of this scope
1848      * };
1849      *
1850      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1851      * // end tag is encountered
1852      * onEndTag["episode"] = &myEpisodeEndHandler;
1853      *
1854      * // call delegate dg for all other end tags
1855      * onEndTag[null] = dg;
1856      * --------------
1857      *
1858      * Note that your function will be called for both start tags and empty
1859      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1860      * and &lt;br/&gt;.
1861      */
1862     ElementHandler[string] onEndTag;
1863 
1864     protected this() @safe @nogc pure nothrow
1865     {
1866         elementStart = *s;
1867     }
1868 
1869     /**
1870      * Register a handler which will be called whenever text is encountered.
1871      *
1872      * Example:
1873      * --------------
1874      * // Call this function whenever text is encountered
1875      * onText = (string s)
1876      * {
1877      *     // Your code here
1878      *
1879      *     // The passed parameter s will have been decoded by the time you see
1880      *     // it, and so may contain any character.
1881      *     //
1882      *     // This is a a closure, so code here may reference
1883      *     // variables which are outside of this scope
1884      * };
1885      * --------------
1886      */
1887     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1888 
1889     /**
1890      * Register an alternative handler which will be called whenever text
1891      * is encountered. This differs from onText in that onText will decode
1892      * the text, whereas onTextRaw will not. This allows you to make design
1893      * choices, since onText will be more accurate, but slower, while
1894      * onTextRaw will be faster, but less accurate. Of course, you can
1895      * still call decode() within your handler, if you want, but you'd
1896      * probably want to use onTextRaw only in circumstances where you
1897      * know that decoding is unnecessary.
1898      *
1899      * Example:
1900      * --------------
1901      * // Call this function whenever text is encountered
1902      * onText = (string s)
1903      * {
1904      *     // Your code here
1905      *
1906      *     // The passed parameter s will NOT have been decoded.
1907      *     //
1908      *     // This is a a closure, so code here may reference
1909      *     // variables which are outside of this scope
1910      * };
1911      * --------------
1912      */
1913     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1914 
1915     /**
1916      * Register a handler which will be called whenever a character data
1917      * segment is encountered.
1918      *
1919      * Example:
1920      * --------------
1921      * // Call this function whenever a CData section is encountered
1922      * onCData = (string s)
1923      * {
1924      *     // Your code here
1925      *
1926      *     // The passed parameter s does not include the opening <![CDATA[
1927      *     // nor closing ]]>
1928      *     //
1929      *     // This is a a closure, so code here may reference
1930      *     // variables which are outside of this scope
1931      * };
1932      * --------------
1933      */
1934     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1935 
1936     /**
1937      * Register a handler which will be called whenever a comment is
1938      * encountered.
1939      *
1940      * Example:
1941      * --------------
1942      * // Call this function whenever a comment is encountered
1943      * onComment = (string s)
1944      * {
1945      *     // Your code here
1946      *
1947      *     // The passed parameter s does not include the opening <!-- nor
1948      *     // closing -->
1949      *     //
1950      *     // This is a a closure, so code here may reference
1951      *     // variables which are outside of this scope
1952      * };
1953      * --------------
1954      */
1955     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1956 
1957     /**
1958      * Register a handler which will be called whenever a processing
1959      * instruction is encountered.
1960      *
1961      * Example:
1962      * --------------
1963      * // Call this function whenever a processing instruction is encountered
1964      * onPI = (string s)
1965      * {
1966      *     // Your code here
1967      *
1968      *     // The passed parameter s does not include the opening <? nor
1969      *     // closing ?>
1970      *     //
1971      *     // This is a a closure, so code here may reference
1972      *     // variables which are outside of this scope
1973      * };
1974      * --------------
1975      */
1976     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1977 
1978     /**
1979      * Register a handler which will be called whenever an XML instruction is
1980      * encountered.
1981      *
1982      * Example:
1983      * --------------
1984      * // Call this function whenever an XML instruction is encountered
1985      * // (Note: XML instructions may only occur preceding the root tag of a
1986      * // document).
1987      * onPI = (string s)
1988      * {
1989      *     // Your code here
1990      *
1991      *     // The passed parameter s does not include the opening <! nor
1992      *     // closing >
1993      *     //
1994      *     // This is a a closure, so code here may reference
1995      *     // variables which are outside of this scope
1996      * };
1997      * --------------
1998      */
1999     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
2000 
2001     /**
2002      * Parse an XML element.
2003      *
2004      * Parsing will continue until the end of the current element. Any items
2005      * encountered for which a handler has been registered will invoke that
2006      * handler.
2007      *
2008      * Throws: various kinds of XMLException
2009      */
2010     void parse()
2011     {
2012         import std.algorithm.searching : startsWith;
2013         import std.string : indexOf;
2014 
2015         string t;
2016         const Tag root = tag_;
2017         Tag[string] startTags;
2018         if (tag_ !is null) startTags[tag_.name] = tag_;
2019 
2020         while (s.length != 0)
2021         {
2022             if (startsWith(*s,"<!--"))
2023             {
2024                 chop(*s,4);
2025                 t = chop(*s,indexOf(*s,"-->"));
2026                 if (commentHandler.funcptr !is null) commentHandler(t);
2027                 chop(*s,3);
2028             }
2029             else if (startsWith(*s,"<![CDATA["))
2030             {
2031                 chop(*s,9);
2032                 t = chop(*s,indexOf(*s,"]]>"));
2033                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2034                 chop(*s,3);
2035             }
2036             else if (startsWith(*s,"<!"))
2037             {
2038                 chop(*s,2);
2039                 t = chop(*s,indexOf(*s,">"));
2040                 if (xiHandler.funcptr !is null) xiHandler(t);
2041                 chop(*s,1);
2042             }
2043             else if (startsWith(*s,"<?"))
2044             {
2045                 chop(*s,2);
2046                 t = chop(*s,indexOf(*s,"?>"));
2047                 if (piHandler.funcptr !is null) piHandler(t);
2048                 chop(*s,2);
2049             }
2050             else if (startsWith(*s,"<"))
2051             {
2052                 tag_ = new Tag(*s,true);
2053                 if (root is null)
2054                     return; // Return to constructor of derived class
2055 
2056                 if (tag_.isStart)
2057                 {
2058                     startTags[tag_.name] = tag_;
2059 
2060                     auto parser = new ElementParser(this);
2061 
2062                     auto handler = tag_.name in onStartTag;
2063                     if (handler !is null) (*handler)(parser);
2064                     else
2065                     {
2066                         handler = null in onStartTag;
2067                         if (handler !is null) (*handler)(parser);
2068                     }
2069                 }
2070                 else if (tag_.isEnd)
2071                 {
2072                     const startTag = startTags[tag_.name];
2073                     string text;
2074 
2075                     if (startTag.tagString.length == 0)
2076                         assert(0);
2077 
2078                     immutable(char)* p = startTag.tagString.ptr
2079                         + startTag.tagString.length;
2080                     immutable(char)* q = &tag_.tagString[0];
2081                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2082 
2083                     auto element = new Element(startTag);
2084                     if (text.length != 0) element ~= new Text(text);
2085 
2086                     auto handler = tag_.name in onEndTag;
2087                     if (handler !is null) (*handler)(element);
2088                     else
2089                     {
2090                         handler = null in onEndTag;
2091                         if (handler !is null) (*handler)(element);
2092                     }
2093 
2094                     if (tag_.name == root.name) return;
2095                 }
2096                 else if (tag_.isEmpty)
2097                 {
2098                     Tag startTag = new Tag(tag_.name);
2099 
2100                     // FIX by hed010gy
2101                     // https://issues.dlang.org/show_bug.cgi?id=2979
2102                     if (tag_.attr.length > 0)
2103                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2104                     // END FIX
2105 
2106                     // Handle the pretend start tag
2107                     string s2;
2108                     auto parser = new ElementParser(startTag,&s2);
2109                     auto handler1 = startTag.name in onStartTag;
2110                     if (handler1 !is null) (*handler1)(parser);
2111                     else
2112                     {
2113                         handler1 = null in onStartTag;
2114                         if (handler1 !is null) (*handler1)(parser);
2115                     }
2116 
2117                     // Handle the pretend end tag
2118                     auto element = new Element(startTag);
2119                     auto handler2 = tag_.name in onEndTag;
2120                     if (handler2 !is null) (*handler2)(element);
2121                     else
2122                     {
2123                         handler2 = null in onEndTag;
2124                         if (handler2 !is null) (*handler2)(element);
2125                     }
2126                 }
2127             }
2128             else
2129             {
2130                 t = chop(*s,indexOf(*s,"<"));
2131                 if (rawTextHandler.funcptr !is null)
2132                     rawTextHandler(t);
2133                 else if (textHandler.funcptr !is null)
2134                     textHandler(decode(t,DecodeMode.LOOSE));
2135             }
2136         }
2137     }
2138 
2139     /**
2140      * Returns that part of the element which has already been parsed
2141      */
2142     override string toString() const @nogc @safe pure nothrow
2143     {
2144         assert(elementStart.length >= s.length);
2145         return elementStart[0 .. elementStart.length - s.length];
2146     }
2147 
2148 }
2149 
2150 private
2151 {
2152     template Check(string msg)
2153     {
2154         string old = s;
2155 
2156         void fail() @safe pure
2157         {
2158             s = old;
2159             throw new Err(s,msg);
2160         }
2161 
2162         void fail(Err e) @safe pure
2163         {
2164             s = old;
2165             throw new Err(s,msg,e);
2166         }
2167 
2168         void fail(string msg2) @safe pure
2169         {
2170             fail(new Err(s,msg2));
2171         }
2172     }
2173 
2174     void checkMisc(ref string s) @safe pure // rule 27
2175     {
2176         import std.algorithm.searching : startsWith;
2177 
2178         mixin Check!("Misc");
2179 
2180         try
2181         {
2182                  if (s.startsWith("<!--")) { checkComment(s); }
2183             else if (s.startsWith("<?"))   { checkPI(s); }
2184             else                           { checkSpace(s); }
2185         }
2186         catch (Err e) { fail(e); }
2187     }
2188 
2189     void checkDocument(ref string s) @safe pure // rule 1
2190     {
2191         mixin Check!("Document");
2192         try
2193         {
2194             checkProlog(s);
2195             checkElement(s);
2196             star!(checkMisc)(s);
2197         }
2198         catch (Err e) { fail(e); }
2199     }
2200 
2201     void checkChars(ref string s) @safe pure // rule 2
2202     {
2203         // TO DO - Fix std.utf stride and decode functions, then use those
2204         // instead
2205         import std.format : format;
2206 
2207         mixin Check!("Chars");
2208 
2209         dchar c;
2210         ptrdiff_t n = -1;
2211         // 'i' must not be smaller than size_t because size_t is used internally in
2212         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2213         foreach (size_t i, dchar d; s)
2214         {
2215             if (!isChar(d))
2216             {
2217                 c = d;
2218                 n = i;
2219                 break;
2220             }
2221         }
2222         if (n != -1)
2223         {
2224             s = s[n..$];
2225             fail(format("invalid character: U+%04X",c));
2226         }
2227     }
2228 
2229     void checkSpace(ref string s) @safe pure // rule 3
2230     {
2231         import std.algorithm.searching : countUntil;
2232         import std.ascii : isWhite;
2233         import std.utf : byCodeUnit;
2234 
2235         mixin Check!("Whitespace");
2236         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2237         if (i == -1 && s.length > 0 && isWhite(s[0]))
2238             s = s[$ .. $];
2239         else if (i > -1)
2240             s = s[i .. $];
2241         if (s is old) fail();
2242     }
2243 
2244     void checkName(ref string s, out string name) @safe pure // rule 5
2245     {
2246         mixin Check!("Name");
2247 
2248         if (s.length == 0) fail();
2249         ptrdiff_t n;
2250         // 'i' must not be smaller than size_t because size_t is used internally in
2251         // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2252         foreach (size_t i, dchar c; s)
2253         {
2254             if (c == '_' || c == ':' || isLetter(c)) continue;
2255             if (i == 0) fail();
2256             if (c == '-' || c == '.' || isDigit(c)
2257                 || isCombiningChar(c) || isExtender(c)) continue;
2258             n = i;
2259             break;
2260         }
2261         name = s[0 .. n];
2262         s = s[n..$];
2263     }
2264 
2265     void checkAttValue(ref string s) @safe pure // rule 10
2266     {
2267         import std.algorithm.searching : countUntil;
2268         import std.utf : byCodeUnit;
2269 
2270         mixin Check!("AttValue");
2271 
2272         if (s.length == 0) fail();
2273         char c = s[0];
2274         if (c != '\u0022' && c != '\u0027')
2275             fail("attribute value requires quotes");
2276         s = s[1..$];
2277         for (;;)
2278         {
2279             s = s[s.byCodeUnit.countUntil(c) .. $];
2280             if (s.length == 0) fail("unterminated attribute value");
2281             if (s[0] == '<') fail("< found in attribute value");
2282             if (s[0] == c) break;
2283             try { checkReference(s); } catch (Err e) { fail(e); }
2284         }
2285         s = s[1..$];
2286     }
2287 
2288     void checkCharData(ref string s) @safe pure // rule 14
2289     {
2290         import std.algorithm.searching : startsWith;
2291 
2292         mixin Check!("CharData");
2293 
2294         while (s.length != 0)
2295         {
2296             if (s.startsWith("&")) break;
2297             if (s.startsWith("<")) break;
2298             if (s.startsWith("]]>")) fail("]]> found within char data");
2299             s = s[1..$];
2300         }
2301     }
2302 
2303     void checkComment(ref string s) @safe pure // rule 15
2304     {
2305         import std.string : indexOf;
2306 
2307         mixin Check!("Comment");
2308 
2309         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2310         ptrdiff_t n = s.indexOf("--");
2311         if (n == -1) fail("unterminated comment");
2312         s = s[n..$];
2313         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2314     }
2315 
2316     void checkPI(ref string s) @safe pure // rule 16
2317     {
2318         mixin Check!("PI");
2319 
2320         try
2321         {
2322             checkLiteral("<?",s);
2323             checkEnd("?>",s);
2324         }
2325         catch (Err e) { fail(e); }
2326     }
2327 
2328     void checkCDSect(ref string s) @safe pure // rule 18
2329     {
2330         mixin Check!("CDSect");
2331 
2332         try
2333         {
2334             checkLiteral(cdata,s);
2335             checkEnd("]]>",s);
2336         }
2337         catch (Err e) { fail(e); }
2338     }
2339 
2340     void checkProlog(ref string s) @safe pure // rule 22
2341     {
2342         mixin Check!("Prolog");
2343 
2344         try
2345         {
2346             /* The XML declaration is optional
2347              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2348              */
2349             opt!(checkXMLDecl)(s);
2350 
2351             star!(checkMisc)(s);
2352             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2353         }
2354         catch (Err e) { fail(e); }
2355     }
2356 
2357     void checkXMLDecl(ref string s) @safe pure // rule 23
2358     {
2359         mixin Check!("XMLDecl");
2360 
2361         try
2362         {
2363             checkLiteral("<?xml",s);
2364             checkVersionInfo(s);
2365             opt!(checkEncodingDecl)(s);
2366             opt!(checkSDDecl)(s);
2367             opt!(checkSpace)(s);
2368             checkLiteral("?>",s);
2369         }
2370         catch (Err e) { fail(e); }
2371     }
2372 
2373     void checkVersionInfo(ref string s) @safe pure // rule 24
2374     {
2375         mixin Check!("VersionInfo");
2376 
2377         try
2378         {
2379             checkSpace(s);
2380             checkLiteral("version",s);
2381             checkEq(s);
2382             quoted!(checkVersionNum)(s);
2383         }
2384         catch (Err e) { fail(e); }
2385     }
2386 
2387     void checkEq(ref string s) @safe pure // rule 25
2388     {
2389         mixin Check!("Eq");
2390 
2391         try
2392         {
2393             opt!(checkSpace)(s);
2394             checkLiteral("=",s);
2395             opt!(checkSpace)(s);
2396         }
2397         catch (Err e) { fail(e); }
2398     }
2399 
2400     void checkVersionNum(ref string s) @safe pure // rule 26
2401     {
2402         import std.algorithm.searching : countUntil;
2403         import std.utf : byCodeUnit;
2404 
2405         mixin Check!("VersionNum");
2406 
2407         s = s[s.byCodeUnit.countUntil('\"') .. $];
2408         if (s is old) fail();
2409     }
2410 
2411     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2412     {
2413         mixin Check!("DocTypeDecl");
2414 
2415         try
2416         {
2417             checkLiteral("<!DOCTYPE",s);
2418             //
2419             // TO DO -- ensure DOCTYPE is well formed
2420             // (But not yet. That's one of our "future directions")
2421             //
2422             checkEnd(">",s);
2423         }
2424         catch (Err e) { fail(e); }
2425     }
2426 
2427     void checkSDDecl(ref string s) @safe pure // rule 32
2428     {
2429         import std.algorithm.searching : startsWith;
2430 
2431         mixin Check!("SDDecl");
2432 
2433         try
2434         {
2435             checkSpace(s);
2436             checkLiteral("standalone",s);
2437             checkEq(s);
2438         }
2439         catch (Err e) { fail(e); }
2440 
2441         int n = 0;
2442              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2443         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2444         else fail("standalone attribute value must be 'yes', \"yes\","~
2445             " 'no' or \"no\"");
2446         s = s[n..$];
2447     }
2448 
2449     void checkElement(ref string s) @safe pure // rule 39
2450     {
2451         mixin Check!("Element");
2452 
2453         string sname,ename,t;
2454         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2455 
2456         if (t == "STag")
2457         {
2458             try
2459             {
2460                 checkContent(s);
2461                 t = s;
2462                 checkETag(s,ename);
2463             }
2464             catch (Err e) { fail(e); }
2465 
2466             if (sname != ename)
2467             {
2468                 s = t;
2469                 fail("end tag name \"" ~ ename
2470                     ~ "\" differs from start tag name \""~sname~"\"");
2471             }
2472         }
2473     }
2474 
2475     // rules 40 and 44
2476     void checkTag(ref string s, out string type, out string name) @safe pure
2477     {
2478         mixin Check!("Tag");
2479 
2480         try
2481         {
2482             type = "STag";
2483             checkLiteral("<",s);
2484             checkName(s,name);
2485             star!(seq!(checkSpace,checkAttribute))(s);
2486             opt!(checkSpace)(s);
2487             if (s.length != 0 && s[0] == '/')
2488             {
2489                 s = s[1..$];
2490                 type = "ETag";
2491             }
2492             checkLiteral(">",s);
2493         }
2494         catch (Err e) { fail(e); }
2495     }
2496 
2497     void checkAttribute(ref string s) @safe pure // rule 41
2498     {
2499         mixin Check!("Attribute");
2500 
2501         try
2502         {
2503             string name;
2504             checkName(s,name);
2505             checkEq(s);
2506             checkAttValue(s);
2507         }
2508         catch (Err e) { fail(e); }
2509     }
2510 
2511     void checkETag(ref string s, out string name) @safe pure // rule 42
2512     {
2513         mixin Check!("ETag");
2514 
2515         try
2516         {
2517             checkLiteral("</",s);
2518             checkName(s,name);
2519             opt!(checkSpace)(s);
2520             checkLiteral(">",s);
2521         }
2522         catch (Err e) { fail(e); }
2523     }
2524 
2525     void checkContent(ref string s) @safe pure // rule 43
2526     {
2527         import std.algorithm.searching : startsWith;
2528 
2529         mixin Check!("Content");
2530 
2531         try
2532         {
2533             while (s.length != 0)
2534             {
2535                 old = s;
2536                      if (s.startsWith("&"))        { checkReference(s); }
2537                 else if (s.startsWith("<!--"))     { checkComment(s); }
2538                 else if (s.startsWith("<?"))       { checkPI(s); }
2539                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2540                 else if (s.startsWith("</"))       { break; }
2541                 else if (s.startsWith("<"))        { checkElement(s); }
2542                 else                               { checkCharData(s); }
2543             }
2544         }
2545         catch (Err e) { fail(e); }
2546     }
2547 
2548     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2549     {
2550         import std.format : format;
2551 
2552         mixin Check!("CharRef");
2553 
2554         c = 0;
2555         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2556         int radix = 10;
2557         if (s.length != 0 && s[0] == 'x')
2558         {
2559             s = s[1..$];
2560             radix = 16;
2561         }
2562         if (s.length == 0) fail("unterminated character reference");
2563         if (s[0] == ';')
2564             fail("character reference must have at least one digit");
2565         while (s.length != 0)
2566         {
2567             immutable char d = s[0];
2568             int n = 0;
2569             switch (d)
2570             {
2571                 case 'F','f': ++n;      goto case;
2572                 case 'E','e': ++n;      goto case;
2573                 case 'D','d': ++n;      goto case;
2574                 case 'C','c': ++n;      goto case;
2575                 case 'B','b': ++n;      goto case;
2576                 case 'A','a': ++n;      goto case;
2577                 case '9':     ++n;      goto case;
2578                 case '8':     ++n;      goto case;
2579                 case '7':     ++n;      goto case;
2580                 case '6':     ++n;      goto case;
2581                 case '5':     ++n;      goto case;
2582                 case '4':     ++n;      goto case;
2583                 case '3':     ++n;      goto case;
2584                 case '2':     ++n;      goto case;
2585                 case '1':     ++n;      goto case;
2586                 case '0':     break;
2587                 default: n = 100; break;
2588             }
2589             if (n >= radix) break;
2590             c *= radix;
2591             c += n;
2592             s = s[1..$];
2593         }
2594         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2595         if (s.length == 0 || s[0] != ';') fail("expected ;");
2596         else s = s[1..$];
2597     }
2598 
2599     void checkReference(ref string s) @safe pure // rule 67
2600     {
2601         import std.algorithm.searching : startsWith;
2602 
2603         mixin Check!("Reference");
2604 
2605         try
2606         {
2607             dchar c;
2608             if (s.startsWith("&#")) checkCharRef(s,c);
2609             else checkEntityRef(s);
2610         }
2611         catch (Err e) { fail(e); }
2612     }
2613 
2614     void checkEntityRef(ref string s) @safe pure // rule 68
2615     {
2616         mixin Check!("EntityRef");
2617 
2618         try
2619         {
2620             string name;
2621             checkLiteral("&",s);
2622             checkName(s,name);
2623             checkLiteral(";",s);
2624         }
2625         catch (Err e) { fail(e); }
2626     }
2627 
2628     void checkEncName(ref string s) @safe pure // rule 81
2629     {
2630         import std.algorithm.searching : countUntil;
2631         import std.ascii : isAlpha;
2632         import std.utf : byCodeUnit;
2633 
2634         mixin Check!("EncName");
2635 
2636         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2637         if (s is old) fail();
2638         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2639     }
2640 
2641     void checkEncodingDecl(ref string s) @safe pure // rule 80
2642     {
2643         mixin Check!("EncodingDecl");
2644 
2645         try
2646         {
2647             checkSpace(s);
2648             checkLiteral("encoding",s);
2649             checkEq(s);
2650             quoted!(checkEncName)(s);
2651         }
2652         catch (Err e) { fail(e); }
2653     }
2654 
2655     // Helper functions
2656 
2657     void checkLiteral(string literal,ref string s) @safe pure
2658     {
2659         import std.string : startsWith;
2660 
2661         mixin Check!("Literal");
2662 
2663         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2664         s = s[literal.length..$];
2665     }
2666 
2667     void checkEnd(string end,ref string s) @safe pure
2668     {
2669         import std.string : indexOf;
2670         // Deliberately no mixin Check here.
2671 
2672         auto n = s.indexOf(end);
2673         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2674         s = s[n..$];
2675         checkLiteral(end,s);
2676     }
2677 
2678     // Metafunctions -- none of these use mixin Check
2679 
2680     void opt(alias f)(ref string s)
2681     {
2682         try { f(s); } catch (Err e) {}
2683     }
2684 
2685     void plus(alias f)(ref string s)
2686     {
2687         f(s);
2688         star!(f)(s);
2689     }
2690 
2691     void star(alias f)(ref string s)
2692     {
2693         while (s.length != 0)
2694         {
2695             try { f(s); }
2696             catch (Err e) { return; }
2697         }
2698     }
2699 
2700     void quoted(alias f)(ref string s)
2701     {
2702         import std.string : startsWith;
2703 
2704         if (s.startsWith("'"))
2705         {
2706             checkLiteral("'",s);
2707             f(s);
2708             checkLiteral("'",s);
2709         }
2710         else
2711         {
2712             checkLiteral("\"",s);
2713             f(s);
2714             checkLiteral("\"",s);
2715         }
2716     }
2717 
2718     void seq(alias f,alias g)(ref string s)
2719     {
2720         f(s);
2721         g(s);
2722     }
2723 }
2724 
2725 /**
2726  * Check an entire XML document for well-formedness
2727  *
2728  * Params:
2729  *      s = the document to be checked, passed as a string
2730  *
2731  * Throws: CheckException if the document is not well formed
2732  *
2733  * CheckException's toString() method will yield the complete hierarchy of
2734  * parse failure (the XML equivalent of a stack trace), giving the line and
2735  * column number of every failure at every level.
2736  */
2737 void check(string s) @safe pure
2738 {
2739     try
2740     {
2741         checkChars(s);
2742         checkDocument(s);
2743         if (s.length != 0) throw new Err(s,"Junk found after document");
2744     }
2745     catch (Err e)
2746     {
2747         e.complete(s);
2748         throw e;
2749     }
2750 }
2751 
2752 @system pure unittest
2753 {
2754     import std.string : indexOf;
2755 
2756     try
2757     {
2758         check(q"[<?xml version="1.0"?>
2759         <catalog>
2760            <book id="bk101">
2761               <author>Gambardella, Matthew</author>
2762               <title>XML Developer's Guide</title>
2763               <genre>Computer</genre>
2764               <price>44.95</price>
2765               <publish_date>2000-10-01</publish_date>
2766               <description>An in-depth look at creating applications
2767               with XML.</description>
2768            </book>
2769            <book id="bk102">
2770               <author>Ralls, Kim</author>
2771               <title>Midnight Rain</title>
2772               <genre>Fantasy</genres>
2773               <price>5.95</price>
2774               <publish_date>2000-12-16</publish_date>
2775               <description>A former architect battles corporate zombies,
2776               an evil sorceress, and her own childhood to become queen
2777               of the world.</description>
2778            </book>
2779            <book id="bk103">
2780               <author>Corets, Eva</author>
2781               <title>Maeve Ascendant</title>
2782               <genre>Fantasy</genre>
2783               <price>5.95</price>
2784               <publish_date>2000-11-17</publish_date>
2785               <description>After the collapse of a nanotechnology
2786               society in England, the young survivors lay the
2787               foundation for a new society.</description>
2788            </book>
2789         </catalog>
2790         ]");
2791         assert(false);
2792     }
2793     catch (CheckException e)
2794     {
2795         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2796                                       " from start tag name \"genre\"");
2797         assert(n != -1);
2798     }
2799 }
2800 
2801 @system unittest
2802 {
2803     string s = q"EOS
2804 <?xml version="1.0"?>
2805 <set>
2806     <one>A</one>
2807     <!-- comment -->
2808     <two>B</two>
2809 </set>
2810 EOS";
2811     try
2812     {
2813         check(s);
2814     }
2815     catch (CheckException e)
2816     {
2817         assert(0, e.toString());
2818     }
2819 }
2820 
2821 @system unittest
2822 {
2823     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2824                         xmlns:stream="http://etherx.'jabber'.org/streams"
2825                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2826                         xml:lang="en" version="1.0" attr='a"b"c'>
2827                         </stream:stream></r>`;
2828 
2829     DocumentParser parser = new DocumentParser(test_xml);
2830     bool tested = false;
2831     parser.onStartTag["stream:stream"] = (ElementParser p) {
2832         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2833         assert(p.tag.attr["from"] == "jid.pl");
2834         assert(p.tag.attr["attr"] == "a\"b\"c");
2835         tested = true;
2836     };
2837     parser.parse();
2838     assert(tested);
2839 }
2840 
2841 @system unittest
2842 {
2843     string s = q"EOS
2844 <?xml version="1.0" encoding="utf-8"?> <Tests>
2845     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2846 </Tests>
2847 EOS";
2848     auto xml = new DocumentParser(s);
2849 
2850     xml.onStartTag["Test"] = (ElementParser xml) {
2851         assert(xml.tag.attr["thing"] == "What & Up");
2852     };
2853 
2854     xml.onEndTag["Test"] = (in Element e) {
2855         assert(e.text() == "What & Up Second");
2856     };
2857     xml.parse();
2858 }
2859 
2860 @system unittest
2861 {
2862     string s = `<tag attr="&quot;value&gt;" />`;
2863     auto doc = new Document(s);
2864     assert(doc.toString() == s);
2865 }
2866 
2867 /** The base class for exceptions thrown by this module */
2868 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2869 
2870 // Other exceptions
2871 
2872 /// Thrown during Comment constructor
2873 class CommentException : XMLException
2874 { private this(string msg) @safe pure { super(msg); } }
2875 
2876 /// Thrown during CData constructor
2877 class CDataException : XMLException
2878 { private this(string msg) @safe pure { super(msg); } }
2879 
2880 /// Thrown during XMLInstruction constructor
2881 class XIException : XMLException
2882 { private this(string msg) @safe pure { super(msg); } }
2883 
2884 /// Thrown during ProcessingInstruction constructor
2885 class PIException : XMLException
2886 { private this(string msg) @safe pure { super(msg); } }
2887 
2888 /// Thrown during Text constructor
2889 class TextException : XMLException
2890 { private this(string msg) @safe pure { super(msg); } }
2891 
2892 /// Thrown during decode()
2893 class DecodeException : XMLException
2894 { private this(string msg) @safe pure { super(msg); } }
2895 
2896 /// Thrown if comparing with wrong type
2897 class InvalidTypeException : XMLException
2898 { private this(string msg) @safe pure { super(msg); } }
2899 
2900 /// Thrown when parsing for Tags
2901 class TagException : XMLException
2902 { private this(string msg) @safe pure { super(msg); } }
2903 
2904 /**
2905  * Thrown during check()
2906  */
2907 class CheckException : XMLException
2908 {
2909     CheckException err; /// Parent in hierarchy
2910     private string tail;
2911     /**
2912      * Name of production rule which failed to parse,
2913      * or specific error message
2914      */
2915     string msg;
2916     size_t line = 0; /// Line number at which parse failure occurred
2917     size_t column = 0; /// Column number at which parse failure occurred
2918 
2919     private this(string tail,string msg,Err err=null) @safe pure
2920     {
2921         super(null);
2922         this.tail = tail;
2923         this.msg = msg;
2924         this.err = err;
2925     }
2926 
2927     private void complete(string entire) @safe pure
2928     {
2929         import std.string : count, lastIndexOf;
2930         import std.utf : toUTF32;
2931 
2932         string head = entire[0..$-tail.length];
2933         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2934         line = head.count("\n") + 1;
2935         dstring t = toUTF32(head[n..$]);
2936         column = t.length + 1;
2937         if (err !is null) err.complete(entire);
2938     }
2939 
2940     override string toString() const @safe pure
2941     {
2942         import std.format : format;
2943 
2944         string s;
2945         if (line != 0) s = format("Line %d, column %d: ",line,column);
2946         s ~= msg;
2947         s ~= '\n';
2948         if (err !is null) s = err.toString() ~ s;
2949         return s;
2950     }
2951 }
2952 
2953 private alias Err = CheckException;
2954 
2955 // Private helper functions
2956 
2957 private
2958 {
2959     inout(T) toType(T)(return inout Object o) @safe
2960     {
2961         T t = cast(T)(o);
2962         if (t is null)
2963         {
2964             throw new InvalidTypeException("Attempt to compare a "
2965                 ~ T.stringof ~ " with an instance of another type");
2966         }
2967         return t;
2968     }
2969 
2970     string chop(ref string s, size_t n) @safe pure nothrow
2971     {
2972         if (n == -1) n = s.length;
2973         string t = s[0 .. n];
2974         s = s[n..$];
2975         return t;
2976     }
2977 
2978     bool optc(ref string s, char c) @safe pure nothrow
2979     {
2980         immutable bool b = s.length != 0 && s[0] == c;
2981         if (b) s = s[1..$];
2982         return b;
2983     }
2984 
2985     void reqc(ref string s, char c) @safe pure
2986     {
2987         if (s.length == 0 || s[0] != c) throw new TagException("");
2988         s = s[1..$];
2989     }
2990 
2991     char requireOneOf(ref string s, string chars) @safe pure
2992     {
2993         import std.string : indexOf;
2994 
2995         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2996             throw new TagException("");
2997         immutable char ch = s[0];
2998         s = s[1..$];
2999         return ch;
3000     }
3001 
3002     alias hash = .hashOf;
3003 
3004     // Definitions from the XML specification
3005     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3006         0x10000,0x10FFFF];
3007     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3008         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3009         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3010         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3011         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3012         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3013         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3014         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3015         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3016         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3017         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3018         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3019         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3020         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3021         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3022         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3023         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3024         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3025         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3026         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3027         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3028         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3029         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3030         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3031         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3032         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3033         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3034         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3035         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3036         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3037         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3038         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3039         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3040         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3041         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3042         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3043         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3044         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3045         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3046         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3047         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3048     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3049     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3050         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3051         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3052         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3053         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3054         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3055         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3056         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3057         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3058         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3059         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3060         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3061         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3062         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3063         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3064         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3065         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3066         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3067         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3068         0x3099,0x3099,0x309A,0x309A];
3069     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3070         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3071         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3072         0x0ED9,0x0F20,0x0F29];
3073     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3074         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3075         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3076 
3077     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3078     {
3079         while (table.length != 0)
3080         {
3081             auto m = (table.length >> 1) & ~1;
3082             if (c < table[m])
3083             {
3084                 table = table[0 .. m];
3085             }
3086             else if (c > table[m+1])
3087             {
3088                 table = table[m+2..$];
3089             }
3090             else return true;
3091         }
3092         return false;
3093     }
3094 
3095     string startOf(string s) @safe nothrow pure
3096     {
3097         string r;
3098         foreach (char c;s)
3099         {
3100             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3101             if (r.length >= 40) { r ~= "___"; break; }
3102         }
3103         return r;
3104     }
3105 
3106     void exit(string s=null)
3107     {
3108         throw new XMLException(s);
3109     }
3110 }