1 // Written in the D programming language. 2 3 /** 4 $(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will be removed from Phobos in 2.101.0. 6 If you still need it, go to $(LINK https://github.com/DigitalMars/undeaD)) 7 8 Classes and functions for creating and parsing XML 9 10 The basic architecture of this module is that there are standalone functions, 11 classes for constructing an XML document from scratch (Tag, Element and 12 Document), and also classes for parsing a pre-existing XML file (ElementParser 13 and DocumentParser). The parsing classes <i>may</i> be used to build a 14 Document, but that is not their primary purpose. The handling capabilities of 15 DocumentParser and ElementParser are sufficiently customizable that you can 16 make them do pretty much whatever you want. 17 18 Example: This example creates a DOM (Document Object Model) tree 19 from an XML file. 20 ------------------------------------------------------------------------------ 21 import std.xml; 22 import std.stdio; 23 import std.string; 24 import std.file; 25 26 // books.xml is used in various samples throughout the Microsoft XML Core 27 // Services (MSXML) SDK. 28 // 29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 30 31 void main() 32 { 33 string s = cast(string) std.file.read("books.xml"); 34 35 // Check for well-formedness 36 check(s); 37 38 // Make a DOM tree 39 auto doc = new Document(s); 40 41 // Plain-print it 42 writeln(doc); 43 } 44 ------------------------------------------------------------------------------ 45 46 Example: This example does much the same thing, except that the file is 47 deconstructed and reconstructed by hand. This is more work, but the 48 techniques involved offer vastly more power. 49 ------------------------------------------------------------------------------ 50 import std.xml; 51 import std.stdio; 52 import std.string; 53 54 struct Book 55 { 56 string id; 57 string author; 58 string title; 59 string genre; 60 string price; 61 string pubDate; 62 string description; 63 } 64 65 void main() 66 { 67 string s = cast(string) std.file.read("books.xml"); 68 69 // Check for well-formedness 70 check(s); 71 72 // Take it apart 73 Book[] books; 74 75 auto xml = new DocumentParser(s); 76 xml.onStartTag["book"] = (ElementParser xml) 77 { 78 Book book; 79 book.id = xml.tag.attr["id"]; 80 81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 87 88 xml.parse(); 89 90 books ~= book; 91 }; 92 xml.parse(); 93 94 // Put it back together again; 95 auto doc = new Document(new Tag("catalog")); 96 foreach (book;books) 97 { 98 auto element = new Element("book"); 99 element.tag.attr["id"] = book.id; 100 101 element ~= new Element("author", book.author); 102 element ~= new Element("title", book.title); 103 element ~= new Element("genre", book.genre); 104 element ~= new Element("price", book.price); 105 element ~= new Element("publish-date",book.pubDate); 106 element ~= new Element("description", book.description); 107 108 doc ~= element; 109 } 110 111 // Pretty-print it 112 writefln(join(doc.pretty(3),"\n")); 113 } 114 ------------------------------------------------------------------------------- 115 Copyright: Copyright Janice Caron 2008 - 2009. 116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 117 Authors: Janice Caron 118 Source: $(PHOBOSSRC std/xml.d) 119 */ 120 /* 121 Copyright Janice Caron 2008 - 2009. 122 Distributed under the Boost Software License, Version 1.0. 123 (See accompanying file LICENSE_1_0.txt or copy at 124 http://www.boost.org/LICENSE_1_0.txt) 125 */ 126 module appbase.utils.xml; 127 128 enum cdata = "<![CDATA["; 129 130 /** 131 * Returns true if the character is a character according to the XML standard 132 * 133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 134 * 135 * Params: 136 * c = the character to be tested 137 */ 138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 139 { 140 if (c <= 0xD7FF) 141 { 142 if (c >= 0x20) 143 return true; 144 switch (c) 145 { 146 case 0xA: 147 case 0x9: 148 case 0xD: 149 return true; 150 default: 151 return false; 152 } 153 } 154 else if (0xE000 <= c && c <= 0x10FFFF) 155 { 156 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 157 return true; 158 } 159 return false; 160 } 161 162 @safe @nogc nothrow pure unittest 163 { 164 assert(!isChar(cast(dchar) 0x8)); 165 assert( isChar(cast(dchar) 0x9)); 166 assert( isChar(cast(dchar) 0xA)); 167 assert(!isChar(cast(dchar) 0xB)); 168 assert(!isChar(cast(dchar) 0xC)); 169 assert( isChar(cast(dchar) 0xD)); 170 assert(!isChar(cast(dchar) 0xE)); 171 assert(!isChar(cast(dchar) 0x1F)); 172 assert( isChar(cast(dchar) 0x20)); 173 assert( isChar('J')); 174 assert( isChar(cast(dchar) 0xD7FF)); 175 assert(!isChar(cast(dchar) 0xD800)); 176 assert(!isChar(cast(dchar) 0xDFFF)); 177 assert( isChar(cast(dchar) 0xE000)); 178 assert( isChar(cast(dchar) 0xFFFD)); 179 assert(!isChar(cast(dchar) 0xFFFE)); 180 assert(!isChar(cast(dchar) 0xFFFF)); 181 assert( isChar(cast(dchar) 0x10000)); 182 assert( isChar(cast(dchar) 0x10FFFF)); 183 assert(!isChar(cast(dchar) 0x110000)); 184 185 debug (stdxml_TestHardcodedChecks) 186 { 187 foreach (c; 0 .. dchar.max + 1) 188 assert(isChar(c) == lookup(CharTable, c)); 189 } 190 } 191 192 /** 193 * Returns true if the character is whitespace according to the XML standard 194 * 195 * Only the following characters are considered whitespace in XML - space, tab, 196 * carriage return and linefeed 197 * 198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 199 * 200 * Params: 201 * c = the character to be tested 202 */ 203 bool isSpace(dchar c) @safe @nogc pure nothrow 204 { 205 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 206 } 207 208 /** 209 * Returns true if the character is a digit according to the XML standard 210 * 211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 212 * 213 * Params: 214 * c = the character to be tested 215 */ 216 bool isDigit(dchar c) @safe @nogc pure nothrow 217 { 218 if (c <= 0x0039 && c >= 0x0030) 219 return true; 220 else 221 return lookup(DigitTable,c); 222 } 223 224 @safe @nogc nothrow pure unittest 225 { 226 debug (stdxml_TestHardcodedChecks) 227 { 228 foreach (c; 0 .. dchar.max + 1) 229 assert(isDigit(c) == lookup(DigitTable, c)); 230 } 231 } 232 233 /** 234 * Returns true if the character is a letter according to the XML standard 235 * 236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 237 * 238 * Params: 239 * c = the character to be tested 240 */ 241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 242 { 243 return isIdeographic(c) || isBaseChar(c); 244 } 245 246 /** 247 * Returns true if the character is an ideographic character according to the 248 * XML standard 249 * 250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 251 * 252 * Params: 253 * c = the character to be tested 254 */ 255 bool isIdeographic(dchar c) @safe @nogc nothrow pure 256 { 257 if (c == 0x3007) 258 return true; 259 if (c <= 0x3029 && c >= 0x3021 ) 260 return true; 261 if (c <= 0x9FA5 && c >= 0x4E00) 262 return true; 263 return false; 264 } 265 266 @safe @nogc nothrow pure unittest 267 { 268 assert(isIdeographic('\u4E00')); 269 assert(isIdeographic('\u9FA5')); 270 assert(isIdeographic('\u3007')); 271 assert(isIdeographic('\u3021')); 272 assert(isIdeographic('\u3029')); 273 274 debug (stdxml_TestHardcodedChecks) 275 { 276 foreach (c; 0 .. dchar.max + 1) 277 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 278 } 279 } 280 281 /** 282 * Returns true if the character is a base character according to the XML 283 * standard 284 * 285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 286 * 287 * Params: 288 * c = the character to be tested 289 */ 290 bool isBaseChar(dchar c) @safe @nogc nothrow pure 291 { 292 return lookup(BaseCharTable,c); 293 } 294 295 /** 296 * Returns true if the character is a combining character according to the 297 * XML standard 298 * 299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 300 * 301 * Params: 302 * c = the character to be tested 303 */ 304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure 305 { 306 return lookup(CombiningCharTable,c); 307 } 308 309 /** 310 * Returns true if the character is an extender according to the XML standard 311 * 312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 313 * 314 * Params: 315 * c = the character to be tested 316 */ 317 bool isExtender(dchar c) @safe @nogc nothrow pure 318 { 319 return lookup(ExtenderTable,c); 320 } 321 322 /** 323 * Encodes a string by replacing all characters which need to be escaped with 324 * appropriate predefined XML entities. 325 * 326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 327 * and greater-than), and similarly, decode() unescapes them. These functions 328 * are provided for convenience only. You do not need to use them when using 329 * the std.xml classes, because then all the encoding and decoding will be done 330 * for you automatically. 331 * 332 * If the string is not modified, the original will be returned. 333 * 334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 335 * 336 * Params: 337 * s = The string to be encoded 338 * 339 * Returns: The encoded string 340 * 341 * Example: 342 * -------------- 343 * writefln(encode("a > b")); // writes "a > b" 344 * -------------- 345 */ 346 S encode(S)(S s) 347 { 348 import std.array : appender; 349 350 string r; 351 size_t lastI; 352 auto result = appender!S(); 353 354 foreach (i, c; s) 355 { 356 switch (c) 357 { 358 case '&': r = "&"; break; 359 case '"': r = """; break; 360 case '\'': r = "'"; break; 361 case '<': r = "<"; break; 362 case '>': r = ">"; break; 363 default: continue; 364 } 365 // Replace with r 366 result.put(s[lastI .. i]); 367 result.put(r); 368 lastI = i + 1; 369 } 370 371 if (!result.data.ptr) return s; 372 result.put(s[lastI .. $]); 373 return result.data; 374 } 375 376 @safe pure unittest 377 { 378 auto s = "hello"; 379 assert(encode(s) is s); 380 assert(encode("a > b") == "a > b", encode("a > b")); 381 assert(encode("a < b") == "a < b"); 382 assert(encode("don't") == "don't"); 383 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 384 assert(encode("cat & dog") == "cat & dog"); 385 } 386 387 /** 388 * Mode to use for decoding. 389 * 390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 393 */ 394 enum DecodeMode 395 { 396 NONE, LOOSE, STRICT 397 } 398 399 /** 400 * Decodes a string by unescaping all predefined XML entities. 401 * 402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 403 * and greater-than), and similarly, decode() unescapes them. These functions 404 * are provided for convenience only. You do not need to use them when using 405 * the std.xml classes, because then all the encoding and decoding will be done 406 * for you automatically. 407 * 408 * This function decodes the entities &amp;, &quot;, &apos;, 409 * &lt; and &gt, 410 * as well as decimal and hexadecimal entities such as &#x20AC; 411 * 412 * If the string does not contain an ampersand, the original will be returned. 413 * 414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 416 * (decode, and throw a DecodeException in the event of an error). 417 * 418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 419 * 420 * Params: 421 * s = The string to be decoded 422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 423 * 424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 425 * 426 * Returns: The decoded string 427 * 428 * Example: 429 * -------------- 430 * writefln(decode("a > b")); // writes "a > b" 431 * -------------- 432 */ 433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 434 { 435 import std.algorithm.searching : startsWith; 436 437 if (mode == DecodeMode.NONE) return s; 438 439 string buffer; 440 foreach (ref i; 0 .. s.length) 441 { 442 char c = s[i]; 443 if (c != '&') 444 { 445 if (buffer.length != 0) buffer ~= c; 446 } 447 else 448 { 449 if (buffer.length == 0) 450 { 451 buffer = s[0 .. i].dup; 452 } 453 if (startsWith(s[i..$],"&#")) 454 { 455 try 456 { 457 dchar d; 458 string t = s[i..$]; 459 checkCharRef(t, d); 460 char[4] temp; 461 import std.utf : encode; 462 buffer ~= temp[0 .. encode(temp, d)]; 463 i = s.length - t.length - 1; 464 } 465 catch (Err e) 466 { 467 if (mode == DecodeMode.STRICT) 468 throw new DecodeException("Unescaped &"); 469 buffer ~= '&'; 470 } 471 } 472 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 473 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 474 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 475 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 476 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 477 else 478 { 479 if (mode == DecodeMode.STRICT) 480 throw new DecodeException("Unescaped &"); 481 buffer ~= '&'; 482 } 483 } 484 } 485 return (buffer.length == 0) ? s : buffer; 486 } 487 488 @safe pure unittest 489 { 490 void assertNot(string s) pure 491 { 492 bool b = false; 493 try { decode(s,DecodeMode.STRICT); } 494 catch (DecodeException e) { b = true; } 495 assert(b,s); 496 } 497 498 // Assert that things that should work, do 499 auto s = "hello"; 500 assert(decode(s, DecodeMode.STRICT) is s); 501 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 502 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 503 assert(decode("don't", DecodeMode.STRICT) == "don't"); 504 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 505 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 506 assert(decode("*", DecodeMode.STRICT) == "*"); 507 assert(decode("*", DecodeMode.STRICT) == "*"); 508 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 509 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 510 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 511 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 512 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 513 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 514 515 // Assert that things that shouldn't work, don't 516 assertNot("cat & dog"); 517 assertNot("a > b"); 518 assertNot("&#;"); 519 assertNot("&#x;"); 520 assertNot("G;"); 521 assertNot("G;"); 522 } 523 524 /** 525 * Class representing an XML document. 526 * 527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 528 * 529 */ 530 class Document : Element 531 { 532 /** 533 * Contains all text which occurs before the root element. 534 * Defaults to <?xml version="1.0"?> 535 */ 536 string prolog = "<?xml version=\"1.0\"?>"; 537 /** 538 * Contains all text which occurs after the root element. 539 * Defaults to the empty string 540 */ 541 string epilog; 542 543 /** 544 * Constructs a Document by parsing XML text. 545 * 546 * This function creates a complete DOM (Document Object Model) tree. 547 * 548 * The input to this function MUST be valid XML. 549 * This is enforced by DocumentParser's in contract. 550 * 551 * Params: 552 * s = the complete XML text. 553 */ 554 this(string s) 555 in 556 { 557 assert(s.length != 0); 558 } 559 do 560 { 561 auto xml = new DocumentParser(s); 562 string tagString = xml.tag.tagString; 563 564 this(xml.tag); 565 prolog = s[0 .. tagString.ptr - s.ptr]; 566 parse(xml); 567 epilog = *xml.s; 568 } 569 570 /** 571 * Constructs a Document from a Tag. 572 * 573 * Params: 574 * tag = the start tag of the document. 575 */ 576 this(const(Tag) tag) 577 { 578 super(tag); 579 } 580 581 const 582 { 583 /** 584 * Compares two Documents for equality 585 * 586 * Example: 587 * -------------- 588 * Document d1,d2; 589 * if (d1 == d2) { } 590 * -------------- 591 */ 592 override bool opEquals(scope const Object o) const 593 { 594 const doc = toType!(const Document)(o); 595 return prolog == doc.prolog 596 && (cast(const) this).Element.opEquals(cast(const) doc) 597 && epilog == doc.epilog; 598 } 599 600 /** 601 * Compares two Documents 602 * 603 * You should rarely need to call this function. It exists so that 604 * Documents can be used as associative array keys. 605 * 606 * Example: 607 * -------------- 608 * Document d1,d2; 609 * if (d1 < d2) { } 610 * -------------- 611 */ 612 override int opCmp(scope const Object o) scope const 613 { 614 const doc = toType!(const Document)(o); 615 if (prolog != doc.prolog) 616 return prolog < doc.prolog ? -1 : 1; 617 if (int cmp = this.Element.opCmp(doc)) 618 return cmp; 619 if (epilog != doc.epilog) 620 return epilog < doc.epilog ? -1 : 1; 621 return 0; 622 } 623 624 /** 625 * Returns the hash of a Document 626 * 627 * You should rarely need to call this function. It exists so that 628 * Documents can be used as associative array keys. 629 */ 630 override size_t toHash() scope const @trusted 631 { 632 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 633 } 634 635 /** 636 * Returns the string representation of a Document. (That is, the 637 * complete XML of a document). 638 */ 639 override string toString() scope const @safe 640 { 641 return prolog ~ super.toString() ~ epilog; 642 } 643 } 644 } 645 646 @system unittest 647 { 648 // https://issues.dlang.org/show_bug.cgi?id=14966 649 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 650 651 auto a = new Document(xml); 652 auto b = new Document(xml); 653 assert(a == b); 654 assert(!(a < b)); 655 int[Document] aa; 656 aa[a] = 1; 657 assert(aa[b] == 1); 658 659 b ~= new Element("b"); 660 assert(a < b); 661 assert(b > a); 662 } 663 664 /** 665 * Class representing an XML element. 666 * 667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 668 */ 669 class Element : Item 670 { 671 Tag tag; /// The start tag of the element 672 Item[] items; /// The element's items 673 Text[] texts; /// The element's text items 674 CData[] cdatas; /// The element's CData items 675 Comment[] comments; /// The element's comments 676 ProcessingInstruction[] pis; /// The element's processing instructions 677 Element[] elements; /// The element's child elements 678 679 /** 680 * Constructs an Element given a name and a string to be used as a Text 681 * interior. 682 * 683 * Params: 684 * name = the name of the element. 685 * interior = (optional) the string interior. 686 * 687 * Example: 688 * ------------------------------------------------------- 689 * auto element = new Element("title","Serenity") 690 * // constructs the element <title>Serenity</title> 691 * ------------------------------------------------------- 692 */ 693 this(string name, string interior=null) @safe pure 694 { 695 this(new Tag(name)); 696 if (interior.length != 0) opOpAssign!("~")(new Text(interior)); 697 } 698 699 /** 700 * Constructs an Element from a Tag. 701 * 702 * Params: 703 * tag_ = the start or empty tag of the element. 704 */ 705 this(const(Tag) tag_) @safe pure 706 { 707 this.tag = new Tag(tag_.name); 708 tag.type = TagType.EMPTY; 709 foreach (k,v;tag_.attr) tag.attr[k] = v; 710 tag.tagString = tag_.tagString; 711 } 712 713 /** 714 * Append a text item to the interior of this element 715 * 716 * Params: 717 * item = the item you wish to append. 718 * 719 * Example: 720 * -------------- 721 * Element element; 722 * element ~= new Text("hello"); 723 * -------------- 724 */ 725 void opOpAssign(string op)(Text item) @safe pure 726 if (op == "~") 727 { 728 texts ~= item; 729 appendItem(item); 730 } 731 732 /** 733 * Append a CData item to the interior of this element 734 * 735 * Params: 736 * item = the item you wish to append. 737 * 738 * Example: 739 * -------------- 740 * Element element; 741 * element ~= new CData("hello"); 742 * -------------- 743 */ 744 void opOpAssign(string op)(CData item) @safe pure 745 if (op == "~") 746 { 747 cdatas ~= item; 748 appendItem(item); 749 } 750 751 /** 752 * Append a comment to the interior of this element 753 * 754 * Params: 755 * item = the item you wish to append. 756 * 757 * Example: 758 * -------------- 759 * Element element; 760 * element ~= new Comment("hello"); 761 * -------------- 762 */ 763 void opOpAssign(string op)(Comment item) @safe pure 764 if (op == "~") 765 { 766 comments ~= item; 767 appendItem(item); 768 } 769 770 /** 771 * Append a processing instruction to the interior of this element 772 * 773 * Params: 774 * item = the item you wish to append. 775 * 776 * Example: 777 * -------------- 778 * Element element; 779 * element ~= new ProcessingInstruction("hello"); 780 * -------------- 781 */ 782 void opOpAssign(string op)(ProcessingInstruction item) @safe pure 783 if (op == "~") 784 { 785 pis ~= item; 786 appendItem(item); 787 } 788 789 /** 790 * Append a complete element to the interior of this element 791 * 792 * Params: 793 * item = the item you wish to append. 794 * 795 * Example: 796 * -------------- 797 * Element element; 798 * Element other = new Element("br"); 799 * element ~= other; 800 * // appends element representing <br /> 801 * -------------- 802 */ 803 void opOpAssign(string op)(Element item) @safe pure 804 if (op == "~") 805 { 806 elements ~= item; 807 appendItem(item); 808 } 809 810 private void appendItem(Item item) @safe pure 811 { 812 items ~= item; 813 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 814 tag.type = TagType.START; 815 } 816 817 private void parse(ElementParser xml) 818 { 819 xml.onText = (string s) { opOpAssign!("~")(new Text(s)); }; 820 xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); }; 821 xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); }; 822 xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); }; 823 824 xml.onStartTag[null] = (ElementParser xml) 825 { 826 auto e = new Element(xml.tag); 827 e.parse(xml); 828 opOpAssign!("~")(e); 829 }; 830 831 xml.parse(); 832 } 833 834 /** 835 * Compares two Elements for equality 836 * 837 * Example: 838 * -------------- 839 * Element e1,e2; 840 * if (e1 == e2) { } 841 * -------------- 842 */ 843 override bool opEquals(scope const Object o) const 844 { 845 const element = toType!(const Element)(o); 846 immutable len = items.length; 847 if (len != element.items.length) return false; 848 foreach (i; 0 .. len) 849 { 850 if (!items[i].opEquals(element.items[i])) return false; 851 } 852 return true; 853 } 854 855 /** 856 * Compares two Elements 857 * 858 * You should rarely need to call this function. It exists so that Elements 859 * can be used as associative array keys. 860 * 861 * Example: 862 * -------------- 863 * Element e1,e2; 864 * if (e1 < e2) { } 865 * -------------- 866 */ 867 override int opCmp(scope const Object o) @safe const 868 { 869 const element = toType!(const Element)(o); 870 for (uint i=0; ; ++i) 871 { 872 if (i == items.length && i == element.items.length) return 0; 873 if (i == items.length) return -1; 874 if (i == element.items.length) return 1; 875 if (!items[i].opEquals(element.items[i])) 876 return items[i].opCmp(element.items[i]); 877 } 878 } 879 880 /** 881 * Returns the hash of an Element 882 * 883 * You should rarely need to call this function. It exists so that Elements 884 * can be used as associative array keys. 885 */ 886 override size_t toHash() scope const @safe 887 { 888 size_t hash = tag.toHash(); 889 foreach (item;items) hash += item.toHash(); 890 return hash; 891 } 892 893 const 894 { 895 /** 896 * Returns the decoded interior of an element. 897 * 898 * The element is assumed to contain text <i>only</i>. So, for 899 * example, given XML such as "<title>Good &amp; 900 * Bad</title>", will return "Good & Bad". 901 * 902 * Params: 903 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 904 * 905 * Throws: DecodeException if decode fails 906 */ 907 string text(DecodeMode mode=DecodeMode.LOOSE) 908 { 909 string buffer; 910 foreach (item;items) 911 { 912 Text t = cast(Text) item; 913 if (t is null) throw new DecodeException(item.toString()); 914 buffer ~= decode(t.toString(),mode); 915 } 916 return buffer; 917 } 918 919 /** 920 * Returns an indented string representation of this item 921 * 922 * Params: 923 * indent = (optional) number of spaces by which to indent this 924 * element. Defaults to 2. 925 */ 926 override string[] pretty(uint indent=2) scope 927 { 928 import std.algorithm.searching : count; 929 import std.string : rightJustify; 930 931 if (isEmptyXML) return [ tag.toEmptyString() ]; 932 933 if (items.length == 1) 934 { 935 auto t = cast(const(Text))(items[0]); 936 if (t !is null) 937 { 938 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 939 } 940 } 941 942 string[] a = [ tag.toStartString() ]; 943 foreach (item;items) 944 { 945 string[] b = item.pretty(indent); 946 foreach (s;b) 947 { 948 a ~= rightJustify(s,count(s) + indent); 949 } 950 } 951 a ~= tag.toEndString(); 952 return a; 953 } 954 955 /** 956 * Returns the string representation of an Element 957 * 958 * Example: 959 * -------------- 960 * auto element = new Element("br"); 961 * writefln(element.toString()); // writes "<br />" 962 * -------------- 963 */ 964 override string toString() scope @safe 965 { 966 if (isEmptyXML) return tag.toEmptyString(); 967 968 string buffer = tag.toStartString(); 969 foreach (item;items) { buffer ~= item.toString(); } 970 buffer ~= tag.toEndString(); 971 return buffer; 972 } 973 974 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 975 } 976 } 977 978 /** 979 * Tag types. 980 * 981 * $(DDOC_ENUM_MEMBERS START) Used for start tags 982 * $(DDOC_ENUM_MEMBERS END) Used for end tags 983 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 984 * 985 */ 986 enum TagType { START, END, EMPTY } 987 988 /** 989 * Class representing an XML tag. 990 * 991 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 992 * 993 * The class invariant guarantees 994 * <ul> 995 * <li> that $(B type) is a valid enum TagType value</li> 996 * <li> that $(B name) consists of valid characters</li> 997 * <li> that each attribute name consists of valid characters</li> 998 * </ul> 999 */ 1000 class Tag 1001 { 1002 TagType type = TagType.START; /// Type of tag 1003 string name; /// Tag name 1004 string[string] attr; /// Associative array of attributes 1005 private string tagString; 1006 1007 invariant() 1008 { 1009 string s; 1010 string t; 1011 1012 assert(type == TagType.START 1013 || type == TagType.END 1014 || type == TagType.EMPTY); 1015 1016 s = name; 1017 try { checkName(s,t); } 1018 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1019 1020 foreach (k,v;attr) 1021 { 1022 s = k; 1023 try { checkName(s,t); } 1024 catch (Err e) 1025 { assert(false,"Invalid attribute name:" ~ e.toString()); } 1026 } 1027 } 1028 1029 /** 1030 * Constructs an instance of Tag with a specified name and type 1031 * 1032 * The constructor does not initialize the attributes. To initialize the 1033 * attributes, you access the $(B attr) member variable. 1034 * 1035 * Params: 1036 * name = the Tag's name 1037 * type = (optional) the Tag's type. If omitted, defaults to 1038 * TagType.START. 1039 * 1040 * Example: 1041 * -------------- 1042 * auto tag = new Tag("img",Tag.EMPTY); 1043 * tag.attr["src"] = "http://example.com/example.jpg"; 1044 * -------------- 1045 */ 1046 this(string name, TagType type=TagType.START) @safe pure 1047 { 1048 this.name = name; 1049 this.type = type; 1050 } 1051 1052 /* Private constructor (so don't ddoc this!) 1053 * 1054 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1055 * 1056 * The string is passed by reference, and is advanced over all characters 1057 * consumed. 1058 * 1059 * The second parameter is a dummy parameter only, required solely to 1060 * distinguish this constructor from the public one. 1061 */ 1062 private this(ref string s, bool dummy) @safe pure 1063 { 1064 import std.algorithm.searching : countUntil; 1065 import std.ascii : isWhite; 1066 import std.utf : byCodeUnit; 1067 1068 tagString = s; 1069 try 1070 { 1071 reqc(s,'<'); 1072 if (optc(s,'/')) type = TagType.END; 1073 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1074 name = s[0 .. i]; 1075 s = s[i .. $]; 1076 1077 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1078 s = s[i .. $]; 1079 1080 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1081 { 1082 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1083 string key = s[0 .. i]; 1084 s = s[i .. $]; 1085 1086 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1087 s = s[i .. $]; 1088 reqc(s,'='); 1089 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1090 s = s[i .. $]; 1091 1092 immutable char quote = requireOneOf(s,"'\""); 1093 i = s.byCodeUnit.countUntil(quote); 1094 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1095 s = s[i .. $]; 1096 reqc(s,quote); 1097 1098 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1099 s = s[i .. $]; 1100 attr[key] = val; 1101 } 1102 if (optc(s,'/')) 1103 { 1104 if (type == TagType.END) throw new TagException(""); 1105 type = TagType.EMPTY; 1106 } 1107 reqc(s,'>'); 1108 tagString.length = tagString.length - s.length; 1109 } 1110 catch (XMLException e) 1111 { 1112 tagString.length = tagString.length - s.length; 1113 throw new TagException(tagString); 1114 } 1115 } 1116 1117 const 1118 { 1119 /** 1120 * Compares two Tags for equality 1121 * 1122 * You should rarely need to call this function. It exists so that Tags 1123 * can be used as associative array keys. 1124 * 1125 * Example: 1126 * -------------- 1127 * Tag tag1,tag2 1128 * if (tag1 == tag2) { } 1129 * -------------- 1130 */ 1131 override bool opEquals(scope Object o) 1132 { 1133 const tag = toType!(const Tag)(o); 1134 return 1135 (name != tag.name) ? false : ( 1136 (attr != tag.attr) ? false : ( 1137 (type != tag.type) ? false : ( 1138 true ))); 1139 } 1140 1141 /** 1142 * Compares two Tags 1143 * 1144 * Example: 1145 * -------------- 1146 * Tag tag1,tag2 1147 * if (tag1 < tag2) { } 1148 * -------------- 1149 */ 1150 override int opCmp(Object o) 1151 { 1152 const tag = toType!(const Tag)(o); 1153 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1154 return 1155 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1156 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1157 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1158 0 ))); 1159 } 1160 1161 /** 1162 * Returns the hash of a Tag 1163 * 1164 * You should rarely need to call this function. It exists so that Tags 1165 * can be used as associative array keys. 1166 */ 1167 override size_t toHash() 1168 { 1169 return .hashOf(name); 1170 } 1171 1172 /** 1173 * Returns the string representation of a Tag 1174 * 1175 * Example: 1176 * -------------- 1177 * auto tag = new Tag("book",TagType.START); 1178 * writefln(tag.toString()); // writes "<book>" 1179 * -------------- 1180 */ 1181 override string toString() @safe 1182 { 1183 if (isEmpty) return toEmptyString(); 1184 return (isEnd) ? toEndString() : toStartString(); 1185 } 1186 1187 private 1188 { 1189 string toNonEndString() @safe 1190 { 1191 import std.format : format; 1192 1193 string s = "<" ~ name; 1194 foreach (key,val;attr) 1195 s ~= format(" %s=\"%s\"",key,encode(val)); 1196 return s; 1197 } 1198 1199 string toStartString() @safe { return toNonEndString() ~ ">"; } 1200 1201 string toEndString() @safe { return "</" ~ name ~ ">"; } 1202 1203 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1204 } 1205 1206 /** 1207 * Returns true if the Tag is a start tag 1208 * 1209 * Example: 1210 * -------------- 1211 * if (tag.isStart) { } 1212 * -------------- 1213 */ 1214 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1215 1216 /** 1217 * Returns true if the Tag is an end tag 1218 * 1219 * Example: 1220 * -------------- 1221 * if (tag.isEnd) { } 1222 * -------------- 1223 */ 1224 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1225 1226 /** 1227 * Returns true if the Tag is an empty tag 1228 * 1229 * Example: 1230 * -------------- 1231 * if (tag.isEmpty) { } 1232 * -------------- 1233 */ 1234 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1235 } 1236 } 1237 1238 /** 1239 * Class representing a comment 1240 */ 1241 class Comment : Item 1242 { 1243 private string content; 1244 1245 /** 1246 * Construct a comment 1247 * 1248 * Params: 1249 * content = the body of the comment 1250 * 1251 * Throws: CommentException if the comment body is illegal (contains "--" 1252 * or exactly equals "-") 1253 * 1254 * Example: 1255 * -------------- 1256 * auto item = new Comment("This is a comment"); 1257 * // constructs <!--This is a comment--> 1258 * -------------- 1259 */ 1260 this(string content) @safe pure 1261 { 1262 import std.string : indexOf; 1263 1264 if (content == "-" || content.indexOf("--") != -1) 1265 throw new CommentException(content); 1266 this.content = content; 1267 } 1268 1269 /** 1270 * Compares two comments for equality 1271 * 1272 * Example: 1273 * -------------- 1274 * Comment item1,item2; 1275 * if (item1 == item2) { } 1276 * -------------- 1277 */ 1278 override bool opEquals(scope const Object o) const 1279 { 1280 const item = toType!(const Item)(o); 1281 const t = cast(const Comment) item; 1282 return t !is null && content == t.content; 1283 } 1284 1285 /** 1286 * Compares two comments 1287 * 1288 * You should rarely need to call this function. It exists so that Comments 1289 * can be used as associative array keys. 1290 * 1291 * Example: 1292 * -------------- 1293 * Comment item1,item2; 1294 * if (item1 < item2) { } 1295 * -------------- 1296 */ 1297 override int opCmp(scope const Object o) scope const 1298 { 1299 const item = toType!(const Item)(o); 1300 const t = cast(const Comment) item; 1301 return t !is null && (content != t.content 1302 ? (content < t.content ? -1 : 1 ) : 0 ); 1303 } 1304 1305 /** 1306 * Returns the hash of a Comment 1307 * 1308 * You should rarely need to call this function. It exists so that Comments 1309 * can be used as associative array keys. 1310 */ 1311 override size_t toHash() scope const nothrow { return hash(content); } 1312 1313 /** 1314 * Returns a string representation of this comment 1315 */ 1316 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1317 1318 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1319 } 1320 1321 // https://issues.dlang.org/show_bug.cgi?id=16241 1322 @safe unittest 1323 { 1324 import std.exception : assertThrown; 1325 auto c = new Comment("=="); 1326 assert(c.content == "=="); 1327 assertThrown!CommentException(new Comment("--")); 1328 } 1329 1330 /** 1331 * Class representing a Character Data section 1332 */ 1333 class CData : Item 1334 { 1335 private string content; 1336 1337 /** 1338 * Construct a character data section 1339 * 1340 * Params: 1341 * content = the body of the character data segment 1342 * 1343 * Throws: CDataException if the segment body is illegal (contains "]]>") 1344 * 1345 * Example: 1346 * -------------- 1347 * auto item = new CData("<b>hello</b>"); 1348 * // constructs <![CDATA[<b>hello</b>]]> 1349 * -------------- 1350 */ 1351 this(string content) @safe pure 1352 { 1353 import std.string : indexOf; 1354 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1355 this.content = content; 1356 } 1357 1358 /** 1359 * Compares two CDatas for equality 1360 * 1361 * Example: 1362 * -------------- 1363 * CData item1,item2; 1364 * if (item1 == item2) { } 1365 * -------------- 1366 */ 1367 override bool opEquals(scope const Object o) const 1368 { 1369 const item = toType!(const Item)(o); 1370 const t = cast(const CData) item; 1371 return t !is null && content == t.content; 1372 } 1373 1374 /** 1375 * Compares two CDatas 1376 * 1377 * You should rarely need to call this function. It exists so that CDatas 1378 * can be used as associative array keys. 1379 * 1380 * Example: 1381 * -------------- 1382 * CData item1,item2; 1383 * if (item1 < item2) { } 1384 * -------------- 1385 */ 1386 override int opCmp(scope const Object o) scope const 1387 { 1388 const item = toType!(const Item)(o); 1389 const t = cast(const CData) item; 1390 return t !is null && (content != t.content 1391 ? (content < t.content ? -1 : 1 ) : 0 ); 1392 } 1393 1394 /** 1395 * Returns the hash of a CData 1396 * 1397 * You should rarely need to call this function. It exists so that CDatas 1398 * can be used as associative array keys. 1399 */ 1400 override size_t toHash() scope const nothrow { return hash(content); } 1401 1402 /** 1403 * Returns a string representation of this CData section 1404 */ 1405 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1406 1407 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1408 } 1409 1410 /** 1411 * Class representing a text (aka Parsed Character Data) section 1412 */ 1413 class Text : Item 1414 { 1415 private string content; 1416 1417 /** 1418 * Construct a text (aka PCData) section 1419 * 1420 * Params: 1421 * content = the text. This function encodes the text before 1422 * insertion, so it is safe to insert any text 1423 * 1424 * Example: 1425 * -------------- 1426 * auto Text = new CData("a < b"); 1427 * // constructs a < b 1428 * -------------- 1429 */ 1430 this(string content) @safe pure 1431 { 1432 this.content = encode(content); 1433 } 1434 1435 /** 1436 * Compares two text sections for equality 1437 * 1438 * Example: 1439 * -------------- 1440 * Text item1,item2; 1441 * if (item1 == item2) { } 1442 * -------------- 1443 */ 1444 override bool opEquals(scope const Object o) const 1445 { 1446 const item = toType!(const Item)(o); 1447 const t = cast(const Text) item; 1448 return t !is null && content == t.content; 1449 } 1450 1451 /** 1452 * Compares two text sections 1453 * 1454 * You should rarely need to call this function. It exists so that Texts 1455 * can be used as associative array keys. 1456 * 1457 * Example: 1458 * -------------- 1459 * Text item1,item2; 1460 * if (item1 < item2) { } 1461 * -------------- 1462 */ 1463 override int opCmp(scope const Object o) scope const 1464 { 1465 const item = toType!(const Item)(o); 1466 const t = cast(const Text) item; 1467 return t !is null 1468 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1469 } 1470 1471 /** 1472 * Returns the hash of a text section 1473 * 1474 * You should rarely need to call this function. It exists so that Texts 1475 * can be used as associative array keys. 1476 */ 1477 override size_t toHash() scope const nothrow { return hash(content); } 1478 1479 /** 1480 * Returns a string representation of this Text section 1481 */ 1482 override string toString() scope const @safe @nogc pure nothrow { return content; } 1483 1484 /** 1485 * Returns true if the content is the empty string 1486 */ 1487 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1488 } 1489 1490 /** 1491 * Class representing an XML Instruction section 1492 */ 1493 class XMLInstruction : Item 1494 { 1495 private string content; 1496 1497 /** 1498 * Construct an XML Instruction section 1499 * 1500 * Params: 1501 * content = the body of the instruction segment 1502 * 1503 * Throws: XIException if the segment body is illegal (contains ">") 1504 * 1505 * Example: 1506 * -------------- 1507 * auto item = new XMLInstruction("ATTLIST"); 1508 * // constructs <!ATTLIST> 1509 * -------------- 1510 */ 1511 this(string content) @safe pure 1512 { 1513 import std.string : indexOf; 1514 if (content.indexOf(">") != -1) throw new XIException(content); 1515 this.content = content; 1516 } 1517 1518 /** 1519 * Compares two XML instructions for equality 1520 * 1521 * Example: 1522 * -------------- 1523 * XMLInstruction item1,item2; 1524 * if (item1 == item2) { } 1525 * -------------- 1526 */ 1527 override bool opEquals(scope const Object o) const 1528 { 1529 const item = toType!(const Item)(o); 1530 const t = cast(const XMLInstruction) item; 1531 return t !is null && content == t.content; 1532 } 1533 1534 /** 1535 * Compares two XML instructions 1536 * 1537 * You should rarely need to call this function. It exists so that 1538 * XmlInstructions can be used as associative array keys. 1539 * 1540 * Example: 1541 * -------------- 1542 * XMLInstruction item1,item2; 1543 * if (item1 < item2) { } 1544 * -------------- 1545 */ 1546 override int opCmp(scope const Object o) scope const 1547 { 1548 const item = toType!(const Item)(o); 1549 const t = cast(const XMLInstruction) item; 1550 return t !is null 1551 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1552 } 1553 1554 /** 1555 * Returns the hash of an XMLInstruction 1556 * 1557 * You should rarely need to call this function. It exists so that 1558 * XmlInstructions can be used as associative array keys. 1559 */ 1560 override size_t toHash() scope const nothrow { return hash(content); } 1561 1562 /** 1563 * Returns a string representation of this XmlInstruction 1564 */ 1565 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1566 1567 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1568 } 1569 1570 /** 1571 * Class representing a Processing Instruction section 1572 */ 1573 class ProcessingInstruction : Item 1574 { 1575 private string content; 1576 1577 /** 1578 * Construct a Processing Instruction section 1579 * 1580 * Params: 1581 * content = the body of the instruction segment 1582 * 1583 * Throws: PIException if the segment body is illegal (contains "?>") 1584 * 1585 * Example: 1586 * -------------- 1587 * auto item = new ProcessingInstruction("php"); 1588 * // constructs <?php?> 1589 * -------------- 1590 */ 1591 this(string content) @safe pure 1592 { 1593 import std.string : indexOf; 1594 if (content.indexOf("?>") != -1) throw new PIException(content); 1595 this.content = content; 1596 } 1597 1598 /** 1599 * Compares two processing instructions for equality 1600 * 1601 * Example: 1602 * -------------- 1603 * ProcessingInstruction item1,item2; 1604 * if (item1 == item2) { } 1605 * -------------- 1606 */ 1607 override bool opEquals(scope const Object o) const 1608 { 1609 const item = toType!(const Item)(o); 1610 const t = cast(const ProcessingInstruction) item; 1611 return t !is null && content == t.content; 1612 } 1613 1614 /** 1615 * Compares two processing instructions 1616 * 1617 * You should rarely need to call this function. It exists so that 1618 * ProcessingInstructions can be used as associative array keys. 1619 * 1620 * Example: 1621 * -------------- 1622 * ProcessingInstruction item1,item2; 1623 * if (item1 < item2) { } 1624 * -------------- 1625 */ 1626 override int opCmp(scope const Object o) scope const 1627 { 1628 const item = toType!(const Item)(o); 1629 const t = cast(const ProcessingInstruction) item; 1630 return t !is null 1631 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1632 } 1633 1634 /** 1635 * Returns the hash of a ProcessingInstruction 1636 * 1637 * You should rarely need to call this function. It exists so that 1638 * ProcessingInstructions can be used as associative array keys. 1639 */ 1640 override size_t toHash() scope const nothrow { return hash(content); } 1641 1642 /** 1643 * Returns a string representation of this ProcessingInstruction 1644 */ 1645 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1646 1647 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always 1648 } 1649 1650 /** 1651 * Abstract base class for XML items 1652 */ 1653 abstract class Item 1654 { 1655 /// Compares with another Item of same type for equality 1656 abstract override bool opEquals(scope const Object o) @safe const; 1657 1658 /// Compares with another Item of same type 1659 abstract override int opCmp(scope const Object o) @safe const; 1660 1661 /// Returns the hash of this item 1662 abstract override size_t toHash() @safe scope const; 1663 1664 /// Returns a string representation of this item 1665 abstract override string toString() @safe scope const; 1666 1667 /** 1668 * Returns an indented string representation of this item 1669 * 1670 * Params: 1671 * indent = number of spaces by which to indent child elements 1672 */ 1673 string[] pretty(uint indent) @safe scope const 1674 { 1675 import std.string : strip; 1676 string s = strip(toString()); 1677 return s.length == 0 ? [] : [ s ]; 1678 } 1679 1680 /// Returns true if the item represents empty XML text 1681 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1682 } 1683 1684 /** 1685 * Class for parsing an XML Document. 1686 * 1687 * This is a subclass of ElementParser. Most of the useful functions are 1688 * documented there. 1689 * 1690 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1691 * 1692 * Bugs: 1693 * Currently only supports UTF documents. 1694 * 1695 * If there is an encoding attribute in the prolog, it is ignored. 1696 * 1697 */ 1698 class DocumentParser : ElementParser 1699 { 1700 string xmlText; 1701 1702 /** 1703 * Constructs a DocumentParser. 1704 * 1705 * The input to this function MUST be valid XML. 1706 * This is enforced by the function's in contract. 1707 * 1708 * Params: 1709 * xmlText_ = the entire XML document as text 1710 * 1711 */ 1712 this(string xmlText_) 1713 in 1714 { 1715 assert(xmlText_.length != 0); 1716 try 1717 { 1718 // Confirm that the input is valid XML 1719 check(xmlText_); 1720 } 1721 catch (CheckException e) 1722 { 1723 // And if it's not, tell the user why not 1724 assert(false, "\n" ~ e.toString()); 1725 } 1726 } 1727 do 1728 { 1729 xmlText = xmlText_; 1730 s = &xmlText; 1731 super(); // Initialize everything 1732 parse(); // Parse through the root tag (but not beyond) 1733 } 1734 } 1735 1736 @system unittest 1737 { 1738 auto doc = new Document("<root><child><grandchild/></child></root>"); 1739 assert(doc.elements.length == 1); 1740 assert(doc.elements[0].tag.name == "child"); 1741 assert(doc.items == doc.elements); 1742 } 1743 1744 /** 1745 * Class for parsing an XML element. 1746 * 1747 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1748 * 1749 * Note that you cannot construct instances of this class directly. You can 1750 * construct a DocumentParser (which is a subclass of ElementParser), but 1751 * otherwise, Instances of ElementParser will be created for you by the 1752 * library, and passed your way via onStartTag handlers. 1753 * 1754 */ 1755 class ElementParser 1756 { 1757 alias Handler = void delegate(string); 1758 alias ElementHandler = void delegate(in Element element); 1759 alias ParserHandler = void delegate(ElementParser parser); 1760 1761 private 1762 { 1763 Tag tag_; 1764 string elementStart; 1765 string* s; 1766 1767 Handler commentHandler = null; 1768 Handler cdataHandler = null; 1769 Handler xiHandler = null; 1770 Handler piHandler = null; 1771 Handler rawTextHandler = null; 1772 Handler textHandler = null; 1773 1774 // Private constructor for start tags 1775 this(ElementParser parent) @safe @nogc pure nothrow 1776 { 1777 s = parent.s; 1778 this(); 1779 tag_ = parent.tag_; 1780 } 1781 1782 // Private constructor for empty tags 1783 this(Tag tag, string* t) @safe @nogc pure nothrow 1784 { 1785 s = t; 1786 this(); 1787 tag_ = tag; 1788 } 1789 } 1790 1791 /** 1792 * The Tag at the start of the element being parsed. You can read this to 1793 * determine the tag's name and attributes. 1794 */ 1795 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1796 1797 /** 1798 * Register a handler which will be called whenever a start tag is 1799 * encountered which matches the specified name. You can also pass null as 1800 * the name, in which case the handler will be called for any unmatched 1801 * start tag. 1802 * 1803 * Example: 1804 * -------------- 1805 * // Call this function whenever a <podcast> start tag is encountered 1806 * onStartTag["podcast"] = (ElementParser xml) 1807 * { 1808 * // Your code here 1809 * // 1810 * // This is a a closure, so code here may reference 1811 * // variables which are outside of this scope 1812 * }; 1813 * 1814 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1815 * // start tag is encountered 1816 * onStartTag["episode"] = &myEpisodeStartHandler; 1817 * 1818 * // call delegate dg for all other start tags 1819 * onStartTag[null] = dg; 1820 * -------------- 1821 * 1822 * This library will supply your function with a new instance of 1823 * ElementHandler, which may be used to parse inside the element whose 1824 * start tag was just found, or to identify the tag attributes of the 1825 * element, etc. 1826 * 1827 * Note that your function will be called for both start tags and empty 1828 * tags. That is, we make no distinction between <br></br> 1829 * and <br/>. 1830 */ 1831 ParserHandler[string] onStartTag; 1832 1833 /** 1834 * Register a handler which will be called whenever an end tag is 1835 * encountered which matches the specified name. You can also pass null as 1836 * the name, in which case the handler will be called for any unmatched 1837 * end tag. 1838 * 1839 * Example: 1840 * -------------- 1841 * // Call this function whenever a </podcast> end tag is encountered 1842 * onEndTag["podcast"] = (in Element e) 1843 * { 1844 * // Your code here 1845 * // 1846 * // This is a a closure, so code here may reference 1847 * // variables which are outside of this scope 1848 * }; 1849 * 1850 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1851 * // end tag is encountered 1852 * onEndTag["episode"] = &myEpisodeEndHandler; 1853 * 1854 * // call delegate dg for all other end tags 1855 * onEndTag[null] = dg; 1856 * -------------- 1857 * 1858 * Note that your function will be called for both start tags and empty 1859 * tags. That is, we make no distinction between <br></br> 1860 * and <br/>. 1861 */ 1862 ElementHandler[string] onEndTag; 1863 1864 protected this() @safe @nogc pure nothrow 1865 { 1866 elementStart = *s; 1867 } 1868 1869 /** 1870 * Register a handler which will be called whenever text is encountered. 1871 * 1872 * Example: 1873 * -------------- 1874 * // Call this function whenever text is encountered 1875 * onText = (string s) 1876 * { 1877 * // Your code here 1878 * 1879 * // The passed parameter s will have been decoded by the time you see 1880 * // it, and so may contain any character. 1881 * // 1882 * // This is a a closure, so code here may reference 1883 * // variables which are outside of this scope 1884 * }; 1885 * -------------- 1886 */ 1887 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1888 1889 /** 1890 * Register an alternative handler which will be called whenever text 1891 * is encountered. This differs from onText in that onText will decode 1892 * the text, whereas onTextRaw will not. This allows you to make design 1893 * choices, since onText will be more accurate, but slower, while 1894 * onTextRaw will be faster, but less accurate. Of course, you can 1895 * still call decode() within your handler, if you want, but you'd 1896 * probably want to use onTextRaw only in circumstances where you 1897 * know that decoding is unnecessary. 1898 * 1899 * Example: 1900 * -------------- 1901 * // Call this function whenever text is encountered 1902 * onText = (string s) 1903 * { 1904 * // Your code here 1905 * 1906 * // The passed parameter s will NOT have been decoded. 1907 * // 1908 * // This is a a closure, so code here may reference 1909 * // variables which are outside of this scope 1910 * }; 1911 * -------------- 1912 */ 1913 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1914 1915 /** 1916 * Register a handler which will be called whenever a character data 1917 * segment is encountered. 1918 * 1919 * Example: 1920 * -------------- 1921 * // Call this function whenever a CData section is encountered 1922 * onCData = (string s) 1923 * { 1924 * // Your code here 1925 * 1926 * // The passed parameter s does not include the opening <![CDATA[ 1927 * // nor closing ]]> 1928 * // 1929 * // This is a a closure, so code here may reference 1930 * // variables which are outside of this scope 1931 * }; 1932 * -------------- 1933 */ 1934 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1935 1936 /** 1937 * Register a handler which will be called whenever a comment is 1938 * encountered. 1939 * 1940 * Example: 1941 * -------------- 1942 * // Call this function whenever a comment is encountered 1943 * onComment = (string s) 1944 * { 1945 * // Your code here 1946 * 1947 * // The passed parameter s does not include the opening <!-- nor 1948 * // closing --> 1949 * // 1950 * // This is a a closure, so code here may reference 1951 * // variables which are outside of this scope 1952 * }; 1953 * -------------- 1954 */ 1955 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1956 1957 /** 1958 * Register a handler which will be called whenever a processing 1959 * instruction is encountered. 1960 * 1961 * Example: 1962 * -------------- 1963 * // Call this function whenever a processing instruction is encountered 1964 * onPI = (string s) 1965 * { 1966 * // Your code here 1967 * 1968 * // The passed parameter s does not include the opening <? nor 1969 * // closing ?> 1970 * // 1971 * // This is a a closure, so code here may reference 1972 * // variables which are outside of this scope 1973 * }; 1974 * -------------- 1975 */ 1976 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1977 1978 /** 1979 * Register a handler which will be called whenever an XML instruction is 1980 * encountered. 1981 * 1982 * Example: 1983 * -------------- 1984 * // Call this function whenever an XML instruction is encountered 1985 * // (Note: XML instructions may only occur preceding the root tag of a 1986 * // document). 1987 * onPI = (string s) 1988 * { 1989 * // Your code here 1990 * 1991 * // The passed parameter s does not include the opening <! nor 1992 * // closing > 1993 * // 1994 * // This is a a closure, so code here may reference 1995 * // variables which are outside of this scope 1996 * }; 1997 * -------------- 1998 */ 1999 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 2000 2001 /** 2002 * Parse an XML element. 2003 * 2004 * Parsing will continue until the end of the current element. Any items 2005 * encountered for which a handler has been registered will invoke that 2006 * handler. 2007 * 2008 * Throws: various kinds of XMLException 2009 */ 2010 void parse() 2011 { 2012 import std.algorithm.searching : startsWith; 2013 import std.string : indexOf; 2014 2015 string t; 2016 const Tag root = tag_; 2017 Tag[string] startTags; 2018 if (tag_ !is null) startTags[tag_.name] = tag_; 2019 2020 while (s.length != 0) 2021 { 2022 if (startsWith(*s,"<!--")) 2023 { 2024 chop(*s,4); 2025 t = chop(*s,indexOf(*s,"-->")); 2026 if (commentHandler.funcptr !is null) commentHandler(t); 2027 chop(*s,3); 2028 } 2029 else if (startsWith(*s,"<![CDATA[")) 2030 { 2031 chop(*s,9); 2032 t = chop(*s,indexOf(*s,"]]>")); 2033 if (cdataHandler.funcptr !is null) cdataHandler(t); 2034 chop(*s,3); 2035 } 2036 else if (startsWith(*s,"<!")) 2037 { 2038 chop(*s,2); 2039 t = chop(*s,indexOf(*s,">")); 2040 if (xiHandler.funcptr !is null) xiHandler(t); 2041 chop(*s,1); 2042 } 2043 else if (startsWith(*s,"<?")) 2044 { 2045 chop(*s,2); 2046 t = chop(*s,indexOf(*s,"?>")); 2047 if (piHandler.funcptr !is null) piHandler(t); 2048 chop(*s,2); 2049 } 2050 else if (startsWith(*s,"<")) 2051 { 2052 tag_ = new Tag(*s,true); 2053 if (root is null) 2054 return; // Return to constructor of derived class 2055 2056 if (tag_.isStart) 2057 { 2058 startTags[tag_.name] = tag_; 2059 2060 auto parser = new ElementParser(this); 2061 2062 auto handler = tag_.name in onStartTag; 2063 if (handler !is null) (*handler)(parser); 2064 else 2065 { 2066 handler = null in onStartTag; 2067 if (handler !is null) (*handler)(parser); 2068 } 2069 } 2070 else if (tag_.isEnd) 2071 { 2072 const startTag = startTags[tag_.name]; 2073 string text; 2074 2075 if (startTag.tagString.length == 0) 2076 assert(0); 2077 2078 immutable(char)* p = startTag.tagString.ptr 2079 + startTag.tagString.length; 2080 immutable(char)* q = &tag_.tagString[0]; 2081 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2082 2083 auto element = new Element(startTag); 2084 if (text.length != 0) element ~= new Text(text); 2085 2086 auto handler = tag_.name in onEndTag; 2087 if (handler !is null) (*handler)(element); 2088 else 2089 { 2090 handler = null in onEndTag; 2091 if (handler !is null) (*handler)(element); 2092 } 2093 2094 if (tag_.name == root.name) return; 2095 } 2096 else if (tag_.isEmpty) 2097 { 2098 Tag startTag = new Tag(tag_.name); 2099 2100 // FIX by hed010gy 2101 // https://issues.dlang.org/show_bug.cgi?id=2979 2102 if (tag_.attr.length > 0) 2103 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2104 // END FIX 2105 2106 // Handle the pretend start tag 2107 string s2; 2108 auto parser = new ElementParser(startTag,&s2); 2109 auto handler1 = startTag.name in onStartTag; 2110 if (handler1 !is null) (*handler1)(parser); 2111 else 2112 { 2113 handler1 = null in onStartTag; 2114 if (handler1 !is null) (*handler1)(parser); 2115 } 2116 2117 // Handle the pretend end tag 2118 auto element = new Element(startTag); 2119 auto handler2 = tag_.name in onEndTag; 2120 if (handler2 !is null) (*handler2)(element); 2121 else 2122 { 2123 handler2 = null in onEndTag; 2124 if (handler2 !is null) (*handler2)(element); 2125 } 2126 } 2127 } 2128 else 2129 { 2130 t = chop(*s,indexOf(*s,"<")); 2131 if (rawTextHandler.funcptr !is null) 2132 rawTextHandler(t); 2133 else if (textHandler.funcptr !is null) 2134 textHandler(decode(t,DecodeMode.LOOSE)); 2135 } 2136 } 2137 } 2138 2139 /** 2140 * Returns that part of the element which has already been parsed 2141 */ 2142 override string toString() const @nogc @safe pure nothrow 2143 { 2144 assert(elementStart.length >= s.length); 2145 return elementStart[0 .. elementStart.length - s.length]; 2146 } 2147 2148 } 2149 2150 private 2151 { 2152 template Check(string msg) 2153 { 2154 string old = s; 2155 2156 void fail() @safe pure 2157 { 2158 s = old; 2159 throw new Err(s,msg); 2160 } 2161 2162 void fail(Err e) @safe pure 2163 { 2164 s = old; 2165 throw new Err(s,msg,e); 2166 } 2167 2168 void fail(string msg2) @safe pure 2169 { 2170 fail(new Err(s,msg2)); 2171 } 2172 } 2173 2174 void checkMisc(ref string s) @safe pure // rule 27 2175 { 2176 import std.algorithm.searching : startsWith; 2177 2178 mixin Check!("Misc"); 2179 2180 try 2181 { 2182 if (s.startsWith("<!--")) { checkComment(s); } 2183 else if (s.startsWith("<?")) { checkPI(s); } 2184 else { checkSpace(s); } 2185 } 2186 catch (Err e) { fail(e); } 2187 } 2188 2189 void checkDocument(ref string s) @safe pure // rule 1 2190 { 2191 mixin Check!("Document"); 2192 try 2193 { 2194 checkProlog(s); 2195 checkElement(s); 2196 star!(checkMisc)(s); 2197 } 2198 catch (Err e) { fail(e); } 2199 } 2200 2201 void checkChars(ref string s) @safe pure // rule 2 2202 { 2203 // TO DO - Fix std.utf stride and decode functions, then use those 2204 // instead 2205 import std.format : format; 2206 2207 mixin Check!("Chars"); 2208 2209 dchar c; 2210 ptrdiff_t n = -1; 2211 // 'i' must not be smaller than size_t because size_t is used internally in 2212 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2213 foreach (size_t i, dchar d; s) 2214 { 2215 if (!isChar(d)) 2216 { 2217 c = d; 2218 n = i; 2219 break; 2220 } 2221 } 2222 if (n != -1) 2223 { 2224 s = s[n..$]; 2225 fail(format("invalid character: U+%04X",c)); 2226 } 2227 } 2228 2229 void checkSpace(ref string s) @safe pure // rule 3 2230 { 2231 import std.algorithm.searching : countUntil; 2232 import std.ascii : isWhite; 2233 import std.utf : byCodeUnit; 2234 2235 mixin Check!("Whitespace"); 2236 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2237 if (i == -1 && s.length > 0 && isWhite(s[0])) 2238 s = s[$ .. $]; 2239 else if (i > -1) 2240 s = s[i .. $]; 2241 if (s is old) fail(); 2242 } 2243 2244 void checkName(ref string s, out string name) @safe pure // rule 5 2245 { 2246 mixin Check!("Name"); 2247 2248 if (s.length == 0) fail(); 2249 ptrdiff_t n; 2250 // 'i' must not be smaller than size_t because size_t is used internally in 2251 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2252 foreach (size_t i, dchar c; s) 2253 { 2254 if (c == '_' || c == ':' || isLetter(c)) continue; 2255 if (i == 0) fail(); 2256 if (c == '-' || c == '.' || isDigit(c) 2257 || isCombiningChar(c) || isExtender(c)) continue; 2258 n = i; 2259 break; 2260 } 2261 name = s[0 .. n]; 2262 s = s[n..$]; 2263 } 2264 2265 void checkAttValue(ref string s) @safe pure // rule 10 2266 { 2267 import std.algorithm.searching : countUntil; 2268 import std.utf : byCodeUnit; 2269 2270 mixin Check!("AttValue"); 2271 2272 if (s.length == 0) fail(); 2273 char c = s[0]; 2274 if (c != '\u0022' && c != '\u0027') 2275 fail("attribute value requires quotes"); 2276 s = s[1..$]; 2277 for (;;) 2278 { 2279 s = s[s.byCodeUnit.countUntil(c) .. $]; 2280 if (s.length == 0) fail("unterminated attribute value"); 2281 if (s[0] == '<') fail("< found in attribute value"); 2282 if (s[0] == c) break; 2283 try { checkReference(s); } catch (Err e) { fail(e); } 2284 } 2285 s = s[1..$]; 2286 } 2287 2288 void checkCharData(ref string s) @safe pure // rule 14 2289 { 2290 import std.algorithm.searching : startsWith; 2291 2292 mixin Check!("CharData"); 2293 2294 while (s.length != 0) 2295 { 2296 if (s.startsWith("&")) break; 2297 if (s.startsWith("<")) break; 2298 if (s.startsWith("]]>")) fail("]]> found within char data"); 2299 s = s[1..$]; 2300 } 2301 } 2302 2303 void checkComment(ref string s) @safe pure // rule 15 2304 { 2305 import std.string : indexOf; 2306 2307 mixin Check!("Comment"); 2308 2309 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2310 ptrdiff_t n = s.indexOf("--"); 2311 if (n == -1) fail("unterminated comment"); 2312 s = s[n..$]; 2313 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2314 } 2315 2316 void checkPI(ref string s) @safe pure // rule 16 2317 { 2318 mixin Check!("PI"); 2319 2320 try 2321 { 2322 checkLiteral("<?",s); 2323 checkEnd("?>",s); 2324 } 2325 catch (Err e) { fail(e); } 2326 } 2327 2328 void checkCDSect(ref string s) @safe pure // rule 18 2329 { 2330 mixin Check!("CDSect"); 2331 2332 try 2333 { 2334 checkLiteral(cdata,s); 2335 checkEnd("]]>",s); 2336 } 2337 catch (Err e) { fail(e); } 2338 } 2339 2340 void checkProlog(ref string s) @safe pure // rule 22 2341 { 2342 mixin Check!("Prolog"); 2343 2344 try 2345 { 2346 /* The XML declaration is optional 2347 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2348 */ 2349 opt!(checkXMLDecl)(s); 2350 2351 star!(checkMisc)(s); 2352 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2353 } 2354 catch (Err e) { fail(e); } 2355 } 2356 2357 void checkXMLDecl(ref string s) @safe pure // rule 23 2358 { 2359 mixin Check!("XMLDecl"); 2360 2361 try 2362 { 2363 checkLiteral("<?xml",s); 2364 checkVersionInfo(s); 2365 opt!(checkEncodingDecl)(s); 2366 opt!(checkSDDecl)(s); 2367 opt!(checkSpace)(s); 2368 checkLiteral("?>",s); 2369 } 2370 catch (Err e) { fail(e); } 2371 } 2372 2373 void checkVersionInfo(ref string s) @safe pure // rule 24 2374 { 2375 mixin Check!("VersionInfo"); 2376 2377 try 2378 { 2379 checkSpace(s); 2380 checkLiteral("version",s); 2381 checkEq(s); 2382 quoted!(checkVersionNum)(s); 2383 } 2384 catch (Err e) { fail(e); } 2385 } 2386 2387 void checkEq(ref string s) @safe pure // rule 25 2388 { 2389 mixin Check!("Eq"); 2390 2391 try 2392 { 2393 opt!(checkSpace)(s); 2394 checkLiteral("=",s); 2395 opt!(checkSpace)(s); 2396 } 2397 catch (Err e) { fail(e); } 2398 } 2399 2400 void checkVersionNum(ref string s) @safe pure // rule 26 2401 { 2402 import std.algorithm.searching : countUntil; 2403 import std.utf : byCodeUnit; 2404 2405 mixin Check!("VersionNum"); 2406 2407 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2408 if (s is old) fail(); 2409 } 2410 2411 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2412 { 2413 mixin Check!("DocTypeDecl"); 2414 2415 try 2416 { 2417 checkLiteral("<!DOCTYPE",s); 2418 // 2419 // TO DO -- ensure DOCTYPE is well formed 2420 // (But not yet. That's one of our "future directions") 2421 // 2422 checkEnd(">",s); 2423 } 2424 catch (Err e) { fail(e); } 2425 } 2426 2427 void checkSDDecl(ref string s) @safe pure // rule 32 2428 { 2429 import std.algorithm.searching : startsWith; 2430 2431 mixin Check!("SDDecl"); 2432 2433 try 2434 { 2435 checkSpace(s); 2436 checkLiteral("standalone",s); 2437 checkEq(s); 2438 } 2439 catch (Err e) { fail(e); } 2440 2441 int n = 0; 2442 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2443 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2444 else fail("standalone attribute value must be 'yes', \"yes\","~ 2445 " 'no' or \"no\""); 2446 s = s[n..$]; 2447 } 2448 2449 void checkElement(ref string s) @safe pure // rule 39 2450 { 2451 mixin Check!("Element"); 2452 2453 string sname,ename,t; 2454 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2455 2456 if (t == "STag") 2457 { 2458 try 2459 { 2460 checkContent(s); 2461 t = s; 2462 checkETag(s,ename); 2463 } 2464 catch (Err e) { fail(e); } 2465 2466 if (sname != ename) 2467 { 2468 s = t; 2469 fail("end tag name \"" ~ ename 2470 ~ "\" differs from start tag name \""~sname~"\""); 2471 } 2472 } 2473 } 2474 2475 // rules 40 and 44 2476 void checkTag(ref string s, out string type, out string name) @safe pure 2477 { 2478 mixin Check!("Tag"); 2479 2480 try 2481 { 2482 type = "STag"; 2483 checkLiteral("<",s); 2484 checkName(s,name); 2485 star!(seq!(checkSpace,checkAttribute))(s); 2486 opt!(checkSpace)(s); 2487 if (s.length != 0 && s[0] == '/') 2488 { 2489 s = s[1..$]; 2490 type = "ETag"; 2491 } 2492 checkLiteral(">",s); 2493 } 2494 catch (Err e) { fail(e); } 2495 } 2496 2497 void checkAttribute(ref string s) @safe pure // rule 41 2498 { 2499 mixin Check!("Attribute"); 2500 2501 try 2502 { 2503 string name; 2504 checkName(s,name); 2505 checkEq(s); 2506 checkAttValue(s); 2507 } 2508 catch (Err e) { fail(e); } 2509 } 2510 2511 void checkETag(ref string s, out string name) @safe pure // rule 42 2512 { 2513 mixin Check!("ETag"); 2514 2515 try 2516 { 2517 checkLiteral("</",s); 2518 checkName(s,name); 2519 opt!(checkSpace)(s); 2520 checkLiteral(">",s); 2521 } 2522 catch (Err e) { fail(e); } 2523 } 2524 2525 void checkContent(ref string s) @safe pure // rule 43 2526 { 2527 import std.algorithm.searching : startsWith; 2528 2529 mixin Check!("Content"); 2530 2531 try 2532 { 2533 while (s.length != 0) 2534 { 2535 old = s; 2536 if (s.startsWith("&")) { checkReference(s); } 2537 else if (s.startsWith("<!--")) { checkComment(s); } 2538 else if (s.startsWith("<?")) { checkPI(s); } 2539 else if (s.startsWith(cdata)) { checkCDSect(s); } 2540 else if (s.startsWith("</")) { break; } 2541 else if (s.startsWith("<")) { checkElement(s); } 2542 else { checkCharData(s); } 2543 } 2544 } 2545 catch (Err e) { fail(e); } 2546 } 2547 2548 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2549 { 2550 import std.format : format; 2551 2552 mixin Check!("CharRef"); 2553 2554 c = 0; 2555 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2556 int radix = 10; 2557 if (s.length != 0 && s[0] == 'x') 2558 { 2559 s = s[1..$]; 2560 radix = 16; 2561 } 2562 if (s.length == 0) fail("unterminated character reference"); 2563 if (s[0] == ';') 2564 fail("character reference must have at least one digit"); 2565 while (s.length != 0) 2566 { 2567 immutable char d = s[0]; 2568 int n = 0; 2569 switch (d) 2570 { 2571 case 'F','f': ++n; goto case; 2572 case 'E','e': ++n; goto case; 2573 case 'D','d': ++n; goto case; 2574 case 'C','c': ++n; goto case; 2575 case 'B','b': ++n; goto case; 2576 case 'A','a': ++n; goto case; 2577 case '9': ++n; goto case; 2578 case '8': ++n; goto case; 2579 case '7': ++n; goto case; 2580 case '6': ++n; goto case; 2581 case '5': ++n; goto case; 2582 case '4': ++n; goto case; 2583 case '3': ++n; goto case; 2584 case '2': ++n; goto case; 2585 case '1': ++n; goto case; 2586 case '0': break; 2587 default: n = 100; break; 2588 } 2589 if (n >= radix) break; 2590 c *= radix; 2591 c += n; 2592 s = s[1..$]; 2593 } 2594 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2595 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2596 else s = s[1..$]; 2597 } 2598 2599 void checkReference(ref string s) @safe pure // rule 67 2600 { 2601 import std.algorithm.searching : startsWith; 2602 2603 mixin Check!("Reference"); 2604 2605 try 2606 { 2607 dchar c; 2608 if (s.startsWith("&#")) checkCharRef(s,c); 2609 else checkEntityRef(s); 2610 } 2611 catch (Err e) { fail(e); } 2612 } 2613 2614 void checkEntityRef(ref string s) @safe pure // rule 68 2615 { 2616 mixin Check!("EntityRef"); 2617 2618 try 2619 { 2620 string name; 2621 checkLiteral("&",s); 2622 checkName(s,name); 2623 checkLiteral(";",s); 2624 } 2625 catch (Err e) { fail(e); } 2626 } 2627 2628 void checkEncName(ref string s) @safe pure // rule 81 2629 { 2630 import std.algorithm.searching : countUntil; 2631 import std.ascii : isAlpha; 2632 import std.utf : byCodeUnit; 2633 2634 mixin Check!("EncName"); 2635 2636 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2637 if (s is old) fail(); 2638 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2639 } 2640 2641 void checkEncodingDecl(ref string s) @safe pure // rule 80 2642 { 2643 mixin Check!("EncodingDecl"); 2644 2645 try 2646 { 2647 checkSpace(s); 2648 checkLiteral("encoding",s); 2649 checkEq(s); 2650 quoted!(checkEncName)(s); 2651 } 2652 catch (Err e) { fail(e); } 2653 } 2654 2655 // Helper functions 2656 2657 void checkLiteral(string literal,ref string s) @safe pure 2658 { 2659 import std.string : startsWith; 2660 2661 mixin Check!("Literal"); 2662 2663 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2664 s = s[literal.length..$]; 2665 } 2666 2667 void checkEnd(string end,ref string s) @safe pure 2668 { 2669 import std.string : indexOf; 2670 // Deliberately no mixin Check here. 2671 2672 auto n = s.indexOf(end); 2673 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2674 s = s[n..$]; 2675 checkLiteral(end,s); 2676 } 2677 2678 // Metafunctions -- none of these use mixin Check 2679 2680 void opt(alias f)(ref string s) 2681 { 2682 try { f(s); } catch (Err e) {} 2683 } 2684 2685 void plus(alias f)(ref string s) 2686 { 2687 f(s); 2688 star!(f)(s); 2689 } 2690 2691 void star(alias f)(ref string s) 2692 { 2693 while (s.length != 0) 2694 { 2695 try { f(s); } 2696 catch (Err e) { return; } 2697 } 2698 } 2699 2700 void quoted(alias f)(ref string s) 2701 { 2702 import std.string : startsWith; 2703 2704 if (s.startsWith("'")) 2705 { 2706 checkLiteral("'",s); 2707 f(s); 2708 checkLiteral("'",s); 2709 } 2710 else 2711 { 2712 checkLiteral("\"",s); 2713 f(s); 2714 checkLiteral("\"",s); 2715 } 2716 } 2717 2718 void seq(alias f,alias g)(ref string s) 2719 { 2720 f(s); 2721 g(s); 2722 } 2723 } 2724 2725 /** 2726 * Check an entire XML document for well-formedness 2727 * 2728 * Params: 2729 * s = the document to be checked, passed as a string 2730 * 2731 * Throws: CheckException if the document is not well formed 2732 * 2733 * CheckException's toString() method will yield the complete hierarchy of 2734 * parse failure (the XML equivalent of a stack trace), giving the line and 2735 * column number of every failure at every level. 2736 */ 2737 void check(string s) @safe pure 2738 { 2739 try 2740 { 2741 checkChars(s); 2742 checkDocument(s); 2743 if (s.length != 0) throw new Err(s,"Junk found after document"); 2744 } 2745 catch (Err e) 2746 { 2747 e.complete(s); 2748 throw e; 2749 } 2750 } 2751 2752 @system pure unittest 2753 { 2754 import std.string : indexOf; 2755 2756 try 2757 { 2758 check(q"[<?xml version="1.0"?> 2759 <catalog> 2760 <book id="bk101"> 2761 <author>Gambardella, Matthew</author> 2762 <title>XML Developer's Guide</title> 2763 <genre>Computer</genre> 2764 <price>44.95</price> 2765 <publish_date>2000-10-01</publish_date> 2766 <description>An in-depth look at creating applications 2767 with XML.</description> 2768 </book> 2769 <book id="bk102"> 2770 <author>Ralls, Kim</author> 2771 <title>Midnight Rain</title> 2772 <genre>Fantasy</genres> 2773 <price>5.95</price> 2774 <publish_date>2000-12-16</publish_date> 2775 <description>A former architect battles corporate zombies, 2776 an evil sorceress, and her own childhood to become queen 2777 of the world.</description> 2778 </book> 2779 <book id="bk103"> 2780 <author>Corets, Eva</author> 2781 <title>Maeve Ascendant</title> 2782 <genre>Fantasy</genre> 2783 <price>5.95</price> 2784 <publish_date>2000-11-17</publish_date> 2785 <description>After the collapse of a nanotechnology 2786 society in England, the young survivors lay the 2787 foundation for a new society.</description> 2788 </book> 2789 </catalog> 2790 ]"); 2791 assert(false); 2792 } 2793 catch (CheckException e) 2794 { 2795 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2796 " from start tag name \"genre\""); 2797 assert(n != -1); 2798 } 2799 } 2800 2801 @system unittest 2802 { 2803 string s = q"EOS 2804 <?xml version="1.0"?> 2805 <set> 2806 <one>A</one> 2807 <!-- comment --> 2808 <two>B</two> 2809 </set> 2810 EOS"; 2811 try 2812 { 2813 check(s); 2814 } 2815 catch (CheckException e) 2816 { 2817 assert(0, e.toString()); 2818 } 2819 } 2820 2821 @system unittest 2822 { 2823 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2824 xmlns:stream="http://etherx.'jabber'.org/streams" 2825 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2826 xml:lang="en" version="1.0" attr='a"b"c'> 2827 </stream:stream></r>`; 2828 2829 DocumentParser parser = new DocumentParser(test_xml); 2830 bool tested = false; 2831 parser.onStartTag["stream:stream"] = (ElementParser p) { 2832 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2833 assert(p.tag.attr["from"] == "jid.pl"); 2834 assert(p.tag.attr["attr"] == "a\"b\"c"); 2835 tested = true; 2836 }; 2837 parser.parse(); 2838 assert(tested); 2839 } 2840 2841 @system unittest 2842 { 2843 string s = q"EOS 2844 <?xml version="1.0" encoding="utf-8"?> <Tests> 2845 <Test thing="What & Up">What & Up Second</Test> 2846 </Tests> 2847 EOS"; 2848 auto xml = new DocumentParser(s); 2849 2850 xml.onStartTag["Test"] = (ElementParser xml) { 2851 assert(xml.tag.attr["thing"] == "What & Up"); 2852 }; 2853 2854 xml.onEndTag["Test"] = (in Element e) { 2855 assert(e.text() == "What & Up Second"); 2856 }; 2857 xml.parse(); 2858 } 2859 2860 @system unittest 2861 { 2862 string s = `<tag attr=""value>" />`; 2863 auto doc = new Document(s); 2864 assert(doc.toString() == s); 2865 } 2866 2867 /** The base class for exceptions thrown by this module */ 2868 class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2869 2870 // Other exceptions 2871 2872 /// Thrown during Comment constructor 2873 class CommentException : XMLException 2874 { private this(string msg) @safe pure { super(msg); } } 2875 2876 /// Thrown during CData constructor 2877 class CDataException : XMLException 2878 { private this(string msg) @safe pure { super(msg); } } 2879 2880 /// Thrown during XMLInstruction constructor 2881 class XIException : XMLException 2882 { private this(string msg) @safe pure { super(msg); } } 2883 2884 /// Thrown during ProcessingInstruction constructor 2885 class PIException : XMLException 2886 { private this(string msg) @safe pure { super(msg); } } 2887 2888 /// Thrown during Text constructor 2889 class TextException : XMLException 2890 { private this(string msg) @safe pure { super(msg); } } 2891 2892 /// Thrown during decode() 2893 class DecodeException : XMLException 2894 { private this(string msg) @safe pure { super(msg); } } 2895 2896 /// Thrown if comparing with wrong type 2897 class InvalidTypeException : XMLException 2898 { private this(string msg) @safe pure { super(msg); } } 2899 2900 /// Thrown when parsing for Tags 2901 class TagException : XMLException 2902 { private this(string msg) @safe pure { super(msg); } } 2903 2904 /** 2905 * Thrown during check() 2906 */ 2907 class CheckException : XMLException 2908 { 2909 CheckException err; /// Parent in hierarchy 2910 private string tail; 2911 /** 2912 * Name of production rule which failed to parse, 2913 * or specific error message 2914 */ 2915 string msg; 2916 size_t line = 0; /// Line number at which parse failure occurred 2917 size_t column = 0; /// Column number at which parse failure occurred 2918 2919 private this(string tail,string msg,Err err=null) @safe pure 2920 { 2921 super(null); 2922 this.tail = tail; 2923 this.msg = msg; 2924 this.err = err; 2925 } 2926 2927 private void complete(string entire) @safe pure 2928 { 2929 import std.string : count, lastIndexOf; 2930 import std.utf : toUTF32; 2931 2932 string head = entire[0..$-tail.length]; 2933 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2934 line = head.count("\n") + 1; 2935 dstring t = toUTF32(head[n..$]); 2936 column = t.length + 1; 2937 if (err !is null) err.complete(entire); 2938 } 2939 2940 override string toString() const @safe pure 2941 { 2942 import std.format : format; 2943 2944 string s; 2945 if (line != 0) s = format("Line %d, column %d: ",line,column); 2946 s ~= msg; 2947 s ~= '\n'; 2948 if (err !is null) s = err.toString() ~ s; 2949 return s; 2950 } 2951 } 2952 2953 private alias Err = CheckException; 2954 2955 // Private helper functions 2956 2957 private 2958 { 2959 inout(T) toType(T)(return inout Object o) @safe 2960 { 2961 T t = cast(T)(o); 2962 if (t is null) 2963 { 2964 throw new InvalidTypeException("Attempt to compare a " 2965 ~ T.stringof ~ " with an instance of another type"); 2966 } 2967 return t; 2968 } 2969 2970 string chop(ref string s, size_t n) @safe pure nothrow 2971 { 2972 if (n == -1) n = s.length; 2973 string t = s[0 .. n]; 2974 s = s[n..$]; 2975 return t; 2976 } 2977 2978 bool optc(ref string s, char c) @safe pure nothrow 2979 { 2980 immutable bool b = s.length != 0 && s[0] == c; 2981 if (b) s = s[1..$]; 2982 return b; 2983 } 2984 2985 void reqc(ref string s, char c) @safe pure 2986 { 2987 if (s.length == 0 || s[0] != c) throw new TagException(""); 2988 s = s[1..$]; 2989 } 2990 2991 char requireOneOf(ref string s, string chars) @safe pure 2992 { 2993 import std.string : indexOf; 2994 2995 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2996 throw new TagException(""); 2997 immutable char ch = s[0]; 2998 s = s[1..$]; 2999 return ch; 3000 } 3001 3002 alias hash = .hashOf; 3003 3004 // Definitions from the XML specification 3005 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3006 0x10000,0x10FFFF]; 3007 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3008 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3009 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3010 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3011 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3012 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3013 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3014 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3015 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3016 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3017 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3018 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3019 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3020 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3021 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3022 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3023 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3024 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3025 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3026 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3027 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3028 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3029 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3030 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3031 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3032 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3033 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3034 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3035 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3036 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3037 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3038 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3039 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3040 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3041 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3042 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3043 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3044 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3045 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3046 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3047 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3048 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3049 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3050 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3051 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3052 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3053 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3054 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3055 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3056 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3057 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3058 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3059 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3060 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3061 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3062 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3063 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3064 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3065 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3066 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3067 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3068 0x3099,0x3099,0x309A,0x309A]; 3069 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3070 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3071 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3072 0x0ED9,0x0F20,0x0F29]; 3073 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3074 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3075 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3076 3077 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3078 { 3079 while (table.length != 0) 3080 { 3081 auto m = (table.length >> 1) & ~1; 3082 if (c < table[m]) 3083 { 3084 table = table[0 .. m]; 3085 } 3086 else if (c > table[m+1]) 3087 { 3088 table = table[m+2..$]; 3089 } 3090 else return true; 3091 } 3092 return false; 3093 } 3094 3095 string startOf(string s) @safe nothrow pure 3096 { 3097 string r; 3098 foreach (char c;s) 3099 { 3100 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3101 if (r.length >= 40) { r ~= "___"; break; } 3102 } 3103 return r; 3104 } 3105 3106 void exit(string s=null) 3107 { 3108 throw new XMLException(s); 3109 } 3110 }