/* xml.js a simple XML DOM, encoder and parser for morphic.js written by Jens Mönig jens@moenig.org Copyright (C) 2020 by Jens Mönig This file is part of Snap!. Snap! is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . prerequisites: -------------- needs morphic.js hierarchy --------- the following tree lists all constructors hierarchically, indentation indicating inheritance. Refer to this list to get a contextual overview: Node* XML_Element ReadStream * defined in morphic.js toc --- the following list shows the order in which all constructors are defined. Use this list to locate code in this document: ReadStream XML_Element credits ------- Nathan Dinsmore contributed to the design and implemented a first working version of a complete XMLSerializer. I have taken much of the overall design and many of the functions and methods in this file from Nathan's fine original prototype. Recently Nathan has once again worked his magic on the parser and optimized it by an order of magnitude. */ /*global modules, detect, Node, isNil*/ /*jshint esversion: 6*/ // Global stuff //////////////////////////////////////////////////////// modules.xml = '2021-July-05'; // Declarations var ReadStream; var XML_Element; // ReadStream //////////////////////////////////////////////////////////// // I am a sequential reading interface to an Array or String // ReadStream instance creation: function ReadStream(arrayOrString) { this.contents = arrayOrString || ''; this.index = 0; } // ReadStream constants: ReadStream.prototype.nonSpace = /\S|$/g; ReadStream.prototype.nonWord = /[\s\>\/\=]|$/g; // ReadStream accessing: ReadStream.prototype.next = function (count) { var element, start; if (count === undefined) { element = this.contents[this.index]; this.index += 1; return element; } start = this.index; this.index += count; return this.contents.slice(start, this.index); }; ReadStream.prototype.peek = function () { return this.contents[this.index]; }; ReadStream.prototype.skip = function (count) { this.index += count || 1; }; ReadStream.prototype.atEnd = function () { return this.index > (this.contents.length - 1); }; // ReadStream accessing String contents: ReadStream.prototype.upTo = function (str) { var i = this.contents.indexOf(str, this.index); return i === -1 ? '' : this.contents.slice(this.index, this.index = i); }; ReadStream.prototype.peekUpTo = function (str) { var i = this.contents.indexOf(str, this.index); return i === -1 ? '' : this.contents.slice(this.index, i); }; ReadStream.prototype.skipSpace = function () { this.nonSpace.lastIndex = this.index; var result = this.nonSpace.exec(this.contents); if (result) this.index = result.index; }; ReadStream.prototype.word = function () { this.nonWord.lastIndex = this.index; var result = this.nonWord.exec(this.contents); return result ? this.contents.slice(this.index, this.index = result.index) : ''; }; // XML_Element /////////////////////////////////////////////////////////// /* I am a DOM-Node which can encode itself to as well as parse itself from a well-formed XML string. Note that there is no separate parser object, all the parsing can be done in a single object. */ // XML_Element inherits from Node: XML_Element.prototype = Object.create(Node.prototype); XML_Element.prototype.constructor = XML_Element; XML_Element.uber = Node.prototype; // XML_Element preferences settings: XML_Element.prototype.indentation = ' '; // XML_Element instance creation: function XML_Element(tag, contents, parent) { this.init(tag, contents, parent); } XML_Element.prototype.init = function (tag, contents, parent) { // additional properties: this.tag = tag || 'unnamed'; this.attributes = {}; this.contents = contents || ''; // initialize inherited properties: XML_Element.uber.init.call(this); // override inherited properties if (parent) parent.addChild(this); }; // XML_Element DOM navigation: (aside from what's inherited from Node) XML_Element.prototype.require = function (tagName, fallback) { // answer the first direct child with the specified tagName. // if it doesn't exist execute the fallback function or return the // fallback value, otherwise throw an error var child = this.childNamed(tagName); if (!child) { if (fallback instanceof Function) { return fallback(); } if (!isNil(fallback)) { return fallback; } throw new Error('Missing required element <' + tagName + '>!'); } return child; }; XML_Element.prototype.childNamed = function (tagName) { // answer the first direct child with the specified tagName, or null return detect( this.children, child => child.tag === tagName ); }; XML_Element.prototype.childrenNamed = function (tagName) { // answer all direct children with the specified tagName return this.children.filter(child => child.tag === tagName); }; XML_Element.prototype.parentNamed = function (tagName) { // including myself if (this.tag === tagName) { return this; } if (!this.parent) { return null; } return this.parent.parentNamed(tagName); }; // XML_Element output: XML_Element.prototype.toString = function (isFormatted, indentationLevel) { var result = '', indent = '', level = indentationLevel || 0, key, i; // spaces for indentation, if any if (isFormatted) { for (i = 0; i < level; i += 1) { indent += this.indentation; } result += indent; } // opening tag result += ('<' + this.tag); // attributes, if any for (key in this.attributes) { if (Object.prototype.hasOwnProperty.call(this.attributes, key) && this.attributes[key]) { result += ' ' + key + '="' + this.escape(this.attributes[key]) + '"'; } } // contents, subnodes, and closing tag if (!this.contents.length && !this.children.length) { result += '/>'; } else { result += '>'; result += this.escape(this.contents); this.children.forEach(element => { if (isFormatted) { result += '\n'; } result += element.toString(isFormatted, level + 1); }); if (isFormatted && this.children.length) { result += ('\n' + indent); } result += ''; } return result; }; XML_Element.prototype.escape = function (string, ignoreQuotes) { var src = isNil(string) ? '' : string.toString(), result = '', i, ch; for (i = 0; i < src.length; i += 1) { ch = src[i]; switch (ch) { case '\'': result += '''; break; case '\"': result += ignoreQuotes ? ch : '"'; break; case '<': result += '<'; break; case '>': result += '>'; break; case '&': result += '&'; break; case '\n': // escape CR b/c of export to URL feature result += ' '; break; case '~': // escape tilde b/c it's overloaded in serializer.store() result += '~'; break; default: result += ch; } } return result; }; XML_Element.prototype.unescape = function (string) { return string.replace(/&(amp|apos|quot|lt|gt|#xD|#126);/g, (_, name) => { switch (name) { case 'amp': return '&'; case 'apos': return '\''; case 'quot': return '"'; case 'lt': return '<'; case 'gt': return '>'; case '#xD': return '\n'; case '#126': return '~'; default: console.warn('unreachable'); } }); }; // XML_Element parsing: XML_Element.prototype.parseString = function (string) { var stream = new ReadStream(string); stream.upTo('<'); stream.skip(); this.parseStream(stream); }; XML_Element.prototype.parseStream = function (stream) { var key, value, ch, child; // tag: this.tag = stream.word(); stream.skipSpace(); // attributes: ch = stream.peek(); while (ch !== '>' && ch !== '/') { key = stream.word(); stream.skipSpace(); if (stream.next() !== '=') { throw new Error('Expected "=" after attribute name'); } stream.skipSpace(); ch = stream.next(); if (ch !== '"' && ch !== "'") { throw new Error('Expected single- or double-quoted attribute value'); } value = stream.upTo(ch); stream.skip(1); stream.skipSpace(); this.attributes[key] = this.unescape(value); ch = stream.peek(); } // empty tag: if (ch === '/') { stream.skip(); if (stream.next() !== '>') { throw new Error('Expected ">" after "/" in empty tag'); } return; } if (stream.next() !== '>') { throw new Error('Expected ">" after tag name and attributes'); } // contents and children while (!stream.atEnd()) { ch = stream.next(); if (ch === '<') { if (stream.peek() === '/') { // closing tag stream.skip(); if (stream.word() !== this.tag) { throw new Error('Expected to close ' + this.tag); } stream.upTo('>'); stream.skip(); this.contents = this.unescape(this.contents); return; } child = new XML_Element(null, null, this); child.parseStream(stream); } else { this.contents += ch; } } };