/* * MIT License * * Copyright (c) 2019 papnkukn * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { MailStringUtils: "resource:///modules/MailStringUtils.sys.mjs", }); /** * MorkParser. Reads Mork formatted Data from a (.msf) file and transforms it * to and object that can then be JSON'd. * * Example usage: * * var { MorkParser } = ChromeUtils.importESModule("resource:///modules/MorkParser.sys.mjs"); * await MorkParser.dumpFile("~/.thunderbird/qyjeoqu0.conv/ImapMail/raspberrypi-1.local/INBOX.msf"); * * @see https://github.com/papnkukn/mork-parser */ export class MorkParser { constructor() { this.key_table = null; this.val_table = null; this.row_hash = null; this.skipped = 0; this.total = 0; } /** * Convenience method to print the JSON-ified Mork data to stdout. * * @param {string} path - Path to the .msf file to dump. * @param {boolean} [prettify=false] - Show human readable msg hdr data. * @param {string} [folderURI=null] - Folder URI. * @returns {object[]} the parsed Mork data. */ static async dumpFile(path, prettify = false, folderURI = null) { const msf = lazy.MailStringUtils.uint8ArrayToByteString( await IOUtils.read(path) ); let data = new MorkParser().parseContent(msf); if (prettify) { data = data .filter(o => "message-id" in o) .map(o => MorkParser.readableMsgHdrData(o, folderURI)); } dump(JSON.stringify(data, null, 2) + "\n"); return data; } /** * Map known header data to a more human readable format. * * @param {object} o - Object from Mork. * @param {string} [folderURI=null] - Folder URI. If set will be used to form * message uri one can use to display a message. */ static readableMsgHdrData(o, folderURI = null) { // mailbox://" -> "mailbox-message://, mail:// -> imap-message:// etc. const baseMessageURI = folderURI?.replace( /^(.*):\/\/(.+)/, "$1-message://$2" ); return { uri: baseMessageURI ? `${baseMessageURI}#${parseInt(o["@id"], 16)}` : undefined, messageId: o["message-id"], references: (o.references || "") .trim() .replaceAll(/\s+/g, " ") .replaceAll(/[<>]/g, "") .split(" ") .filter(Boolean), date: o.date ? new Date(parseInt(o.date, 16) * 1000) : null, received: o.dateReceived ? new Date(parseInt(o.dateReceived, 16) * 1000) : null, subject: o.subject, from: o.sender ? [o.sender] : [], recipients: (o.recipients || "") .split(",") .map(r => r.trim()) .filter(Boolean), // TODO: use real header parsing! ccList: (o.ccList || "") .split(",") .map(r => r.trim()) .filter(Boolean), // TODO: use real header parsing! flags: o.flags ? parseInt(o.flags, 16) : undefined, priority: o.priority ? parseInt(o.priority, 16) : undefined, size: o.size ? parseInt(o.size, 16) : undefined, storeToken: o.storeToken, offlineMsgSize: o.offlineMsgSize ? parseInt(o.offlineMsgSize, 16) : undefined, numLines: o.numLines ? parseInt(o.numLines, 16) : undefined, preview: o.preview || undefined, junkscoreorigin: o.junkscoreorigin ? o.junkscoreorigin : null, junkpercent: o.junkpercent ? o.junkpercent : undefined, senderName: o.sender_name ? o.sender_name : undefined, prevkeywords: o.prevkeywords ? o.prevkeywords : undefined, keywords: o.keywords ? o.keywords : undefined, remoteContentPolicy: o.remoteContentPolicy ? parseInt(o.remoteContentPolicy, 16) : undefined, protoThreadFlags: o.ProtoThreadFlags ? parseInt(o.ProtoThreadFlags, 16) : undefined, account: o.account ? o.account : undefined, glodaId: o["gloda-id"] ? parseInt(o["gloda-id"], 16) : undefined, xGmMsgId: o["X-GM-MSGID"] || undefined, xGmThrId: o["X-GM-THRID"] || undefined, xGmLabels: o["X-GM-LABELS"] || undefined, pseudoHdr: o.pseudoHdr ? parseInt(o.pseudoHdr, 16) : undefined, enigmail: o.enigmail ? parseInt(o.enigmail, 16) : undefined, notAPhishMessage: o.notAPhishMessage ? parseInt(o.notAPhishMessage, 16) : undefined, }; } /** * Parse mork content and return an array of objects. * * @param {string} body * @returns {string[]} an array of hashes. */ parseContent(body) { // Reset global variables, this.key_table = {}; this.val_table = {}; this.row_hash = {}; this.skipped = 0; this.total = 0; // Local variables let section_end_re = null; let section = "top level"; // Windows Mozilla uses \r\n body = body.replace(/\r\n/g, "\n"); // Presumably Mac Mozilla is similarly dumb body = body.replace(/\r/g, "\n"); // Sometimes backslash is quoted with a backslash; convert to hex. body = body.replace(/\\\\/g, "$5C"); // Close-paren is quoted with a backslash; convert to hex. body = body.replace(/\\\)/g, "$29"); // Backslash at end of line is continuation. body = body.replace(/\\\n/g, ""); // Figure out what we're looking at, and parse it. while (body.trim()) { // Comment. let m = /^\s*\/\/.*?\n/g.exec(body); if (m) { body = body.substring(m[0].length); continue; } // Key table <(a=c)>. m = /^\s*<\s*<\(a=c\)>[\S\s]+?(([^>]*))>\s*/g.exec(body); if (m) { const captured = m[1]; body = body.replace(m[0], ""); this.parseKeyTable(section, captured); continue; } // Values <...>. m = /^\s*<([\S\s]*?\))>\s*/g.exec(body); if (m) { const captured = m[1]; body = body.replace(m[0], ""); this.parseValueTable(section, captured); continue; } // Table {...}. m = /^\s*\{-?[\dA-F]+:[\S\s]*?\{(([\S\s]*?\})([\S\s]*?\}[^,\\)}]))\s*/gi.exec( body ); if (m) { const captured = m[1]; body = body.replace(m[0], ""); this.parseTable(section, captured); continue; } // Rows (-> table) [...]. m = /^\s*((\[[\S\s]*?\]\s*)+)/g.exec(body); if (m) { const captured = m[1]; body = body.replace(m[0], ""); this.parseTable(section, captured); continue; } // Section end. if (section_end_re) { m = section_end_re.exec(body); if (m) { body = body.replace(m[0], ""); section_end_re = null; section = "top level"; continue; } } // Section begin. m = /\@\$\$\{([\dA-F]+)\{\@\s*/gi.exec(body); if (m) { const captured = m[1]; section = captured; body = body.replace(m[0], ""); section_end_re = new RegExp( "^\\s*\\@\\$\\$\\}" + section + "\\}\\@\\s*", "g" ); continue; } // Unknown segment. const segment = body.substring(0, 255 < body.length ? 255 : body.length); console.error(`Cannot parse ${section}: ${segment}`); return [{ error: "Cannot parse!", section, segment }]; } if (section_end_re) { console.error("Unterminated section " + section); } // Convert dictionary to array. const list = []; const keys = Object.keys(this.row_hash); for (const key of keys) { const o = this.row_hash[key]; o["@id"] = key; list.push(o); } return list; } /** * Parse a row and column table. * * @param {string} section * @param {string} table_part */ parseTable(section, table_part) { // Assumption: no relevant spaces in values in this section table_part = table_part.replace(/\s+/g, ""); // Grab each complete [...] block. const regex = /[^[]*\[([\S\s]+?)\]/g; let m; while ((m = regex.exec(table_part)) != null) { let hash = {}; // Break up the table - each line consists of a $id and the rest are // records. const parts = m[1].split(/[()]+/); let id = parts[0]; const cells = parts; // A long way of saying skip the line if there are no records in the // cells array. if (cells.length < 1) { continue; } // Trim junk. id = id.replace(/^-/g, ""); id = id.replace(/:[\S\s]*/g, ""); // Check that the id number we've been given corresponds to one we pulled // out from the key_table index. if (this.row_hash[id]) { hash = this.row_hash[id]; } for (let i = 1; i < cells.length; i++) { const cell = cells[i]; // Skip empty record. if (!cell?.trim()) { continue; } // Extract key and value const cm = /^\^([-\dA-F]+)([\^=])([\S\s]*)$/gi.exec(cell); if (!cm) { continue; } const keyi = cm[1]; const which = cm[2]; const vali = cm[3]; // Empty value. if (!vali?.trim()) { // console.warn("Unparsable cell: " + cell); } // Ignore the key if it isn't in the key table. const key = this.key_table[keyi]; if (!key) { continue; } let val = which == "=" ? vali : this.val_table[vali]; // Fix character encoding. val = this.fixEncoding(val); hash[key] = val; } this.total++; this.row_hash[id] = hash; } } /** * Parse a values table. * * @param {string}section * @param {string} val_part */ parseValueTable(section, val_part) { if (!val_part) { return {}; } // Extract pairs (key=value) const pairs = val_part.split(/\(([^\)]+)\)/g); for (const pair of pairs) { // Skip empty line if (!pair.trim()) { continue; } const m = /([\dA-F]*)[\t\n ]*=[\t\n ]*([\S\s]*)/gi.exec(pair); if (!m) { continue; } const key = m[1]; const val = m[2]; if (!val?.trim()) { // console.warn(section + ": unparsable value: " + pair); continue; } // Approximate wchar_t -> ASCII and remove NULs // val = this.fixEncoding(val); this.val_table[key] = val; } return this.val_table; } /** * Parse a key table. * * @param {string} section * @param {string} key_part */ parseKeyTable(section, key_part) { // Remove comments (starting with "//" until the end of the line). key_part = key_part.replace(/\s*\/\/.*$/gm, ""); // Extract pairs (key=value). const pairs = key_part.split(/\(([^\)]+)\)/g); // Convert to dictionary object. for (const pair of pairs) { // Skip empty line if (!pair.trim()) { continue; } // Parse key-value pairs. const m = /([\dA-F]+)\s*=\s*([\S\s]*)/gi.exec(pair); if (m) { const key = m[1]; const val = m[2]; this.key_table[key] = val; } } return this.key_table; } /** * Fix character encoding, e.g. remove $00 but keep \$ff (escaped with slash). * * @param {string} value */ fixEncoding(value) { if (value && value.includes("$")) { function fixASCII(m, m0, m1) { const n1 = parseInt(m1, 16); const ch = String.fromCharCode(n1); // Convert byte to ASCII. return m0 + ch; } function fixUTF8(m, m0, m1, m2) { const n1 = parseInt(m1, 16); const n2 = parseInt(m2, 16); const arr = new Uint8Array(2); arr[0] = n1; arr[1] = n2; const ch = new TextDecoder().decode(arr); return m0 + ch; } // e.g. $E2$80$93 $E2$80$9D == – ” function fixUTF8_3(m, m0, m1, m2, m3) { const n1 = parseInt(m1, 16); const n2 = parseInt(m2, 16); const n3 = parseInt(m3, 16); const arr = new Uint8Array(3); arr[0] = n1; arr[1] = n2; arr[2] = n3; const ch = new TextDecoder().decode(arr); return m0 + ch; } return value .replace(/([^\\])\$00/g, "$1") .replace( /([^\\])\$([0-9A-Z][0-9A-Z])\$([0-9A-Z][0-9A-Z])\$([0-9A-Z][0-9A-Z])/gi, fixUTF8_3 ) // Replace non-escaped $xx$yy$zz but ignore \$xx$yy .replace(/([^\\])\$([0-9A-Z][0-9A-Z])\$([0-9A-Z][0-9A-Z])/gi, fixUTF8) // Replace non-escaped $xx$yy but ignore \$xx$yy .replace(/^()\$([0-9A-Z][0-9A-Z])\$([0-9A-Z][0-9A-Z])/gi, fixUTF8) // Replace value starting with $xx$yy .replace(/([^\\])\$([0-9A-Z][0-9A-Z])/gi, fixASCII) // Replace non-escaped $xx but ignore \$xx .replace(/^()\$([0-9A-Z][0-9A-Z])/gi, fixASCII); // Replace value starting with $xx } return value; } }