/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ // /////////////////////////////////////////////////////////////////////////// // Below is nsMsgBodyHandler2, a copy of nsMsgBodyHandler that we use to parse // an entire decrypted message in a string. // // This .cpp file is included in nsMsgBodyHandler.cpp. // /////////////////////////////////////////////////////////////////////////// class nsMsgBodyHandler2 { public: explicit nsMsgBodyHandler2(const nsCString& buf); virtual ~nsMsgBodyHandler2(); int32_t GetNextLine(nsCString& buf, nsCString& charset); bool IsQP() { return m_partIsQP; } protected: void Initialize(); // common initialization code int32_t GetNextLocalLine(nsCString& buf); const char* m_currInput; const char* m_currInputEnd; // Transformations // With the exception of m_isMultipart, these all apply to the various parts bool m_EOF; bool m_pastPartHeaders; // true if we've already skipped over the part // headers bool m_partIsQP; // true if the Content-Transfer-Encoding header claims // quoted-printable bool m_partIsHtml; // true if the Content-type header claims text/html bool m_base64part; // true if the current part is in base64 bool m_isMultipart; // true if the message is a multipart/* message bool m_partIsText; // true if the current part is text/* bool m_inMessageAttachment; // true if current part is message/* nsTArray m_boundaries; // The boundary strings to look for nsCString m_partCharset; // The charset found in the part // See implementation for comments int32_t ApplyTransformations(const nsCString& line, int32_t length, bool& returnThisLine, nsCString& buf); void SniffPossibleMIMEHeader(const nsCString& line); }; nsMsgBodyHandler2::nsMsgBodyHandler2(const nsCString& buf) { m_currInput = buf.BeginReading(); m_currInputEnd = m_currInput + buf.Length(); Initialize(); } void nsMsgBodyHandler2::Initialize() { // Default transformations for local message search and MAPI access m_EOF = false; m_partIsHtml = false; m_base64part = false; m_partIsQP = false; m_isMultipart = false; m_partIsText = true; // Default is text/plain, maybe proven otherwise later. m_pastPartHeaders = false; m_inMessageAttachment = false; } nsMsgBodyHandler2::~nsMsgBodyHandler2() {} int32_t nsMsgBodyHandler2::GetNextLine(nsCString& buf, nsCString& charset) { if (m_EOF) return -1; int32_t length = -1; // length of incoming line or -1 eof int32_t outLength = -1; // length of outgoing line or -1 eof bool eatThisLine = true; nsAutoCString nextLine; while (eatThisLine) { length = GetNextLocalLine(nextLine); if (length < 0) break; // eof in outLength = ApplyTransformations(nextLine, length, eatThisLine, buf); } if (outLength < 0) return -1; // eof out // For non-multipart messages, the entire message minus headers is encoded. if (!m_isMultipart && m_base64part) { nsMsgBodyHandler::Base64Decode(buf); outLength = buf.Length(); m_base64part = false; } // Process aggregated HTML. if (!m_isMultipart && m_partIsHtml) { nsMsgBodyHandler::StripHtml(buf); outLength = buf.Length(); } charset = m_partCharset; return outLength; } // return -1 if no more local lines, length of next line otherwise. int32_t nsMsgBodyHandler2::GetNextLocalLine(nsCString& buf) { if (m_EOF) return -1; if (m_currInput >= m_currInputEnd) return -1; const char* q = m_currInput; // Deliver the next line. while (q < m_currInputEnd && *q && *q != '\r' && *q != '\n') q++; if (!*q && q < m_currInputEnd) { NS_WARNING("nsMsgBodyHandler2: null byte found in message buffer"); m_EOF = true; } int32_t l = q - m_currInput; buf.Assign(m_currInput, q - m_currInput); // This mimicks nsILineInputStream.readLine() which claims to skip // LF, CR, CRLF and LFCR. if (*q == '\r' && (q + 1) < m_currInputEnd && *(q + 1) == '\n') { q += 2; } else if (*q == '\n' && (q + 1) < m_currInputEnd && *(q + 1) == '\r') { q += 2; } else if (*q) { q++; } m_currInput = q; return l; } /** * This method applies a sequence of transformations to the line. * * It applies the following sequences in order * * Removes headers if the searcher doesn't want them * (sets m_past*Headers) * * Determines the current MIME type. * (via SniffPossibleMIMEHeader) * * Strips any HTML if the searcher doesn't want it * * Strips non-text parts * * Decodes any base64 part * (resetting part variables: m_base64part, m_pastPartHeaders, m_partIsHtml, * m_partIsText) * * @param line (in) the current line * @param length (in) the length of said line * @param eatThisLine (out) whether or not to ignore this line * @param buf (inout) if m_base64part, the current part as needed for * decoding; else, it is treated as an out param (a * redundant version of line). * @return the length of the line after applying transformations */ int32_t nsMsgBodyHandler2::ApplyTransformations(const nsCString& line, int32_t length, bool& eatThisLine, nsCString& buf) { eatThisLine = false; if (!m_pastPartHeaders) // line is a line from the part headers { eatThisLine = true; // We have already grabbed all worthwhile information from the headers, // so there is no need to keep track of the current lines buf.Assign(line); SniffPossibleMIMEHeader(buf); if (buf.IsEmpty() || buf.First() == '\r' || buf.First() == '\n') { if (!m_inMessageAttachment) { m_pastPartHeaders = true; } else { // We're in a message attachment and have just read past the // part header for the attached message. We now need to read // the message headers and any part headers. // We can now forget about the special handling of attached messages. m_inMessageAttachment = false; } } return length; } // Check to see if this is one of our boundary strings. bool matchedBoundary = false; if (m_isMultipart && m_boundaries.Length() > 0) { for (int32_t i = (int32_t)m_boundaries.Length() - 1; i >= 0; i--) { if (StringBeginsWith(line, m_boundaries[i])) { matchedBoundary = true; // If we matched a boundary, we won't need the nested/later ones any // more. m_boundaries.SetLength(i + 1); break; } } } if (matchedBoundary) { if (m_base64part && m_partIsText) { nsMsgBodyHandler::Base64Decode(buf); // Work on the parsed string if (!buf.Length()) { NS_WARNING("Trying to transform an empty buffer"); eatThisLine = true; } else { // Avoid spurious failures eatThisLine = false; } } else if (!m_partIsHtml) { buf.Truncate(); eatThisLine = true; // We have no content... } if (m_partIsHtml) { nsMsgBodyHandler::StripHtml(buf); } // Reset all assumed headers m_base64part = false; m_pastPartHeaders = false; m_partIsHtml = false; // If we ever see a multipart message, each part needs to set // 'm_partIsText', so no more defaulting to 'true' when the part is done. m_partIsText = false; m_partIsQP = false; return buf.Length(); } if (!m_partIsText) { // Ignore non-text parts buf.Truncate(); eatThisLine = true; return 0; } // Accumulate base64 parts and HTML parts for later decoding or tag stripping. if (m_base64part || m_partIsHtml) { if (m_partIsHtml && !m_base64part) { size_t bufLength = buf.Length(); if (!m_partIsQP || bufLength == 0 || !StringEndsWith(buf, "="_ns)) { // Replace newline in HTML with a space. buf.Append(' '); } else { // Strip the soft line break. buf.SetLength(bufLength - 1); } } buf.Append(line); eatThisLine = true; return buf.Length(); } buf.Assign(line); return buf.Length(); } /** * Determines the MIME type, if present, from the current line. * * m_partIsHtml, m_isMultipart, m_partIsText, m_base64part, and boundary are * all set by this method at various points in time. * * @param line (in) a header line that may contain a MIME header */ void nsMsgBodyHandler2::SniffPossibleMIMEHeader(const nsCString& line) { // Some parts of MIME are case-sensitive and other parts are case-insensitive; // specifically, the headers are all case-insensitive and the values we care // about are also case-insensitive, with the sole exception of the boundary // string, so we can't just take the input line and make it lower case. nsCString lowerCaseLine; ToLowerCase(line, lowerCaseLine); if (StringBeginsWith(lowerCaseLine, "content-transfer-encoding:"_ns)) m_partIsQP = lowerCaseLine.Find("quoted-printable") != -1; if (StringBeginsWith(lowerCaseLine, "content-type:"_ns)) { if (lowerCaseLine.Find("text/html") != kNotFound) { m_partIsText = true; m_partIsHtml = true; } else if (lowerCaseLine.Find("multipart/") != kNotFound) { if (m_isMultipart) { // Nested multipart, get ready for new headers. m_base64part = false; m_partIsQP = false; m_pastPartHeaders = false; m_partIsHtml = false; m_partIsText = false; } m_isMultipart = true; m_partCharset.Truncate(); } else if (lowerCaseLine.Find("message/") != kNotFound) { // Initialise again. m_base64part = false; m_partIsQP = false; m_pastPartHeaders = false; m_partIsHtml = false; m_partIsText = true; // Default is text/plain, maybe proven otherwise later. m_inMessageAttachment = true; } else if (lowerCaseLine.Find("text/") != kNotFound) m_partIsText = true; else if (lowerCaseLine.Find("text/") == kNotFound) m_partIsText = false; // We have disproven our assumption. } int32_t start; if (m_isMultipart && (start = lowerCaseLine.Find("boundary=")) != kNotFound) { start += 9; // strlen("boundary=") if (line[start] == '\"') start++; int32_t end = line.RFindChar('\"'); if (end == -1) end = line.Length(); // Collect all boundaries. Since we only react to crossing a boundary, // we can simply collect the boundaries instead of forming a tree // structure from the message. Keep it simple ;-) nsCString boundary; boundary.AssignLiteral("--"); boundary.Append(Substring(line, start, end - start)); if (!m_boundaries.Contains(boundary)) m_boundaries.AppendElement(boundary); } if ((start = lowerCaseLine.Find("charset=")) != kNotFound) { start += 8; // strlen("charset=") bool foundQuote = false; if (line[start] == '\"') { start++; foundQuote = true; } int32_t end = line.FindChar(foundQuote ? '\"' : ';', start); if (end == -1) end = line.Length(); m_partCharset.Assign(Substring(line, start, end - start)); } if (StringBeginsWith(lowerCaseLine, "content-transfer-encoding:"_ns) && lowerCaseLine.Find(ENCODING_BASE64) != kNotFound) m_base64part = true; }