to \[img\]. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _img_tag_fixup(array $matches) : string { $params = trim($matches[1]); if ($params != '') { $params .= ' '; } $params .= trim($matches[3]); if ($params != '') { $params = ' ' . $params; } $params = str_replace('alt="', 'param="', $params); $params = preg_replace('#style="[^"]*vertical-align: ([^;"]+)(;[^"]*)?;?"#i', 'align="${1}"', $params); $params = str_replace(' class="c-img"', '', $params); $extraneous = ['border', 'height', 'hspace', 'ismap', 'longdesc', 'usemap', 'vspace', 'width', 'id', 'class', 'title', 'style', 'lang']; foreach ($extraneous as $ex) { $params = preg_replace('# ' . $ex . '="[^"]*"#', '', $params); } $params = str_replace(' ismap', '', $params); $params = preg_replace('#\sdata-[\w\-]+="[^"]*"#', '', $params); /*$referer = post_param_string('http_referer', $_SERVER['HTTP_REFERER']);*/ // CKEditor allows us to specify the base, so we know get_base_url() is right $caller_url = /*looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : */get_base_url(); if ((strpos($matches[2], '{$FIND_SCRIPT') === false) && (strpos($matches[2], '{$IMG') === false)) { $new_url = qualify_url($matches[2], $caller_url); } else { $new_url = $matches[2]; } return '[img' . rtrim($params) . ']' . $new_url . '[/img]'; } /** * Used by semihtml_to_comcode to turn fix URLs in to be absolute. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _img_tag_fixup_raw(array $matches) : string { /*$referer = post_param_string('http_referer', $_SERVER['HTTP_REFERER']);*/ // CKEditor allows us to specify the base, so we know get_base_url() is right $caller_url = /*looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : */get_base_url(); $matches[2] = html_entity_decode($matches[2], ENT_QUOTES); if ((strpos($matches[2], '{$FIND_SCRIPT') === false) && (strpos($matches[2], '{$IMG') === false)) { $new_url = qualify_url($matches[2], $caller_url); } else { $new_url = $matches[2]; } $ret = ''; return $ret; } /** * Used by semihtml_to_comcode to fix tag links. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _a_tag_link_fixup(array $matches) : string { $referer = post_param_string('http_referer', $_SERVER['HTTP_REFERER']); $caller_url = looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : get_base_url(); $ret = ''; return $ret; } /** * Used by semihtml_to_comcode to fix CSS colours away from RGB notation. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _css_color_fixup(array $matches) : string { $r = dechex(intval(trim($matches[2]))); if (strlen($r) == 1) { $r = '0' . $r; } $g = dechex(intval(trim($matches[3]))); if (strlen($g) == 1) { $g = '0' . $g; } $b = dechex(intval(trim($matches[4]))); if (strlen($b) == 1) { $b = '0' . $b; } return $matches[1] . '#' . $r . $g . $b . $matches[5]; } /** * Used by semihtml_to_comcode to make it so inline CSS with quotes uses single quotes. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _css_quot_fixup(array $matches) : string { return str_replace('"', '\'', $matches[0]); } /** * Apply temporary ad hoc-escaping to a CDATA area (we'll reverse convert later). preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _cdata_protect(array $matches) : string { $new = $matches[2]; // We use a closing tag, as we can't just type these in the HTML normally (even in CDATA) - so they are safe unused strings $new = str_replace(' ', '', $new); $new = str_replace("\t", '', $new); $new = str_replace("\n", '', $new); $new = str_replace("\r", '', $new); $new = str_replace('&', '', $new); return $matches[1] . $new . $matches[3]; } /** * Apply temporary ad hoc-escaping to a code tags (we'll reverse convert later). preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _codetag_protect(array $matches) : string { $new = $matches[2]; $new = str_replace('<', '___lt___', $new); $new = str_replace('>', '___gt___', $new); return $matches[1] . $new . $matches[3]; } /** * Remove temporary ad hoc-escaping to a code tags. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _codetag_unprotect(array $matches) : string { $new = $matches[2]; $new = str_replace('___lt___', '<', $new); $new = str_replace('___gt___', '>', $new); return $matches[1] . $new . $matches[3]; } /** * Reorder XHTML attributes alphabetically, so our regexp's match better. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _reorder_xhtml_attributes(array $matches) : string { $middle = trim($matches[2]); $short = (substr($middle, -1) == '/'); if ($short) { if (substr($middle, -2) != ' /') { $middle = substr($middle, 0, strlen($middle) - 1); } else { $middle = substr($middle, 0, strlen($middle) - 2); } } $bits = array_map('trim', preg_split('#\s(\w+=)\s*"#', ' ' . $middle, -1, PREG_SPLIT_DELIM_CAPTURE)); array_shift($bits); $bits2 = []; $cnt = count($bits); for ($i = 0; $i < $cnt; $i++) { if ($i % 2 == 0) { $bits2[] = $bits[$i]; } else { $bits2[intval($i / 2)] .= '"' . $bits[$i]; } } sort($bits2); $middle = implode(' ', $bits2); return '<' . $matches[1] . ' ' . $middle . ($short ? ' /' : '') . '>'; } /** * Reorder style properties alphabetically, so our regexp's match better. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _reorder_css_properties(array $matches) : string { $middle = $matches[2]; $bits = array_map('trim', explode(';', $middle)); sort($bits); $middle = ''; foreach ($bits as $bit) { if (trim($bit) == '') { continue; } if ($middle != '') { $middle .= '; '; } $middle .= trim($bit); } return $matches[1] . $middle . $matches[3]; } /** * Convert Semi-HTML into Comcode. Cleanup where possible. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _semihtml_to_comcode_wrap(array $matches) : string { $middle = semihtml_to_comcode($matches[2]); if (substr($middle, 0, 10) == '[semihtml]') { return substr($middle, 10, strlen($middle) - 21); } return $matches[1] . $middle . $matches[3]; } /** * Extract underlying Comcode from an editor Comcode-management button. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _debuttonise(array $matches) : string { return html_entity_decode($matches[1], ENT_QUOTES); } /** * Extract underlying Comcode from an editor XML tag. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _detagonise(array $matches) : string { $tag = $matches[1]; $attributes = html_entity_decode(str_replace('"', '\"', isset($matches[2]) ? $matches[2] : ''), ENT_QUOTES); $attributes = preg_replace('# id="[^"]*"#', '', $attributes); // IDs aren't a real Comcode attribute return '[' . $tag . $attributes . ']'; } /** * Extract underlying Tempcode directive from an editor XML tag. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _dedirectiveise(array $matches) : string { $attributes_arr = []; $matches_attributes = []; $num_matches_attributes = preg_match_all('#\s+([\w\-]+)\s*=\s*"([^"]*)"#', $matches[1], $matches_attributes); for ($i = 0; $i < $num_matches_attributes; $i++) { $attributes_arr[$matches_attributes[1][$i]] = $matches_attributes[2][$i]; } if (@cms_empty_safe($attributes_arr['params'])) { return ''; // Should not happen } $directive_opener = html_entity_decode($attributes_arr['params'], ENT_QUOTES); $directive_middle = html_entity_decode(str_replace('
', '', $matches[2]), ENT_QUOTES); $directive_closer = '{+END}'; $directive = $directive_opener . $directive_middle . $directive_closer; return $directive; } /** * Extract underlying Tempcode symbol from an editor XML tag. preg_replace_callback callback. * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _desymbolise(array $matches) : string { return html_entity_decode($matches[1], ENT_QUOTES); } /** * Cleanup HTML coming out of the WYSIWYG editor, converting represented Comcode back to proper Comcode. * * @param string $semihtml Semi-HTML */ function remove_wysiwyg_comcode_markup(string &$semihtml) { // Our invisible characters isolating the cms Keep markers from style run-off $semihtml = str_replace('​', '', $semihtml); $array_html_preg_replace = []; if (get_charset() == 'utf-8') { $semihtml = str_replace("\u{200B}", '', $semihtml); } if (stripos($semihtml, ']*class="cms-keep-ui-controlled" [^>]*title="([^"]*)" [^>]*type="button" [^>]*value="[^"]*"[^>]*/?' . '>#siU', '_debuttonise', $semihtml); } while ($semihtml != $semihtml_before); } // Our Comcode tag start/end markers $array_html_preg_replace[] = ['#^]*class="(cms-keep|cms-keep-block)"[^>]*>(.*)$#siU', "\${2}"]; $semihtml = array_html_preg_replace('kbd', $array_html_preg_replace, $semihtml); // Our wrapper tags if (stripos($semihtml, ']*)?' . '>#', '_detagonise', $semihtml); $semihtml = preg_replace('##', '[/' . $tag . ']', $semihtml); } } if (stripos($semihtml, ']*)' . '>(.*)#Us', '_dedirectiveise', $semihtml); } // Our symbols as meta tags $semihtml = preg_replace_callback('##', '_desymbolise', $semihtml); } /** * Convert HTML headers to Comcode titles. * * @param string $semihtml Semi-HTML * @param boolean $forceful Whether to force conversion on all header tags, even if they don't match Comcode-style/simple headers exactly * @return string Semi-HTML, with headers converted to titles */ function convert_html_headers_to_titles(string $semihtml, bool $forceful) : string { if (stripos($semihtml, ']*>\s*(\s*
)?\s*(.*?)\s*\s*(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*

]*>\s*(\s*)?\s*(.*?)\s*\s*

(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*

\s*(\s*)?\s*(.*?)\s*\s*

(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*

]*>(\s*)?\s*(.*?)\s*

(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*

]*>(\s*)?\s*(.*?)\s*

(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*

]*>(\s*)?\s*(.*?)\s*

(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*

(\s*)?\s*(.*?)\s*

(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; if ($forceful) { $array_html_preg_replace[] = ['#\s*]*>(\s*)?\s*(.*?)\s*(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; } $array_html_preg_replace[] = ['#\s*]+>\s*(.*?)\s*(\s*)?\s*#si', '[title="1"]${2}[/title]' . "\n"]; for ($i = 2; $i <= 6; $i++) { $array_html_preg_replace[] = ['#\s*\s*(\s*)?\s*(.*?)\s*\s*(\s*)?\s*#si', '[title="' . strval($i) . '"]${2}[/title]' . "\n"]; $array_html_preg_replace[] = ['#\s*(\s*)?\s*(.*?)\s*(\s*)?\s*#si', '[title="' . strval($i) . '"]${2}[/title]' . "\n"]; if ($forceful) { $array_html_preg_replace[] = ['#\s*]*>(\s*)?\s*(.*?)\s*(\s*)?\s*#si', '[title="' . strval($i) . '"]${2}[/title]' . "\n"]; } } foreach ($array_html_preg_replace as $bits) { list($regexp, $replace) = $bits; $semihtml = preg_replace($regexp, $replace, $semihtml); } } return $semihtml; } /** * Convert HTML-filled Comcode to cleaner Comcode. * * @param LONG_TEXT $comcode The messy Comcode * @return LONG_TEXT The cleaned Comcode */ function force_clean_comcode(string $comcode) : string { $matches = []; if (preg_match('#^\[semihtml\](.*)\[/semihtml\]$#s', $comcode, $matches) != 0) { if ((strpos($matches[1], '[semihtml]') === false) && (strpos($matches[1], '[html]') === false)) { return semihtml_to_comcode($matches[1], true); } } if (preg_match('#^\[html\](.*)\[/html\]$#s', $comcode, $matches) != 0) { if ((strpos($matches[1], '[semihtml]') === false) && (strpos($matches[1], '[html]') === false)) { return html_to_comcode($matches[1], true); } } return $comcode; } /** * Strip down the contents of the media_set tag for easier WYSIWYG-editing. * * @param LONG_TEXT $semihtml The Semi-HTML to be converted * @return LONG_TEXT The equivalent Comcode */ function wysiwygify_media_set(string $semihtml) : string { // Media set contents doesn't need any divs, which get left from native attachments $i = 0; do { $media_set_start = strpos($semihtml, '[media_set', $i); $media_set_end = strpos($semihtml, '[/media_set]', $i); if (($media_set_start !== false) && ($media_set_end !== false) && ($media_set_end > $media_set_start)) { $middle_before = substr($semihtml, $media_set_start, $media_set_end - $media_set_start); $middle_after = preg_replace('#]*)?' . '>#', '', $middle_before); $middle_after = preg_replace('#]*)? ' . '>.*#Us', '', $middle_after); $semihtml = substr($semihtml, 0, $media_set_start) . $middle_after . substr($semihtml, $media_set_end); $i = $media_set_end - (strlen($middle_before) - strlen($middle_after)) + 1; } } while (($media_set_start !== false) && ($media_set_end !== false) && ($media_set_end > $media_set_start)); return $semihtml; } /** * Convert Semi-HTML into Comcode. Cleanup where possible. * * @param LONG_TEXT $semihtml The Semi-HTML to be converted * @param boolean $force Whether to force full conversion regardless of settings * @param boolean $quick Whether to trust the HTML is valid rather than cleaning it up (e.g. for software-generated HTML) * @param ?MEMBER $member_id Member to do as (null: current member) * @return LONG_TEXT The equivalent Comcode */ function semihtml_to_comcode(string $semihtml, bool $force = false, bool $quick = false, ?int $member_id = null) : string { if ($member_id === null) { $member_id = get_member(); } // Optimisations $matches = []; if (preg_match('#^\[semihtml\]([^\[\]<>]*)\[\/semihtml\]$#', $semihtml, $matches) != 0) { // Already have semihtml tags return $matches[1]; } if (preg_match('#^([^\[\]<>\{\}&]*)$#', $semihtml) != 0) { // Plain-text return $semihtml; } $semihtml = trim($semihtml); require_code('templates'); // Optimisation, not long enough to clean up if (cms_trim($semihtml, strlen($semihtml) < 30) === '') { return ''; } $decoded = html_entity_decode($semihtml, ENT_QUOTES); if ((strpos($decoded, '<') === false) && (strpos($decoded, '[') === false) && (strpos($decoded, '{') === false) && (strpos($decoded, '&') === false)) { return $decoded; } require_code('crypt'); cms_ini_set('pcre.backtrack_limit', '10000000'); // Special clean up we always do regardless... // Software markers remove_wysiwyg_comcode_markup($semihtml); // Links should be kept from being base URL-specific $semihtml = reinstate_static_tempcode($semihtml); // Empty comments $semihtml = str_replace('#sU', '', $semihtml); $semihtml = preg_replace('##sU', '', $semihtml); $semihtml = str_replace(' $easy_replace) $easy_replace = true; $on_closer = true; $pos = 0; do { $pos_opener_1 = strpos($semihtml, '<' . $element . '>', $pos); $pos_opener_2 = strpos($semihtml, '<' . $element . ' ', $pos); $pos_opener = (($pos_opener_1 !== false) && (($pos_opener_2 === false) || ($pos_opener_1 < $pos_opener_2))) ? $pos_opener_1 : $pos_opener_2; if ($pos_opener === false) { break; } if ($pos == 0) { // First iteration is just to find first opener $pos = $pos_opener + 1; continue; } $pos_closer_1 = strpos($semihtml, '', $pos); $pos_closer_2 = strpos($semihtml, ']#', $semihtml, $matches, PREG_OFFSET_CAPTURE); $tags = []; for ($i = 0; $i < $count; $i++) { $is_closer = ($matches[1][$i][0] == '/'); $tags[] = [ $is_closer ? -1 : 1, // Balancer $matches[0][$i][1], // Offset strlen($matches[0][$i][0]), // Length ]; } $num_tags = count($tags); foreach ($array as $index => $temp) { list($pattern, $replacement) = $temp; foreach ($tags as $i => $tag) { if ($tag[0] == 1) { $start = $tag[1]; // Find the matching end position $end = null; $balance = 0; for ($j = $i; $j < $num_tags; $j++) { $balance += $tags[$j][0]; if ($balance == 0) { $end = $tags[$j][1]; $length = $tags[$j][2]; break; } } if ($end === null) { break; } // Process segment $segment = substr($semihtml, $start, $end + $length - $start); $before = substr($semihtml, 0, $start); $after = substr($semihtml, $end + $length); $subbed = cms_preg_replace_safe($pattern . 'A', $replacement, $segment); $semihtml = $before . $subbed . $after; if ($semihtml != $old_semihtml) { break 2; // We need to start again now as the offsets have all changed } } } unset($array[$index]); // If we are going to recurse, we don't want extra work -- let's record that this one completed } } while (cms_preg_replace_safe('#(\s|]*>| )#i', '', $semihtml) != cms_preg_replace_safe('#(\s|]*>| )#i', '', $old_semihtml) && cms_preg_safety_guard_ok($safety_guard)); return $semihtml; }