to \[img\]. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _img_tag_fixup($matches) { $params = trim($matches[1]); if ($params != '') { $params .= ' '; } $params .= trim($matches[3]); if ($params != '') { $params = ' ' . $params; } $params = str_replace('alt="', 'param="', $params); $params = preg_replace('#style="[^"]*vertical-align: ([^;"]+)(;[^"]*)?;?"#i', 'align="${1}"', $params); $params = str_replace(' class="c_img"', '', $params); $extraneous = array('border', 'height', 'hspace', 'ismap', 'longdesc', 'usemap', 'vspace', 'width', 'id', 'class', 'title', 'style', 'lang'); foreach ($extraneous as $ex) { $params = preg_replace('# ' . $ex . '="[^"]*"#', '', $params); } $params = str_replace(' ismap', '', $params); /*$referer = post_param_string('http_referer', cms_srv('HTTP_REFERER'));*/ // CKEditor allows us to specify the base, so we know get_base_url() is right $caller_url = /*looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : */get_base_url(); if ((strpos($matches[2], '{$FIND_SCRIPT') === false) && (strpos($matches[2], '{$IMG') === false)) { $new_url = qualify_url($matches[2], $caller_url); } else { $new_url = $matches[2]; } return '[img' . rtrim($params) . ']' . $new_url . '[/img]'; } /** * Used by semihtml_to_comcode to turn fix URLs in to be absolute. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _img_tag_fixup_raw($matches) { /*$referer = post_param_string('http_referer', cms_srv('HTTP_REFERER'));*/ // CKEditor allows us to specify the base, so we know get_base_url() is right $caller_url = /*looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : */get_base_url(); $matches[2] = html_entity_decode($matches[2], ENT_QUOTES, get_charset()); if ((strpos($matches[2], '{$FIND_SCRIPT') === false) && (strpos($matches[2], '{$IMG') === false)) { $new_url = qualify_url($matches[2], $caller_url); } else { $new_url = $matches[2]; } $ret = ''; return $ret; } /** * Used by semihtml_to_comcode to fix tag links. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _a_tag_link_fixup($matches) { $referer = post_param_string('http_referer', cms_srv('HTTP_REFERER')); $caller_url = looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : get_base_url(); $ret = ''; return $ret; } /** * Used by semihtml_to_comcode to fix CSS colours aways from RGB notation. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _css_color_fixup($matches) { $r = dechex(intval(trim($matches[2]))); if (strlen($r) == 1) { $r = '0' . $r; } $g = dechex(intval(trim($matches[3]))); if (strlen($g) == 1) { $g = '0' . $g; } $b = dechex(intval(trim($matches[4]))); if (strlen($b) == 1) { $b = '0' . $b; } return $matches[1] . '#' . $r . $g . $b . $matches[5]; } /** * Used by semihtml_to_comcode to make it so inline CSS with quotes uses single quotes. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _css_quot_fixup($matches) { return str_replace('"', '\'', $matches[0]); } /** * Apply temporary ad hoc-escaping to a CDATA area (we'll reverse convert later). preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _cdata_protect($matches) { $new = $matches[2]; // We use a closing tag, as we can't just type these in the HTML normally (even in CDATA) - so they are safe unused strings $new = str_replace(' ', '', $new); $new = str_replace("\t", '', $new); $new = str_replace("\n", '', $new); $new = str_replace("\r", '', $new); $new = str_replace('&', '', $new); return $matches[1] . $new . $matches[3]; } /** * Apply temporary ad hoc-escaping to a code tags (we'll reverse convert later). preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _codetag_protect($matches) { $new = $matches[2]; $new = str_replace('<', '___lt___', $new); $new = str_replace('>', '___gt___', $new); return $matches[1] . $new . $matches[3]; } /** * Apply temporary ad hoc-escaping to a code tags (we'll reverse convert later). preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _codetag_unprotect($matches) { $new = $matches[2]; $new = str_replace('___lt___', '<', $new); $new = str_replace('___gt___', '>', $new); return $matches[1] . $new . $matches[3]; } /** * Reorder XHTML attributes alphabetically, so our regexp's match better. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _reorder_xhtml_attributes($matches) { $middle = trim($matches[2]); $short = (substr($middle, -1) == '/'); if ($short) { if (substr($middle, -2) != ' /') { $middle = substr($middle, 0, strlen($middle) - 1); } else { $middle = substr($middle, 0, strlen($middle) - 2); } } $bits = array_map('trim', preg_split('#\s(\w+=)\s*"#', ' ' . $middle, -1, PREG_SPLIT_DELIM_CAPTURE)); array_shift($bits); $bits2 = array(); $cnt = count($bits); for ($i = 0; $i < $cnt; $i++) { if ($i % 2 == 0) { $bits2[] = $bits[$i]; } else { $bits2[intval($i / 2)] .= '"' . $bits[$i]; } } sort($bits2); $middle = implode(' ', $bits2); return '<' . $matches[1] . ' ' . $middle . ($short ? ' /' : '') . '>'; } /** * Reorder style properties alphabetically, so our regexp's match better. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _reorder_css_properties($matches) { $middle = $matches[2]; $bits = array_map('trim', explode(';', $middle)); sort($bits); $middle = ''; foreach ($bits as $bit) { if (trim($bit) == '') { continue; } if ($middle != '') { $middle .= '; '; } $middle .= trim($bit); } return $matches[1] . $middle . $matches[3]; } /** * Convert Semi-HTML into Comcode. Cleanup where possible. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _semihtml_to_comcode_wrap($matches) { $middle = semihtml_to_comcode($matches[2]); if (substr($middle, 0, 10) == '[semihtml]') { return substr($middle, 10, strlen($middle) - 21); } return $matches[1] . $middle . $matches[3]; } /** * Extract underlying Comcode from an editor Comcode-management button. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _debuttonise($matches) { return html_entity_decode($matches[1], ENT_QUOTES, get_charset()); } /** * Extract underlying Comcode from an editor XML tag. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _detagonise($matches) { $tag = $matches[1]; $attributes = html_entity_decode(str_replace('"', '\"', isset($matches[2]) ? $matches[2] : ''), ENT_QUOTES, get_charset()); $attributes = preg_replace('# id="[^"]*"#', '', $attributes); // IDs aren't a real Comcode attribute return '[' . $tag . $attributes . ']'; } /** * Extract underlying Tempcode directive from an editor XML tag. preg_replace_callback callback * * @param array $matches Array of matches * @return string Substituted text * * @ignore */ function _dedirectiveise($matches) { $attributes_arr = array(); $attributes_xml = isset($matches[1]) ? $matches[1] : ''; $matches_attributes = array(); $num_matches_attributes = preg_match_all('#\s+([\w\-]+)\s*=\s*"([^"]*)"#', $attributes_xml, $matches_attributes); for ($i = 0; $i < $num_matches_attributes; $i++) { $attributes_arr[$matches_attributes[1][$i]] = $matches_attributes[2][$i]; } $attributes = ''; if (!empty($attributes_arr['params'])) { $attributes = html_entity_decode($attributes_arr['params'], ENT_QUOTES, get_charset()); } return $attributes; } /** * Cleanup HTML coming out of the WYSIWYG editor, converting represented Comcode back to proper Comcode * * @param string $semihtml Semi-HTML */ function remove_wysiwyg_comcode_markup(&$semihtml) { // Our invisible characters isolating the cms Keep markers from style run-off $semihtml = str_replace('​', '', $semihtml); $array_html_preg_replace = array(); if (get_charset() == 'utf-8') { $semihtml = str_replace(chr(hexdec('e2')) . chr(hexdec('80')) . chr(hexdec('8b')), '', $semihtml); } if (stripos($semihtml, ']*class="cms_keep_ui_controlled" [^>]*title="([^"]*)" [^>]*type="button" [^>]*value="[^"]*"[^>]*/?' . '>#siU', '_debuttonise', $semihtml); } while ($semihtml != $semihtml_before); } // Our Comcode tag start/end markers $array_html_preg_replace[] = array('#^]*class="(cms_keep|cms_keep_block)"[^>]*>(.*)$#siU', "\${2}"); $semihtml = array_html_preg_replace('kbd', $array_html_preg_replace, $semihtml); // Our wrapper tags if (stripos($semihtml, ']*)?' . '>#', '_detagonise', $semihtml); $semihtml = preg_replace('##', '[/' . $tag . ']', $semihtml); } } if (stripos($semihtml, ']*)' . '>\s*#', '_dedirectiveise', $semihtml); $semihtml = preg_replace('##', '{+END}', $semihtml); } } /** * Convert HTML headers to Comcode titles * * @param string $semihtml Semi-HTML * @param boolean $forceful Whether to force conversion on all header tags, even if they don't match Comcode-style/simple headers exactly * @return string Semi-HTML, with headers converted to titles */ function convert_html_headers_to_titles($semihtml, $forceful) { if (stripos($semihtml, ']*>\s*(.*)\s*\s*$#siU', '[title="1"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*

]*>\s*(.*)\s*

\s*$#siU', '[title="1"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*

\s*(.*)\s*

\s*$#siU', '[title="1"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*

]*>(.*)

\s*$#siU', '[title="1"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*

]*>(.*)

\s*$#siU', '[title="1"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*

]*>(.*)

\s*$#siU', '[title="1"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*

(.*)

\s*$#siU', '[title="1"]${1}[/title]' . "\n"); if ($forceful) { $array_html_preg_replace[] = array('#^\s*]*>(.*)\s*$#siU', '[title="1"]${1}[/title]' . "\n"); } $semihtml = array_html_preg_replace('h1', $array_html_preg_replace, $semihtml); $semihtml = preg_replace('#^\s*]+>(.*)\s*#siU', '[title="1"]${1}[/title]' . "\n", $semihtml); for ($i = 2; $i <= 4; $i++) { $array_html_preg_replace = array(); $array_html_preg_replace[] = array('#^\s*(.*)\s*$#siU', '[title="' . strval($i) . '"]${1}[/title]' . "\n"); $array_html_preg_replace[] = array('#^\s*(.*)\s*$#siU', '[title="' . strval($i) . '"]${1}[/title]' . "\n"); if ($forceful) { $array_html_preg_replace[] = array('#^\s*]*>(.*)\s*$#siU', '[title="' . strval($i) . '"]${1}[/title]' . "\n"); } $semihtml = array_html_preg_replace('h' . strval($i) . '', $array_html_preg_replace, $semihtml); } } return $semihtml; } /** * Convert HTML-filled Comcode to cleaner Comcode. * * @param LONG_TEXT $comcode The messy Comcode. * @return LONG_TEXT The cleaned Comcode. */ function force_clean_comcode($comcode) { $matches = array(); if (preg_match('#^\[semihtml\](.*)\[/semihtml\]$#s', $comcode, $matches) != 0) { if ((strpos($matches[1], '[semihtml]') === false) && (strpos($matches[1], '[html]') === false)) { return semihtml_to_comcode($matches[1], true); } } if (preg_match('#^\[html\](.*)\[/html\]$#s', $comcode, $matches) != 0) { if ((strpos($matches[1], '[semihtml]') === false) && (strpos($matches[1], '[html]') === false)) { return html_to_comcode($matches[1], true); } } return $comcode; } /** * Strip down the contents of the media_set tag for easier WYSIWYG-editing * * @param LONG_TEXT $semihtml The Semi-HTML to be converted * @return LONG_TEXT The equivalent Comcode */ function wysiwygify_media_set($semihtml) { // Media set contents doesn't need any divs, which get left from native attachments $i = 0; do { $media_set_start = strpos($semihtml, '[media_set', $i); $media_set_end = strpos($semihtml, '[/media_set]', $i); if ($media_set_start !== false && $media_set_end !== false && $media_set_end > $media_set_start) { $middle_before = substr($semihtml, $media_set_start, $media_set_end - $media_set_start); $middle_after = preg_replace('#]*)?' . '>#', '', $middle_before); $middle_after = preg_replace('#]*)? ' . '>.*#Us', '', $middle_after); $semihtml = substr($semihtml, 0, $media_set_start) . $middle_after . substr($semihtml, $media_set_end); $i = $media_set_end - (strlen($middle_before) - strlen($middle_after)) + 1; } } while ($media_set_start !== false && $media_set_end !== false && $media_set_end > $media_set_start); return $semihtml; } /** * Convert Semi-HTML into comcode. Cleanup where possible * * @param LONG_TEXT $semihtml The Semi-HTML to be converted * @param boolean $force Whether to force full conversion regardless of settings * @param boolean $quick Whether to trust the HTML is valid rather than cleaning it up (e.g. for Composr-generated HTML) * @return LONG_TEXT The equivalent Comcode */ function semihtml_to_comcode($semihtml, $force = false, $quick = false) { // Optimisations $matches = array(); if (preg_match('#^\[semihtml\]([^\[\]<>]*)\[\/semihtml\]$#', $semihtml, $matches) != 0) { return $matches[1]; } if (preg_match('#^([^\[\]<>\{\}]*)$#', $semihtml) != 0) { return $semihtml; } $semihtml = trim($semihtml); // Optimisation, not long enough to clean up if (cms_trim($semihtml, strlen($semihtml) < 30) === '') { return ''; } $decoded = html_entity_decode($semihtml, ENT_QUOTES, get_charset()); if ((strpos($decoded, '<') === false) && (strpos($decoded, '[') === false) && (strpos($decoded, '{') === false) && (strpos($decoded, '&') === false)) { return $decoded; } require_code('obfuscate'); safe_ini_set('pcre.backtrack_limit', '10000000'); // Special clean up we always do regardless... // Composr markers remove_wysiwyg_comcode_markup($semihtml); // Links should be kept from being base-URL-specific $semihtml = preg_replace('#(<[^<>]*)' . preg_quote(escape_html(get_base_url() . '/'), '#') . '([^<>]*>)#', '$1{$BASE_URL*}/$2', $semihtml); // Empty comments $semihtml = str_replace('#sU', '', $semihtml); $semihtml = preg_replace('##sU', '', $semihtml); $semihtml = str_replace(' $easy_replace) $easy_replace = true; $on_closer = true; $pos = 0; do { $pos_opener_1 = strpos($semihtml, '<' . $element . '>', $pos); $pos_opener_2 = strpos($semihtml, '<' . $element . ' ', $pos); $pos_opener = ($pos_opener_1 !== false && ($pos_opener_2 === false || $pos_opener_1 < $pos_opener_2)) ? $pos_opener_1 : $pos_opener_2; if ($pos_opener === false) { break; } if ($pos == 0) // First iteration is just to find first opener { $pos = $pos_opener + 1; continue; } $pos_closer_1 = strpos($semihtml, '', $pos); $pos_closer_2 = strpos($semihtml, ']#', $semihtml, $matches, PREG_OFFSET_CAPTURE); $tags = array(); for ($i = 0; $i < $count; $i++) { $is_closer = ($matches[1][$i][0] == '/'); $tags[] = array( $is_closer ? -1 : 1, // Balancer $matches[0][$i][1], // Offset strlen($matches[0][$i][0]), // Length ); } $num_tags = count($tags); foreach ($array as $index => $temp) { list($pattern, $replacement) = $temp; foreach ($tags as $i => $tag) { if ($tag[0] == 1) { $start = $tag[1]; // Find the matching end position $end = null; $balance = 0; for ($j = $i ; $j < $num_tags; $j++) { $balance += $tags[$j][0]; if ($balance == 0) { $end = $tags[$j][1]; $length = $tags[$j][2]; break; } } if ($end === null) { break; } // Process segment $segment = substr($semihtml, $start, $end + $length - $start); $before = substr($semihtml, 0, $start); $after = substr($semihtml, $end + $length); $subbed = preg_replace($pattern . 'A', $replacement, $segment); $semihtml = $before . $subbed . $after; if ($semihtml != $old_semihtml) { break 2; // We need to start again now as the offsets have all changed } } } unset($array[$index]); // If we are going to recurse, we don't want extra work -- let's record that this one completed } } while (preg_replace('#(\s|]*>| )#i', '', $semihtml) != preg_replace('#(\s|]*>| )#i', '', $old_semihtml)); return $semihtml; }