log = array(); // Dindent does not indent ', $input); } } // Removing double whitespaces to make the source code easier to read. // With exception of
/ CSS white-space changing the default behaviour, double whitespace is meaningless in HTML output.
// This reason alone is sufficient not to use Dindent in production.
$input = str_replace("\t", '', $input);
$input = preg_replace('/\s{2,}/', ' ', $input);
// Remove inline tags and replace them with text entities.
if (preg_match_all('/<(b|i|abbr|em|strong|a|span)[^>]*>(?:[^<]*)<\/\1>/', $input, $matches)) {
$this->temporary_replacements_inline = $matches[0];
foreach ($matches[0] as $i => $match) {
$input = str_replace($match, 'ᐃ' . ($i + 1) . 'ᐃ', $input);
}
}
$subject = $input;
$output = '';
$next_line_indentation_level = 0;
do {
$indentation_level = $next_line_indentation_level;
$patterns = array(
// block tag
'/^(<([a-z]+)(?:[^>]*)>(?:[^<]*)<\/(?:\2)>)/' => static::MATCH_INDENT_NO,
// DOCTYPE
'/^]*)>/' => static::MATCH_INDENT_NO,
// tag with implied closing
'/^<(input|link|meta|base|br|img|hr)([^>]*)>/' => static::MATCH_INDENT_NO,
// opening tag
'/^<[^\/]([^>]*)>/' => static::MATCH_INDENT_INCREASE,
// closing tag
'/^<\/([^>]*)>/' => static::MATCH_INDENT_DECREASE,
// self-closing tag
'/^<(.+)\/>/' => static::MATCH_INDENT_DECREASE,
// whitespace
'/^(\s+)/' => static::MATCH_DISCARD,
// text node
'/([^<]+)/' => static::MATCH_INDENT_NO
);
$rules = array('NO', 'DECREASE', 'INCREASE', 'DISCARD');
foreach ($patterns as $pattern => $rule) {
if ($match = preg_match($pattern, $subject, $matches)) {
$this->log[] = array(
'rule' => $rules[$rule],
'pattern' => $pattern,
'subject' => $subject,
'match' => $matches[0]
);
$subject = mb_substr($subject, mb_strlen($matches[0]));
if ($rule === static::MATCH_DISCARD) {
break;
}
if ($rule === static::MATCH_INDENT_NO) {
} else if ($rule === static::MATCH_INDENT_DECREASE) {
$next_line_indentation_level--;
$indentation_level--;
} else {
$next_line_indentation_level++;
}
if ($indentation_level < 0) {
$indentation_level = 0;
}
#$output .= str_repeat($this->indent, $indentation_level) . 'A:' . $indentation_level . "\n";
$output .= str_repeat($this->indent, $indentation_level) . $matches[0] . "\n";
break;
}
}
} while ($match);
$interpreted_input = '';
foreach ($this->log as $e) {
$interpreted_input .= $e['match'];
}
if ($interpreted_input !== $input) {
throw new \RuntimeException('Did not reproduce the exact input.');
}
$output = preg_replace('/(<(\w+)[^>]*>)\s*(<\/\2>)/', '\\1\\3', $output);
foreach ($this->temporary_replacements_script as $i => $original) {
$output = str_replace('', $original, $output);
}
foreach ($this->temporary_replacements_inline as $i => $original) {
$output = str_replace('ᐃ' . ($i + 1) . 'ᐃ', $original, $output);
}
return trim($output);
}
/**
* Debugging utility. Get log for the last indent operation.
*
* @return array
*/
public function getLog () {
return $this->log;
}
}