.
*
* ---------------------------------------------------------------------
*/
namespace Glpi\Console\Database;
use CommonDBTM;
use Glpi\Console\AbstractCommand;
use ITILFollowup;
use Plugin;
use Search;
use Session;
use Ticket;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
/**
* Prior from GLPI 10.0, some HTML entities were not properly encoded.
*
* This CLI tool helps to fix items one by one or in small batches
*/
final class FixHtmlEncodingCommand extends AbstractCommand
{
/**
* Error code returned when a specified itemtype does not exists.
*
* @var integer
*/
public const ERROR_ITEMTYPE_NOT_FOUND = 1;
/**
* Error code returned when update of an item failed.
*
* @var integer
*/
public const ERROR_UPDATE_FAILED = 2;
/**
* Error code returned when rollback file could not be created.
*
* @var integer
*/
public const ERROR_ROLLBACK_FILE_FAILED = 3;
/**
* Error code returned when rollback file path is not passed to command.
*
* @var integer
*/
public const ERROR_ROLLBACK_FILE_REQUIRED = 4;
/**
* Items with invalid HTML.
*
* @var array
*/
private array $invalid_items = [];
/**
* Count of items with invalid HTML that have NOT been fixed.
*
* @var int
*/
private int $failed_items_count = 0;
/**
* Columns which contains rich text, populated by analyzing search options.
*
* @var array
*/
private array $text_fields = [];
protected function configure()
{
parent::configure();
$this->setName('glpi:database:fix_html_encoding');
$this->setAliases(['db:fix_html']);
$this->setDescription(__('Fix HTML encoding issues in database.'));
$this->addOption(
'dump',
null,
InputOption::VALUE_REQUIRED,
__('Path of file where will be stored SQL queries that can be used to rollback changes')
);
$this->addOption(
'dump-only',
null,
InputOption::VALUE_NONE,
__('Dump SQL roolback to file and exit')
);
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$this->warnAboutExecutionTime();
$this->findTextFields();
$this->scanItems();
$count = $this->countItems($this->invalid_items);
if ($count === 0) {
$output->writeln('' . __('No item to fix.') . '');
return 0;
}
$output->writeln('' . sprintf(_n('Found %d item to fix.', 'Found %d items to fix.', $count), $count) . '');
$this->askForConfirmation();
if ($input->getOption('dump')) {
$this->dumpObjects();
}
if ($input->getOption('dump-only')) {
return 0;
}
$this->fixItems();
if ($this->failed_items_count > 0) {
$this->output->writeln(
'' . sprintf(__('Unable to update %s items'), $this->failed_items_count) . '',
OutputInterface::VERBOSITY_QUIET
);
return self::ERROR_UPDATE_FAILED;
}
$output->writeln('' . __('HTML encoding has been fixed.') . '');
return 0;
}
/**
* Dump items
*
* @return void
*/
private function dumpObjects(): void
{
global $DB;
$dump_content = '';
foreach ($this->invalid_items as $itemtype => $items) {
foreach ($items as $item_id => $fields) {
// Get the item to save
$item = new $itemtype();
$item->getFromDB($item_id);
// read the fields to save
$object_state = [];
foreach ($fields as $field) {
$object_state[$field] = $DB->escape($item->fields[$field]);
}
// Build the SQL query
$dump_content .= $DB->buildUpdate(
$itemtype::getTable(),
$object_state,
['id' => $item_id],
) . ';' . PHP_EOL;
}
}
// Save the rollback SQL queries dump
$dump_file_name = $this->input->getOption('dump');
if (@file_put_contents($dump_file_name, $dump_content) == strlen($dump_content)) {
$this->output->writeln(
'' . sprintf(__('File %s contains SQL queries that can be used to rollback command.'), $dump_file_name) . '',
OutputInterface::VERBOSITY_QUIET
);
} else {
throw new \Glpi\Console\Exception\EarlyExitException(
'' . sprintf(__('Failed to write rollback SQL queries in "%s" file.'), $dump_file_name) . '',
self::ERROR_ROLLBACK_FILE_FAILED
);
}
}
/**
* Fix encoding issues.
*
* @return void
*/
private function fixItems(): void
{
foreach ($this->invalid_items as $itemtype => $items) {
$this->outputMessage(
'' . sprintf(__('Fixing %s...'), $itemtype::getTypeName(Session::getPluralNumber())) . '',
);
$progress_message = function (array $fields, int $id) use ($itemtype) {
return sprintf(__('Fixing %s with ID %s...'), $itemtype::getTypeName(1), $id);
};
foreach ($this->iterate($items, $progress_message) as $item_id => $fields) {
/* @var \CommonDBTM $item */
$item = new $itemtype();
if (!$item->getFromDB($item_id)) {
$this->outputMessage(
'' . sprintf(__('Unable to fix %s with ID %s.'), $itemtype::getTypeName(1), $item_id) . '',
OutputInterface::VERBOSITY_QUIET
);
$this->failed_items_count++;
continue;
}
$this->fixOneItem($item, $fields);
}
}
}
/**
* Fix a single item, on specified fields.
*
* @param CommonDBTM $item item to fix
* @param array $fields fields names to fix
* @return void
*/
private function fixOneItem(CommonDBTM $item, array $fields): void
{
global $DB;
$itemtype = $item::getType();
// update the item
$update = [];
foreach ($fields as $field) {
$update[$field] = $this->fixOneField($item, $field);
$update[$field] = $DB->escape($update[$field]);
}
$success = $DB->update(
$itemtype::getTable(),
$update,
['id' => $item->fields['id']],
);
if (!$success) {
$this->outputMessage(
'' . sprintf(__('Unable to fix %s with ID %s.'), $itemtype::getTypeName(1), $item->getID()) . '',
OutputInterface::VERBOSITY_QUIET
);
$this->failed_items_count++;
}
}
/**
* Fix a single field of an item.
*
* @param CommonDBTM $item
* @param string $field
* @return string
*/
private function fixOneField(CommonDBTM $item, string $field): string
{
$new_value = $item->fields[$field];
if (in_array($item::getType(), [Ticket::getType(), ITILFollowup::getType()]) && $field == 'content') {
$new_value = $this->fixEmailHeadersEncoding($new_value);
}
$new_value = $this->fixQuoteEntityWithoutSemicolon($new_value);
$new_value = $this->fixUnescapedLineBreak($new_value);
$new_value = $this->fixRawGreaterThanSign($new_value);
$new_value = $this->fixRawAmpersand($new_value);
return $new_value;
}
/**
* Fix double encoded HTML entities in old followups
* @see https://github.com/glpi-project/glpi/issues/8330
*
* @param string $input
* @return string
*/
private function fixEmailHeadersEncoding(string $input): string
{
$output = $input;
// Not very strict pattern for emails, but should be enough
// Capturing parentheses:
// 1: Triple encoded < character
// 2: email address
// 3: Triple encoded > character
$pattern = '/(&lt;)(?[^@]*?@[a-zA-Z0-9\-.]*?)(&gt;)/';
$replace = '<${2}>';
$output = preg_replace($pattern, $replace, $output);
// Triple encoded should be now double encoded (this double encoding is expected)
return $output;
}
/**
* Fix " HTML entity without its final semicolon.
* @see https://github.com/glpi-project/glpi/pull/6084
*
* @param string $input
* @return string
*/
private function fixQuoteEntityWithoutSemicolon(string $input): string
{
$output = $input;
// Add the missing semicolon to " HTML entity
$pattern = '/"(?!;)/';
$replace = '"';
$output = preg_replace($pattern, $replace, $output);
return $output;
}
/**
* Fix raw
HTML tag. Caused by Formcreator plugin before GLPI 10.0
* Impacts Tickets, Problems and Changes, in the content field.
* Those items were generated with GLPI 9.5's flavor of HTML escaping.
*
* @param string $input
* @return string
*/
private function fixUnescapedLineBreak(string $input): string
{
$output = $input;
$pattern = '#
#';
$replace = '<br />';
$output = preg_replace($pattern, $replace, $output);
return $output;
}
/**
* Fix raw < character. Caused by Formcreator plugin before GLPI 10.0
* Impacts Tickets, Problems and Changes, in the content field.
* May happen with glpi object questions.
* Those items were generated with GLPI 9.5's flavor of HTML escaping.
*
* @param string $input
* @return string
*/
private function fixRawGreaterThanSign(string $input): string
{
$output = $input;
// Add the missing semicolon to " HTML entity
$pattern = '# > #';
$replace = ' > ';
$output = preg_replace($pattern, $replace, $output);
return $output;
}
/**
* Undocumented function
*
* @param string $input
* @return string
*/
private function fixRawAmpersand(string $input): string
{
$pattern = '/&(?!#?[a-z0-9]+;)/i';
$replace = '#38;';
$output = preg_replace($pattern, $replace, $input);
return $output;
}
/**
* Find rich text fields for itemtypes given as CLI argument.
*
* @return void
*/
private function findTextFields(): void
{
global $DB;
$table_iterator = $DB->listTables();
foreach ($table_iterator as $table_data) {
$table = $table_data['TABLE_NAME'];
if (preg_match("/^glpi_plugin_([a-z0-9]+)/", $table, $matches)) {
if (!Plugin::isPluginActive($matches[1])) {
continue;
}
}
$itemtype = getItemTypeForTable($table);
if (!is_a($itemtype, CommonDBTM::class, true)) {
continue;
}
$search_options = Search::getOptions($itemtype);
foreach ($search_options as $search_option) {
if (!isset($search_option['table'])) {
continue;
}
if (
$search_option['table'] === $table
&& ($search_option['datatype'] ?? '') === 'text'
&& ($search_option['htmltext'] ?? false) === true
) {
$this->text_fields[$itemtype][] = $search_option['field'];
}
}
}
}
/**
* Search in all items of an itemtype for bad HTML.
*
* @return void
*/
private function scanItems(): void
{
$this->outputMessage(
'' . __('Scanning database for items to fix...') . ''
);
foreach ($this->text_fields as $itemtype => $fields) {
foreach ($fields as $field) {
$this->scanField($itemtype, $field);
}
}
}
/**
* Search for bad HTML in a single column of a table
*
* @param string $itemtype
* @param string $field
* @return void
*/
private function scanField(string $itemtype, string $field): void
{
global $DB;
$searches = [
[$field => ['LIKE', '%"(?!;)/%']],
[$field => ['LIKE', '%
%']],
// '>' is not allowed in encoded HTML
// May happen when using a select field with a value containing a '>'
// Known to happen with GLPI select questions, then the symbol is
// surrounded with ' '
[$field => ['LIKE', '% > %']],
[$field => ['REGEXP', '&(?!#?[a-z0-9]+;)']],
];
if (in_array($itemtype, [Ticket::getType(), ITILFollowup::getType()]) && $field == 'content') {
$searches[] = [
$field => ['REGEXP', $DB->escape('(&lt;)(?[^@]*?@[a-zA-Z0-9\-.]*?)(&gt;)')]
];
}
$iterator = $DB->request([
'SELECT' => 'id',
'FROM' => $itemtype::getTable(),
'WHERE' => [
'OR' => $searches,
],
]);
foreach ($iterator as $row) {
$this->invalid_items[$itemtype][$row['id']][] = $field;
}
}
/**
* Count items in list of invalid idems
*
* @return integer
*/
private function countItems(array $items_array): int
{
$count = 0;
if (count($items_array) === 0) {
return 0;
}
foreach ($items_array as $items) {
$count += count($items);
}
return $count;
}
}