* @license GNU General Public License version 2 or later; see LICENSE.txt */ namespace Joomla\Component\Finder\Administrator\Indexer; use Exception; use Joomla\CMS\Component\ComponentHelper; use Joomla\CMS\Factory; use Joomla\CMS\Language\Text; use Joomla\CMS\Uri\Uri; use Joomla\Component\Finder\Administrator\Helper\LanguageHelper; use Joomla\Component\Finder\Site\Helper\RouteHelper; use Joomla\Database\DatabaseAwareTrait; use Joomla\Database\DatabaseInterface; use Joomla\Database\ParameterType; use Joomla\Registry\Registry; use Joomla\String\StringHelper; use Joomla\Utilities\ArrayHelper; // phpcs:disable PSR1.Files.SideEffects \defined('_JEXEC') or die; // phpcs:enable PSR1.Files.SideEffects /** * Query class for the Finder indexer package. * * @since 2.5 */ class Query { use DatabaseAwareTrait; /** * Flag to show whether the query can return results. * * @var boolean * @since 2.5 */ public $search; /** * The query input string. * * @var string * @since 2.5 */ public $input; /** * The language of the query. * * @var string * @since 2.5 */ public $language; /** * The query string matching mode. * * @var string * @since 2.5 */ public $mode; /** * The included tokens. * * @var Token[] * @since 2.5 */ public $included = []; /** * The excluded tokens. * * @var Token[] * @since 2.5 */ public $excluded = []; /** * The tokens to ignore because no matches exist. * * @var Token[] * @since 2.5 */ public $ignored = []; /** * The operators used in the query input string. * * @var array * @since 2.5 */ public $operators = []; /** * The terms to highlight as matches. * * @var array * @since 2.5 */ public $highlight = []; /** * The number of matching terms for the query input. * * @var integer * @since 2.5 */ public $terms; /** * Allow empty searches * * @var boolean * @since 4.0.0 */ public $empty; /** * The static filter id. * * @var string * @since 2.5 */ public $filter; /** * The taxonomy filters. This is a multi-dimensional array of taxonomy * branches as the first level and then the taxonomy nodes as the values. * * For example: * $filters = array( * 'Type' = array(10, 32, 29, 11, ...); * 'Label' = array(20, 314, 349, 91, 82, ...); * ... * ); * * @var array * @since 2.5 */ public $filters = []; /** * The start date filter. * * @var string * @since 2.5 */ public $date1; /** * The end date filter. * * @var string * @since 2.5 */ public $date2; /** * The start date filter modifier. * * @var string * @since 2.5 */ public $when1; /** * The end date filter modifier. * * @var string * @since 2.5 */ public $when2; /** * Match search terms exactly or with a LIKE scheme * * @var string * @since 4.2.0 */ public $wordmode; /** * The dates Registry. * * @var Registry * @since 4.3.0 */ public $dates; /** * Method to instantiate the query object. * * @param array $options An array of query options. * * @since 2.5 * @throws Exception on database error. */ public function __construct($options, DatabaseInterface $db = null) { if ($db === null) { @trigger_error(sprintf('Database will be mandatory in 5.0.'), E_USER_DEPRECATED); $db = Factory::getContainer()->get(DatabaseInterface::class); } $this->setDatabase($db); // Get the input string. $this->input = $options['input'] ?? ''; // Get the empty query setting. $this->empty = isset($options['empty']) ? (bool) $options['empty'] : false; // Get the input language. $this->language = !empty($options['language']) ? $options['language'] : Helper::getDefaultLanguage(); // Get the matching mode. $this->mode = 'AND'; // Set the word matching mode $this->wordmode = !empty($options['word_match']) ? $options['word_match'] : 'exact'; // Initialize the temporary date storage. $this->dates = new Registry(); // Populate the temporary date storage. if (!empty($options['date1'])) { $this->dates->set('date1', $options['date1']); } if (!empty($options['date2'])) { $this->dates->set('date2', $options['date2']); } if (!empty($options['when1'])) { $this->dates->set('when1', $options['when1']); } if (!empty($options['when2'])) { $this->dates->set('when2', $options['when2']); } // Process the static taxonomy filters. if (!empty($options['filter'])) { $this->processStaticTaxonomy($options['filter']); } // Process the dynamic taxonomy filters. if (!empty($options['filters'])) { $this->processDynamicTaxonomy($options['filters']); } // Get the date filters. $d1 = $this->dates->get('date1'); $d2 = $this->dates->get('date2'); $w1 = $this->dates->get('when1'); $w2 = $this->dates->get('when2'); // Process the date filters. if (!empty($d1) || !empty($d2)) { $this->processDates($d1, $d2, $w1, $w2); } // Process the input string. $this->processString($this->input, $this->language, $this->mode); // Get the number of matching terms. foreach ($this->included as $token) { $this->terms += count($token->matches); } // Remove the temporary date storage. unset($this->dates); // Lastly, determine whether this query can return a result set. // Check if we have a query string. if (!empty($this->input)) { $this->search = true; } elseif ($this->empty && (!empty($this->filter) || !empty($this->filters) || !empty($this->date1) || !empty($this->date2))) { // Check if we can search without a query string. $this->search = true; } else { // We do not have a valid search query. $this->search = false; } } /** * Method to convert the query object into a URI string. * * @param string $base The base URI. [optional] * * @return string The complete query URI. * * @since 2.5 */ public function toUri($base = '') { // Set the base if not specified. if ($base === '') { $base = 'index.php?option=com_finder&view=search'; } // Get the base URI. $uri = Uri::getInstance($base); // Add the static taxonomy filter if present. if ((bool) $this->filter) { $uri->setVar('f', $this->filter); } // Get the filters in the request. $t = Factory::getApplication()->getInput()->request->get('t', [], 'array'); // Add the dynamic taxonomy filters if present. if ((bool) $this->filters) { foreach ($this->filters as $nodes) { foreach ($nodes as $node) { if (!in_array($node, $t)) { continue; } $uri->setVar('t[]', $node); } } } // Add the input string if present. if (!empty($this->input)) { $uri->setVar('q', $this->input); } // Add the start date if present. if (!empty($this->date1)) { $uri->setVar('d1', $this->date1); } // Add the end date if present. if (!empty($this->date2)) { $uri->setVar('d2', $this->date2); } // Add the start date modifier if present. if (!empty($this->when1)) { $uri->setVar('w1', $this->when1); } // Add the end date modifier if present. if (!empty($this->when2)) { $uri->setVar('w2', $this->when2); } // Add a menu item id if one is not present. if (!$uri->getVar('Itemid')) { // Get the menu item id. $query = [ 'view' => $uri->getVar('view'), 'f' => $uri->getVar('f'), 'q' => $uri->getVar('q'), ]; $item = RouteHelper::getItemid($query); // Add the menu item id if present. if ($item !== null) { $uri->setVar('Itemid', $item); } } return $uri->toString(['path', 'query']); } /** * Method to get a list of excluded search term ids. * * @return array An array of excluded term ids. * * @since 2.5 */ public function getExcludedTermIds() { $results = []; // Iterate through the excluded tokens and compile the matching terms. for ($i = 0, $c = count($this->excluded); $i < $c; $i++) { foreach ($this->excluded[$i]->matches as $match) { $results = array_merge($results, $match); } } // Sanitize the terms. $results = array_unique($results); return ArrayHelper::toInteger($results); } /** * Method to get a list of included search term ids. * * @return array An array of included term ids. * * @since 2.5 */ public function getIncludedTermIds() { $results = []; // Iterate through the included tokens and compile the matching terms. for ($i = 0, $c = count($this->included); $i < $c; $i++) { // Check if we have any terms. if (empty($this->included[$i]->matches)) { continue; } // Get the term. $term = $this->included[$i]->term; // Prepare the container for the term if necessary. if (!array_key_exists($term, $results)) { $results[$term] = []; } // Add the matches to the stack. foreach ($this->included[$i]->matches as $match) { $results[$term] = array_merge($results[$term], $match); } } // Sanitize the terms. foreach ($results as $key => $value) { $results[$key] = array_unique($results[$key]); $results[$key] = ArrayHelper::toInteger($results[$key]); } return $results; } /** * Method to get a list of required search term ids. * * @return array An array of required term ids. * * @since 2.5 */ public function getRequiredTermIds() { $results = []; // Iterate through the included tokens and compile the matching terms. for ($i = 0, $c = count($this->included); $i < $c; $i++) { // Check if the token is required. if ($this->included[$i]->required) { // Get the term. $term = $this->included[$i]->term; // Prepare the container for the term if necessary. if (!array_key_exists($term, $results)) { $results[$term] = []; } // Add the matches to the stack. foreach ($this->included[$i]->matches as $match) { $results[$term] = array_merge($results[$term], $match); } } } // Sanitize the terms. foreach ($results as $key => $value) { $results[$key] = array_unique($results[$key]); $results[$key] = ArrayHelper::toInteger($results[$key]); } return $results; } /** * Method to process the static taxonomy input. The static taxonomy input * comes in the form of a pre-defined search filter that is assigned to the * search form. * * @param integer $filterId The id of static filter. * * @return boolean True on success, false on failure. * * @since 2.5 * @throws Exception on database error. */ protected function processStaticTaxonomy($filterId) { // Get the database object. $db = $this->getDatabase(); // Initialize user variables $groups = implode(',', Factory::getUser()->getAuthorisedViewLevels()); // Load the predefined filter. $query = $db->getQuery(true) ->select('f.data, f.params') ->from($db->quoteName('#__finder_filters') . ' AS f') ->where('f.filter_id = ' . (int) $filterId); $db->setQuery($query); $return = $db->loadObject(); // Check the returned filter. if (empty($return)) { return false; } // Set the filter. $this->filter = (int) $filterId; // Get a parameter object for the filter date options. $registry = new Registry($return->params); $params = $registry; // Set the dates if not already set. $this->dates->def('d1', $params->get('d1')); $this->dates->def('d2', $params->get('d2')); $this->dates->def('w1', $params->get('w1')); $this->dates->def('w2', $params->get('w2')); // Remove duplicates and sanitize. $filters = explode(',', $return->data); $filters = array_unique($filters); $filters = ArrayHelper::toInteger($filters); // Remove any values of zero. if (in_array(0, $filters, true) !== false) { unset($filters[array_search(0, $filters, true)]); } // Check if we have any real input. if (empty($filters)) { return true; } /* * Create the query to get filters from the database. We do this for * two reasons: one, it allows us to ensure that the filters being used * are real; two, we need to sort the filters by taxonomy branch. */ $query->clear() ->select('t1.id, t1.title, t2.title AS branch') ->from($db->quoteName('#__finder_taxonomy') . ' AS t1') ->leftJoin($db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.lft < t1.lft AND t1.rgt < t2.rgt AND t2.level = 1') ->where('t1.state = 1') ->where('t1.access IN (' . $groups . ')') ->where('t1.id IN (' . implode(',', $filters) . ')') ->where('t2.state = 1') ->where('t2.access IN (' . $groups . ')'); // Load the filters. $db->setQuery($query); $results = $db->loadObjectList(); // Sort the filter ids by branch. foreach ($results as $result) { $this->filters[$result->branch][$result->title] = (int) $result->id; } return true; } /** * Method to process the dynamic taxonomy input. The dynamic taxonomy input * comes in the form of select fields that the user chooses from. The * dynamic taxonomy input is processed AFTER the static taxonomy input * because the dynamic options can be used to further narrow a static * taxonomy filter. * * @param array $filters An array of taxonomy node ids. * * @return boolean True on success. * * @since 2.5 * @throws Exception on database error. */ protected function processDynamicTaxonomy($filters) { // Initialize user variables $groups = implode(',', Factory::getUser()->getAuthorisedViewLevels()); // Remove duplicates and sanitize. $filters = array_unique($filters); $filters = ArrayHelper::toInteger($filters); // Remove any values of zero. if (in_array(0, $filters, true) !== false) { unset($filters[array_search(0, $filters, true)]); } // Check if we have any real input. if (empty($filters)) { return true; } // Get the database object. $db = $this->getDatabase(); $query = $db->getQuery(true); /* * Create the query to get filters from the database. We do this for * two reasons: one, it allows us to ensure that the filters being used * are real; two, we need to sort the filters by taxonomy branch. */ $query->select('t1.id, t1.title, t2.title AS branch') ->from($db->quoteName('#__finder_taxonomy') . ' AS t1') ->leftJoin($db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.lft < t1.lft AND t1.rgt < t2.rgt AND t2.level = 1') ->where('t1.state = 1') ->where('t1.access IN (' . $groups . ')') ->where('t1.id IN (' . implode(',', $filters) . ')') ->where('t2.state = 1') ->where('t2.access IN (' . $groups . ')'); // Load the filters. $db->setQuery($query); $results = $db->loadObjectList(); // Cleared filter branches. $cleared = []; /* * Sort the filter ids by branch. Because these filters are designed to * override and further narrow the items selected in the static filter, * we will clear the values from the static filter on a branch by * branch basis before adding the dynamic filters. So, if the static * filter defines a type filter of "articles" and three "category" * filters but the user only limits the category further, the category * filters will be flushed but the type filters will not. */ foreach ($results as $result) { // Check if the branch has been cleared. if (!in_array($result->branch, $cleared, true)) { // Clear the branch. $this->filters[$result->branch] = []; // Add the branch to the cleared list. $cleared[] = $result->branch; } // Add the filter to the list. $this->filters[$result->branch][$result->title] = (int) $result->id; } return true; } /** * Method to process the query date filters to determine start and end * date limitations. * * @param string $date1 The first date filter. * @param string $date2 The second date filter. * @param string $when1 The first date modifier. * @param string $when2 The second date modifier. * * @return boolean True on success. * * @since 2.5 */ protected function processDates($date1, $date2, $when1, $when2) { // Clean up the inputs. $date1 = trim(StringHelper::strtolower($date1)); $date2 = trim(StringHelper::strtolower($date2)); $when1 = trim(StringHelper::strtolower($when1)); $when2 = trim(StringHelper::strtolower($when2)); // Get the time offset. $offset = Factory::getApplication()->get('offset'); // Array of allowed when values. $whens = ['before', 'after', 'exact']; // The value of 'today' is a special case that we need to handle. if ($date1 === StringHelper::strtolower(Text::_('COM_FINDER_QUERY_FILTER_TODAY'))) { $date1 = Factory::getDate('now', $offset)->format('%Y-%m-%d'); } // Try to parse the date string. $date = Factory::getDate($date1, $offset); // Check if the date was parsed successfully. if ($date->toUnix() !== null) { // Set the date filter. $this->date1 = $date->toSql(); $this->when1 = in_array($when1, $whens, true) ? $when1 : 'before'; } // The value of 'today' is a special case that we need to handle. if ($date2 === StringHelper::strtolower(Text::_('COM_FINDER_QUERY_FILTER_TODAY'))) { $date2 = Factory::getDate('now', $offset)->format('%Y-%m-%d'); } // Try to parse the date string. $date = Factory::getDate($date2, $offset); // Check if the date was parsed successfully. if ($date->toUnix() !== null) { // Set the date filter. $this->date2 = $date->toSql(); $this->when2 = in_array($when2, $whens, true) ? $when2 : 'before'; } return true; } /** * Method to process the query input string and extract required, optional, * and excluded tokens; taxonomy filters; and date filters. * * @param string $input The query input string. * @param string $lang The query input language. * @param string $mode The query matching mode. * * @return boolean True on success. * * @since 2.5 * @throws Exception on database error. */ protected function processString($input, $lang, $mode) { if ($input === null) { $input = ''; } // Clean up the input string. $input = html_entity_decode($input, ENT_QUOTES, 'UTF-8'); $input = StringHelper::strtolower($input); $input = preg_replace('#\s+#mi', ' ', $input); $input = trim($input); $debug = Factory::getApplication()->get('debug_lang'); $params = ComponentHelper::getParams('com_finder'); /* * First, we need to handle string based modifiers. String based * modifiers could potentially include things like "category:blah" or * "before:2009-10-21" or "type:article", etc. */ $patterns = [ 'before' => Text::_('COM_FINDER_FILTER_WHEN_BEFORE'), 'after' => Text::_('COM_FINDER_FILTER_WHEN_AFTER'), ]; // Add the taxonomy branch titles to the possible patterns. foreach (Taxonomy::getBranchTitles() as $branch) { // Add the pattern. $patterns[$branch] = StringHelper::strtolower(Text::_(LanguageHelper::branchSingular($branch))); } // Container for search terms and phrases. $terms = []; $phrases = []; // Cleared filter branches. $cleared = []; /* * Compile the suffix pattern. This is used to match the values of the * filter input string. Single words can be input directly, multi-word * values have to be wrapped in double quotes. */ $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8'); $suffix = '(([\w\d' . $quotes . '-]+)|\"([\w\d\s' . $quotes . '-]+)\")'; /* * Iterate through the possible filter patterns and search for matches. * We need to match the key, colon, and a value pattern for the match * to be valid. */ foreach ($patterns as $modifier => $pattern) { $matches = []; if ($debug) { $pattern = substr($pattern, 2, -2); } // Check if the filter pattern is in the input string. if (preg_match('#' . $pattern . '\s*:\s*' . $suffix . '#mi', $input, $matches)) { // Get the value given to the modifier. $value = $matches[3] ?? $matches[1]; // Now we have to handle the filter string. switch ($modifier) { // Handle a before and after date filters. case 'before': case 'after': // Get the time offset. $offset = Factory::getApplication()->get('offset'); // Array of allowed when values. $whens = ['before', 'after', 'exact']; // The value of 'today' is a special case that we need to handle. if ($value === StringHelper::strtolower(Text::_('COM_FINDER_QUERY_FILTER_TODAY'))) { $value = Factory::getDate('now', $offset)->format('%Y-%m-%d'); } // Try to parse the date string. $date = Factory::getDate($value, $offset); // Check if the date was parsed successfully. if ($date->toUnix() !== null) { // Set the date filter. $this->date1 = $date->toSql(); $this->when1 = in_array($modifier, $whens, true) ? $modifier : 'before'; } break; // Handle a taxonomy branch filter. default: // Try to find the node id. $return = Taxonomy::getNodeByTitle($modifier, $value); // Check if the node id was found. if ($return) { // Check if the branch has been cleared. if (!in_array($modifier, $cleared, true)) { // Clear the branch. $this->filters[$modifier] = []; // Add the branch to the cleared list. $cleared[] = $modifier; } // Add the filter to the list. $this->filters[$modifier][$return->title] = (int) $return->id; } break; } // Clean up the input string again. $input = str_replace($matches[0], '', $input); $input = preg_replace('#\s+#mi', ' ', $input); $input = trim($input); } } /* * Extract the tokens enclosed in double quotes so that we can handle * them as phrases. */ if (StringHelper::strpos($input, '"') !== false) { $matches = []; // Extract the tokens enclosed in double quotes. if (preg_match_all('#\"([^"]+)\"#m', $input, $matches)) { /* * One or more phrases were found so we need to iterate through * them, tokenize them as phrases, and remove them from the raw * input string before we move on to the next processing step. */ foreach ($matches[1] as $key => $match) { // Find the complete phrase in the input string. $pos = StringHelper::strpos($input, $matches[0][$key]); $len = StringHelper::strlen($matches[0][$key]); // Add any terms that are before this phrase to the stack. if (trim(StringHelper::substr($input, 0, $pos))) { $terms = array_merge($terms, explode(' ', trim(StringHelper::substr($input, 0, $pos)))); } // Strip out everything up to and including the phrase. $input = StringHelper::substr($input, $pos + $len); // Clean up the input string again. $input = preg_replace('#\s+#mi', ' ', $input); $input = trim($input); // Get the number of words in the phrase. $parts = explode(' ', $match); $tuplecount = $params->get('tuplecount', 1); // Check if the phrase is longer than our $tuplecount. if (count($parts) > $tuplecount && $tuplecount > 1) { $chunk = array_slice($parts, 0, $tuplecount); $parts = array_slice($parts, $tuplecount); // If the chunk is not empty, add it as a phrase. if (count($chunk)) { $phrases[] = implode(' ', $chunk); $terms[] = implode(' ', $chunk); } /* * If the phrase is longer than $tuplecount words, we need to * break it down into smaller chunks of phrases that * are less than or equal to $tuplecount words. We overlap * the chunks so that we can ensure that a match is * found for the complete phrase and not just portions * of it. */ for ($i = 0, $c = count($parts); $i < $c; $i++) { array_shift($chunk); $chunk[] = array_shift($parts); // If the chunk is not empty, add it as a phrase. if (count($chunk)) { $phrases[] = implode(' ', $chunk); $terms[] = implode(' ', $chunk); } } } else { // The phrase is <= $tuplecount words so we can use it as is. $phrases[] = $match; $terms[] = $match; } } } } // Add the remaining terms if present. if ((bool) $input) { $terms = array_merge($terms, explode(' ', $input)); } // An array of our boolean operators. $operator => $translation $operators = [ 'AND' => StringHelper::strtolower(Text::_('COM_FINDER_QUERY_OPERATOR_AND')), 'OR' => StringHelper::strtolower(Text::_('COM_FINDER_QUERY_OPERATOR_OR')), 'NOT' => StringHelper::strtolower(Text::_('COM_FINDER_QUERY_OPERATOR_NOT')), ]; // If language debugging is enabled you need to ignore the debug strings in matching. if (JDEBUG) { $debugStrings = ['**', '??']; $operators = str_replace($debugStrings, '', $operators); } /* * Iterate through the terms and perform any sorting that needs to be * done based on boolean search operators. Terms that are before an * and/or/not modifier have to be handled in relation to their operator. */ for ($i = 0, $c = count($terms); $i < $c; $i++) { // Check if the term is followed by an operator that we understand. if (isset($terms[$i + 1]) && in_array($terms[$i + 1], $operators, true)) { // Get the operator mode. $op = array_search($terms[$i + 1], $operators, true); // Handle the AND operator. if ($op === 'AND' && isset($terms[$i + 2])) { // Tokenize the current term. $token = Helper::tokenize($terms[$i], $lang, true); // @todo: The previous function call may return an array, which seems not to be handled by the next one, which expects an object $token = $this->getTokenData(array_shift($token)); if ($params->get('filter_commonwords', 0) && $token->common) { continue; } if ($params->get('filter_numeric', 0) && $token->numeric) { continue; } // Set the required flag. $token->required = true; // Add the current token to the stack. $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); // Skip the next token (the mode operator). $this->operators[] = $terms[$i + 1]; // Tokenize the term after the next term (current plus two). $other = Helper::tokenize($terms[$i + 2], $lang, true); $other = $this->getTokenData(array_shift($other)); // Set the required flag. $other->required = true; // Add the token after the next token to the stack. $this->included[] = $other; $this->highlight = array_merge($this->highlight, array_keys($other->matches)); // Remove the processed phrases if possible. if (($pk = array_search($terms[$i], $phrases, true)) !== false) { unset($phrases[$pk]); } if (($pk = array_search($terms[$i + 2], $phrases, true)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i], $terms[$i + 1], $terms[$i + 2]); // Adjust the loop. $i += 2; } elseif ($op === 'OR' && isset($terms[$i + 2])) { // Handle the OR operator. // Tokenize the current term. $token = Helper::tokenize($terms[$i], $lang, true); $token = $this->getTokenData(array_shift($token)); if ($params->get('filter_commonwords', 0) && $token->common) { continue; } if ($params->get('filter_numeric', 0) && $token->numeric) { continue; } // Set the required flag. $token->required = false; // Add the current token to the stack. if ((bool) $token->matches) { $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); } else { $this->ignored[] = $token; } // Skip the next token (the mode operator). $this->operators[] = $terms[$i + 1]; // Tokenize the term after the next term (current plus two). $other = Helper::tokenize($terms[$i + 2], $lang, true); $other = $this->getTokenData(array_shift($other)); // Set the required flag. $other->required = false; // Add the token after the next token to the stack. if ((bool) $other->matches) { $this->included[] = $other; $this->highlight = array_merge($this->highlight, array_keys($other->matches)); } else { $this->ignored[] = $other; } // Remove the processed phrases if possible. if (($pk = array_search($terms[$i], $phrases, true)) !== false) { unset($phrases[$pk]); } if (($pk = array_search($terms[$i + 2], $phrases, true)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i], $terms[$i + 1], $terms[$i + 2]); // Adjust the loop. $i += 2; } } elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators, true) === 'OR') { // Handle an orphaned OR operator. // Skip the next token (the mode operator). $this->operators[] = $terms[$i]; // Tokenize the next term (current plus one). $other = Helper::tokenize($terms[$i + 1], $lang, true); $other = $this->getTokenData(array_shift($other)); if ($params->get('filter_commonwords', 0) && $other->common) { continue; } if ($params->get('filter_numeric', 0) && $other->numeric) { continue; } // Set the required flag. $other->required = false; // Add the token after the next token to the stack. if ((bool) $other->matches) { $this->included[] = $other; $this->highlight = array_merge($this->highlight, array_keys($other->matches)); } else { $this->ignored[] = $other; } // Remove the processed phrase if possible. if (($pk = array_search($terms[$i + 1], $phrases, true)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i], $terms[$i + 1]); // Adjust the loop. $i++; } elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators, true) === 'NOT') { // Handle the NOT operator. // Skip the next token (the mode operator). $this->operators[] = $terms[$i]; // Tokenize the next term (current plus one). $other = Helper::tokenize($terms[$i + 1], $lang, true); $other = $this->getTokenData(array_shift($other)); if ($params->get('filter_commonwords', 0) && $other->common) { continue; } if ($params->get('filter_numeric', 0) && $other->numeric) { continue; } // Set the required flag. $other->required = false; // Add the next token to the stack. if ((bool) $other->matches) { $this->excluded[] = $other; } else { $this->ignored[] = $other; } // Remove the processed phrase if possible. if (($pk = array_search($terms[$i + 1], $phrases, true)) !== false) { unset($phrases[$pk]); } // Remove the processed terms. unset($terms[$i], $terms[$i + 1]); // Adjust the loop. $i++; } } /* * Iterate through any search phrases and tokenize them. We handle * phrases as autonomous units and do not break them down into two and * three word combinations. */ for ($i = 0, $c = count($phrases); $i < $c; $i++) { // Tokenize the phrase. $token = Helper::tokenize($phrases[$i], $lang, true); if (!count($token)) { continue; } $token = $this->getTokenData(array_shift($token)); if ($params->get('filter_commonwords', 0) && $token->common) { continue; } if ($params->get('filter_numeric', 0) && $token->numeric) { continue; } // Set the required flag. $token->required = true; // Add the current token to the stack. $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); // Remove the processed term if possible. if (($pk = array_search($phrases[$i], $terms, true)) !== false) { unset($terms[$pk]); } // Remove the processed phrase. unset($phrases[$i]); } /* * Handle any remaining tokens using the standard processing mechanism. */ if ((bool) $terms) { // Tokenize the terms. $terms = implode(' ', $terms); $tokens = Helper::tokenize($terms, $lang, false); // Make sure we are working with an array. $tokens = is_array($tokens) ? $tokens : [$tokens]; // Get the token data and required state for all the tokens. foreach ($tokens as $token) { // Get the token data. $token = $this->getTokenData($token); if ($params->get('filter_commonwords', 0) && $token->common) { continue; } if ($params->get('filter_numerics', 0) && $token->numeric) { continue; } // Set the required flag for the token. $token->required = $mode === 'AND' ? (!$token->phrase) : false; // Add the token to the appropriate stack. if ($token->required || (bool) $token->matches) { $this->included[] = $token; $this->highlight = array_merge($this->highlight, array_keys($token->matches)); } else { $this->ignored[] = $token; } } } return true; } /** * Method to get the base and similar term ids and, if necessary, suggested * term data from the database. The terms ids are identified based on a * 'like' match in MySQL and/or a common stem. If no term ids could be * found, then we know that we will not be able to return any results for * that term and we should try to find a similar term to use that we can * match so that we can suggest the alternative search query to the user. * * @param Token $token A Token object. * * @return Token A Token object. * * @since 2.5 * @throws Exception on database error. */ protected function getTokenData($token) { // Get the database object. $db = $this->getDatabase(); // Create a database query to build match the token. $query = $db->getQuery(true) ->select('t.term, t.term_id') ->from('#__finder_terms AS t'); if ($token->phrase) { // Add the phrase to the query. $query->where('t.term = ' . $db->quote($token->term)) ->where('t.phrase = 1'); } else { // Add the term to the query. $searchTerm = $token->term; $searchStem = $token->stem; $term = $db->quoteName('t.term'); $stem = $db->quoteName('t.stem'); if ($this->wordmode === 'begin') { $searchTerm .= '%'; $searchStem .= '%'; $query->where('(' . $term . ' LIKE :searchTerm OR ' . $stem . ' LIKE :searchStem)'); } elseif ($this->wordmode === 'fuzzy') { $searchTerm = '%' . $searchTerm . '%'; $searchStem = '%' . $searchStem . '%'; $query->where('(' . $term . ' LIKE :searchTerm OR ' . $stem . ' LIKE :searchStem)'); } else { $query->where('(' . $term . ' = :searchTerm OR ' . $stem . ' = :searchStem)'); } $query->bind(':searchTerm', $searchTerm, ParameterType::STRING) ->bind(':searchStem', $searchStem, ParameterType::STRING); $query->where('t.phrase = 0') ->where('t.language IN (\'*\',' . $db->quote($token->language) . ')'); } // Get the terms. $db->setQuery($query); $matches = $db->loadObjectList(); // Check the matching terms. if ((bool) $matches) { // Add the matches to the token. for ($i = 0, $c = count($matches); $i < $c; $i++) { if (!isset($token->matches[$matches[$i]->term])) { $token->matches[$matches[$i]->term] = []; } $token->matches[$matches[$i]->term][] = (int) $matches[$i]->term_id; } } // If no matches were found, try to find a similar but better token. if (empty($token->matches)) { // Create a database query to get the similar terms. $query->clear() ->select('DISTINCT t.term_id AS id, t.term AS term') ->from('#__finder_terms AS t') // ->where('t.soundex = ' . soundex($db->quote($token->term))) ->where('t.soundex = SOUNDEX(' . $db->quote($token->term) . ')') ->where('t.phrase = ' . (int) $token->phrase); // Get the terms. $db->setQuery($query); $results = $db->loadObjectList(); // Check if any similar terms were found. if (empty($results)) { return $token; } // Stack for sorting the similar terms. $suggestions = []; // Get the levnshtein distance for all suggested terms. foreach ($results as $sk => $st) { // Get the levenshtein distance between terms. $distance = levenshtein($st->term, $token->term); // Make sure the levenshtein distance isn't over 50. if ($distance < 50) { $suggestions[$sk] = $distance; } } // Sort the suggestions. asort($suggestions, SORT_NUMERIC); // Get the closest match. $keys = array_keys($suggestions); $key = $keys[0]; // Add the suggested term. $token->suggestion = $results[$key]->term; } return $token; } }