Current File : /home/pacjaorg/public_html/km/administrator/components/com_finder/src/Indexer/Helper.php |
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
namespace Joomla\Component\Finder\Administrator\Indexer;
use Joomla\CMS\Component\ComponentHelper;
use Joomla\CMS\Factory;
use Joomla\CMS\Language\Multilanguage;
use Joomla\CMS\Plugin\PluginHelper;
use Joomla\CMS\Table\Table;
use Joomla\Registry\Registry;
use Joomla\String\StringHelper;
// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects
/**
* Helper class for the Finder indexer package.
*
* @since 2.5
*/
class Helper
{
/**
* Method to parse input into plain text.
*
* @param string $input The raw input.
* @param string $format The format of the input. [optional]
*
* @return string The parsed input.
*
* @since 2.5
* @throws \Exception on invalid parser.
*/
public static function parse($input, $format = 'html')
{
// Get a parser for the specified format and parse the input.
return Parser::getInstance($format)->parse($input);
}
/**
* Method to tokenize a text string.
*
* @param string $input The input to tokenize.
* @param string $lang The language of the input.
* @param boolean $phrase Flag to indicate whether input could be a phrase. [optional]
*
* @return Token[] An array of Token objects.
*
* @since 2.5
*/
public static function tokenize($input, $lang, $phrase = false)
{
static $cache = [], $tuplecount;
static $multilingual;
static $defaultLanguage;
if (!$tuplecount) {
$params = ComponentHelper::getParams('com_finder');
$tuplecount = $params->get('tuplecount', 1);
}
if (is_null($multilingual)) {
$multilingual = Multilanguage::isEnabled();
$config = ComponentHelper::getParams('com_finder');
if ($config->get('language_default', '') == '') {
$defaultLang = '*';
} elseif ($config->get('language_default', '') == '-1') {
$defaultLang = self::getDefaultLanguage();
} else {
$defaultLang = $config->get('language_default');
}
/*
* The default language always has the language code '*'.
* In order to not overwrite the language code of the language
* object that we are using, we are cloning it here.
*/
$obj = Language::getInstance($defaultLang);
$defaultLanguage = clone $obj;
$defaultLanguage->language = '*';
}
if (!$multilingual || $lang == '*') {
$language = $defaultLanguage;
} else {
$language = Language::getInstance($lang);
}
if (!isset($cache[$lang])) {
$cache[$lang] = [];
}
$tokens = [];
$terms = $language->tokenise($input);
// @todo: array_filter removes any number 0's from the terms. Not sure this is entirely intended
$terms = array_filter($terms);
$terms = array_values($terms);
/*
* If we have to handle the input as a phrase, that means we don't
* tokenize the individual terms and we do not create the two and three
* term combinations. The phrase must contain more than one word!
*/
if ($phrase === true && count($terms) > 1) {
// Create tokens from the phrase.
$tokens[] = new Token($terms, $language->language, $language->spacer);
} else {
// Create tokens from the terms.
for ($i = 0, $n = count($terms); $i < $n; $i++) {
if (isset($cache[$lang][$terms[$i]])) {
$tokens[] = $cache[$lang][$terms[$i]];
} else {
$token = new Token($terms[$i], $language->language);
$tokens[] = $token;
$cache[$lang][$terms[$i]] = $token;
}
}
// Create multi-word phrase tokens from the individual words.
if ($tuplecount > 1) {
for ($i = 0, $n = count($tokens); $i < $n; $i++) {
$temp = [$tokens[$i]->term];
// Create tokens for 2 to $tuplecount length phrases
for ($j = 1; $j < $tuplecount; $j++) {
if ($i + $j >= $n || !isset($tokens[$i + $j])) {
break;
}
$temp[] = $tokens[$i + $j]->term;
$key = implode('::', $temp);
if (isset($cache[$lang][$key])) {
$tokens[] = $cache[$lang][$key];
} else {
$token = new Token($temp, $language->language, $language->spacer);
$token->derived = true;
$tokens[] = $token;
$cache[$lang][$key] = $token;
}
}
}
}
}
// Prevent the cache to fill up the memory
while (count($cache[$lang]) > 1024) {
/**
* We want to cache the most common words/tokens. At the same time
* we don't want to cache too much. The most common words will also
* be early in the text, so we are dropping all terms/tokens which
* have been cached later.
*/
array_pop($cache[$lang]);
}
return $tokens;
}
/**
* Method to get the base word of a token.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
public static function stem($token, $lang)
{
static $multilingual;
static $defaultStemmer;
if (is_null($multilingual)) {
$multilingual = Multilanguage::isEnabled();
$config = ComponentHelper::getParams('com_finder');
if ($config->get('language_default', '') == '') {
$defaultStemmer = Language::getInstance('*');
} elseif ($config->get('language_default', '') == '-1') {
$defaultStemmer = Language::getInstance(self::getDefaultLanguage());
} else {
$defaultStemmer = Language::getInstance($config->get('language_default'));
}
}
if (!$multilingual || $lang == '*') {
$language = $defaultStemmer;
} else {
$language = Language::getInstance($lang);
}
return $language->stem($token);
}
/**
* Method to add a content type to the database.
*
* @param string $title The type of content. For example: PDF
* @param string $mime The mime type of the content. For example: PDF [optional]
*
* @return integer The id of the content type.
*
* @since 2.5
* @throws \Exception on database error.
*/
public static function addContentType($title, $mime = null)
{
static $types;
$db = Factory::getDbo();
$query = $db->getQuery(true);
// Check if the types are loaded.
if (empty($types)) {
// Build the query to get the types.
$query->select('*')
->from($db->quoteName('#__finder_types'));
// Get the types.
$db->setQuery($query);
$types = $db->loadObjectList('title');
}
// Check if the type already exists.
if (isset($types[$title])) {
return (int) $types[$title]->id;
}
// Add the type.
$query->clear()
->insert($db->quoteName('#__finder_types'))
->columns([$db->quoteName('title'), $db->quoteName('mime')])
->values($db->quote($title) . ', ' . $db->quote($mime ?? ''));
$db->setQuery($query);
$db->execute();
// Cache the result
$type = new \stdClass();
$type->title = $title;
$type->mime = $mime ?? '';
$type->id = (int) $db->insertid();
$types[$title] = $type;
// Return the new id.
return $type->id;
}
/**
* Method to check if a token is common in a language.
*
* @param string $token The token to test.
* @param string $lang The language to reference.
*
* @return boolean True if common, false otherwise.
*
* @since 2.5
*/
public static function isCommon($token, $lang)
{
static $data = [], $default, $multilingual;
if (is_null($multilingual)) {
$multilingual = Multilanguage::isEnabled();
$config = ComponentHelper::getParams('com_finder');
if ($config->get('language_default', '') == '') {
$default = '*';
} elseif ($config->get('language_default', '') == '-1') {
$default = self::getPrimaryLanguage(self::getDefaultLanguage());
} else {
$default = self::getPrimaryLanguage($config->get('language_default'));
}
}
if (!$multilingual || $lang == '*') {
$lang = $default;
}
// Load the common tokens for the language if necessary.
if (!isset($data[$lang])) {
$data[$lang] = self::getCommonWords($lang);
}
// Check if the token is in the common array.
return in_array($token, $data[$lang], true);
}
/**
* Method to get an array of common terms for a language.
*
* @param string $lang The language to use.
*
* @return array Array of common terms.
*
* @since 2.5
* @throws \Exception on database error.
*/
public static function getCommonWords($lang)
{
$db = Factory::getDbo();
// Create the query to load all the common terms for the language.
$query = $db->getQuery(true)
->select($db->quoteName('term'))
->from($db->quoteName('#__finder_terms_common'))
->where($db->quoteName('language') . ' = ' . $db->quote($lang));
// Load all of the common terms for the language.
$db->setQuery($query);
return $db->loadColumn();
}
/**
* Method to get the default language for the site.
*
* @return string The default language string.
*
* @since 2.5
*/
public static function getDefaultLanguage()
{
static $lang;
// We need to go to com_languages to get the site default language, it's the best we can guess.
if (empty($lang)) {
$lang = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
}
return $lang;
}
/**
* Method to parse a language/locale key and return a simple language string.
*
* @param string $lang The language/locale key. For example: en-GB
*
* @return string The simple language string. For example: en
*
* @since 2.5
*/
public static function getPrimaryLanguage($lang)
{
static $data = [];
// Only parse the identifier if necessary.
if (!isset($data[$lang])) {
if (is_callable(['Locale', 'getPrimaryLanguage'])) {
// Get the language key using the Locale package.
$data[$lang] = \Locale::getPrimaryLanguage($lang);
} else {
// Get the language key using string position.
$data[$lang] = StringHelper::substr($lang, 0, StringHelper::strpos($lang, '-'));
}
}
return $data[$lang];
}
/**
* Method to get extra data for a content before being indexed. This is how
* we add Comments, Tags, Labels, etc. that should be available to Finder.
*
* @param Result $item The item to index as a Result object.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
* @throws \Exception on database error.
*/
public static function getContentExtras(Result $item)
{
// Load the finder plugin group.
PluginHelper::importPlugin('finder');
Factory::getApplication()->triggerEvent('onPrepareFinderContent', [&$item]);
return true;
}
/**
* Method to process content text using the onContentPrepare event trigger.
*
* @param string $text The content to process.
* @param Registry $params The parameters object. [optional]
* @param ?Result $item The item which get prepared. [optional]
*
* @return string The processed content.
*
* @since 2.5
*/
public static function prepareContent($text, $params = null, Result $item = null)
{
static $loaded;
// Load the content plugins if necessary.
if (empty($loaded)) {
PluginHelper::importPlugin('content');
$loaded = true;
}
// Instantiate the parameter object if necessary.
if (!($params instanceof Registry)) {
$registry = new Registry($params);
$params = $registry;
}
// Create a mock content object.
$content = Table::getInstance('Content');
$content->text = $text;
if ($item) {
$content->bind((array) $item);
$content->bind($item->getElements());
}
if ($item && !empty($item->context)) {
$content->context = $item->context;
}
// Fire the onContentPrepare event.
Factory::getApplication()->triggerEvent('onContentPrepare', ['com_finder.indexer', &$content, &$params, 0]);
return $content->text;
}
}