Current File : /home/pacjaorg/public_html/km/administrator/components/com_finder/src/Indexer/Helper.php
<?php

/**
 * @package     Joomla.Administrator
 * @subpackage  com_finder
 *
 * @copyright   (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
 * @license     GNU General Public License version 2 or later; see LICENSE.txt
 */

namespace Joomla\Component\Finder\Administrator\Indexer;

use Joomla\CMS\Component\ComponentHelper;
use Joomla\CMS\Factory;
use Joomla\CMS\Language\Multilanguage;
use Joomla\CMS\Plugin\PluginHelper;
use Joomla\CMS\Table\Table;
use Joomla\Registry\Registry;
use Joomla\String\StringHelper;

// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects

/**
 * Helper class for the Finder indexer package.
 *
 * @since  2.5
 */
class Helper
{
    /**
     * Method to parse input into plain text.
     *
     * @param   string  $input   The raw input.
     * @param   string  $format  The format of the input. [optional]
     *
     * @return  string  The parsed input.
     *
     * @since   2.5
     * @throws  \Exception on invalid parser.
     */
    public static function parse($input, $format = 'html')
    {
        // Get a parser for the specified format and parse the input.
        return Parser::getInstance($format)->parse($input);
    }

    /**
     * Method to tokenize a text string.
     *
     * @param   string   $input   The input to tokenize.
     * @param   string   $lang    The language of the input.
     * @param   boolean  $phrase  Flag to indicate whether input could be a phrase. [optional]
     *
     * @return  Token[]  An array of Token objects.
     *
     * @since   2.5
     */
    public static function tokenize($input, $lang, $phrase = false)
    {
        static $cache = [], $tuplecount;
        static $multilingual;
        static $defaultLanguage;

        if (!$tuplecount) {
            $params     = ComponentHelper::getParams('com_finder');
            $tuplecount = $params->get('tuplecount', 1);
        }

        if (is_null($multilingual)) {
            $multilingual = Multilanguage::isEnabled();
            $config       = ComponentHelper::getParams('com_finder');

            if ($config->get('language_default', '') == '') {
                $defaultLang = '*';
            } elseif ($config->get('language_default', '') == '-1') {
                $defaultLang = self::getDefaultLanguage();
            } else {
                $defaultLang = $config->get('language_default');
            }

            /*
             * The default language always has the language code '*'.
             * In order to not overwrite the language code of the language
             * object that we are using, we are cloning it here.
             */
            $obj                       = Language::getInstance($defaultLang);
            $defaultLanguage           = clone $obj;
            $defaultLanguage->language = '*';
        }

        if (!$multilingual || $lang == '*') {
            $language = $defaultLanguage;
        } else {
            $language = Language::getInstance($lang);
        }

        if (!isset($cache[$lang])) {
            $cache[$lang] = [];
        }

        $tokens = [];
        $terms  = $language->tokenise($input);

        // @todo: array_filter removes any number 0's from the terms. Not sure this is entirely intended
        $terms = array_filter($terms);
        $terms = array_values($terms);

        /*
         * If we have to handle the input as a phrase, that means we don't
         * tokenize the individual terms and we do not create the two and three
         * term combinations. The phrase must contain more than one word!
         */
        if ($phrase === true && count($terms) > 1) {
            // Create tokens from the phrase.
            $tokens[] = new Token($terms, $language->language, $language->spacer);
        } else {
            // Create tokens from the terms.
            for ($i = 0, $n = count($terms); $i < $n; $i++) {
                if (isset($cache[$lang][$terms[$i]])) {
                    $tokens[] = $cache[$lang][$terms[$i]];
                } else {
                    $token                    = new Token($terms[$i], $language->language);
                    $tokens[]                 = $token;
                    $cache[$lang][$terms[$i]] = $token;
                }
            }

            // Create multi-word phrase tokens from the individual words.
            if ($tuplecount > 1) {
                for ($i = 0, $n = count($tokens); $i < $n; $i++) {
                    $temp = [$tokens[$i]->term];

                    // Create tokens for 2 to $tuplecount length phrases
                    for ($j = 1; $j < $tuplecount; $j++) {
                        if ($i + $j >= $n || !isset($tokens[$i + $j])) {
                            break;
                        }

                        $temp[] = $tokens[$i + $j]->term;
                        $key    = implode('::', $temp);

                        if (isset($cache[$lang][$key])) {
                            $tokens[] = $cache[$lang][$key];
                        } else {
                            $token              = new Token($temp, $language->language, $language->spacer);
                            $token->derived     = true;
                            $tokens[]           = $token;
                            $cache[$lang][$key] = $token;
                        }
                    }
                }
            }
        }

        // Prevent the cache to fill up the memory
        while (count($cache[$lang]) > 1024) {
            /**
             * We want to cache the most common words/tokens. At the same time
             * we don't want to cache too much. The most common words will also
             * be early in the text, so we are dropping all terms/tokens which
             * have been cached later.
             */
            array_pop($cache[$lang]);
        }

        return $tokens;
    }

    /**
     * Method to get the base word of a token.
     *
     * @param   string  $token  The token to stem.
     * @param   string  $lang   The language of the token.
     *
     * @return  string  The root token.
     *
     * @since   2.5
     */
    public static function stem($token, $lang)
    {
        static $multilingual;
        static $defaultStemmer;

        if (is_null($multilingual)) {
            $multilingual = Multilanguage::isEnabled();
            $config       = ComponentHelper::getParams('com_finder');

            if ($config->get('language_default', '') == '') {
                $defaultStemmer = Language::getInstance('*');
            } elseif ($config->get('language_default', '') == '-1') {
                $defaultStemmer = Language::getInstance(self::getDefaultLanguage());
            } else {
                $defaultStemmer = Language::getInstance($config->get('language_default'));
            }
        }

        if (!$multilingual || $lang == '*') {
            $language = $defaultStemmer;
        } else {
            $language = Language::getInstance($lang);
        }

        return $language->stem($token);
    }

    /**
     * Method to add a content type to the database.
     *
     * @param   string  $title  The type of content. For example: PDF
     * @param   string  $mime   The mime type of the content. For example: PDF [optional]
     *
     * @return  integer  The id of the content type.
     *
     * @since   2.5
     * @throws  \Exception on database error.
     */
    public static function addContentType($title, $mime = null)
    {
        static $types;

        $db    = Factory::getDbo();
        $query = $db->getQuery(true);

        // Check if the types are loaded.
        if (empty($types)) {
            // Build the query to get the types.
            $query->select('*')
                ->from($db->quoteName('#__finder_types'));

            // Get the types.
            $db->setQuery($query);
            $types = $db->loadObjectList('title');
        }

        // Check if the type already exists.
        if (isset($types[$title])) {
            return (int) $types[$title]->id;
        }

        // Add the type.
        $query->clear()
            ->insert($db->quoteName('#__finder_types'))
            ->columns([$db->quoteName('title'), $db->quoteName('mime')])
            ->values($db->quote($title) . ', ' . $db->quote($mime ?? ''));
        $db->setQuery($query);
        $db->execute();

        // Cache the result
        $type        = new \stdClass();
        $type->title = $title;
        $type->mime  = $mime ?? '';
        $type->id    = (int) $db->insertid();

        $types[$title] = $type;

        // Return the new id.
        return $type->id;
    }

    /**
     * Method to check if a token is common in a language.
     *
     * @param   string  $token  The token to test.
     * @param   string  $lang   The language to reference.
     *
     * @return  boolean  True if common, false otherwise.
     *
     * @since   2.5
     */
    public static function isCommon($token, $lang)
    {
        static $data = [], $default, $multilingual;

        if (is_null($multilingual)) {
            $multilingual = Multilanguage::isEnabled();
            $config       = ComponentHelper::getParams('com_finder');

            if ($config->get('language_default', '') == '') {
                $default = '*';
            } elseif ($config->get('language_default', '') == '-1') {
                $default = self::getPrimaryLanguage(self::getDefaultLanguage());
            } else {
                $default = self::getPrimaryLanguage($config->get('language_default'));
            }
        }

        if (!$multilingual || $lang == '*') {
            $lang = $default;
        }

        // Load the common tokens for the language if necessary.
        if (!isset($data[$lang])) {
            $data[$lang] = self::getCommonWords($lang);
        }

        // Check if the token is in the common array.
        return in_array($token, $data[$lang], true);
    }

    /**
     * Method to get an array of common terms for a language.
     *
     * @param   string  $lang  The language to use.
     *
     * @return  array  Array of common terms.
     *
     * @since   2.5
     * @throws  \Exception on database error.
     */
    public static function getCommonWords($lang)
    {
        $db = Factory::getDbo();

        // Create the query to load all the common terms for the language.
        $query = $db->getQuery(true)
            ->select($db->quoteName('term'))
            ->from($db->quoteName('#__finder_terms_common'))
            ->where($db->quoteName('language') . ' = ' . $db->quote($lang));

        // Load all of the common terms for the language.
        $db->setQuery($query);

        return $db->loadColumn();
    }

    /**
     * Method to get the default language for the site.
     *
     * @return  string  The default language string.
     *
     * @since   2.5
     */
    public static function getDefaultLanguage()
    {
        static $lang;

        // We need to go to com_languages to get the site default language, it's the best we can guess.
        if (empty($lang)) {
            $lang = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
        }

        return $lang;
    }

    /**
     * Method to parse a language/locale key and return a simple language string.
     *
     * @param   string  $lang  The language/locale key. For example: en-GB
     *
     * @return  string  The simple language string. For example: en
     *
     * @since   2.5
     */
    public static function getPrimaryLanguage($lang)
    {
        static $data = [];

        // Only parse the identifier if necessary.
        if (!isset($data[$lang])) {
            if (is_callable(['Locale', 'getPrimaryLanguage'])) {
                // Get the language key using the Locale package.
                $data[$lang] = \Locale::getPrimaryLanguage($lang);
            } else {
                // Get the language key using string position.
                $data[$lang] = StringHelper::substr($lang, 0, StringHelper::strpos($lang, '-'));
            }
        }

        return $data[$lang];
    }

    /**
     * Method to get extra data for a content before being indexed. This is how
     * we add Comments, Tags, Labels, etc. that should be available to Finder.
     *
     * @param   Result  $item  The item to index as a Result object.
     *
     * @return  boolean  True on success, false on failure.
     *
     * @since   2.5
     * @throws  \Exception on database error.
     */
    public static function getContentExtras(Result $item)
    {
        // Load the finder plugin group.
        PluginHelper::importPlugin('finder');

        Factory::getApplication()->triggerEvent('onPrepareFinderContent', [&$item]);

        return true;
    }

    /**
     * Method to process content text using the onContentPrepare event trigger.
     *
     * @param   string    $text    The content to process.
     * @param   Registry  $params  The parameters object. [optional]
     * @param   ?Result   $item    The item which get prepared. [optional]
     *
     * @return  string  The processed content.
     *
     * @since   2.5
     */
    public static function prepareContent($text, $params = null, Result $item = null)
    {
        static $loaded;

        // Load the content plugins if necessary.
        if (empty($loaded)) {
            PluginHelper::importPlugin('content');
            $loaded = true;
        }

        // Instantiate the parameter object if necessary.
        if (!($params instanceof Registry)) {
            $registry = new Registry($params);
            $params   = $registry;
        }

        // Create a mock content object.
        $content       = Table::getInstance('Content');
        $content->text = $text;

        if ($item) {
            $content->bind((array) $item);
            $content->bind($item->getElements());
        }

        if ($item && !empty($item->context)) {
            $content->context = $item->context;
        }

        // Fire the onContentPrepare event.
        Factory::getApplication()->triggerEvent('onContentPrepare', ['com_finder.indexer', &$content, &$params, 0]);

        return $content->text;
    }
}
Site is undergoing maintenance

PACJA Events

Maintenance mode is on

Site will be available soon. Thank you for your patience!