Current File : /home/pacjaorg/public_html/dnpsom/libraries/vendor/wamania/php-stemmer/src/Stemmer/Finnish.php
<?php
/**
 * Finnish Snowball Stemmer.
 *
 * @author msaari <mikko@mikkosaari.fi>
 */
namespace Wamania\Snowball\Stemmer;

use voku\helper\UTF8;

/**
 * Finnish Snowball Stemmer.
 *
 * @link http://snowball.tartarus.org/algorithms/finnish/stemmer.html
 * @author msaari
 */
class Finnish extends Stem
{
    /**
     * All swedish vowels
     */
    protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö');

    protected static $consonants = array('b', 'c', 'd', 'f', 'g', 'h', 'j',
    'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z');

    protected static $restrictedVowels = array('a', 'e', 'i', 'o', 'u', 'ä', 'ö');

    /**
     * Long restricted vowels, ie. doubled vowels.
     */
    protected static $longVowels = array('aa', 'ee', 'ii', 'oo', 'uu', 'ää', 'öö');

    private $_removedInStep3 = false;

    /**
     * {@inheritdoc}
     */
    public function stem($word)
    {
        // we do ALL in UTF-8
        if (! UTF8::is_utf8($word)) {
            throw new \Exception('Word must be in UTF-8');
        }

        $this->word = Utf8::strtolower($word);

        // R1 and R2 are then defined in the usual way
        $this->r1();
        $this->r2();

        // Do each of steps 1, 2 3, 4, 5 and 6.

        $this->step1();
        $this->step2();
        $this->step3();
        $this->step4();
        $this->step5();
        $this->step6();

        return $this->word;
    }

    /**
     * Step 1
     *
     * Search for the longest among the following suffixes in R1, and perform
     * the action indicated.
     *
     * @return boolean True when something is done.
     */
    private function step1()
    {
        // (a) kin   kaan   kään   ko   kö   han   hän   pa   pä
        //      delete if preceded by n, t or a vowel
        if (($position = $this->searchIfInR1(array('kaan', 'kään', 'kin', 'han', 'hän', 'ko', 'kö', 'pa', 'pä'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);

            if (in_array($lastLetter, array_merge(['t', 'n'], self::$vowels))) {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->r1();
                $this->r2();
            }

            return true;
        }

        //  sti
        //  delete if in R2
        if (($position = $this->searchIfInR1(array('sti'))) !== false) {
            if ($this->inR2($position)) {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->r1();
                $this->r2();
            }

            return true;
        }
    }

    /**
     * Step 2: possessives.
     *
     * Search for the longest among the following suffixes in R1, and perform
     * the action indicated.
     *
     * @return boolean True when something is done.
     */
    private function step2()
    {
        // si
        //  delete if not preceded by k
        if (($position = $this->searchIfInR1(array('si'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);

            if ($lastLetter !== 'k') {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // ni
        //  delete
        if (($position = $this->searchIfInR1(array('ni'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            // if preceded by kse, replace with ksi
            if ( ($position = $this->search(array('kse'))) !== false) {
                $this->word = preg_replace('#(kse)$#u', 'ksi', $this->word);
            }
            $this->r1();
            $this->r2();
            return true;
        }

        // nsa   nsä   mme   nne
        //  delete
        if (($position = $this->searchIfInR1(array('nsa', 'nsä', 'mme', 'nne'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            $this->r1();
            $this->r2();
            return true;
        }

        // an
        //  delete if preceded by one of   ta   ssa   sta   lla   lta   na
        if (($position = $this->searchIfInR1(array('an'))) !== false) {
            $word = Utf8::substr($this->word, 0, $position);
            $lastThreeLetters = Utf8::substr($word, -3, 3);
            $lastTwoLetters = Utf8::substr($word, -2, 2);
            if (in_array($lastThreeLetters, array('ssa', 'sta', 'lla', 'lta'), true) || in_array($lastTwoLetters, array('na', 'ta'), true)) {
                $this->word = $word;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // än
        // delete if preceded by one of   tä   ssä   stä   llä   ltä   nä
        if (($position = $this->searchIfInR1(array('än'))) !== false) {
            $word = Utf8::substr($this->word, 0, $position);
            $lastThreeLetters = Utf8::substr($word, -3, 3);
            $lastTwoLetters = Utf8::substr($word, -2, 2);
            if (in_array($lastThreeLetters, array('ssä', 'stä', 'llä', 'ltä'), true) || in_array($lastTwoLetters, array('nä', 'tä'), true)) {
                $this->word = $word;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // en
        // delete if preceded by one of   lle   ine
        if (($position = $this->searchIfInR1(array('en'))) !== false) {
            $word = Utf8::substr($this->word, 0, $position);
            if (Utf8::strlen($this->word) > 4) {
                $lastThreeLetters = Utf8::substr($this->word, -5, 3);
                if (in_array($lastThreeLetters, array('lle', 'ine'), true)) {
                    $this->word = $word;
                    $this->r1();
                    $this->r2();
                    return true;
                }
            }
        }
    }

    /**
     * Step 3: cases
     *
     * Search for the longest among the following suffixes in R1, and perform
     * the action indicated.
     *
     * @return boolean True when something is done.
     */
    private function step3()
    {
        // hXn
        // delete if preceded by X, where X is a V other than u (a/han, e/hen etc)
        foreach (self::$restrictedVowels as $vowel) {
            if ($vowel === 'u') {
                continue;
            }
            if (($position = $this->searchIfInR1(array('h' . $vowel . 'n'))) !== false) {
                $lastLetter = Utf8::substr($this->word, $position-1, 1);
                if ($lastLetter === $vowel) {
                    $this->word = Utf8::substr($this->word, 0, $position);
                    $this->_removedInStep3 = true;
                    $this->r1();
                    $this->r2();
                }
                return true;
            }
        }

        // siin   den   tten
        // delete if preceded by Vi
        if (($position = $this->searchIfInR1(array('siin', 'den', 'tten'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
            if ($lastLetter === 'i') {
                $nextLastLetter = Utf8::substr($this->word, ($position-2), 1);
                if (in_array($nextLastLetter, self::$restrictedVowels, true)) {
                    $this->word = Utf8::substr($this->word, 0, $position);
                    $this->_removedInStep3 = true;
                    $this->r1();
                    $this->r2();
                    return true;
                }
            }
        }

        // seen
        // delete if preceded by LV
        if (($position = $this->searchIfInR1(array('seen'))) !== false) {
            $lastLetters = Utf8::substr($this->word, ($position-2), 2);

            if (in_array($lastLetters, self::$longVowels, true)) {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->_removedInStep3 = true;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // tta    ttä
        // delete if preceded by e
        if (($position = $this->searchIfInR1(array('tta', 'ttä'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);

            if ($lastLetter === 'e') {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->_removedInStep3 = true;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // ta  tä  ssa  ssä  sta  stä  lla  llä  lta  ltä  lle  na  nä  ksi  ine
        // delete
        if (($position = $this->searchIfInR1(array('ssa', 'ssä', 'sta', 'stä', 'lla', 'llä', 'lta', 'ltä', 'lle', 'ksi', 'na', 'nä', 'ine', 'ta', 'tä'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            $this->_removedInStep3 = true;
            $this->r1();
            $this->r2();
            return true;
        }

        // a    ä
        // delete if preceded by cv
        if (($position = $this->searchIfInR1(array('a', 'ä'))) !== false) {
            $lastLetter = Utf8::substr($this->word, ($position-1), 1);
            $nextLastLetter = Utf8::substr($this->word, ($position-2), 1);

            if (in_array($lastLetter, self::$vowels, true) && in_array($nextLastLetter, self::$consonants, true)) {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->_removedInStep3 = true;
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // n
        // delete, and if preceded by LV or ie, delete the last vowel
        if (($position = $this->searchIfInR1(array('n'))) !== false) {
            $lastLetters = Utf8::substr($this->word, ($position-2), 2);

            if (in_array($lastLetters, self::$longVowels, true) || $lastLetters === 'ie') {
                $this->word = Utf8::substr($this->word, 0, $position-1);
            } else {
                $this->word = Utf8::substr($this->word, 0, $position);
            }
            $this->r1();
            $this->r2();
            $this->_removedInStep3 = true;
            return true;
        }
    }

    /**
     * Step 4: other endings
     *
     * Search for the longest among the following suffixes in R2, and perform
     * the action indicated
     *
     * @return boolean True when something is done.
     */
    private function step4()
    {
        // mpi   mpa   mpä   mmi   mma   mmä
        // delete if not preceded by po
        if (($position = $this->searchIfInR2(array('mpi', 'mpa', 'mpä', 'mmi', 'mma', 'mmä'))) !== false) {
            $lastLetters = Utf8::substr($this->word, ($position-2), 2);
            if ($lastLetters !== 'po') {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->r1();
                $this->r2();
                return true;
            }
        }

        // impi   impa   impä   immi   imma   immä   eja   ejä
        // delete
        if (($position = $this->searchIfInR2(array('impi', 'impa', 'impä', 'immi', 'imma', 'immä', 'eja', 'ejä'))) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position);
            $this->r1();
            $this->r2();
            return true;
        }
    }

    /**
     * Step 5: plurals
     * If an ending was removed in step 3, delete a final i or j if in R1;
     * otherwise,
     * if an ending was not removed in step 3, delete a final t in R1 if it
     * follows a vowel, and, if a t is removed, delete a final mma or imma in
     * R2, unless the mma is preceded by po.
     *
     * @return boolean True when something is done.
     */
    private function step5()
    {
        if ($this->_removedInStep3) {
            if (($position = $this->searchIfInR1(array('i', 'j'))) !== false) {
                $this->word = Utf8::substr($this->word, 0, $position);
                $this->r1();
                $this->r2();
                return true;
            }
        } else {
            if (($position = $this->searchIfInR1(array('t'))) !== false) {
                $lastLetter = Utf8::substr($this->word, ($position-1), 1);
                if (in_array($lastLetter, self::$vowels, true)) {
                    $this->word = Utf8::substr($this->word, 0, $position);
                    $this->r1();
                    $this->r2();
                    if (($position2 = $this->searchIfInR2(array('imma'))) !== false) {
                        $this->word = Utf8::substr($this->word, 0, $position2);
                        $this->r1();
                        $this->r2();
                        return true;
                    } elseif (($position2 = $this->searchIfInR2(array('mma'))) !== false) {
                        $lastLetters = Utf8::substr($this->word, ($position2-2), 2);
                        if ($lastLetters !== 'po') {
                            $this->word = Utf8::substr($this->word, 0, $position2);
                            $this->r1();
                            $this->r2();
                            return true;
                        }
                    }
                }
            }
        }

    }

    /**
     * Step 6: tidying up
     *
     * Do in turn steps (a), (b), (c), (d), restricting all tests to the
     * region R1.
     */
    private function step6()
    {
        // a) If R1 ends LV
        // delete the last letter
        if (($position = $this->searchIfInR1(self::$longVowels)) !== false) {
            $this->word = Utf8::substr($this->word, 0, $position+1);
            $this->r1();
            $this->r2();
        }

        // b) If R1 ends cX, c a consonant and X one of   a   ä   e   i,
        // delete the last letter
        $lastLetter = Utf8::substr($this->r1, -1, 1);
        $secondToLastLetter = Utf8::substr($this->r1, -2, 1);
        if (in_array($secondToLastLetter, self::$consonants, true) && in_array($lastLetter, array('a', 'e', 'i', 'ä'))) {
            $this->word = Utf8::substr($this->word, 0, -1);
            $this->r1();
            $this->r2();
        }

        // c) If R1 ends oj or uj
        // delete the last letter
        $twoLastLetters = Utf8::substr($this->r1, -2, 2);
        if (in_array($twoLastLetters, array('oj', 'uj'))) {
            $this->word = Utf8::substr($this->word, 0, -1);
            $this->r1();
            $this->r2();
        }

        // d) If R1 ends jo
        // delete the last letter
        $twoLastLetters = Utf8::substr($this->r1, -2, 2);
        if ($twoLastLetters === 'jo') {
            $this->word = Utf8::substr($this->word, 0, -1);
            $this->r1();
            $this->r2();
        }

        // e) If the word ends with a double consonant followed by zero or more
        // vowels, remove the last consonant (so eläkk -> eläk,
        // aatonaatto -> aatonaato)
        $endVowels = '';
        for ($i = Utf8::strlen($this->word) - 1; $i > 0; $i--) {
            $letter = Utf8::substr($this->word, $i, 1);
            if (in_array($letter, self::$vowels, true)) {
                $endVowels = $letter . $endVowels;
            } else {
                // check for double consonant
                $prevLetter = Utf8::substr($this->word, $i-1, 1);
                if ($prevLetter === $letter) {
                    $this->word = Utf8::substr($this->word, 0, $i) . $endVowels;
                }
                break;
            }
        }
    }
}
Site is undergoing maintenance

PACJA Events

Maintenance mode is on

Site will be available soon. Thank you for your patience!