<?php
    
#
    # An implementation of the Porter Stemming Algorithm in PHP
    # ( see http://www.tartarus.org/~martin/PorterStemmer/ )
    #
    # (C)2005 Cal Henderson, <cal@iamcal.com>
    #
    # usage:   $stem = Porter_Stemmer::stem($word);
    #


    #
    # PHP4 doesn't have class variables, so we'll use the GLOBALS stash
    #

    
$GLOBALS['Porter_Stemmer_step2list'] = array(
        
'ational'=>'ate',
        
'tional'=>'tion',
        
'enci'=>'ence',
        
'anci'=>'ance',
        
'izer'=>'ize',
        
'bli'=>'ble',
        
'alli'=>'al',
        
'entli'=>'ent',
        
'eli'=>'e',
        
'ousli'=>'ous',
        
'ization'=>'ize',
        
'ation'=>'ate',
        
'ator'=>'ate',
        
'alism'=>'al',
        
'iveness'=>'ive',
        
'fulness'=>'ful',
        
'ousness'=>'ous',
        
'aliti'=>'al',
        
'iviti'=>'ive',
        
'biliti'=>'ble',
        
'logi'=>'log'
    
);

    
$GLOBALS['Porter_Stemmer_step3list'] = array(
        
'icate'=>'ic',
        
'ative'=>'',
        
'alize'=>'al',
        
'iciti'=>'ic',
        
'ical'=>'ic',
        
'ful'=>'',
        
'ness'=>''
    
);

    
$GLOBALS['Porter_Stemmer_r'] = array(
        
'c'    => '[^aeiou]',            # consonant
        
'v'    => '[aeiouy]',            # vowel
        
'c2'    => '[^aeiou][^aeiouy]*',    # consonant sequence
        
'v2'    => '[aeiouy][aeiou]*',        # vowel sequence

        
'mgr0'    => '^([^aeiou][^aeiouy]*)?[aeiouy][aeiou]*[^aeiou][^aeiouy]*',                        # [C]VC... is m>0
        
'meq1'    => '^([^aeiou][^aeiouy]*)?[aeiouy][aeiou]*[^aeiou][^aeiouy]*([aeiouy][aeiou]*)?$',            # [C]VC[V] is m=1
        
'mgr1'    => '^([^aeiou][^aeiouy]*)?[aeiouy][aeiou]*[^aeiou][^aeiouy]*[aeiouy][aeiou]*[^aeiou][^aeiouy]*',    # [C]VC[V] is m=1
        
'_v'    => '^([^aeiou][^aeiouy]*)?[aeiouy]',                                    # vowel in stem
    
);


    
#
    # the class really just puts the method in a namespace
    #

    
class Porter_Stemmer {

        
#
        # utility function to chop {x} chars from the end of a string
        #

        
function end_chop($str$len){
            return 
substr($str0strlen($str)-$len);
        }

        
#
        # the meat is here
        #

        
function stem($w){

            
# load into scope
            
$r =& $GLOBALS['Porter_Stemmer_r'];
            
$step2list =& $GLOBALS['Porter_Stemmer_step2list'];
            
$step3list =& $GLOBALS['Porter_Stemmer_step3list'];


            
$w StrToLower($w);

            
#
            # length at least 3
            #

            
if (strlen($w) < 3) {
                return 
$w;
            }

            
#
            # now map initial y to Y so that the patterns never treat it as vowel:
            #

            
$firstch substr($w01);

            if (
$firstch == 'y') {
                
$w ucfirst($w);
            }

            
#
            # Step 1a
            #

            
if (preg_match('/(ss|i)es$/'$w)){
                
$w Porter_Stemmer::end_chop($w2);
            }

            else if (
preg_match('/([^s])s$/'$w)){
                
$w Porter_Stemmer::end_chop($w1);
            }

            
#
            # Step 1b
            #

            
if (preg_match('/eed$/'$w)) {
                
$pre Porter_Stemmer::end_chop($w3);

                if (
preg_match("/$r[mgr0]/"$pre)) {
                    
$w Porter_Stemmer::end_chop($w1);
                }
            }

            else if (
preg_match('/(ed|ing)$/'$w$m1)){

                
$stem Porter_Stemmer::end_chop($wstrlen($m1[1]));

                if (
preg_match("/$r[_v]/"$stem)){

                    
$w $stem;

                    if (
preg_match('/(at|bl|iz)$/'$w)) {
                        
$w .= "e";
                    }

                    else if (
preg_match('/([^aeiouylsz])\1$/'$w)) {
                        
$w Porter_Stemmer::end_chop($w1);
                    }

                    else if (
preg_match("/^$r[c2]$r[v][^aeiouwxy]$/"$w)) {
                        
$w .= "e";
                    }
                }
            }

            
#
            # Step 1c
            #

            
if (preg_match('/y$/'$w)) {

                
$stem Porter_Stemmer::end_chop($w1);

                if (
preg_match("/$r[_v]/"$stem)){

                    
$w $stem."i";
                }
            }

            
#
            # Step 2
            #

            
if (preg_match('/(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/'$w$m1)){

                
$suffix $m1[1];
                
$stem Porter_Stemmer::end_chop($wstrlen($suffix));

                if (
preg_match("/$r[mgr0]/"$stem)) {
                    
$w $stem $step2list[$suffix];
                }
            }

            
#
            # Step 3
            #

            
if (preg_match('/(icate|ative|alize|iciti|ical|ful|ness)$/'$w$m1)){

                
$suffix $m1[1];
                
$stem Porter_Stemmer::end_chop($wstrlen($suffix));

                if (
preg_match("/$r[mgr0]/"$stem)) {
                    
$w $stem $step3list[$suffix];
                }
            }

            
#
            # Step 4
            #

            
if (preg_match('/(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/'$w$m1)){

                
$stem Porter_Stemmer::end_chop($wstrlen($m1[1]));

                if (
preg_match("/$r[mgr1]/"$stem)) {
                    
$w $stem;
                }
            }

            else if (
preg_match('/(s|t)(ion)$/'$w$m1)){

                
$stem Porter_Stemmer::end_chop($wstrlen($m1[2]));

                if (
preg_match("/$r[mgr1]/"$stem)) {
                    
$w $stem;
                }
            }

            
#
            #  Step 5
            #

            
if (preg_match('/e$/'$w)) {

                
$stem Porter_Stemmer::end_chop($w1);

                
$b1 preg_match("/$r[mgr1]/"$stem);
                
$b2 preg_match("/$r[meq1]/"$stem);
                
$b3 preg_match("/^$r[c2]$r[v][^aeiouwxy]$/"$stem);

                if (
$b1 || ($b2 && !$b3)) {
                    
$w $stem;
                }
            }

            if (
preg_match('/ll$/'$w) && preg_match("/$r[mgr1]/"$w)) {
                
$w Porter_Stemmer::end_chop($w1);
            }


            
#
            # and turn initial Y back to y
            #

            
if ($firstch == 'y') {
                
$w lcfirst($w);
            }

            return 
$w;
        }

    }

?>