routines ( prelude postlude mark_regions RV R1 R2 standard_suffix i_verb_suffix verb_suffix residual_suffix un_double un_accent ) externals ( stem ) integers ( pV p1 p2 ) groupings ( v keep_with_s ) stringescapes {} /* special characters */ stringdef a^ '{U+00E2}' // a-circumflex stringdef a` '{U+00E0}' // a-grave stringdef cc '{U+00E7}' // c-cedilla stringdef e" '{U+00EB}' // e-diaeresis (rare) stringdef e' '{U+00E9}' // e-acute stringdef e^ '{U+00EA}' // e-circumflex stringdef e` '{U+00E8}' // e-grave stringdef i" '{U+00EF}' // i-diaeresis stringdef i^ '{U+00EE}' // i-circumflex stringdef o^ '{U+00F4}' // o-circumflex stringdef u^ '{U+00FB}' // u-circumflex stringdef u` '{U+00F9}' // u-grave define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}' define prelude as repeat goto ( ( v [ ('u' ] v <- 'U') or ('i' ] v <- 'I') or ('y' ] <- 'Y') ) or ( [ '{e"}' ] <- 'He' ) or ( [ '{i"}' ] <- 'Hi' ) or ( ['y'] v <- 'Y' ) or ( 'q' ['u'] <- 'U' ) ) define mark_regions as ( \$pV = limit \$p1 = limit \$p2 = limit // defaults do ( ( v v next ) or among ( // this exception list begun Nov 2006 'par' // paris, parie, pari 'col' // colis 'tap' // tapis // extensions possible here ) or ( next gopast v ) setmark pV ) do ( gopast v gopast non-v setmark p1 gopast v gopast non-v setmark p2 ) ) define postlude as repeat ( [substring] among( 'I' (<- 'i') 'U' (<- 'u') 'Y' (<- 'y') 'He' (<- '{e"}') 'Hi' (<- '{i"}') 'H' (delete) '' (next) ) ) backwardmode ( define RV as \$pV <= cursor define R1 as \$p1 <= cursor define R2 as \$p2 <= cursor define standard_suffix as ( [substring] among( 'ance' 'iqUe' 'isme' 'able' 'iste' 'eux' 'ances' 'iqUes' 'ismes' 'ables' 'istes' ( R2 delete ) 'atrice' 'ateur' 'ation' 'atrices' 'ateurs' 'ations' ( R2 delete try ( ['ic'] (R2 delete) or <-'iqU' ) ) 'logie' 'logies' ( R2 <- 'log' ) 'usion' 'ution' 'usions' 'utions' ( R2 <- 'u' ) 'ence' 'ences' ( R2 <- 'ent' ) 'ement' 'ements' ( RV delete try ( [substring] among( 'iv' (R2 delete ['at'] R2 delete) 'eus' ((R2 delete) or (R1<-'eux')) 'abl' 'iqU' (R2 delete) 'i{e`}r' 'I{e`}r' //) (RV <-'i') //)--new 2 Sept 02 ) ) ) 'it{e'}' 'it{e'}s' ( R2 delete try ( [substring] among( 'abil' ((R2 delete) or <-'abl') 'ic' ((R2 delete) or <-'iqU') 'iv' (R2 delete) ) ) ) 'if' 'ive' 'ifs' 'ives' ( R2 delete try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' ) ) 'eaux' (<- 'eau') 'aux' (R1 <- 'al') 'euse' 'euses'((R2 delete) or (R1<-'eux')) 'issement' 'issements'(R1 non-v delete) // verbal // fail(...) below forces entry to verb_suffix. -ment typically // follows the p.p., e.g 'confus{e'}ment'. 'amment' (RV fail(<- 'ant')) 'emment' (RV fail(<- 'ent')) 'ment' 'ments' (test(v RV) fail(delete)) // v is e,i,u,{e'},I or U ) ) define i_verb_suffix as setlimit tomark pV for ( [substring] among ( '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai' 'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez' 'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait' 'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses' 'issez' 'issiez' 'issions' 'issons' 'it' (not 'H' non-v delete) ) ) define verb_suffix as setlimit tomark pV for ( [substring] among ( 'ions' (R2 delete) '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai' 'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions' 'erons' 'eront' 'ez' 'iez' // 'ons' //-best omitted (delete) '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant' 'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez' 'assions' (delete try(['e'] delete) ) ) ) define keep_with_s 'aiou{e`}s' define residual_suffix as ( try(['s'] test ('Hi' or non-keep_with_s) delete) setlimit tomark pV for ( [substring] among( 'ion' (R2 's' or 't' delete) 'ier' 'i{e`}re' 'Ier' 'I{e`}re' (<-'i') 'e' (delete) ) ) ) define un_double as ( test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete ) define un_accent as ( atleast 1 non-v [ '{e'}' or '{e`}' ] <-'e' ) ) define stem as ( do prelude do mark_regions backwards ( do ( ( ( standard_suffix or i_verb_suffix or verb_suffix ) and try( [ ('Y' ] <- 'i' ) or ('{cc}'] <- 'c' ) ) ) or residual_suffix ) // try(['ent'] RV delete) // is best omitted do un_double do un_accent ) do postlude )