// Dutch stemming algorithm developed by Wessel Kraaij and Renée Pohlmann strings ( ch ) integers ( p1 p2 ) booleans ( stemmed GE_removed ) routines ( R1 R2 C V VX lengthen_V Step_1 Step_2 Step_3 Step_4 Step_7 Step_6 Step_1c Lose_prefix Lose_infix measure ) externals ( stem ) groupings ( v v_WX A AEIOU AIOU E I O U ) stringescapes {} /* special characters */ stringdef a` '{U+00E0}' stringdef a' '{U+00E1}' stringdef a^ '{U+00E2}' stringdef a" '{U+00E4}' stringdef e` '{U+00E8}' stringdef e' '{U+00E9}' stringdef e^ '{U+00EA}' stringdef e" '{U+00EB}' stringdef i` '{U+00EC}' stringdef i' '{U+00ED}' stringdef i^ '{U+00EE}' stringdef i" '{U+00EF}' stringdef o` '{U+00F2}' stringdef o' '{U+00F3}' stringdef o^ '{U+00F4}' stringdef o" '{U+00F6}' stringdef u` '{U+00F9}' stringdef u' '{U+00FA}' stringdef u^ '{U+00FB}' stringdef u" '{U+00FC}' define A 'a{a"}{a'}{a`}{a^}' define E 'e{e"}{e'}{e`}{e^}' define I 'i{i"}{i'}{i`}{i^}' define O 'o{o"}{o'}{o`}{o^}' define U 'u{u"}{u'}{u`}{u^}' define AIOU A + I + O + U define AEIOU A + E + I + O + U define v AEIOU + 'y' define v_WX v + 'wx' backwardmode ( define R1 as ($p1 <= cursor) define R2 as ($p2 <= cursor) define V as test (v or 'ij') define VX as test (next v or 'ij') define C as test (not 'ij' non-v) define lengthen_V as do ( non-v_WX [substring] among ( 'a' '{a"}' '{a'}' '{a`}' '{a^}' 'o' '{o"}' '{o'}' '{o`}' '{o^}' 'u' '{u"}' '{u'}' '{u`}' '{u^}' (test (non-AEIOU or atlimit) ->ch insert ch) 'e' '{e'}' '{e`}' '{e^}' (test (non-AEIOU or atlimit not (AIOU or (E atlimit)) not (next AIOU non-AEIOU)) ->ch insert ch) 'e{e"}' (<-'e{e"}e') 'i{e"}' (<-'iee') ) ) define Step_1 as ( [substring] among ( '{'}s' (delete) 's' (R1 not ('t' R1) C delete) 'ies' (R1 <-'ie') 'es' ((test ('ar' R1 C) delete lengthen_V) or (test ('er' R1 C) delete) or (R1 C <-'e')) '{e'}s' (R1 <-'{e'}') 'aus' (R1 V <-'au') 'en' (('hed' R1 ] <-'heid') or ('nd' delete) or ('d' R1 C ] delete) or ('i' or 'j' V delete) or (R1 C delete lengthen_V)) 'nde' (<-'nd') ) ) define Step_2 as ( [substring] among ( 'je' (('{'}t' ] delete) or ('et' ] R1 C delete) or ('rnt' ] <-'rn') or ('t' ] R1 VX delete) or ('ink' ] <-'ing') or ('mp' ] <-'m') or ('{'}' ] R1 delete) or (] R1 C delete)) 'ge' (R1 <-'g') 'lijke'(R1 <-'lijk') 'ische'(R1 <-'isch') 'de' (R1 C delete) 'te' (R1 <-'t') 'se' (R1 <-'s') 're' (R1 <-'r') 'le' (R1 delete attach 'l' lengthen_V) 'ene' (R1 C delete attach 'en' lengthen_V) 'ieve' (R1 C <-'ief') ) ) define Step_3 as ( [substring] among ( 'atie' (R1 <-'eer') 'iteit' (R1 delete lengthen_V) 'heid' 'sel' 'ster' (R1 delete) 'rder' (<-'r') 'ing' 'isme' 'erij' (// Exception added to avoid conflating // `schilderij` (painting) and `schild` (shield). ('ild' <- 'er') or (R1 delete lengthen_V)) 'arij' (R1 C <-'aar') 'fie' (R2 delete attach 'f' lengthen_V) 'gie' (R2 delete attach 'g' lengthen_V) 'tst' (R1 C <-'t') 'dst' (R1 C <-'d') ) ) define Step_4 as ( ( [substring] among ( 'ioneel' (R1 <-'ie') 'atief' (R1 <-'eer') 'baar' (R1 delete) 'naar' (R1 V <-'n') 'laar' (R1 V <-'l') 'raar' (R1 V <-'r') 'tant' (R1 <-'teer') 'lijker' 'lijkst' (R1 <-'lijk') 'achtig' 'achtiger' 'achtigst'(R1 delete) 'eriger' 'erigst' 'erig' 'end' (R1 C delete lengthen_V) ) ) or ( [substring] among ( 'iger' 'igst' 'ig' (R1 // Exception added to avoid conflating // `innig` (intimate) and `in` (in). not ('inn' atlimit) C delete lengthen_V) ) ) ) define Step_7 as ( [substring] among ( 'kt' (<-'k') 'ft' (<-'f') 'pt' (<-'p') ) ) define Step_6 as ( [substring] among ( 'bb' (<-'b') 'cc' (<-'c') 'dd' (<-'d') 'ff' (<-'f') 'gg' (<-'g') 'hh' (<-'h') 'jj' (<-'j') 'kk' (<-'k') 'll' (<-'l') 'mm' (<-'m') 'nn' (// Exception added to avoid conflating // `innen` (to collect/cash) and `in` (in). not ('i' atlimit) <-'n') 'pp' (<-'p') 'qq' (<-'q') 'rr' (<-'r') 'ss' (<-'s') 'tt' (<-'t') 'vv' (<-'v') 'ww' (<-'w') 'xx' (<-'x') 'zz' (<-'z') 'v' (<-'f') 'z' (<-'s') ) ) define Step_1c as ( [substring] R1 C among ( 'd' (not ('n' R1) // Exception added to avoid conflating // `geïnd` (collected/cashed) and `in` (in). // Instead we conflate `geïnd` with `innen`. ('in' atlimit <-'n') or delete) 't' (not ('h' R1) // Exception added to avoid conflating // `geënt` (grafted) and `en` (and). not ('en' atlimit) delete ) ) ) ) define Lose_prefix as ( ['ge'] test hop 3 test (gopast ('ij' or v) repeat ('ij' or v) not atlimit) // Exceptions added: among ( // Avoid conflating `geeft` and `effen`/`effende`\`geeffende`. 'eft' (false) // Avoid conflating `gevallen`/`geval` and `vallen`. 'val' (false) 'vali' (true) // Avoid conflating `gevaren`/`gevaar` (danger), `gevaarten` (huge // objects) and `varen` (to sail) 'vaa' 'vare' (false) '' (true) ) set GE_removed delete do ( [substring] among ( '{e"}' (<-'e') '{i"}' (<-'i') ) ) ) define Lose_infix as ( next gopast (['ge']) test hop 3 test (gopast ('ij' or v) repeat ('ij' or v) not atlimit) set GE_removed delete do ( [substring] among ( '{e"}' (<-'e') '{i"}' (<-'i') ) ) ) define measure as ( $p1 = limit $p2 = limit do( repeat non-v atleast 1 ('ij' or v) non-v setmark p1 repeat non-v atleast 1 ('ij' or v) non-v setmark p2 ) ) define stem as ( unset stemmed measure backwards ( do (Step_1 set stemmed ) do (Step_2 set stemmed ) do (Step_3 set stemmed ) do (Step_4 set stemmed ) ) unset GE_removed do (Lose_prefix and measure) backwards ( do (GE_removed set stemmed Step_1c) ) unset GE_removed do (Lose_infix and measure) backwards ( do (GE_removed set stemmed Step_1c) ) backwards ( do (Step_7 set stemmed ) do (stemmed Step_6) ) )