#This program is free software: you can redistribute it and/or modify #it under the terms of the GNU General Public License as published by #the Free Software Foundation, either version 3 of the License, or #(at your option) any later version. # #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program. If not, see . DELIMITERS = "<.>" "<..>" "<...>" "" "" "<ΒΆ>" sent ; SOFT-DELIMITERS = "<,>" ; SUBREADINGS = LTR ; LIST BOS = (>>>) ; LIST EOS = (<<<) ; SETS LIST N = n ; LIST Np = np ; LIST Prn = prn ; LIST PrnObj = prn obj ; LIST PrnSubj = prn subj ; LIST Det = det ; LIST Have = vbhaver ; LIST Be = vbser ; LIST Adj = adj ; LIST Predet = predet ; LIST Preadv = preadv ; LIST Adv = adv ; LIST Pr = pr ; LIST Ger = ger ; LIST Pprs = pprs ; LIST Subs = subs ; LIST Num = num ; LIST Rel = (prn rel) ; LIST Ij = ij ; LIST Cnjadv = cnjadv ; LIST Cnjcoo = cnjcoo ; LIST V = vblex vbser vaux vbmod vbhaver vbdo ; LIST Vblex = vblex ; LIST Vbser = vbser ; LIST Vbhaver = vbhaver ; LIST Vbdo = vbdo ; LIST Vbaux = vaux ; LIST Vbmod = vbmod ; LIST Gen = gen ; LIST Apos = apos ; LIST Not = "not" ; SET Please = ("please" adv) | ("please" ij) ; SET Do = ("do") ; SET Imp = (vblex imp) | (vbser imp) | (vbdo imp) ; SET Inf = (vblex inf) | (vbmod inf) | (vaux inf) | (vbser inf) | (vbhaver inf) | (vbdo inf) ; SET Pp = (vblex pp) | (vbmod pp) | (vaux pp) | (vbser pp) | (vbhaver pp) | (vbdo pp) ; LIST Sg = sg ; LIST Pl = pl ; LIST Sp = sp ; LIST M = m ; LIST F = f ; LIST Nt = nt ; LIST Mf = mf ; LIST The = "the" ; LIST Fin = pres past imp ; LIST To = "to" ; SET Pers = (p1) | (p2) | (p3) ; LIST P1 = p1 ; LIST P2 = p2 ; LIST P3 = p3 ; LIST PrnSubjP1Sg = (prn subj p1 mf sg) ; LIST PrnSubjP1Pl = (prn subj p1 mf pl) ; LIST PrnSubjP2Sp = (prn subj p2 mf sp) ; LIST Letters = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" ; LIST Ing = "<.*ing>"ri ; SECTION REMOVE Fin IF (-1C Det) ; REMOVE Pl IF (0 Letters) ; REMOVE (vblex) IF (-1 (gen)) ; REMOVE (preadv) IF (1 (cm)) ; #Lemmas that are N or V: Inf if preposition precedes it SELECT Inf IF (0 Inf) (0 N) (-1 To) ; SELECT Inf IF (0 Inf) (0 N) (-1 Adv) (-2 To) ; #Lemmas that are N or V: N if DET/ADJ/PR precedes it SELECT N IF (0 N) (0 V) (-1 Pr) ; SELECT N IF (0 N) (0 V) (-1 Det) ; SELECT N IF (0 N) (0 V) (-1 Adj) ; #Lemmas that are N or V: V if ADV/PRN/N/VBMOD precedes it or ADV/N follows SELECT V IF (0 N) (0 V) (-2 N) (-1 Adv) ; SELECT V IF (0 N) (0 V) (-2 Det) (-1 N) ; SELECT V IF (0 N) (0 V) (-1 Prn) ; SELECT V IF (0 N) (0 V) (-3 Prn) (-2 Cnjcoo) (-1 Prn) ; SELECT V IF (0 N) (0 V) (-3 Prn) (-2 Cnjcoo) (-1 Np) ; SELECT V IF (0 N) (0 V) (-2 Prn) (-1 Adv) ; SELECT V IF (0 N) (0 V) (-1 Np) ; SELECT V IF (0 N) (0 V) (-3 Np) (-2 Cnjcoo) (-1 Np) ; SELECT V IF (0 N) (0 V) (-3 Np) (-2 Cnjcoo) (-1 Prn) ; SELECT V IF (0 N) (0 V) (-2 Np) (-1 Adv) ; SELECT V IF (0 N) (0 V) (1 PrnObj) ; SELECT V IF (0 N) (0 V) (-1 Vbmod | Vbaux) ; SELECT V IF (0 N) (0 V) (-2 Vbmod | Vbaux) (-1 Adv) ; SELECT V IF (0 N) (0 V) (-1 Vbdo) ; SELECT V IF (0 N) (0 V) (-2 Vbdo) (-1 Adv) ; SELECT V IF (0 N) (0 V) (-3 Vbdo) (-2 Adv) (-1 Adv) ; #Lemmas that are N or V: will/can/may SELECT Vbmod IF (0 Vbmod) (0 N) (1 Inf) ; SELECT Vbmod IF (0 Vbmod) (0 N) (1 Adv) (2 Inf) ; SELECT Vbmod IF (0 Vbmod) (0 N) (1 Adv) (2 Adv) (3 Inf) ; #Otherwise, select N SELECT N IF (0 N) (0 V) ; #Lemmas that are N or ADJ: N if ADJ precedes it or V follows SELECT N IF (0 N) (0 Adj) (-1 Adj) ; SELECT N IF (0 N) (0 Adj) (1 V) ; #Lemmas that are N or ADJ: ADJ if N follows it SELECT Adj IF (0 N) (0 Adj) (1 N) ; #Lemmas that are DET or PRN: DET if N/ADJ follows REMOVE Det IF (-1 Det) ; SELECT Det IF (0 Det) (0 Prn) (1 N) ; SELECT Det IF (0 Det) (0 Prn) (1 Adj) (2 N) ; SELECT Det IF (0 Det) (0 Prn) (1 Preadv) (2 Adj) (3 N) ; #Otherwise, select PRN SELECT Prn IF (0 Det) (0 Prn) ; # one : eins (num) / man (prn) SELECT (num) IF (1 (n)) ; #Lemmas that are ADJ or DET ('little'): ADJ if DET precedes it or N+PL follows SELECT Adj IF (0 Adj) (0 Det) (-1 Det) ; SELECT Adj IF (0 Adj) (0 Det) (1 N + Pl) ; #Lemmas that are ADV or DET ('no'): ADV unless N follows SELECT Det IF (0 Adv) (0 Det) (1 N) ; SELECT Det IF (0 Adv) (0 Det) (1 Adj) (2 N) ; SELECT Adv IF (0 Adv) (0 Det) ; #Lemmas that are ADV or PREP ('around'): ADV unless N/NP follows SELECT Pr IF (0 Adv) (0 Pr) (1 Np) ; SELECT Pr IF (0 Adv) (0 Pr) (1 N) ; SELECT Pr IF (0 Adv) (0 Pr) (1 Det) (2 N) ; SELECT Pr IF (0 Adv) (0 Pr) (1 Det) (2 Adj) (3 N) ; SELECT Pr IF (0 Adv) (0 Pr) (1 Det) (2 Preadv) (3 Adj) (4 N) ; SELECT Adv IF (0 Adv) (0 Pr) ; #Lemmas that are PREADV or CNJADV ('so'): CNJADV unless ADV/ADJ follows SELECT Preadv IF (0 Preadv) (0 Cnjadv) (1 Adv) ; SELECT Preadv IF (0 Preadv) (0 Cnjadv) (1 Adj) ; SELECT Cnjadv IF (0 Preadv) (0 Cnjadv) ; # Please (adv/ij) and please (vblex) SELECT Vblex IF (0 Please) (0 Vblex) (-1 To) ; SELECT Vblex IF (0 Please) (0 Vblex) (1 PrnObj) ; SELECT Vblex IF (0 Please) (0 Vblex) (1 Np) ; SELECT Vblex IF (0 Please) (0 Vblex) (1 Det) (2 N) ; SELECT Vblex IF (0 Please) (0 Vblex) (1 Det) (2 Adj) (3 N) ; SELECT Vblex IF (0 Please) (0 Vblex) (1 Det) (2 Preadv) (3 Adj) (4 N) ; REMOVE Vblex IF (0 Please) (0 Vblex) ; # Remove imperatives unless preceded by "please" or placed at BOS SELECT Imp IF (-1 BOS | _S_SOFT_DELIMITERS_) ; SELECT Imp IF (-2 BOS) (-1 Please); REMOVE Imp ; REMOVE Inf IF (-1 BOS) ; REMOVE (vblex pres) IF (-1 BOS) ; REMOVE (vblex pp) IF (-1 BOS) ; REMOVE (cnjadv) IF (-1 BOS) ; REMOVE (preadv) IF (1 EOS) ; REMOVE (vblex pp) IF (-1 (np)) ; REMOVE (vblex pp) IF (-1 (prn subj)) ; REMOVE (vbmod inf) IF (1 Inf) ; REMOVE (vbmod inf) IF (1 Adv) (2 Inf) ; REMOVE ("second"i n) IF (1 (n)) ; #REMOVE (prn obj) IF (-1 V) (-2 BOS) ; REMOVE (prn obj) IF (1 (vblex inf)) ; SELECT (adv itg) IF (-1 BOS) ; SELECT V IF (-1 (prn pers)) ; SELECT SUB:1 Be IF (1 Ing) (0/* Have) ; SELECT Ger IF (-1/* Be) (1 To) ; SELECT Predet IF (1C The) ; # Saxon genitive ('s) REMOVE Gen IF (NOT -1 N | Np) ; REMOVE Gen IF (0 Gen) (0 Apos) (NOT -1 ("<.*s>"r)) ; SELECT Gen IF (0 Gen) (0 Vbser) (0 Vbhaver) (-1 N + Pl) ; # Have (vblex) and have (vbhaver) REMOVE Vblex IF (0 Vblex) (0 Vbhaver) (1 Pp) ; REMOVE Vblex IF (0 Vblex) (0 Vbhaver) (1 Adv) (2 Pp) ; REMOVE Vblex IF (0 Vblex) (0 Vbhaver) (1 Adv) (2 Adv) (3 Pp) ; REMOVE Vbhaver ; # Verbs preceded by to REMOVE Fin IF (-1C To) ; # Do (vbdo) and do (vblex) SELECT Vbdo IF (1 Inf) ; SELECT Vbdo IF (1 Adv) (2 Inf) ; SELECT Vbdo IF (1 Adv) (2 Adv) (3 Inf) ; REMOVE Vbdo ; # Verbs with person/number: P1 SELECT P1 IF (0 V) (-1 PrnSubjP1Sg | PrnSubjP1Pl) ; SELECT P1 IF (0 V) (-1 Adv) (-2 PrnSubjP1Sg | PrnSubjP1Pl) ; SELECT P1 IF (0 V) (-1 Adv) (-2 Adv) (-3 PrnSubjP1Sg | PrnSubjP1Pl) ; SELECT Sg IF (0 V) (-1 PrnSubjP1Sg) ; SELECT Sg IF (0 V) (-1 Adv) (-2 PrnSubjP1Sg) ; SELECT Sg IF (0 V) (-1 Adv) (-2 Adv) (-3 PrnSubjP1Sg) ; SELECT Pl IF (0 V) (-1 PrnSubjP1Pl) ; SELECT Pl IF (0 V) (-1 Adv) (-2 PrnSubjP1Pl) ; SELECT Pl IF (0 V) (-1 Adv) (-2 Adv) (-3 PrnSubjP1Pl) ; # Verbs with person/number: P2 SELECT P2 IF (0 V) (-1 PrnSubjP2Sp) ; SELECT P2 IF (0 V) (-1 Adv) (-2 PrnSubjP2Sp) ; SELECT P2 IF (0 V) (-1 Adv) (-2 Adv) (-3 PrnSubjP2Sp) ; # Verbs with person/number: P3 (default) (if not genitive) SELECT P3 IF (0 V) (NOT 0 ("'s")) ; # Verbs ending in -ing: GER if VBSER precedes it SELECT Ger IF (0 Ger) (0 Pprs) (0 Subs) (-1 Vbser) ; # a job / Steve Jobs REMOVE Np IF (-1C (det ind)) ;