DELIMITERS = "<.>" "" "" "<...>" "<¶>" ; SOFT-DELIMITERS = "<,>" ; SETS LIST N = n ; LIST Det = det ; LIST A = adj ; LIST V = vblex vbser vaux vbhaver vbdo ; LIST Adv = adv ; LIST Pron = prn ; LIST CS = cnjsub ; LIST CC = cnjcoo ; LIST Rel = rel ; LIST Preadv = preadv ; LIST Pr = pr ; LIST NUM = num ; LIST Cop = vbser ; LIST Inf = inf ; LIST Pres = pres pri; LIST Past = past ; LIST Ger = ger ; LIST VerbalNoun = (vblex subs) ; LIST PPRS = (vblex pprs); LIST PrcPast = pp ; LIST AdvItg = (adv itg) ; LIST Unknown = ("\*.*"r); LIST Interr = itg ; LIST Subj = subj ; LIST Obj = obj ; LIST Pos = pos ; LIST Months = "January" "February" "March" "April" "May" "June" "July" "August" "September" "October" "November" "December" ; LIST Seasons = "winter" "spring" "summer" "autumn" ; LIST Year = year ; LIST 1p = p1 ; LIST 2p = p2 ; LIST 3p = p3 ; LIST EOS = (<<<) ; LIST FIN = pres past pri ; LIST SENT = sent ; LIST To = ("to") ; SET V-FIN = V + FIN ; SET V-DO = ("do"i) ; SET English-sg = ("English"i) ; SET VAUX-CAN = ("can"i) ; SET VAUX-SHOULD = ("should"i) ; SET VAUX-WILL = ("will"i) ; SET VAUX-WOULD = ("would"i) ; SET Quest = (""?) ; SET V-DO-AUX = (vbdo) ; SET ADV-NOT = ("not"i) ; SET V-HAVE = (vbhaver) ; SET V-PRES = (vblex pres) ; SET V-MOD = ("must"i) | ("should"i) | ("will"i) | ("can"i) | ("need# to"i) | ("want# to"i) | ("may"i) ; LIST PRN-SUBJ = (prn subj) ; SET CLB = SENT | V-FIN | Subj ; # 'Subj' is risky, e.g. coordination SET PERSON = 1p | 2p | 3p ; SECTION SELECT (vblex inf) if (0 ("be"i)) (-1 Pr) ; # z_z # Если # до слова стоит "to", # после слова стоит артикль, или начечие, # то слово - инфинитив SELECT Inf IF (-1 ("to")) (1 Det OR Adv) ; #should not np unk SELECT (vaux inf) IF (0 VAUX-SHOULD) (1* ("?")) ; #remove np unk if word has another tag REMOVE (np unk) IF (0 Det OR A OR Adv) ; # Почему-то не работает, если поставить это правило в конец. #five time a week SELECT N IF (-1 NUM) (0 ("time")) (1 Det) ; #at the beginning SELECT N IF (-2 Pr) (0 ("beginning")) ; #at the beginning SELECT N IF (-1 (vblex pres)) (0 ("beginning")) ; #Ability to sing WELL(ADV) SELECT Adv IF (-1 V) (-2 ("to")) ; # z_z # Если # после слова стоит артикль, или начечие, # то слово - глагол SELECT V IF (1 Det OR Adv) (NOT -1 Det) ; #either(cnjcoo) 1 or 2 SELECT CC IF (1 NUM) (0 ("either") OR ("neither")) ; REMOVE V-FIN IF (-1C V-FIN); # Careful: would kill "The man who _came_ _wrote_ a book" SELECT (vblex pp) IF (-1 ("have"i)) ; REMOVE N IF (0 (""i)) ; SELECT N IF (0 (""i))(1 (""i))(2 (""i)) ; SELECT N IF (-1 (""i)) ; #for selecting "when" as cnjadv, if before come "adv - once" SELECT (cnjadv) IF (-1 Adv) ; # #for selecting name if it come before cog SELECT (np ant) IF (1 (np cog)) ; ## SELECT NUM IF (0 (""i))(1 EOS) ; SELECT (prn tn mf) IF (0 (""i))(1 N) ; SELECT (cnjcoo) IF (1 (pr)) ; SELECT A IF (0 (""i))(1 Pr)(-1 (vbser)) ; SELECT V IF (0 (""i))(1 (vblex ger)) ; SELECT V IF (0 (""i))(1 Adv)(-1 ("that"i)) ; SELECT V IF (0 (""i))(-2 PRN-SUBJ) ; #AS - 11/08/2016: DUA1 - number caused Segmentation fault, fixed SELECT PRN-SUBJ IF (0 (""i))(-1 V-DO-AUX OR V-HAVE) (1 V) ; SELECT Pr IF (0 (""i))(1 Unknown) ; SELECT (vblex) IF (-1 Adv)(0 (""i)) ; SELECT Inf IF (-2* (""i))(-1 (""i)) ; SELECT N IF (0 (""i))(-1 Pr) ; REMOVE (np unk) IF (0 A OR N); SELECT Inf IF (-1 (""i) OR (""i) OR (""i)) # (-1/0 V-DO-AUX) ; SELECT (vbmod past) IF (0 (vbmod past)) (1 (vblex inf)) ; SELECT (vblex pp) IF (0 (""i)) (-1 (""i)) ; SELECT (vblex sep pp) IF (-1 (vbser past)) ; REMOVE N IF (0 (""))(1 ADV-NOT); SELECT (vbhaver inf) if (0 ("have")) (-1 ("not")); SELECT (n sg) if (0 ("English")) ; SELECT (n sg) if (0 ("Kazakh")) ; SELECT (vblex pp) if (-1 (vbser past)) ; SELECT (vblex ger) if (0 ("beginning"i))(NOT 1 V) ; SELECT Rel if (-1 N) ; SELECT Inf IF (-1 ADV-NOT)(-2 VAUX-CAN); SELECT V if (0 ("cost"i)) (-1 Adv); SELECT A if (0 ("pay"i))(1 N) ; SELECT N if (-1 Pr) ; SELECT (det qnt) if (1 A) ; SELECT (det itg) if (1 ("be"i)) (2 ("call"i)) ; SELECT A if (1C ("Party"i)) ; #to choose np top instead of np unk SELECT (np top) if (0 (np top)) ; SELECT (det itg) if (-1 Pr)(0 ("which"i)) ; SELECT (det itg) if (1 V OR Adv)(2 Det OR N OR V) ; SELECT AdvItg if (1C V) ; SELECT (det dem) if (1 N) ; SELECT AdvItg if (2C V) ; SELECT (vblex) if (-1 ("was"i)) ; SELECT N if (-1 AdvItg) ; SELECT A if (1 ("Autonomous"i)) ; SELECT N if (-1 Det) (NOT 0 A) ; SELECT V-HAVE if (2C V) ; SELECT Inf if (-2 V-MOD) (-1 ADV-NOT); SELECT Inf if (-1 VAUX-WOULD) ; SELECT Pr if (-1 VAUX-WOULD) ; SELECT Inf IF (-2 V-FIN) (-1 ("")); SELECT (vblex inf) IF (-2 ("what"i)) (-1 ("")); SELECT (vblex inf) IF (0 ("do"i)) (-1 ("")); SELECT Inf IF (0 ("")) (1 ADV-NOT); SELECT Inf IF (-2 ("")) (-1 Adv OR (preadv)); SELECT Inf IF (-1 ("")); SELECT V-HAVE IF (1 PrcPast); SELECT V IF (1 Ger)(NOT 0 (vblex pres p3 sg)); SELECT V-FIN IF (0 ("show"i)) (1 Det); REMOVE V-FIN IF (-1 A) (-2 Det); REMOVE V-FIN IF (-1 Det); REMOVE Ger IF (-1 V-FIN) (NOT -1 Cop) (1 N); #MLF not sure about this rule here REMOVE A IF (-1 Det) (0 N OR A) (1 ("")) (2 A); REMOVE Inf IF (0 N OR Inf) (1 ("to"i)) (2 Inf); SELECT V IF (-1 Subj OR N)(1 ("to"i)) (2 Inf); SELECT N IF (-1C NUM); SELECT N IF (-1 ("Baltic"i)); SELECT N IF (-1 ("Asian"i)); SELECT N IF (-1 A); SELECT Adv IF (-1 V)(0 ("forward"i)); SELECT Inf IF (-1 V-MOD); SELECT N IF (1 Pr)(2 N); SELECT N IF (1 (vbser pres)); REMOVE N IF (-2 N)(-1 (cnjcoo))(1 N); SELECT N IF (-2 N)(-1 (cnjcoo)); REMOVE N IF (0 V-FIN) (0 N) (0 N OR V-FIN) ((NOT 1* V-FIN BARRIER CLB) OR (NOT -1C* V-FIN BARRIER CLB)) (NOT -1* Det BARRIER (*) - Unknown); REMOVE A IF (0 A OR Adv) (NOT 1 N) (NOT -1* Cop BARRIER CLB) ; REMOVE N IF (0 N OR V-FIN) (1 (""i)) (2 Inf); SELECT V-FIN + $$PERSON IF (-1 Subj + $$PERSON); SELECT CS IF (-1 V)(1 N OR PRN-SUBJ); SELECT Inf IF (-1* V-DO BARRIER SENT) (NOT 1 Det); REMOVE Inf IF (NOT 0 ("will"i) ) (1C ADV-NOT) (2 Inf); SELECT (det qnt) IF (-1 Pr)(1 N)(2 FIN); SELECT Adv IF (1 N)(2 FIN); SELECT Pr IF (1 N); SELECT Pr IF (1 NUM); SELECT Inf IF (1 Inf) (2 Ger); SELECT Inf IF (1 Inf) (2 PrcPast); SELECT Inf IF (-1/0 ADV-NOT)(-1/1 VAUX-CAN); SELECT V-PRES IF (-1 N OR Pron)(1 N OR Pron); SELECT V-DO-AUX IF (1C ADV-NOT)(2 Inf); SELECT V-DO-AUX IF (NOT 1C ADV-NOT); SELECT V-DO-AUX IF (-1 PRN-SUBJ) (1C ADV-NOT); REMOVE V-DO-AUX IF (-1 PRN-SUBJ) (NOT 1C ADV-NOT); SELECT (gen) IF (0 ("'s"i)); SELECT V IF (-1 PRN-SUBJ OR N); SELECT V-HAVE IF (1C ADV-NOT) (2 PrcPast); SELECT Inf IF (0 Inf) (1 ADV-NOT)(2 Ger); SELECT Pres IF (1 ADV-NOT) (2 Inf); SELECT ("put"i vblex pp) IF (-1 V-HAVE); SELECT ("forward"i adv) IF (-1 V); REMOVE Inf IF (-1 PRN-SUBJ) (NOT 0 ("will"i)); REMOVE N IF (-1 PRN-SUBJ) (0 ("will"i) ); REMOVE N IF (0 ("will"i)) (1C ADV-NOT); SELECT Inf IF (0 Inf) (1 ADV-NOT)(2 Ger); REMOVE Inf IF (0 Inf OR V-FIN) (NOT 1* V-FIN BARRIER CLB) (NOT -1* V-FIN BARRIER CLB) (NOT -1 To); REMOVE PrcPast IF (-1 VAUX-WILL); REMOVE V-FIN IF (0 Inf) (0 Inf OR V-FIN) ((1C* V-FIN BARRIER CLB) OR (-1C* V-FIN BARRIER CLB)); REMOVE CS IF (NOT 1* V-FIN BARRIER CLB); REMOVE Det IF (NOT 1 N OR A OR NUM); SELECT Adv + Interr IF (0 Adv OR Rel) (1* ("?")) ; SELECT Subj IF (0 Subj OR Obj) (1 V-FIN); SELECT A IF (-1 Preadv) (0 A OR V); #REMOVE Pr IF (NOT 1* N BARRIER CLB); REMOVE Pr IF (0 Pr OR CC) (-1 SENT); SELECT V-FIN + $$PERSON (-1C Subj + $$PERSON); SELECT Inf IF (-2 V-FIN) (-1 Adv) (0 V-FIN OR Inf); SELECT Det + Pos IF (0 Det OR Pron) (1 N); SELECT N IF (0 ("television"i)) (-1 Det) (-2 ("or"i)); SELECT A IF (0 Det OR A) (-1 Det) (1 N); REMOVE Ger IF (-1 Det OR A); SELECT Ger IF (-1 Cop); # Would not work with "We are sleeping beauties", but it is hard... SELECT PPRS IF (-1 Det OR A) (1 N); SELECT Pr IF (0 Pr OR Adv) (1 Det); REMOVE Rel IF (-1 CC); SELECT Det IF (0 ("") OR ("")) (1C N); REMOVE A IF (0C A OR N) (-1 Det) (1 SENT); SELECT Det IF (0C Pron OR Det) (1C N); SELECT V-FIN IF (-1C Subj) (1 Inf); REMOVE Subj IF (0C Subj OR Obj) (-1C* Subj BARRIER SENT); SELECT V-HAVE IF (1C PrcPast); SELECT Pr IF (0 Pr OR Adv) (1 Pron + Obj) ; REMOVE N IF (0 N OR V) (-1 VAUX-CAN) ; REMOVE Pron IF (0 Det OR Pron) (1 NUM) ; SELECT Year IF (-1 ("in") OR ("after") OR ("since") OR ("of") OR ("throughout")) (NOT 1 N); SELECT Year IF (-1 Seasons OR Months); AFTER-SECTIONS REMOVE Year ;