### Constraint Grammar for Kyrgyz ### DELIMITERS = "<.>" "" "" ; SOFT-DELIMITERS = "<,>" ; SUBREADINGS = LTR ; # Alternate, left-to-right (main reading on the left) LIST BOS = (>>>) ; # Beginning of sentence LIST EOS = (<<<) ; # End of sentence LIST Cm = cm ; SET MARK = Cm | ("\\") | ("\;") | ("\(") ; LIST Nom = nom ; LIST Acc = acc ; LIST @acc-ind = acc-ind ; LIST Gen = gen ; LIST Adj = adj ; LIST Adv = adv ; LIST Pron = prn ; LIST Pron-Pers = (prn pers) ; LIST Noun = n ; LIST Past = past ; LIST Vadj = vadj vadj_past ; LIST Vaux = vaux ; LIST Verb = v ; LIST V = v ; LIST Vaux = vaux ; LIST Cop = cop ; LIST Num = num ; LIST Interj = ij ; LIST Subst = subst ; LIST Vb-or-Aux = v vaux ; LIST PrcIrre = prc_irre ; LIST Prc = prc_plan prc_past prc_hab prc_fut prc_cond prc_perf prc_impf prc_vol prc_irre prc_real prc_pcond ; LIST PrcCop = prc_past prc_hab prc_fut prc_irre pre_pcond ; # Prc forms that occur with copula LIST Ger = ger ger_fut ger_past ger_pres ; LIST Gpr = gpr_fut gpr_pot2 gpr_perf gpr_past gpr gpr_pot gpr_impf gpr_hab gpr_notyet gpr_ppot ; LIST Finite = past ifi fut aor pih ; LIST Gerund = ger ger_ppot ger_past ger_perf ger_impf ger_abs ; LIST FiniteVerb = pres aor past ifi ifi_evid fut fut_plan imp opt pih ; SET V-NotGerund = V - Gerund ; SET V-NonFiniteCanGetCop = Ger | Gpr ; LIST Det = det ; LIST CC = cnjcoo ; LIST CS = cnjsub ; LIST Cop = cop ; LIST Sent = sent ; LIST Pl = pl ; LIST Tv = tv ; LIST Iv = iv ; LIST Aor = aor ; LIST Px3pl = px3pl ; LIST Px3sg = px3sg ; LIST Px3sp = px3sp ; LIST Post = post ; LIST Coop = coop ; LIST IJ = ij ; LIST Sg = sg ; LIST Pl = pl ; LIST Nom = nom ; LIST Gen = gen ; LIST Dat = dat ; LIST Acc = acc ; LIST Abl = abl ; LIST Loc = loc ; LIST Ins = ins ; LIST P1 = p1 ; LIST P2 = p2 ; LIST P3 = p3 ; LIST Vb-Pl = (v pl) ; LIST Vb-Sg = (v sg) ; LIST Nom-Pl = (nom pl) ; SET Nom-Sg = (nom) - (pl) ; LIST Nouns = n np pron ; LIST Prop = np ; LIST V-IV = (v iv) ; LIST V-TV = (v tv) ; SET Sing = (*) - (pl) ; SET Acc-Or-Nom = Acc | Nom ; # "Syntactic" tags # ================ LIST Advl = advl ; LIST Attr = attr ; LIST Subst = subst ; LIST N-LIKE = (adj subst) n np prn ; # Actually, see NOMINAL SET ADV-NOUNS = ("саат") | ("жыл") | ("күн") | ("ай") ; SET NOMINAL = Noun | Prop | Pron | Subst | Gerund ; SET NOMINAL-HEAD = Noun | Ger | Subst ; SET PRE-N = Det | Num | Attr | Adj | Gen | ("-") ; SET N-LIKE-NO-ADV = N-LIKE - ADV-NOUNS ; SET Not-Prc = (*) - Prc ; SET Not-Sent = (*) - Sent ; SET Tv-Or-Iv = Tv | Iv ; SET PrcIrre-Or-Aor = PrcIrre | Aor ; SET Vb-Sg-Or-Pl = Vb-Pl | Vb-Sg ; SET Nom-Or-Gen = Nom OR Gen ; SET Np-Nom-Or-Gen = Nouns + Nom-Or-Gen ; SET Np-Nom-Or-Gen-Sg = Np-Nom-Or-Gen + Sing ; SET Px3-Sg-Or-Pl = Px3sg | Px3pl ; SET Px3 = Px3sg | Px3pl | Px3sp ; LIST Adj-Subst = (adj subst) ; SET Ger-Acc = Ger + (acc); LIST P3-Pl = (p3 pl) ; LIST P3-Sg = (p3 sg) ; LIST Vb-Imp = (v imp) ; LIST N-Acc = (n acc) ; LIST Vb-Ifi-3 = (v ifi p3 sg) (v ifi p3 pl) ; LIST P2Pron = (prn pers p2) ; SECTION # No vaux if participle doesn't precede REMOVE Vaux IF (0C Vb-or-Aux) # the current word can be verb also (-1C Not-Prc); # previous word is not a participle ## V if participle doesn't precede #SELECT Verb IF # (0C Vb-or-Aux) # the current word can be verb also # (-1C Not-Part); # previous word is not a participle # adj if next word is noun SELECT Adj IF (1C Noun); # no past forms if next word is noun REMOVE Past IF (0C Vb-or-Aux) (1C Noun); # get rid of copula reading if not end of sentence REMOVE Cop IF (1C Not-Sent); # select plural verb reading if subject is plural SELECT Pl IF (0C Vb-Sg-Or-Pl) (*-1C Nom-Pl BARRIER Sent); # get rid of plural verb reading if subject not plural REMOVE Pl IF (0C Vb-Sg-Or-Pl) (*-1C Nom-Sg BARRIER Sent); # select iv if acc precedes REMOVE Iv IF (0C Tv-Or-Iv) (-1C Acc); # Сен канча саат күттүң? SELECT Iv IF (0C Tv) # e.g., бар is otherwise a problem (NOT -1C* Acc BARRIER Sent) ; # get rid of prc_irre if also can be aor if following word sent REMOVE PrcIrre IF (0C PrcIrre-Or-Aor) # (1C Sent); (1 EOS OR MARK) ; SELECT Aor IF (0 PrcIrre-Or-Aor) (NOT 1 Cop) (NOT 1 ("<экен>")) ; # get rid of plural possessive reading if antecedant is sg REMOVE Px3pl IF (0C Px3-Sg-Or-Pl) (*-1C Np-Nom-Or-Gen-Sg BARRIER Sent); # жер REMOVE Verb IF (-1 Det); # Some rules for participle forms that are followed by copulas # E.g., түшкөн эле REMOVE Ger IF (0 PrcCop) (1 Cop) ; REMOVE Gpr IF (0 PrcCop) (1 Cop) ; REMOVE Finite IF (0 PrcCop) (1 Cop) ; SELECT Cop IF (-1 PrcCop) ; #If an adjective is right before a numeral + noun it is an adjective for sure SELECT Adj IF (1C Num) (2C Noun) ; ## select Adj if inbetween nouns SELECT Adj IF (-1 Noun) (1 Noun) ; ## select Adj if before copula (idi, iken) SELECT Adj IF (1 Cop) ; #interjections REMOVE Interj IF (1 Verb) ; REMOVE Interj IF (NOT -1 BOS) (NOT 1 EOS) ; # бат REMOVE Vb-Imp IF (1 Verb) ; # ambiguous n.acc/v.*.ifi forms, e.g. түштү, etc. SELECT Vb-Ifi-3 IF (0 N-Acc) (1C Sent) ; REMOVE Attr IF (NOT 1 PRE-N) (NOT 1 NOMINAL-HEAD) (NOT 1/1 NOMINAL-HEAD) ; ## Bu araba 11,5 milyar a # Deciding about the number of a verb or copula in the 3 person # Basic idea is to remove plural reading if subject is not in plural, # but there a lot of corner cases, which lead to lots of mispredictions. # So let's just delete plural reading for now. # # Selecting plural reading might be a more productive approach. REMOVE Pl IF (0 V OR Vaux OR Cop) (NOT 0 Pron) (0 P3) ((0 P3-Sg) LINK (NOT 0 Coop)) ; ## # Thought that the above rule should cover copulas, seems that sometimes it doesn't REMOVE SUB:1 Pl IF (0/1 Cop) (0/1 P3) ; # select adverbial reading of adjectives if any verbal form except gerund follows # FIXME CHECK it might be a gerund as well SELECT Advl OR Adv IF (1C V-NotGerund) #(NOT 1C Participle) (NOT 1 ("бол")) (NOT 0 ("балама")) (NOT 1 ("де")) ; ## # example: in 'балама үйретейін' the 'балама' shouldn't be treated as adj.advl (equivalent), it should be n.px1sq.dat (to my son) "<атам>" SELECT Vaux IF (-1 Prc) ; # "<өтө>" SELECT Adv IF (1 Adj) ; ## Бірақ кеше _өте_ суық еді! "<өтө>" REMOVE Prc IF (NOT 1 Vaux) ; "<бардык>" SELECT Det IF (1 Noun) ; "<алдым>" SELECT Verb IF (1 EOS OR MARK) ; "<алдың>" SELECT Verb IF (1 EOS OR MARK) ; "<алды>" SELECT Verb IF (1 EOS OR MARK) ; "<алар>" REMOVE Verb IF (NOT 1 EOS OR MARK) ; "<алар>" REMOVE Vaux IF (NOT 1 EOS OR MARK) ; "<карай>" REMOVE Verb IF (NOT 1 Vaux); "<бир>" SELECT Det IF (1 Noun) (-1 Adj) ; "<бирге>" SELECT Adv IF (-1 ("<менен>")); "<кандай>" SELECT Det IF (1 Noun) ; "<канча>" SELECT Num IF (1 Noun) ; "<канча>" REMOVE Subst IF (1 Noun) ; "<ат>" REMOVE Vaux ; # "<келишим>" SELECT Noun ; SELECT Noun IF (0 ("келишим"i)) ; REMOVE V IF (0 V or Vaux) (-1 Prc) ; REMOVE Ger-Acc IF (0 P3-Pl) (1 EOS OR MARK) ; SELECT Pron IF (0C Det OR Pron) (1C Adv) ; SELECT Pron IF (0C Det OR Pron) (1/1 Cop) (2 EOS OR MARK) ; # example: Бул оюн. # FIXME removes determiner even when there is a noun 1 to the right # FIXED JNW 2017-08-12 REMOVE Det IF (0 Det OR Pron) # ADDED JNW 2017-08-11 (NOT 1 Noun OR Subst) (NEGATE 1 Adj LINK 1 Noun OR Subst) ; # 44 . Ол енді ол дыбысты анығырақ ести бастады. # (!) 34 . Ол Азаматтың қайда екенін білсе де айтқысы келген жоқ. SELECT Pron IF (0C Det OR Pron) (1C Adv) ; ## 44 . [0]Ол енді ол дыбысты анығырақ ести бастады. SELECT Pron IF (1 Noun) (2 ("бол")) ; ## Бул мектеп болуш керек # бар SELECT SUB:1 Cop IF (1 MARK OR EOS) (NOT 0 FiniteVerb OR Vaux) (NOT -1* P2Pron) (NOT 0 Interj) ## Дұрыс, оның мысығы бар. (NOT 0 FiniteVerb) ## 74 ... барлығы 53 ел [0]қатысты. ; ## Жоқ, Айгүлдің күшігі [0]жоқ, оның мысығы [0]бар. SELECT SUB:1 Cop IF (1 MARK OR EOS) # (2*/1 Cop BARRIER EOS) # (NOT 0 Interj) ## Дұрыс, оның мысығы бар. (NOT 0 FiniteVerb) ## 74 ... барлығы 53 ел [0]қатысты. # (NOT 2 Noun) ; REMOVE SUB:1 Cop IF (NOT 1 EOS OR MARK OR ("де")) ; REMOVE SUB:1 Cop IF (-1 BOS OR MARK) ## Headings or enumerations (NOT 1 EOS) ; "<бар>" SELECT V IF (-1 Dat) ; "<бар>" SELECT SUB:1 Cop IF (-1 (px3sp)) ; REMOVE IJ IF (0 Adj) (1 Noun) ; # SELECT Interj OR (cnjadv) IF (-1 BOS) (1 Cm) ; ## "Мысалы, ежелгі заманның өзінде Арал теңізі көп елдерге мәлім болған." # e.g., дұрыс! () | дұрыс жерден () SELECT V-TV IF (0 ("бас")) (-1* Acc BARRIER N-LIKE) ; SELECT V-IV IF (0 ("бас")) (NOT -1 NOMINAL) ; SELECT V-TV IF (0 ("күт")) (-1* Acc BARRIER N-LIKE-NO-ADV) ; REMOVE V-TV IF (0 ("күт")) (NOT -1* Acc BARRIER N-LIKE-NO-ADV) ; ## [0]Азамат пен Айгүл бақшада. ### : for "Алма мен Аян"-cnjcoo SELECT CC IF (-1 N-LIKE) (0 Post) (1 N-LIKE) ; REMOVE CC IF (0 Post) (NOT 1 N-LIKE); REMOVE Attr IF (0 Noun OR Prop) (NOT 0 Loc) (NOT 1 Noun OR Prop) ; REMOVE Attr IF (0 Noun OR Prop) (NOT 0 Loc) (1 (px3sp)) ; REMOVE Adj-Subst IF (0 Noun) (0/1 Cop) (1 MARK OR EOS) ; SELECT (gpr_past) IF (0 (ger_past) + Nom) (1C Noun) (NOT 0 Det) ; SELECT FiniteVerb IF (1 MARK OR EOS OR ("де"i)) # FIXME s/.*/SentenceBoundary/ # (NOT 0 ("шығар"i) OR ("бар"i)) # FIXME a better way? # (NOT 0/1 Cop) (0 V-NonFiniteCanGetCop) ; SECTION # If Nom and Attr are left, just go with Nom... REMOVE Attr IF (0 Noun OR Prop) (0 Nom) (NOT 0 Loc) ; REMOVE Noun + Loc + Subst + Nom IF (0 Loc + Attr) (1 Noun) ; SECTION SUBSTITUTE Nom @acc-ind TARGET Noun IF (0 Nom) (-1C* Nom BARRIER Sent) # C so that we don't get det n sequences always being marked acc-ind (NOT 1 Verb) (NEGATE 0 Px3 LINK -1 Nom) # Don't apply to N.nom N.px3 compounds ;