### Constraint Grammar for Kyrgyz ### DELIMITERS = "<.>" "" "" ; SOFT-DELIMITERS = "<,>" ; SUBREADINGS = LTR ; # Alternate, left-to-right (main reading on the left) LIST BOS = (>>>) ; # Beginning of sentence LIST EOS = (<<<) ; # End of sentence LIST Cm = cm ; LIST Guio = guio ; SET MARK = Cm | ("\\") | ("\;") | ("\(") ; # | ("-") | ("—") ; SET AdvclSep = Cm | Guio | ("\;") ; LIST Nom = nom ; LIST Acc = acc ; LIST @acc-ind = acc-ind ; LIST Gen = gen ; LIST Adj = adj ; LIST Adv = adv ; LIST Pron = prn ; LIST Pron-Pers = (prn pers) ; LIST Noun = n ; LIST Past = past ; LIST Vadj = vadj vadj_past ; LIST Vaux = vaux ; LIST Verb = v ; LIST V = v ; LIST Cop = cop ; LIST Num = num ; LIST Interj = ij ; LIST Subst = subst ; LIST Vb-or-Aux = v vaux ; LIST Cond = gna_cond prc_cond ; LIST PrcIrre = prc_irre ; LIST Prc = prc_plan prc_past prc_hab prc_fut prc_cond prc_perf prc_impf prc_vol prc_irre prc_real prc_pcond ; LIST PrcCop = prc_past prc_hab prc_fut prc_irre pre_pcond ; # Prc forms that occur with copula LIST Gna = gna_perf gna_impf gna_cond gna_ala gna_until gna_after ; LIST Ger = ger ger_fut ger_past ger_pres ; LIST Ger-Plain = ger ; LIST Ger-Fut = ger_fut ; LIST Gpr = gpr_fut gpr_pot2 gpr_perf gpr_past gpr gpr_pot gpr_impf gpr_hab gpr_notyet gpr_ppot ; LIST Finite = past ifi fut aor pih ; LIST Gerund = ger ger_ppot ger_past ger_perf ger_impf ger_abs ger_pres ; LIST FiniteVerb = pres aor past ifi ifi_evid fut fut_plan imp opt pih ; SET V-NotGerund = V - Gerund ; SET V-NonFiniteCanGetCop = Ger | Gpr ; LIST Det = det ; LIST CC = cnjcoo ; LIST CS = cnjsub ; LIST Cop = cop ; LIST Sent = sent ; LIST Pl = pl ; LIST Tv = tv ; LIST Iv = iv ; LIST Aor = aor ; LIST Px3pl = px3pl ; LIST Px3sg = px3sg ; LIST Px3sp = px3sp ; LIST Px1sg = px1sg ; LIST Px1pl = px1pl ; LIST Px2sg = px2sg ; LIST Px2pl = px2pl ; LIST Post = post ; LIST Coop = coop ; LIST IJ = ij ; LIST Sg = sg ; LIST Pl = pl ; LIST Nom = nom ; LIST Gen = gen ; LIST Dat = dat ; LIST Acc = acc ; LIST Abl = abl ; LIST Loc = loc ; #LIST Ins = ins ; SET Case = Nom | Gen | Dat | Acc | Abl | Loc ; LIST P1 = p1 ; LIST P2 = p2 ; LIST P3 = p3 ; LIST Vb-Pl = (v pl) ; LIST Vb-Sg = (v sg) ; LIST Nom-Pl = (nom pl) ; SET Nom-Sg = (nom) - (pl) ; LIST Nouns = n np pron ; LIST Prop = np ; LIST Np-Top = (np top) ; LIST N-Pl = (n pl) ; SET N-Sg = Noun - (pl) ; LIST V-IV = (v iv) ; SET V-TV = (v tv) | (v caus) ; SET Sing = (*) - (pl) ; SET Acc-Or-Nom = Acc | Nom ; # "Syntactic" tags # ================ LIST Advl = advl ; LIST Attr = attr ; LIST Subst = subst ; LIST N-LIKE = (adj subst) n np prn ; # Actually, see NOMINAL SET ADV-NOUNS = ("саат") | ("жыл") | ("күн") | ("ай") ; SET PLACE-NOUNS = ("шаар") | ("айыл") | ("район") | ("микрорайон") | ("аймак") | ("көл") | ("суу") ; SET NOMINAL = Noun | Prop | Pron | Subst | Gerund ; SET NOMINAL-HEAD = Noun | Ger | Subst ; SET PRE-N = Det | Num | Attr | Adj | Gen | ("-") ; SET N-LIKE-NO-ADV = N-LIKE - ADV-NOUNS ; SET Not-Prc = (*) - Prc ; SET Not-Sent = (*) - Sent ; SET Tv-Or-Iv = Tv | Iv ; SET PrcIrre-Or-Aor = PrcIrre | Aor ; SET Vb-Sg-Or-Pl = Vb-Pl | Vb-Sg ; SET Nom-Or-Gen = Nom OR Gen ; SET Np-Nom-Or-Gen = Nouns + Nom-Or-Gen ; SET Np-Nom-Or-Gen-Sg = Np-Nom-Or-Gen + Sing ; SET Px3-Sg-Or-Pl = Px3sg | Px3pl ; SET Px3 = Px3sg | Px3pl | Px3sp ; SET Px1 = Px1sg | Px1pl ; SET Px2 = Px2sg | Px2pl ; SET Possessive = Px3 | Px1 | Px2 ; LIST Adj-Subst = (adj subst) ; SET Ger-Acc = Ger + (acc); LIST P3-Pl = (p3 pl) ; LIST P3-Sg = (p3 sg) ; LIST Vb-Imp = (v imp) ; LIST N-Acc = (n acc) ; LIST Subst-Acc = (n acc) (adj subst acc) ; LIST Vb-Ifi-3 = (v ifi p3 sg) (v ifi p3 pl) ; LIST Vb-Iv = (v iv) (v tv pass) ; LIST Vb-Tv-Caus = (v tv) (v caus) ; SET Vb-Tv = Vb-Tv-Caus - (pass); LIST P2Pron = (prn pers p2) ; SECTION # No vaux if participle doesn't precede REMOVE Vaux IF (0 Vb-or-Aux) # the current word can be verb also (-1C Not-Prc); # previous word is not a participle ## V if participle doesn't precede #SELECT Verb IF # (0C Vb-or-Aux) # the current word can be verb also # (-1C Not-Part); # previous word is not a participle # adj if next word is noun SELECT Adj IF (1C Noun); # no past forms if next word is noun REMOVE Past IF (0C Vb-or-Aux) (1C Noun); # get rid of copula reading if not end of sentence REMOVE Cop IF (1C Not-Sent); # select plural verb reading if subject is plural SELECT Pl IF (0C Vb-Sg-Or-Pl) (*-1C Nom-Pl BARRIER Sent); # get rid of plural verb reading if subject not plural REMOVE Pl IF (0C Vb-Sg-Or-Pl) (*-1C Nom-Sg BARRIER Sent); # select iv if acc precedes REMOVE Iv IF (0C Tv-Or-Iv) (-1C Acc); SELECT Iv IF (0C Tv-Or-Iv) (-1* BOS OR MARK BARRIER Noun OR Pron) ; # Сен канча саат күттүң? SELECT Iv IF (0C Tv) # e.g., бар is otherwise a problem (NOT -1C* Acc BARRIER Sent) ; # get rid of prc_irre if also can be aor if following word sent REMOVE PrcIrre IF (0C PrcIrre-Or-Aor) # (1C Sent); (1 EOS OR MARK) ; SELECT Aor IF (0 PrcIrre-Or-Aor) (NOT 1 Cop) (NOT 1 ("<экен>")) ; # get rid of plural possessive reading if antecedant is sg REMOVE Px3pl IF (0C Px3-Sg-Or-Pl) (*-1C Np-Nom-Or-Gen-Sg BARRIER Sent); # жер REMOVE Verb IF (-1 Det); # барган соң (etc.) #"<соң>" SELECT Post IF (-1 Ger) ; SELECT Post IF (-1 Ger) ; SELECT Gerund IF (0 Gpr) (1 Post) ; REMOVE Ger-Plain If (0 Noun) (0 Ger-Plain LINK 0 Case) ; # Some rules for participle forms that are followed by copulas # E.g., түшкөн эле REMOVE Ger IF (0 PrcCop) (1 Cop) ; REMOVE Gpr IF (0 PrcCop) (1 Cop) ; REMOVE Finite IF (0 PrcCop) (1 Cop) ; SELECT Cop IF (-1 PrcCop) ; #If an adjective is right before a numeral + noun it is an adjective for sure SELECT Adj IF (1C Num) (2C Noun) ; ## select Adj if inbetween nouns SELECT Adj IF (-1 Noun) (1 Noun) ; ## select Adj if before copula (idi, iken) SELECT Adj IF (1 Cop) ; #interjections REMOVE Interj IF (1 Verb) ; REMOVE Interj IF (NOT -1 BOS) (NOT 1 EOS) ; # бат REMOVE Vb-Imp IF (1 Verb) ; # ambiguous n.acc/v.*.ifi forms, e.g. түштү, etc. SELECT Vb-Ifi-3 IF (0 Subst-Acc) (1C Sent) ; REMOVE Attr IF (NOT 1 PRE-N) (NOT 1 NOMINAL-HEAD) (NOT 1/1 NOMINAL-HEAD) ; ## Bu araba 11,5 milyar a REMOVE Noun IF (0 Noun) (0 Np-Top) (1 PLACE-NOUNS) ; # Deciding about the number of a verb or copula in the 3 person # Basic idea is to remove plural reading if subject is not in plural, # but there a lot of corner cases, which lead to lots of mispredictions. # So let's just delete plural reading for now. # # Selecting plural reading might be a more productive approach. REMOVE Pl IF (0 V OR Vaux OR Cop) (NOT 0 Pron) (0 P3) ((0 P3-Sg) LINK (NOT 0 Coop)) ; ## ## THIS ISN'T ALWAYS THE CASE ## Thought that the above rule should cover copulas, seems that sometimes it doesn't # REMOVE SUB:1 Pl IF # (0/1 Cop) # (0/1 P3) #; REMOVE SUB:1 Sg IF (0/1 Cop) (0/1 P3) (-1* N-Pl BARRIER Noun) ; REMOVE SUB:1 Pl IF (0/1 Cop) (0/1 P3) (-1* N-Sg BARRIER Noun) ; # select adverbial reading of adjectives if any verbal form except gerund follows # FIXME CHECK it might be a gerund as well SELECT Advl OR Adv IF (1C V-NotGerund) #(NOT 1C Participle) (NOT 1 ("бол")) (NOT 0 ("балама")) (NOT 1 ("де")) ; ## # example: in 'балама үйретейін' the 'балама' shouldn't be treated as adj.advl (equivalent), it should be n.px1sq.dat (to my son) "<атам>" SELECT Vaux IF (-1 Prc) ; # "<өтө>" SELECT Adv IF (1 Adj) ; ## Бірақ кеше _өте_ суық еді! "<өтө>" REMOVE Prc IF (NOT 1 Vaux) ; "<бардык>" SELECT Det IF (1 Noun) ; "<алдым>" SELECT Verb IF (1 EOS OR MARK) ; "<алдың>" SELECT Verb IF (1 EOS OR MARK) ; "<алды>" SELECT Verb IF (1 EOS OR MARK) ; "<алар>" REMOVE Verb IF (NOT 1 EOS OR MARK) ; "<алар>" REMOVE Vaux IF (NOT 1 EOS OR MARK) ; "<карай>" REMOVE Verb IF (NOT 1 Vaux); "<бир>" SELECT Det IF (1 Noun) (-1 Adj) ; "<бирге>" SELECT Adv IF (-1 ("<менен>")); "<кандай>" SELECT Det IF (1 Noun) ; "<канча>" SELECT Num IF (1 Noun) ; "<канча>" REMOVE Subst IF (1 Noun) ; "<ат>" REMOVE Vaux ; "<жашар>" SELECT Adj IF (-1 Num) ; "<боюнча>" SELECT Post ; # "<келишим>" SELECT Noun ; SELECT Noun IF (0 ("келишим"i)) ; #SELECT Noun IF # (0 ("конок"i) LINK 0 Noun LINK 0 Pl OR ("конокто"i)) # FIXME: probably not what I intended -JNW 2023-11-01 #; #SELECT Noun IF # (0 ("белги"i) LINK 0 Noun LINK 0 Pl OR ("белгиле"i)) # FIXME: probably not what I intended -JNW 2023-11-01 #; SELECT Noun IF (0 N-Pl OR Ger-Fut) ; REMOVE Gerund IF (0 Gerund OR Noun LINK 0 Pl) ; REMOVE V IF (0 V or Vaux) (-1 Prc) ; REMOVE Gna If (0 Gna or Prc) (1 Vaux) ; REMOVE Ger-Acc IF (0 P3-Pl) (1 EOS OR MARK) ; REMOVE SUB:1 P3-Pl IF (NOT -1* Pl) ; # Азамат алты жашар кичинекей бала. SELECT Pron IF (0C Det OR Pron) (1C Adv) ; SELECT Pron IF (0C Det OR Pron) (1/1 Cop) (2 EOS OR MARK) ; # example: Бул оюн. # FIXME removes determiner even when there is a noun 1 to the right # FIXED JNW 2017-08-12 REMOVE Det IF (0 Det OR Pron) # ADDED JNW 2017-08-11 (NOT 1 Noun OR Subst) (NEGATE 1 Adj LINK 1 Noun OR Subst) ; # 44 . Ол енді ол дыбысты анығырақ ести бастады. # (!) 34 . Ол Азаматтың қайда екенін білсе де айтқысы келген жоқ. SELECT Pron IF (0C Det OR Pron) (1C Adv) ; ## 44 . [0]Ол енді ол дыбысты анығырақ ести бастады. SELECT Pron IF (1 Noun) (2 ("бол")) ; ## Бул мектеп болуш керек # бар SELECT SUB:1 Cop IF (1 MARK OR EOS) (NOT 0 FiniteVerb OR Vaux) (NOT -1* P2Pron) (NOT 0 Interj) ## Дұрыс, оның мысығы бар. (NOT 0 FiniteVerb) ## 74 ... барлығы 53 ел [0]қатысты. ; ## Жоқ, Айгүлдің күшігі [0]жоқ, оның мысығы [0]бар. SELECT SUB:1 Cop IF (1 MARK OR EOS) # (2*/1 Cop BARRIER EOS) # (NOT 0 Interj) ## Дұрыс, оның мысығы бар. (NOT 0 FiniteVerb) ## 74 ... барлығы 53 ел [0]қатысты. # (NOT 2 Noun) ; REMOVE SUB:1 Cop IF (NOT 1 EOS OR MARK OR ("де")) ; REMOVE SUB:1 Cop IF (-1 BOS OR MARK) ## Headings or enumerations (NOT 1 EOS) ; #REMOVE SUB:1 Not Cop IF # (1 EOS OR MARK OR ("де")) #; "<бар>" SELECT V IF (-1 Dat) ; "<бар>" SELECT SUB:1 Cop IF (-1 (px3sp)) ; "<бар>" SELECT SUB:1 Cop IF (1 MARK OR EOS) ; "<бар>" REMOVE Noun IF (1 MARK OR EOS OR Cop); #"<бар>" REMOVE Noun IF # (1 MARK OR EOS OR Cop); "<бар>" REMOVE Adj-Subst ; REMOVE IJ IF (0 Adj) (1 Noun) ; # SELECT Interj OR (cnjadv) IF (-1 BOS) (1 Cm) ; ## "Мысалы, ежелгі заманның өзінде Арал теңізі көп елдерге мәлім болған." # e.g., дұрыс! () | дұрыс жерден () SELECT V-TV IF (0 ("бас")) (-1* Acc BARRIER N-LIKE) ; SELECT V-IV IF (0 ("бас")) (NOT -1 NOMINAL) ; SELECT V-TV IF (0 ("күт")) (-1* Acc BARRIER N-LIKE-NO-ADV) ; REMOVE V-TV IF (0 ("күт")) (NOT -1* Acc BARRIER N-LIKE-NO-ADV) ; ## [0]Азамат пен Айгүл бақшада. ### : for "Алма мен Аян"-cnjcoo SELECT CC IF (-1 N-LIKE) (0 Post) (1 N-LIKE) ; REMOVE CC IF (0 Post) (NOT 1 N-LIKE); REMOVE Attr IF (0 Noun OR Prop) (NOT 0 Loc) (NOT 1 Noun OR Prop) ; REMOVE Attr IF (0 Noun OR Prop) (NOT 0 Loc) (1 (px3sp)) ; REMOVE Adj-Subst IF (0 Noun) (0/1 Cop) (1 MARK OR EOS) ; SELECT (gpr_past) IF (0 (ger_past) + Nom) (1C Noun) (NOT 0 Det) ; SELECT FiniteVerb IF (1 MARK OR EOS OR ("де"i)) # FIXME s/.*/SentenceBoundary/ # (NOT 0 ("шығар"i) OR ("бар"i)) # FIXME a better way? # (NOT 0/1 Cop) (0 V-NonFiniteCanGetCop) ; REMOVE (prc_cond) IF (0 (gna_cond)) (NEGATE 1* ("бол"i) or ("э"i) BARRIER Verb OR Noun) ; REMOVE (gna_cond) IF (0 (prc_cond)) (1* ("бол"i) or ("э"i) BARRIER Verb) ; SECTION # If Nom and Attr are left, just go with Nom... REMOVE Attr IF (0 Noun OR Prop) (0 Nom) (NOT 0 Loc) ; REMOVE Noun + Loc + Subst + Nom IF (0 Loc + Attr) (1 Noun) ; SECTION SUBSTITUTE Nom @acc-ind TARGET Noun IF (0 Nom) (-1C* Nom BARRIER Sent) # C so that we don't get det n sequences always being marked acc-ind # (NOT 1 Verb) # commenting this out because could totally be this (NEGATE 1 Vb-Iv) (1* Vb-Tv BARRIER Sent) # using this instead of the above (1* (Vb-Tv Cond) BARRIER AdvclSep) # using this instead of the above (NEGATE 0 Px3 LINK -1 Nom) # Don't apply to N.nom N.px3 compounds (NEGATE 0 Possessive) # Possessives can't be indefinite, would need overt acc marking (NEGATE 1C Post) # not acc if followed by a postposition (NEGATE 1C Noun) # e.g. "_кайгы_ өмүр кесет" ;