# Firstly, we need to define what tags should be considered sentence delimiters. For this example, only full stop is set as delimiter. DELIMITERS = "<.>" ; SUBREADINGS = LTR ; # Alternate, left-to-right (main reading on the left) # ============= # # Tags and sets # # ============= # SETS LIST BOS = (>>>) ; LIST EOS = (<<<) ; # First-level/Parts-of-speech tags # ================================ LIST A = adj ; LIST Adv = adv ; LIST Pron = prn ; LIST Pron-Pers = (prn pers) ; LIST N = n ; LIST IJ = ij ; LIST Prop = np ; LIST V = v ; LIST Vaux = vaux ; LIST Cop = cop ; LIST Det = det ; LIST CC = cnjcoo ; LIST CS = cnjsub ; LIST Interj = ij ; LIST Pres = pres ; LIST Num = num ; LIST Post = post ; LIST Postadv = postadv ; LIST FinalClitic = mod_ass mod_emo mod qst emph ; LIST Cm = cm ; LIST Adv = adv ; LIST Rquot = rquot ; LIST Excl = "!" ; # POS sub-categories # ================== LIST Interr = itg ; # Morphosyntactic properties # if you find yourself embracing the same tag in # ========================== # brackets over and over again, you may want to # add it here LIST Nom = nom ; LIST Gen = gen ; LIST Dat = dat ; LIST Acc = acc ; LIST Abl = abl ; LIST Loc = loc ; LIST Attr = attr ; LIST Subst = subst ; LIST Prc = prc_perf prc_impf ; # Verb sets # ========= LIST FiniteVerb = pres aor past ifi ifi_evid fut fut_plan imp opt pih ; LIST V-P3 = (v p3) (vaux p3) (cop p3) ; LIST Gerund = ger ger_ppot ger_past ger_perf ger_impf ger_abs ger_aor ; SET V-NotGerund = V - Gerund ; LIST Participle = prc_perf prc_impf prc_cond prc_vol prc_plan ; LIST VerbalAdverb = gna_perf gna_cond gna_until gna_after ; LIST VerbalAdjective = gpr_past gpr_fut gpr_pot gpr_impf ; LIST SubstPl = (subst pl) ; # All possible word categories # ============================ SET WORD = N | V | A | Post | Postadv | Pron | Det | Adv | CC | CS | Interj | Num | ("\?") ; SET PRE-N = A | Det | Postadv | Num | (n gen) | (prn gen) | CC | (attr) ; SET MARK = Cm | ("\\") | ("\;") | ("\(") ; SET NOMINAL = N | Prop | Pron | Subst | Gerund ; # Barriers # ======== SET S-BOUNDARY = CS | Interr | EOS ; SECTION # If there is a singular noun to the right, I cannot be a verb or noun. REMOVE (n) OR (v) IF (1 (n s)) ; ## If there is a conjunction followed by a certain cohort of the same CASE as me, choose me. #SELECT $$Case IF (1 (cnjcoo) LINK 1C $$Case) ; # If the next form is px3sp, then N is probably gen and not acc REMOVE Acc IF (0 Gen) (1 (px3sp)) ; # unlikely to be nominative reading # if it can also be attributive reading of bare noun # and the thing after it can be a noun REMOVE Nom IF (0 Attr) (1 N) ; # unlikely to be an attributive reading of bare noun # if it can also be nominative reading # and the thing after it can't be a noun REMOVE Attr IF (0 Nom) (NOT 1 N) ; # unlikely to be an auxiliary # if it can be a lexical verb # and the thing before it can't be an infinitive REMOVE Vaux IF (0 V) (NOT -1 Prc) ; # probably not a lexical verb # if it can be an auxiliary # and the thing before it is an infinitive REMOVE V IF (0 Vaux) (-1 Prc) ; # a couple rules for бла SELECT Post IF (-1 NOMINAL)(0 CC)(1 V); SELECT CC IF (-1 NOMINAL)(0 CC)(1 NOMINAL); REMOVE VerbalAdjective IF (1 Cm) ; REMOVE VerbalAdjective + Subst IF (-1C Gen) (1C N) ; SELECT Participle IF (0C Participle OR VerbalAdverb) (NOT 0C (prc_cond) OR (gna_cond)) (1 Vaux) ; # if ambiguous between prn and adj (e.g. башха) and it has copula, probably adj REMOVE Pron IF (0 A) (0/1 Cop) ; SELECT Gerund IF (1 Post) ; SELECT Post IF (-1 Gerund) ; # ======= # SECTION # Copulas # ======= # REMOVE SUB:1 Cop IF (NOT 1 EOS OR MARK OR ("де")) ; REMOVE SUB:1 Cop IF (-1 BOS OR MARK) ## Headings or enumerations (NOT 1 EOS) ; # get rid of ger+cop readings # if it can also be a finite verb form # and is at the end of the sentence # (not strictly correct, but probably okay most of the time) REMOVE SUB:1 Cop IF (0 Gerund) (0 FiniteVerb) (1 EOS) ; SECTION # Word-specific stuff # If башха is pl it's probably better to have the pronoun rading and not the subst adj reading for MT REMOVE A IF (0 ("башха"i)) (0 Pron) (0 SubstPl) ; # Resources: # http://visl.sdu.dk/cg3.html # http://groups.google.com/group/constraint-grammar # http://kevindonnelly.org.uk/2010/05/constraint-grammar-tutorial/ # https://wiki.apertium.org/wiki/Constraint_Grammar # https://wiki.apertium.org/wiki/Apertium_and_Constraint_Grammar