#!/usr/bin/gawk -f # lw2xml: Conversion of Lexware band label format to xhtml # (c) Campbell Webb 2020 # License: GPLv3 # (See tail of file for documentation) # TODO: see BEGIN{ # ===================== Initialization ========================== read_args() init() # Initialize Path and Prevbc (previous band class) Path = "" Prevbc = "0" # Start the xhtml output if (!XML) head("Dictionary") o("doc") # ===================== Line-by-line cycle ========================== # Read each line from the input file, until there are no more while ((getline < LEX) > 0) { Line++ # Ignore comments, and read from start line to finish line (if given) if (($0) && ($0 !~ /^[ #]/) && \ (Line >= STARTLINE) && \ (Line <= FINISHLINE)) { # Pre-process the line. See functions below indexmarks() xmlencode() dollar2sup() splitline() # sets Bl, Bc, Word2, Rest1, Rest2 # Skip some classes if (Bc ~ /^(cf|caus|xr|xgl|grp|smf|sc)$/) continue if (XML) if (Bc ~ /^(file|par|par3|com)$/) continue # ===================== Validation ========================== if (VALIDATE) if ((Bc != "com") && (Bc != "par") && (Bc != "par3")) validate() # ==================== Conversion to XHTML ================== # For each line, check the band label if (Bc == "rt") { # Close any open
s back up to the
inside
c("doc") # Open a new
o("rt") # These comments (#> ) following an o() or oc() can be extracted from # the program to form a comprehensive outline of the XML structure # with: grep -E ' +#>' lw2xml | sed -E 's/^ +#>//g' | sort > \ # lw_xml_elements #> rt: doc/rt o("rtattr") #> rt: doc/rt/rtattr oc("rttype", substr(Bl,2)) #> rt: doc/rt/rtattr/rttype if (!XML) print wedge(0) "" # Needed for the indexing: Rootlabel[Rootn] = Rest1 oc("rtword", Rest1) #> rt: doc/rt/rtattr/rtword } else if (Bc == "pd") { c("rtattr") oc("pd", ("/" Rest1 "/")) #> pd: doc/rt/rtattr/pd } else if (Bc == "tag") { c("rtattr") oc("tag", Rest1) #> tag: doc/rt/rtattr/tag } else if (Bc == "rtyp") { c("rtattr") oc("rtyp", Rest1) #> rtyp: doc/rt/rtattr/rtyp } else if (Bc == "nav") { c("rtattr") oc("nav", Rest1) #> nav: doc/rt/rtattr/nav } else if (Bc == "df") { c("rtattr") oc("df", Rest1) #> df: doc/rt/rtattr/df } else if (Bc == "sets") { c("rt") o("sets") #> sets: doc/rt/sets } else if (Bc == "set") { c("sets") o("set") #> set: doc/rt/sets/set if (VALIDATE) if (!Aspcat[Word2]) error(Word2 " not a known aspectual category") oc("setasp", Word2) #> set: doc/rt/sets/set/setasp oc("setwords", Rest2) #> set: doc/rt/sets/set/setwords c("sets") } else if (Bc == "th") { c("rt") o("th") #> th: doc/rt/th o("thattr") #> th: doc/rt/th/thattr oc("thword", Rest1) #> th: doc/rt/th/thattr/thword } else if (Bc == "tc") { # tc may occur in several places. The following logic is needed to # correctly balance the divs: if (Path ~ "thattr") c("thattr") else if (Path ~ "gc2attr") c("gc2attr") else if (Path ~ "th3") c("th3attr") oc("tc", Rest1) # These are the three places that tc can occur in the XML #> tc: doc/rt/th/thattr/tc #> tc: doc/rt/gc2/gc2attr/tc #> tc: doc/rt/gc2/th3/th3attr/tc } else if (Bc == "cnj") { if (Path ~ "thattr") c("thattr") else if (Path ~ "gc2attr") c("gc2attr") else if (Path ~ "th3") c("th3attr") oc("cnj", Rest1) #> cnj: doc/rt/th/thattr/cnj #> cnj: doc/rt/gc2/gc2attr/cnj #> cnj: doc/rt/gc2/th3/th3attr/cnj } else if (Bc == "gl") { if (Path ~ "thattr") c("thattr") else if (Path ~ "gc2attr") c("gc2attr") else if (Path ~ "th3") c("th3attr") else if (Path ~ "gc3attr") c("gc3attr") oc("gl", Rest1) #> gl: doc/rt/th/thattr/gl #> gl: doc/rt/gc2/gc2attr/gl #> gl: doc/rt/gc2/th3/th3attr/gl #> gl: doc/rt/gc2/gc3/gc3attr/gl #> gl: doc/rt/th/gc3/gc3attr/gl } else if (Bc == "quo") { if (Path ~ "thattr") c("thattr") else if (Path ~ "th3attr") c("th3attr") else if (Path ~ "exengs") c("exeng") oc("quo", Rest1) #> quo: doc/rt/th/thattr/quo #> quo: doc/rt/gc2/th3/th3attr/quo #> quo: doc/rt/th/thexengs/exeng/quo #> quo: doc/rt/gc2/gc2exengs/exeng/quo #> quo: doc/rt/gc2/th3/th3exengs/exeng/quo #> quo: doc/rt/gc2/gc3/gc3exengs/exeng/quo #> quo: doc/rt/th/gc3/gc3exengs/exeng/quo } else if (Bc == "cit") { if (Path ~ "thattr") c("thattr") else if (Path ~ "th3attr") c("th3attr") else if (Path ~ "exengs") c("exeng") oc("cit", Rest1) #> cit: doc/rt/th/thattr/cit #> cit: doc/rt/gc2/th3/th3attr/cit #> cit: doc/rt/th/thexengs/exeng/cit #> cit: doc/rt/gc2/gc2exengs/exeng/cit #> cit: doc/rt/gc2/th3/th3exengs/exeng/cit #> cit: doc/rt/gc2/gc3/gc3exengs/exeng/cit #> cit: doc/rt/th/gc3/gc3exengs/exeng/cit } else if (Bc == "ex") { # first one if (Path !~ "exengs") { if (Path ~ "rt/th") c("th") else if (Path ~ "gc2/th3") c("th3") else if (Path ~ "gc2/gc3") c("gc3") else if (Path ~ "gc2") c("gc2") o("exengs") o("exeng") oc("ex", Rest1) #> ex: doc/rt/th/exengs/exeng/ex #> ex: doc/rt/gc2/exengs/exeng/ex #> ex: doc/rt/gc2/th3/exengs/exeng/ex #> ex: doc/rt/gc2/gc3/gc3exengs/exeng/ex #> ex: doc/rt/th/gc3/gc3exengs/exeng/ex # oc("excolon", ": ") } # second or later else { c("exengs") o("exeng") oc("ex", Rest1) # oc("excolon", ": ") } } else if (Bc == "eng") { c("exeng") oc("eng", Rest1) #> eng: doc/rt/th/exengs/exeng/eng #> eng: doc/rt/gc2/exengs/exeng/eng #> eng: doc/rt/gc2/th3/exengs/exeng/eng #> eng: doc/rt/gc2/gc3/exengs/exeng/eng #> eng: doc/rt/th/gc3/exengs/exeng/eng if (!XML) oc("exsemicolon", "; ") } else if (Bc == "lit") { if (Path ~ "exengs") c("exeng") else if (Path ~ "gc3attr") c("gc3attr") else if (Path ~ "gc2attr") c("gc2attr") oc("lit", "lit. " Rest1) #> lit: doc/rt/th/exengs/exeng/lit #> lit: doc/rt/gc2/exengs/exeng/lit #> lit: doc/rt/gc2/th3/exengs/exeng/lit #> lit: doc/rt/gc2/gc3/exengs/exeng/lit #> lit: doc/rt/th/gc3/exengs/exeng/lit #> lit: doc/rt/gc2/gc2attr/lit #> lit: doc/rt/gc2/gc3/gc3attr/lit #> lit: doc/rt/th/gc3/gc3attr/lit if (!XML) oc("exsemicolon", "; ") } else if (Bc == "prds") { c("th") o("prds") oc("prdstype", Word2) #> prds: doc/rt/th/prds/prdstype oc("prdsdef", Rest2) #> prds: doc/rt/th/prds/prdsdef } else if (Bc == "prd") { c("prds") o("prd") oc("prdpers", Word2) #> prd: doc/rt/th/prds/prd/prdpers oc("prdwords", Rest2) #> prd: doc/rt/th/prds/prd/prdwords } else if (Bc == "prdgl") { c("prd") oc("prdgl", Rest1) #> prdgl:doc/rt/th/prds/prd/prdgl } else if (Bc == "gc2") { if (Path ~ "doc/rt") c("rt") if (Path ~ "doc/lw") c("lw") o("gc2") o("gc2attr") oc("gc2word", Rest1) #> gc2: doc/rt/gc2/gc2attr/gc2word oc("gc2type", "(" abbrev(Bl) ")") #> gc2: doc/rt/gc2/gc2attr/gc2type } else if (Bc == "dial") { # first one if (Path !~ /dials/) { if (Path ~ /gc2attr/) c("gc2attr") if (Path ~ /gc3attr/) c("gc3attr") if (Path ~ /exeng/) c("exeng") o("dials") oc("dial", Word2) #> dial: doc/rt/gc2/gc2attr/dials/dial #> dial: doc/rt/gc2/gc3/gc3attr/dials/dial #> dial: doc/rt/th/gc3/gc3attr/dials/dial #> dial: ... exeng/dials/dial if (VALIDATE) if (!Dial[Word2]) error(Word2 " not a known dialect") } # not first dial else if (Path ~ /dials/) { c("dials") o("dialx") if (!XML) oc("dialcomma", ",") oc("dial", Word2) #> dial: doc/rt/gc2/gc2attr/dials/dialx/dial #> dial: doc/rt/gc2/gc3/gc3attr/dials/dialx/dial #> dial: doc/rt/th/gc3/gc3attr/dials/dialx/dial #> dial: ... exeng/dials/dialx/dial if (!XML) oc("dialcolon", ":") oc("dialword", Rest2) #> dial: doc/rt/gc2/gc2attr/dials/dialx/dialword #> dial: doc/rt/gc2/gc3/gc3attr/dials/dialx/dialword #> dial: doc/rt/th/gc3/gc3attr/dials/dialx/dialword #> dial: ... exeng/dials/dialx/dialword c("dials") } } else if (Bc == "asp") { if (Path !~ "gc3") c("gc2attr") else if (Path ~ "gc3") c("gc3attr") oc("asp", Rest1) #> asp: doc/rt/gc2/gc2attr/asp #> asp: doc/rt/gc2/gc3/gc3attr/asp #> asp: doc/rt/th/gc3/gc3attr/asp } else if (Bc == "th3") { c("gc2") o("th3") o("th3attr") oc("thword", Rest1) #> th3: doc/rt/gc2/th3/th3attr/thword } # GC3 else if (Bc == "gc3") { if (Path !~ "/th/") c("gc2") if (Path ~ "/th/") { c("th") if (VALIDATE) if (Bl !~ /^\.\.\.(mpn|in|pn|n)$/) error("Under ..th, the only gc3 allowed are n, in, pn, mpn") } o("gc3") o("gc3attr") oc("gc3word", Rest1) #> gc3: doc/rt/gc2/gc3/gc3attr/gc3word #> gc3: doc/rt/th/gc3/gc3attr/gc3word oc("gc3type", "(" abbrev(Bl) ")") #> gc3: doc/rt/gc2/gc3/gc3attr/gc3type #> gc3: doc/rt/th/gc3/gc3attr/gc3type } else if (Bc == "lw") { c("doc") o("lw") o("lwattr") oc("lwword", Rest1) #> lw: doc/lw/lwattr/lwword } else if (Bc == "src") { c("lwattr") oc("src", Rest1 ) #> src: doc/lw/lwattr/src } else if (Bc == "com") { oc("com", Rest1 ) #> com: .../com } else if (Bc == "par") { oc("par", Rest1 ) #> par: .../par } else if (Bc == "par3") { oc("par3", Rest1 ) #> par3: .../par3 } else if (Bc == "file") { c("doc") oc("file", Rest1 ) #> file: doc/file } # Band categories and labels not known else if (Bc) error("Band category '" Bc "' not handled") else error("Band label '" Bl "' not known") # Move Bc to Prevbc, if not a com if ((Bc != "com") && (Bc != "par") && (Bc != "par3")) Prevbc = Bc } } c("doc") # Make the index if (MAKEINDEX) makeindex() # Close doc itself if (XML) print " " else print "
" # XHTML footer if (!XML) foot() # Successful program termination (exit status 0) } function init( load, x, y, i, j, k, z, fol) { # Initialize # Set default sort order PROCINFO["sorted_in"] = "@ind_str_asc" # ** Load the syntax for validation ** # fol[A][B] are bands allowed to follow band B, in the context of A # A is needed, because whether a band can follow A depends on the context # of B. E.g., nav (as a member of attr1) can be followed by df, sets # th, gc2, or a new root. A is the exit Path of B. # (To view the following sections, unwrap lines. # M-x toggle-truncate-lines in Emacs) # Occurrences: ? ? ? ? ? ? <+ * * * * * fol["doc" ]["0" ] = "rt lw file" fol["doc" ]["file"] = "rt lw file" fol["rtattr"]["rt" ] = " pd tag rtyp nav df sets th gc2 rt lw file" fol["rtattr"]["pd" ] = " tag rtyp nav df sets th gc2 rt lw file" fol["rtattr"]["tag" ] = " rtyp nav df sets th gc2 rt lw file" fol["rtattr"]["rtyp"] = " nav df sets th gc2 rt lw file" fol["rtattr"]["nav" ] = " df sets th gc2 rt lw file" fol["rtattr"]["df" ] = " sets th gc2 rt lw file" fol["sets" ]["sets"] = " set " fol["sets" ]["set" ] = " set th gc2 rt lw file" # 1 ? 1 ? ? * * <1 ? ? ? * * * <1 ? * * fol["thattr"]["th" ] = "dial tc " fol["thattr"]["tc" ] = " cnj gl " fol["thattr"]["cnj" ] = " gl " fol["thattr"]["gl" ] = " quo cit ex gc3 th prds gc2 th3 rt lw file" fol["thattr"]["quo" ] = " cit ex gc3 th prds gc2 th3 rt lw file" fol["thattr"]["cit" ] = " ex gc3 th prds gc2 th3 rt lw file" fol["exeng" ]["ex" ] = " dial eng " fol["exeng" ]["dial" ] = " eng " fol["exeng" ]["eng" ] = " ex lit quo cit gc3 th prds gc2 th3 rt lw file" fol["exeng" ]["lit" ] = " ex quo cit gc3 th prds gc2 th3 rt lw file" fol["exeng" ]["quo" ] = " ex cit gc3 th prds gc2 th3 rt lw file" fol["exeng" ]["cit" ] = " ex gc3 th prds gc2 th3 rt lw file" fol["prds" ]["prds" ] = " prd " fol["prd" ]["prd" ] = " gc3 th prds prd prdgl gc2 th3 rt lw file" fol["prd" ]["prdgl"] = " gc3 th prds prd gc2 th3 rt lw file" # ? ? ? ? 1 ? ? ? * * * * fol["gc2attr"]["gc2" ] = "dial tc asp cnj gl " fol["dials" ]["dial"] = "dial eng tc asp cnj gl " fol["dialx" ]["dial"] = "dial tc asp cnj gl " fol["gc2attr"]["tc" ] = " asp cnj gl " fol["gc2attr"]["asp" ] = " cnj gl " fol["gc2attr"]["cnj" ] = " gl " fol["gc2attr"]["gl" ] = " quo cit smf sc lit ex th3 gc3 gc2 rt lw file" fol["gc2attr"]["quo" ] = " cit smf sc lit ex th3 gc3 gc2 rt lw file" fol["gc2attr"]["cit" ] = " smf sc lit ex th3 gc3 gc2 rt lw file" fol["gc2attr"]["smf" ] = " sc lit ex th3 gc3 gc2 rt lw file" fol["gc2attr"]["sc" ] = " lit ex th3 gc3 gc2 rt lw file" fol["gc2attr"]["lit" ] = " ex th3 gc3 gc2 rt lw file" # 1 ? 1 ? ? * * * fol["th3attr"]["th3" ] = "tc " fol["th3attr"]["tc" ] = " cnj gl " fol["th3attr"]["cnj" ] = " gl " fol["th3attr"]["gl" ] = " quo cit ex gc3 gc2 rt lw file" fol["th3attr"]["quo" ] = " cit ex gc3 gc2 rt lw file" fol["th3attr"]["cit" ] = " ex gc3 gc2 rt lw file" # ? ? ? ? 1 ? ? ? * * * * fol["gc3attr"]["gc3" ] = "dial tc asp cnj gl " fol["gc3attr"]["tc" ] = " asp cnj gl " fol["gc3attr"]["asp" ] = " cnj gl " fol["gc3attr"]["cnj" ] = " gl " fol["gc3attr"]["gl" ] = " quo cit smf sc lit ex th th3 gc3 gc2 rt lw file" fol["gc3attr"]["quo" ] = " cit smf sc lit ex th th3 gc3 gc2 rt lw file" fol["gc3attr"]["cit" ] = " smf sc lit ex th th3 gc3 gc2 rt lw file" fol["gc3attr"]["smf" ] = " sc lit ex th th3 gc3 gc2 rt lw file" fol["gc3attr"]["sc" ] = " lit ex th th3 gc3 gc2 rt lw file" fol["gc3attr"]["lit" ] = " ex th th3 gc3 gc2 rt lw file" # fol["lwattr"]["lw" ] = "src " fol["lwattr"]["src"] = "gc2 rt lw file" # make F array from fol for (i in fol) for (j in fol[i]) { gsub(/^ +/,"",fol[i][j]) gsub(/ +$/,"",fol[i][i]) split(fol[i][j], z, / +/) for (k in z) F[i][j][z[k]] = 1 } # Band labels to band classes load = \ ".file file; ..par par; ...par par3; com com; ..grp grp; " \ ".rt rt; .af rt; .ra rt; .lw lw; " \ "pd pd; tag tag; rtyp rtyp; nav nav; df df; " \ "..sets sets; set set; " \ "..th th; ...th th3; tc tc; cnj cnj; gl gl; quo quo; cit cit; caus caus; " \ "ex ex; eng eng; lit lit; cf cf; " \ "...prds prds; prd prd; prdgl prdgl; " \ "dial dial; asp asp; smf smf; sc sc; " \ ".xr xr; xgl xgl; " \ "src src; " \ \ "..adj gc2 adj.; " \ "..ads gc2 a.d.s.; " \ "..adv gc2 adv.; " \ "..an gc2 a.n.; " \ "..c gc2 c.; " \ "..cnj gc2 cnj.; " \ "..coll gc2 coll.; " \ "..dem gc2 dem.; " \ "..dir gc2 dir.; " \ "..enc gc2 enc.; " \ "..exc gc2 exc.; " \ "..i gc2 i.; " \ "..ic gc2 i.c.; " \ "..in gc2 ins.n.; " \ "..int gc2 int.; " \ "..mpn gc2 m.p.n.; " \ "..n gc2 n.; " \ "..nc gc2 n.c.; " \ "..nds gc2 n.d.s; " \ "..nenc gc2 n.enc.; " \ "..nfaf gc2 n.f.af.;" \ "..nfsf gc2 n.f.sf.;" \ "..ni gc2 n.i.; " \ "..nic gc2 n.i.c.; " \ "..nsf gc2 n.sf.; " \ "..padj gc2 p.adj.; " \ "..pf gc2 pf.; " \ "..pn gc2 p.n.; " \ "..pp gc2 pp.; " \ "..pro gc2 pro.; " \ "..psn gc2 ps.n.; " \ "..scnj gc2 s.cnj.; " \ "..sf gc2 sf.; " \ "..tfs gc2 t.f.s.; " \ "..venc gc2 v.enc.; " \ "..vfaf gc2 v.f.af.;" \ "..vfsf gc2 v.f.sf.;" \ "..voc gc2 voc.; " \ "..vpf gc2 v.pf.; " \ "..vsf gc2 v.sf.; " \ "..vsf1 gc2 v.sf.1; " \ \ "...adj gc3 adj.; " \ "...ads gc3 a.d.s.; " \ "...adv gc3 adv.; " \ "...an gc3 a.n.; " \ "...c gc3 c.; " \ "...cnj gc3 cnj.; " \ "...coll gc3 coll.; " \ "...dem gc3 dem.; " \ "...dir gc3 dir.; " \ "...enc gc3 enc.; " \ "...exc gc3 exc.; " \ "...i gc3 i.; " \ "...ic gc3 i.c.; " \ "...ifs gc3 i.f.s.; " \ "...in gc3 ins.n.; " \ "...int gc3 int.; " \ "...mpn gc3 m.p.n.; " \ "...n gc3 n.; " \ "...nc gc3 n.c.; " \ "...nds gc3 n.d.s; " \ "...nenc gc3 n.enc.; " \ "...ni gc3 n.i.; " \ "...nic gc3 n.i.c.; " \ "...padj gc3 p.adj.; " \ "...pf gc3 pf.; " \ "...pn gc3 p.n.; " \ "...pp gc3 pp.; " \ "...pro gc3 pro.; " \ "...psn gc3 ps.n.; " \ "...scnj gc3 s.cnj.; " \ "...sf gc3 sf.; " \ "...venc gc3 v.enc.; " \ "...voc gc3 voc." \ split(load, x, ";") for (i in x) { gsub(/^ +/,"",x[i]) split(x[i], y) Bl2bc[y[1]] = y[2] if (y[3]) Abbrev[y[1]] = y[3] } # Allowed aspectual cats load = "bisec|conc|cns|cona|cont|cust|dist|dur|durĂ˜|durD|mom|mult|neu|per|"\ "pers|prog|rep|rev|sem|tran" split(load, x, "|") for (i in x) Aspcat[x[i]] = 1 # Minto-Nenana|Chena|M-N&Chena|Toklat|UpperS|Salcha|Goodpaster||Gwichin load = "MN|C|MC|T|Us|S|G|TG|OC|GG|all|all+Sl|E|ETy|I|IIl|IIlO|IIlSl|IIlU-O|I-IO|IL|Il|Il-I|IlI|IlI-OU|IlN|IlO|IlO-UI|IlOU-NL|IlT|IlU|Il-UIO|IlU-O|IO|I-O|I-OU|IO-U|IOU|IOUSl|ISl|IT|I-U|IU|IUIl|IUO|I-UO|IU-O|L|L-ki|ki|LN|L-N|L-NO|L-NOU|L-NUO|LO|L-O|L-OIl|L-ONU|LOU|L-OUI|L-UO|L-UOIl|L-UON|N|NIl|NIlO|NL|NL-IlOU|NLU|NL-UO|NLU-O|NO|NOIl|N-OU|N-OULIl|O|OE|OEIl|OETy|OI|O-I|OII|OIIl|OIIl-U|OIL|OIl|O-IlI|OIlSl|OIl-U|OIlU|OIU|O-IU|OI-U|OIUSl|OL|O-N|ON|ONIl|ON-L|ONL|O-NU|OSl|OSlIl|OSl-IU|OU|O-U|OU-I|O-UI|OUI|OUIl|OUIlN|OUL|OUN|O-UN|OUNIl-L|O-UO|S|Sl|Sl-IlN|SlO|SlOTyIl|SlTy|Su|T|T-UIO|Ty|U|UE|UI|U-I|UIIl|UIIl-O|UIl|UIl-IO|U-IO|UI-O|UIO|U-L|UL|UN|UNIl|UNL|UO|U-O|U-OI|UO-I|U-OIl|UOIl|UONIl-L|USl|UU" split(load, x, "|") for (i in x) Dial[x[i]] = 1 } function o (class) { # open a div if (!XML) print wedge(0) "
" else print wedge(0) "<" class ">" if (Path) Path = Path "/" class else Path = class } function wedge (reduce, p) { # print "[ " reduce " " (2*split(Path,p,"/") - 2*reduce +4) " " Path " ]" return sprintf("%*s", (2*split(Path,p,"/") - 2*reduce +4) ,"") } function oc (class, text) { # open and close a div if (!XML) print wedge(0) "
" text "
" else print wedge(0) "<" class ">" text "" } function c(p, oldlevel, newlevel, z, i) { # "close the hierarchy n levels to the new path indicated in p" oldlevel = split(Path, z, "/") Path = "" for (i = 1; i <= oldlevel ; i++) { # Rebuild the path to the new level Path = Path "/" z[i] # stop at the new terminal level # note: this only works if all the possible path elements are unique if (p == z[i]) { newlevel = i break } } # check if (!newlevel) { error("c(" p ") not found") exit 1 } # fix "/doc/..." to "doc/..." gsub(/^\//,"",Path) # Close the divs if (!XML) for (i = 1; i <= (oldlevel - newlevel); i++) print wedge(i-(oldlevel-newlevel)) "
" # " else for (i = 1; i <= (oldlevel - newlevel); i++) print wedge(i-(oldlevel-newlevel)) "" } function splitline ( n, z, i) { Bl = Bc = Word2 = Rest1 = Rest2 gsub(/ +$/,"",$0) n = split($0, z) Bl = z[1] Bc = Bl2bc[Bl] Word2 = z[2] Rest1 = z[2] for (i = 3; i <= n; i++) Rest1 = Rest1 " " z[i] Rest2 = z[3] for (i = 4; i <= n; i++) Rest2 = Rest2 " " z[i] } function head(title) { # Use html5 print "" print "" print " \n " title "" print " " print " " # print "" # print "" print " \n " } function foot() { print " \n"; } function abbrev(bl) { if (!Abbrev[bl]) { printf "%5d: '%s' has no abbreviation\n", Line, bl > "/dev/stderr" return bl } else return Abbrev[bl] } function validate( z, n) { # find the context from the tail element of Path n = split(Path, z, "/") # if the current band class cannot follow the previous band class in the # current context, write an error if (!F[z[n]][Prevbc][Bc]) error(sprintf("%-5s (in context %-9s) cannot follow %-7s", ("'" Bc "'"), \ ("'" z[n] "'"), ("'" Prevbc "'"))) } function error(msg) { printf "ERR %5d : %s\n", Line, msg > "/dev/stderr" if (DEBUG) { printf "LINE : %-65s\n", substr($0,1,65) > "/dev/stderr" # printf "PATH : %-65s\n", substr(Path,1,65) > "/dev/stderr" } } function xmlencode() { gsub(/&/,"\\&",$0) gsub(//,"\\>", $0) } function indexmarks( i, j, ind) { if ($0 ~ /\*/) { # (can't use a single regex, so char by char) ind = 0; j = "" for (i = 1; i <= length($0); i++) { if (ind) { if ((substr($0, i, 1) !~ /[[:alpha:]]/) || \ (i == length($0))) { if (i == length($0)) j = j substr($0, i, 1) Index[j][Rootn]++ ind = 0 j = "" } else j = j substr($0, i, 1) } else if (substr($0, i, 1) == "*") ind = 1 } } gsub(/\*/,"",$0) gsub(/\]/,"", $0) } function dollar2sup() { $0 = gensub(/\$(.)/,"\\1","G",$0) } function case_fold_compare(i1, v1, i2, v2, l, r) { l = tolower(i1) r = tolower(i2) if (l < r) return -1 else if (l == r) return 0 else return 1 } function makeindex( si) { PROCINFO["sorted_in"] = "@ind_num_asc" IGNORECASE = 1 # note IGNORECASE does not affect array subscripting, so asorti() needed asorti(Index, si) o("index") oc("indexlabel", "Index") o("indexitems") for (i in si) { o("indexitem") oc("indexword", (si[i] ":")) o("indexrefs") # numerical index. Ordered the same as the occurence of the entry in # the lexware file. Not necessarily in alphabetical order for (j in Index[si[i]]) oc("indexref", ("" Rootlabel[j] ", ")) c("indexitems") } c("doc") } function read_args( arg) { # Defaults DEBUG = 0 VALIDATE = 1 STARTLINE = 1 FINISHLINE = 1000000000 for (arg = 1; arg < ARGC; arg++) { if (ARGV[arg] == "--s") STARTLINE = ARGV[arg+1] else if (ARGV[arg] == "--f") FINISHLINE = ARGV[arg+1] else if (ARGV[arg] == "--d") DEBUG = 1 else if (ARGV[arg] == "--nv") VALIDATE = 0 else if (ARGV[arg] == "--xml") XML = 1 else if (ARGV[arg] == "--index") MAKEINDEX = 1 } LEX = ARGV[ARGC-1] if (XML) MAKEINDEX = 0 # removed to make it portable to Windows # ("test -f " LEX " ; echo $?") | getline exitcode # if (exitcode) { if (ARGC == 1) { print "Usage: lw2xml [ --index --s --f ] " \ "" exit 1 } } # =================== DOCUMENTATION ====================================== # Variable naming convention: Global variables begin with a Capital letter, # local variables with a lowercase letter # XHTML Document structure # ------------------------ # # * XML schema choices made to reflect blocking choice in html document. # * If a block may need to be formatted independently, it needs a unique id # e.g., prdsexengs, but if it will always be the same, the id can be # reused in different contexts (e.g., exeng) # * Regenerate with: grep -E ' +#>' lw2xml | sed -E 's/^ +#>/# /g' #