//!\file k.c \brief bare minimum atw-style k interpreter for learning purposes \copyright (c) 2024 arthur whitney and the regents of kparc \license mit #include"a.h" //fF[+-!#,@|] atom/vector 1byteint 1bytetoken no(parser tokens ..) cc -Os -oa a.c -w //! above is a brief description of k/simple system by atw: //! he says: he means: //! fF[+-!#,@] we have 6 verbs [+ - ! # , @ |] in both monadic and dyadic contexts, total of 14 //! (since monadic + and dyadic | are nyi, we actually have 12, feel free to implement f[+]/F[|]) //! atom/vector k/simple sports atoms and vectors! //! 1byteint the only supported atom/vector type is 8bit integer, so beware of overflows //! 1bytetoken input tokenizer is spartan: a token can't be longer than one char //! no(...) no parser and multichar tokenizer are implemented //! cc -w .. minimal build instructions (which are much more stringent in provided makefile) //!debug f(wu,O("%lu\n",x)) //!< (w)rite (u)ll: print unsigned long long (e.g. total memory allocation), useful for refcount debugging. void wg(){i(26,x(U[i],$(!ax,O("%c[%d] %d\n",i+97,nx,rx))))} //!< dump global namespace: varname, refcount, length (also useful for refcount debugging). //!printing facilities f(w,write(1,ax?(c*)&x:sx,ax?1:strlen(sx))) //!< (w)rite to stdout: if x is an atom, print its decimal value, otherwise print x as ascii string. static c pb[12]; //!< temporary string (b)uffer for formatting vector items. it's ok to declare it globally, since we only have one thread. f(si,sprintf(pb,"%d ",(int)(128>x?x:x-256));pb) //!< (s)tring from (i)nteger: format a given atom x as decimal in range (-128..127) into buffer b using sprintf(3). f(wi,w(si(x))) //!< (w)rite (i)nteger: format x and (w)rite it to stdout. f(W,Q(x)$(ax,wi(x))i(nx,wi(xi))w(10)) //!< pretty print x: if x is an atom, format and print it, otherwise print all items of vector x, //!< separated by space. terminate output by a newline aka ascii 10. G(err,w(f);w(58);wi(x);w(y);w(10);Q) //!< (err)or: print name of the c (f)unction where error occured, line number and error msg, return Q. //!malloc f(a,y(x+2,WS+=x;c*s=malloc(y);*s++=0;*s++=x;s)) //!< (a)llocate x bytes of memory for a vector of length x plus two extra bytes for preamble, set refcount to 0 //!< and vector length to x in the preamble, and return pointer to the 0'th element of a new vector \see a.h type system f(_a,WS-=nx;free(sx-2);0) //!< release memory allocated for vector x. G(m,(u)memcpy((c*)x,(c*)y,f)) //!< (m)ove: x and y are pointers to source and destination, f is number of bytes to be copied from x to y. //!< \note memcpy(3) assumes that x/y don't overlap in ram, which in k/simple they can't, but \see memmove(3) //!memory management f(r_,ax?x:(++rx,x)) //!< increment refcount: if x is an atom, return x. if x is a vector, increment its refcount and return x. f(_r,ax?x //!< decrement refcount: if x is an atom, return x. :rx?(--rx,x) //!< if x is a vector and its refcount is greater than 0, decrement it and return x. :_a(x)) //!< if refcount is 0, release memory occupied by x and return 0. //!monadic verbs f(foo,_r(x);Qz(1);Q)F(Foo,_r(x);Qz(1);Q) //!< (foo)bar is a dummy monadic verb: for any x, throw nyi error and return error code Q. f(sub,ax?(c)-x:_x(N(nx,-xi))) //!< monadic (sub)tract is also known as (neg)ation, or -x: if x is atom, return its additive inverse. //!< if x is a vector, return a new vector same as x only with sign of its every element flipped. f(til,Qr(!ax)(N(x,i))) //!< monadic til is !x aka her majesty apl iota. for a given atom x, it returns a vector //!< of x integers from 0 to x-1. if x is not an atom, til throws a rank error. f(cnt,Qr(ax)nx) //!< monadic (c)ou(nt) is #x. it returns the length of a given vector and throws rank error for atoms. f(cat,Qr(!ax)N(1,x)) //!< monadic (cat)enate is (enl)ist, or comma-x: wraps a given atom x into a new vector of length 1 whose //!< only item holds the value of that atom. if x is a vector, enlist will throw a rank error. f(rev,Qr(ax)_x(N(nx,sx[nx-i-1]))) //!< monadic (rev)erse is |x and simply returns a mirror copy of vector x. //!dyadic verbs F(Add, //!< dyadic f+y is add. operands can be both atoms and verbs, ie. a+a, a+v, v+a, v+v are all valid. ax?af?(c)(f+x) //!< case a+a: if (f,x) are atoms, compute their sum and handle possible overflows by downcasting it to c. :Add(x,f) //!< case v+a: if f is a vector and x is an atom, make a recursive call with operands swapped, i.e. a+v. :af?_x(N(nx,f+xi)) //!< case a+v: if f is an atom, return a new vector constructed by adding f to every element of x. :nx-nf?(_x(_f(Ql()))) //!< case v+v: if (f,x) are vectors, first make sure they are of the same length, throw length error if not. :_f(_x(N(nx,xi+fi)))) //!< if lengths are the same, return a new vector holding their pairwise sum. //!< \note by convention, atwc uses x-y for inequality test, which has the same effect as nx!=nf. F(Sub,Add(f,sub(x))) //!< dyadic f-x is subtract. since we already have Add() and sub(), we get Sub() for free by negating x. F(Mod,Qr(!f||!af)ax?x%f:_x(N(nx,xi%f))) //!< dyadic f!x is x (mod)ulo f, aka remainder operation. f must be an non-zero atom, x can be anything. F(Tak,Qr(!af)_f(N(f,ax?x:sx[i%nx]))) //!< dyadic f#x is (tak)e, which has two variants based on the type of right operand (left must be atom): //!< if x is a vector, return first f items of x. if f exceeds the size of x, wrap around from the start. //!< if x is an atom, return a vector of length f filled with x. F(Cat, //!< dyadic f,x is (cat)enate: a) join two vectors b) join an atom to vector c) make a vector from two atoms. f=af?cat(f):f; //!< if f is an atom, enlist it \see cat() x=ax?cat(x):x; //!< ditto for x u r=a(nf+nx); //!< (a)llocate array r long enough to hold f and x. m(nx,r+nf,x); //!< (m)ove contents of x to the end of r. m(nf,r,f);_r(x);_r(f);r) //!< (m)ove contents of f to the beginning of r, try to release f and x, and return pointer to r. F(At,Qr(af) //!< dyadic f@x is "needle at x in the haystack f" and has two modes based on the type of x (f must be a vector): ax?x>nf?Ql():sf[x] //!< if x is an atom, return the x'th item of f. :_x(_f(N(nx,sf[xi])))) //!< if x is a vector, return a vector containg items from f at indices listed in x. //!< \note that the second mode currently doesn't perform the boundary check, fell free to implement it! f(at,At(x,0)) //!< monadic @x is simply (f)ir(st): return the head element of x, or throw a rank error if x is an atom. //! note how Sub() and at() are implemented in terms of other verbs, and especially how Add() cuts corners by calling itself with operands swapped. //! in fact, we can use Add() as a template to implement of a whole bunch of additional dyadic verbs, provided that they also hold commutative property. //! so let's generalize Add(), first in pseudocode: //! function fn(f,x) implementing a commutative OP: //! 1. if both operands f and x are atoms, return (f) OP (x) //! 2. if f is an atom and x is a vector, return fn(x,f) //! 3. if both operands are vectors, ensure they are the same length. //! 4. allocate a (r)esult vector of the same length as x, then: //! 5. depending on type of x, each i'th element of r becomes either: //! 5.1 (atom x) OP (i'th element of f) //! 5.2 (i'th element of x) OP (i'th element of f) //! 6. finally, attempt to release memory of f and x, and return r. #define op(fn,OP) F(fn,ax?af?(c)(f OP x):fn(x,f):af?_x(N(nx,f OP xi)):_f(_x(nx-nf?Ql():N(nx,sx[i] OP sf[i])))) //!< above pseudocode expressed as a C macro. op(Eql,==)op(Not,!=)op(And,&)op(Or,|)op(Prd,*) //!< et voila, we have definitions of dyadic equal, not equal, and, or and product for free. //!verb dispatch char*V=" +-!#,@=~&|*"; //!< V is an array of tokens of all supported k verbs. 0'th item (space) stands for "not a verb". u(*f[])(u )={0,foo,sub,til,cnt,cat,at,foo,foo,foo,rev,foo}, //!< f[] is an array of pointers to c functions which implement monadic versions of k verbs listed in V. (*F[])(u,u)={0,Add,Sub,Mod,Tak,Cat,At,Eql,Not,And,Or, Prd}; //!< F[] is ditto for dyadic versions of verbs listed in V. // V: + - ! # , @ = ~ & | * //!adverbs F(Ovr,ax?x:_x(r(*sx,i(nx-1,r=F[f](r,sx[i+1]))))) //!< adverb over: recursively fold all elements of vector x using dyadic verb f going left to right. F(Scn,ax?x:_x(r(a(nx),*sr=*sx;i(nx-1,sr[i+1]=F[f](sr[i],sx[i+1]))))) //!< adverb scan: same as over, but produces a vector of intermediate results. //!adverb dispatch char*AV=" /\\";u(*D[])(u,u)={0,Ovr,Scn}; //!< AV[]/D[] is the same as V[]/F[], only for adverbs. //!globals, verbs, nouns, adverbs f(g,x>='a'&&x<='z') //!< is x a valid (g)lobal variable identifier? F(ag,y(U[f],!ay?_a(y):x;r_(U[f]=x))) //!< (a)ssign (g)lobal: release no longer referenced global object at U[f], and replace it with object x. f(v,(strchr(V,x)?:V)-V) //!< is x a valid (v)erb from V? if so, return its index, otherwise return 0. //!< \note rarely seen ternary form x?:y, which is just a shortcut for x?x:y in c. f(d,(strchr(AV,x)?:AV)-AV) //!< same as v() for a(d)verbs. f(n,10>x-48 //!< is x a (n)oun? valid nouns are digits 0..9 and lowercase varnames a..z. ?x-48 //!< if x is a digit, e.g. '7', return its decimal value. :g(x)?r_(U[x-97]) //!< if x is a varname, e.g. 'a', return its value from U[26] and increment its refcount. :Q) //!< ..anything else is an error. //!fio static char*l;u mx=99;FILE*t; //!< l is a line buffer, mx is its max length, t is input stream handle. us(rl,l=l?:malloc(mx); //!< (r)ead(l)ine: reset mx to max line length, allocate buffer l of size mx if not yet allocated. P(!s,l[read(0,l,mx)-1]=0) //!< (r)ead: if no filename s is given, read line from stdin up to mx bytes, clamp trailing \n and return 0. t=t?:fopen(s,"r");Qs(!t,s) //!< open file s for reading if not yet open, throw error in case of problems. r(getline(&l,&mx,t), //!< read next line from stream t into l up to mx bytes. r=r