// (inc clojure-brush) ;; an improved SyntaxHighlighter brush for clojure // // Copyright (C) 2011 Andrew Brehaut // // Distributed under the Eclipse Public License, the same as Clojure. // // https://github.com/brehaut/inc-clojure-brush // // Written by Andrew Brehaut // V0.9.1, November 2011 if (typeof net == "undefined") net = {}; if (!(net.brehaut)) net.brehaut = {}; net.brehaut.ClojureTools = (function (SH) { "use strict"; // utiliies if (!Object.create) Object.create = function object(o) { function F() {}; F.prototype = o; return new F(); }; // data function Token(value, index, tag, length) { this.value = value; this.index = index; this.length = length || value.length; this.tag = tag; this.secondary_tags = {}; } // null_token exists so that LispNodes that have not had a closing tag attached // can have a dummy token to simplify annotation var null_token = new Token("", -1, "null", -1); /* LispNodes are aggregate nodes for sexpressions. * */ function LispNode(tag, children, opening) { this.tag = tag; // current metadata for syntax inference this.parent = null; // the parent expression this.list = children; // all the child forms in order this.opening = opening; // the token that opens this form. this.closing = null_token; // the token that closes this form. this.meta = null; // metadata nodes will be attached here if they are found } var null_lispnode = new LispNode("null", [], null_token); function PrefixNode(tag, token, attached_node) { this.tag = tag; this.token = token; this.attached_node = attached_node; this.parent = null; } // tokenize function tokenize(code) { var tokens = []; var tn = 0; var zero = "0".charCodeAt(0); var nine = "9".charCodeAt(0); var lower_a = "a".charCodeAt(0); var lower_f = "f".charCodeAt(0); var upper_a = "A".charCodeAt(0); var upper_f = "F".charCodeAt(0); var dispatch = false; // have we just seen a # character? // i tracks the start of the current window // extent is the window for slicing for (var i = 0, extent = i, j = code.length; i < j && extent <= j;) { var c = code[i]; // we care about capturing the whole token when dispatch is used, so back up the // starting index by 1 if (dispatch) i--; switch (c) { // dispatch alters the value of the next thing read case "#": dispatch = true; i++; extent++; continue; case " ": // ignore whitespace case "\t": case "\n": case "\r": case ",": extent++ break; // simple terms case "^": case "`": case ")": case "[": case "]": case "}": case "@": tokens[tn++] = new Token(c, i, c, ++extent - i); break; case "'": tokens[tn++] = new Token(code.slice(i, ++extent), i, dispatch ? "#'" : "'", extent - i); break case "(": tokens[tn++] = new Token(code.slice(i, ++extent), i, dispatch ? "#(" : "(", extent - i); break; case "{": tokens[tn++] = new Token(code.slice(i, ++extent), i, dispatch ? "#{" : "{", extent - i); break; case "\\": if (code.slice(i + 1, i + 8) === "newline") { tokens[tn++] = new Token("\\newline", i, "value", 8); extent = i + 9; } else if (code.slice(i + 1, i + 6) === "space") { tokens[tn++] = new Token("\\space", i, "value", 6); extent = i + 6; } else if (code.slice(i + 1, i + 4) === "tab") { tokens[tn++] = new Token("\\tab", i, "value", 4); extent = i + 5; } // work around fun bug with &,>,< in character literals else if (code.slice(i + 1, i + 6) === "&") { tokens[tn++] = new Token("\\&", i, "value", 6); extent = i + 6; } else if (code.slice(i + 1, i + 5) === "<") { tokens[tn++] = new Token("\\<", i, "value", 5); extent = i + 5; } else if (code.slice(i + 1, i + 5) === ">") { tokens[tn++] = new Token("\\>", i, "value", 5); extent = i + 5; } else { extent += 2; tokens[tn++] = new Token(code.slice(i, extent), i, "value", 2); } break; case "~": // slice if (code[i + 1] === "@") { extent += 2; tokens[tn++] = new Token(code.slice(i, extent), i, "splice", 2); } else { tokens[tn++] = new Token(code.slice(i, ++extent), i, "unquote", 2); } break; // complicated terms case "\"": // strings and regexps for (extent++; extent <= j; extent++) { if (code[extent] === "\\") extent++; else if (code[extent] === "\"") break; } tokens[tn++] = new Token(code.slice(i, ++extent), i, dispatch ? "regexp" : "string", extent - i); break; case ";": for (; extent <= j && code[extent] !== "\n" && code[extent] !== "\r"; extent++); tokens[tn++] = new Token(code.slice(i, ++extent), i, "comments", extent - i); break; case "+": // numbers; fall through to symbol for + and - not prefixing a number case "-": case "0": case "1": case "2": case "3": case "4": case "5": case "6": case "7": case "8": case "9": // todo: exponents, hex // http://my.safaribooksonline.com/9781449310387/14?reader=pf&readerfullscreen=&readerleftmenu=1 var c2 = code.charCodeAt(i + 1); if (((c === "+" || c === "-") && (c2 >= zero && c2 <= nine)) // prefixes || (c !== "+" && c !== "-")) { if (c === "+" || c === "-") extent++; for (; extent <= j; extent++) { var charCode = code.charCodeAt(extent); if (charCode < zero || charCode > nine) break; } c = code[extent]; c2 = code.charCodeAt(extent + 1); if ((c === "r" || c === "R" || c === "/" || c === ".") // interstitial characters && (c2 >= zero && c2 <= nine)) { for (extent++; extent <= j; extent++) { var charCode = code.charCodeAt(extent); if (charCode < zero || charCode > nine) break; } } c = code[extent]; c2 = code.charCodeAt(extent + 1); if ((c === "x" || c === "X") && ((c2 >= zero && c2 <= nine) || (c2 >= lower_a && c2 <= lower_f) || (c2 >= upper_a && c2 <= upper_f))) { for (extent++; extent <= j; extent++) { var charCode = code.charCodeAt(extent); if (((charCode >= zero && charCode <= nine) || (charCode >= lower_a && charCode <= lower_f) || (charCode >= upper_a && charCode <= upper_f))) continue; break; } } c = code[extent]; c2 = code.charCodeAt(extent + 1); if ((c === "e" || c === "E") && (c2 >= zero && c2 <= nine)) { for (extent++; extent <= j; extent++) { var charCode = code.charCodeAt(extent); if (charCode < zero || charCode > nine) break; } } c = code[extent]; if (c === "N" || c === "M") extent++; tokens[tn++] = new Token(code.slice(i, extent), i, "value", extent - i); break; } case "_": if (dispatch && c === "_") { tokens[tn++] = new Token(code.slice(i, ++extent), i, "skip", extent - i); break; } // if not a skip, fall through to symbols // Allow just about any other symbol as a symbol. This is far more permissive than // clojure actually allows, but should catch any weirdo crap that accidentally gets // into the code. default: for (extent++; extent <= j; extent++) { switch (code[extent]) { case " ": case "\t": case "\n": case "\r": case "\\": case ",": case "{": case "}": case "(": case ")": case "[": case "]": case "^": case "`": case "@": break; case ";": // theres a weird bug via syntax highligher that gives us escaped entities. // need to watch out for these if (code.slice(extent-3, extent+1) === "<" ||code.slice(extent-3, extent+1) === ">" ||code.slice(extent-4, extent+1) === "&") { continue; } break; default: continue; } break; } var value = code.slice(i, extent); var tag = "symbol"; if (value[0] == ":") { tag = "keyword"; } else if (value === "true" || value === "false" || value === "nil") { tag = "value"; } tokens[tn++] = new Token(value, i, tag, extent - i); } dispatch = false; i = extent; } return tokens; } function build_tree(tokens) { var toplevel = { list: [], tag: "toplevel", parent: null, opening: null, closing: null, depth: -1 }; // loop variables hoisted out as semi globals to track position in token stream var i = -1; var j = tokens.length; var short_fn = false; // are we already inside a #( … ) function form? function parse_one(t) { // ignore special tokens and forms that dont belong in the tree for (; t && (t.tag === "comments" || t.tag === "invalid" || t.tag == "skip") && i < j; ) { if (t.tag === "skip") { t.tag = "preprocessor"; annotate_comment(parse_one(tokens[++i])); } t = tokens[++i]; } if (!t) return {}; // hackity hack switch (t.tag) { case "{": return build_aggregate(new LispNode("map", [], t), "}"); case "(": return build_aggregate(new LispNode("list", [], t), ")"); case "#{": return build_aggregate(new LispNode("set", [], t), "}"); case "[": return build_aggregate(new LispNode("vector", [], t), "]"); case "#(": // this is a bit hairy, but it annotates nested #( … ) forms as invalid var prev_short_fn = short_fn; try { short_fn = true; var aggregate = build_aggregate(new LispNode("list", [], t), ")"); if (prev_short_fn) { aggregate.opening.tag = "invalid"; aggregate.closing.tag = "invalid"; } return aggregate; } finally { short_fn = prev_short_fn; } case "'": return new PrefixNode("quote", t, parse_one(tokens[++i])); case "#'": return new PrefixNode("varquote", t, parse_one(tokens[++i])); case "@": return new PrefixNode("deref", t, parse_one(tokens[++i])); case "`": return new PrefixNode("syntaxquote", t, parse_one(tokens[++i])); case "unquote": return new PrefixNode("unquote", t, parse_one(tokens[++i])); case "splice": return new PrefixNode("splice", t, parse_one(tokens[++i])); case "^": t.tag = "meta"; var meta = parse_one(tokens[++i]); var next = parse_one(tokens[++i]); next.meta = meta; return next; } return t; } // build_aggregate collects to ether sub forms for one aggregate for. function build_aggregate(current, expected_closing) { for (i++; i < j; i++) { var t = tokens[i]; if (t.tag === "}" || t.tag === ")" || t.tag === "]") { if (t.tag !== expected_closing) t.tag = "invalid"; current.closing = t; if (expected_closing) return current; } var node = parse_one(t); node.parent = current; current.list[current.list.length] = node; } return current; } build_aggregate(toplevel, null); return toplevel; } // annotation rules to apply to a form based on its head var show_locals = true; // HACK. would rather not use a (semi)-global. /* annotate_comment is a special case annotation. * in addition to its role in styling specific forms, it is called by parse_one to * ignore any forms skipped with #_ */ function annotate_comment(exp) { exp.tag = "comments"; if (exp.list) { exp.opening.tag = "comments"; exp.closing.tag = "comments"; for (var i = 0; i < exp.list.length; i++) { var child = exp.list[i]; if (child.list) { annotate_comment(child); } if (child.attached_node) { annotate_comment(child.attached_node); } else { child.tag = "comments"; } } } } /* custom annotation rules are stored here */ var annotation_rules = {}; // this function is exposed to allow ad hoc extension of the customisation rules function register_annotation_rule(names, rule) { for (var i = 0; i < names.length; i++) { annotation_rules[names[i]] = rule; } } function annotate_destructuring (exp, scope) { if (exp.list) { if (exp.tag === "vector") { for (var i = 0; i < exp.list.length; i++) { annotate_destructuring(exp.list[i], scope); } } else if (exp.tag === "map") { for (var i = 0; i < exp.list.length; i += 2) { var key = exp.list[i]; var val = exp.list[i + 1]; if (key.tag === "keyword" && val.tag === "vector") { for (var ii = 0, jj = val.list.length; ii < jj; ii++) { if (val.list[ii].tag !== "symbol") continue; val.list[ii].tag = "variable"; scope[val.list[ii].value] = true; } } else { annotate_destructuring(key, scope); annotate_expressions(val, scope); } } } } else if (exp.tag === "symbol" && (exp.value !== "&" && exp.value !== "&")){ exp.tag = "variable"; scope[exp.value] = true; } } function _annotate_binding_vector (exp, scope) { if (exp.tag !== "vector") return; var bindings = exp.list; if (bindings.length % 2 === 1) return; for (var i = 0; i < bindings.length; i += 2) { annotate_destructuring(bindings[i], scope); annotate_expressions(bindings[i + 1], scope); } } function annotate_binding (exp, scope) { var bindings = exp.list[1]; if (!show_locals) return; // HACK if (bindings) { scope = Object.create(scope); _annotate_binding_vector(bindings, scope); } for (var i = 2; i < exp.list.length; i++) { annotate_expressions(exp.list[i], scope); } } function _annotate_function_body (exp, scope, start_idx) { var argvec = exp.list[start_idx]; if (argvec.tag !== "vector") return; scope = Object.create(scope); for (var i = 0, j = argvec.list.length; i < j; i++) { annotate_destructuring(argvec.list[i], scope); } for (var i = start_idx, j = exp.list.length; i < j; i++) { annotate_expressions(exp.list[i], scope); } } function annotate_function (exp, scope) { for (var i = 1, j = exp.list.length; i < j; i++) { var child = exp.list[i]; if (child.tag === "vector") { _annotate_function_body (exp, scope, i); return; } else if (child.tag === "list") { _annotate_function_body(child, scope, 0) } } } function annotate_letfn (exp, scope) { scope = Object.create(scope); var bindings = exp.list[1]; var fn; for (var i = 0, j = bindings.list.length; i < j; i++) { fn = bindings.list[i]; if (!fn.list[0]) continue; fn.list[0].tag = "variable"; scope[fn.list[0].value] = true; } for (i = 0, j = bindings.list.length; i < j; i++) { var fn = bindings.list[i]; annotate_function(fn, scope); } for (i = 2, j = exp.list.length; i < j; i++) { annotate_expressions(exp.list[i], scope); } } register_annotation_rule( ["comment"], annotate_comment ); register_annotation_rule( ["let", "when-let", "if-let", "binding", "doseq", "for", "dotimes", "let*"], annotate_binding ); register_annotation_rule( ["defn", "defn-", "fn", "bound-fn", "defmacro", "fn*", "defmethod"], annotate_function ); register_annotation_rule( ["letfn"], annotate_letfn ); // standard annotations function _annotate_metadata_recursive(meta, scope) { if (!meta) return; if (meta.list !== undefined && meta.list !== null) { for (var i = 0, j = meta.list.length; i < j; i++) { meta.opening.secondary_tags.meta = true meta.closing.secondary_tags.meta = true _annotate_metadata_recursive(meta.list[i], scope); } } else if (meta.attached_node) { meta.token.secondary_tags.meta = true; _annotate_metadata_recursive(meta.attached_node, scope); } else { meta.secondary_tags.meta = true; } } function annotate_metadata(exp) { if (!(exp && exp.meta)) return; var meta = exp.meta; annotate_expressions(meta, {}); _annotate_metadata_recursive(meta, {}); } function annotate_quoted(exp, scope) { if (!exp) return; if (exp.list !== undefined && exp.list !== null) { for (var i = 0, j = exp.list.length; i < j; i++) { exp.opening.secondary_tags.quoted = true exp.closing.secondary_tags.quoted = true annotate_quoted(exp.list[i], scope); } } else if (exp.attached_node) { if (exp.tag === "unquote" || exp.tag === "splice") return; exp.token.secondary_tags.quoted = true; annotate_quoted(exp.attached_node, scope); } else { exp.secondary_tags.quoted = true; } } function annotate_expressions(exp, scope) { annotate_metadata(exp); switch (exp.tag) { case "toplevel": for (var i = 0; i < exp.list.length; i++) { annotate_expressions(exp.list[i], scope); } break; case "list": // functions, macros, special forms, comments var head = exp.list[0]; if (head) { if (head.tag === "list" || head.tag === "vector" || head.tag === "map" || head.tag === "set") { annotate_expressions(head, scope); } else if (head.attached_node) { annotate_expressions(head.attached_node, scope); } else { head.tag = (head.value.match(/(^\.)|(\.$)|[A-Z].*\//) ? "method" : "function"); } // apply specific rules if (annotation_rules.hasOwnProperty(head.value)) { annotation_rules[head.value](exp, scope); } else { for (var i = 1; i < exp.list.length; i++) { annotate_expressions(exp.list[i], scope); } } } else { // empty list exp.opening.tag = "value"; exp.closing.tag = "value"; } break; case "vector": // data case "map": case "set": for (var i = 0; i < exp.list.length; i++) { annotate_expressions(exp.list[i], scope); } break; case "symbol": if (exp.value.match(/[A-Z].*\/[A-Z_]+/)) { exp.tag = "constant"; } else if (show_locals && scope[exp.value]) { exp.tag = "variable"; } else if (exp.tag === "symbol" && exp.value.match(/([A-Z].*\/)?[A-Z_]+/)) { exp.tag = "type"; } break; case "quote": case "syntaxquote": annotate_quoted(exp.attached_node, scope); default: if (exp.attached_node) annotate_expressions(exp.attached_node, scope); } } // translation of tag to css: var css_translation = { "constant": "constants", "keyword": "constants", "method": "color1", "type": "color3", "function": "functions", "string": "string", "regexp": "string", "value": "value", "comments": "comments", "symbol": "symbol", "variable": "variable", "splice": "preprocessor", "unquote": "preprocessor", "preprocessor": "preprocessor", "meta": "preprocessor", "'": "preprocessor", "#'": "preprocessor", "(": "plain", ")": "plain", "{": "keyword", "}": "keyword", "#{": "keyword", "[": "keyword", "]": "keyword", "invalid": "invalid", "@": "plain" }; function translate_tags_to_css(tokens) { for (var i = 0, j = tokens.length; i < j; i++) { var token = tokens[i]; token.css = css_translation[token.tag]; for (var k in token.secondary_tags) if (token.secondary_tags.hasOwnProperty(k)) token.css += " " + k ; }; } // create the new brush SH.brushes.Clojure = function () {}; SH.brushes.Clojure.prototype = new SyntaxHighlighter.Highlighter(); SH.brushes.Clojure.prototype.findMatches = function find_matches (regexpList, code) { // this is a nasty global hack. need to resolve this if (this.params && this.params.locals) { show_locals = this.params.locals === true || this.params.locals === "true"; } else { show_locals = true; } var tokens = tokenize(code); annotate_expressions(build_tree(tokens), {}); translate_tags_to_css(tokens); return tokens; }; SH.brushes.Clojure.aliases = ['clojure', 'Clojure', 'clj']; SH.brushes.Clojure.register_annotation_rule = register_annotation_rule; return { tokenize: tokenize, build_tree: build_tree }; })(SyntaxHighlighter);