#!/usr/bin/env python3 # pylint: disable=too-many-lines """ ======= pySELL ================================================================= A Python based Simple E-Learning Language for the simple creation of interactive courses LICENSE GPLv3 AUTHOR Andreas Schwenk DOCS Refer to https://github.com/andreas-schwenk/pysell and read the descriptions at the end of the page USAGE Only file 'sell.py' is required to compile question files COMMAND python3 [-J] sell.py PATH ARGUMENTS -J is optional and generates a JSON output file for debugging EXAMPLE python3 sell.py examples/ex1.txt OUTPUT examples/ex1.html, examples/ex1_DEBUG.html FAQ Q: Why is this file so long? A: The intention is to provide pySelL as ONE file, that can easily be shared and modified. Q: You could also package and publish pySELL as a package! A: Sure. Maybe this will happen in the future.. """ import base64 import datetime import io import json import os import re import sys from typing import Self class SellError(Exception): """exception""" # pylint: disable-next=too-few-public-methods class Lexer: """Scanner that takes a string input and returns a sequence of tokens; one at a time.""" def __init__(self, src: str) -> None: """sets the source to be scanned""" # the source code self.src: str = src # the current token self.token: str = "" # the current input position self.pos: int = 0 # set the first token to self.token self.next() def next(self) -> None: """gets the next token""" # start with a fresh token self.token = "" # loop up to the next special character stop = False while not stop and self.pos < len(self.src): # get the next character from the input ch = self.src[self.pos] # in case that we get a special character (a.k.a delimiter), # we stop if ch in "`^'\"%#*$()[]{}\\,.:;+-*/_!<>\t\n =?|&": # if the current token is not empty, return it for now and # keep the delimiter to the next call of next() if len(self.token) > 0: return # a delimiter stops further advancing in the input stop = True # keep quotes as a single token. Supported quote types are # double quotes ("...") and accent grave quotes (`...`) if ch in '"`': kind = ch # " or ` self.token += ch self.pos += 1 # advance to the quotation end while self.pos < len(self.src): if self.src[self.pos] == kind: break self.token += self.src[self.pos] self.pos += 1 # add the current character to the token self.token += ch self.pos += 1 # # lexer tests # lex = Lexer('a"x"bc 123 *blub* $`hello, world!`123$') # while len(lex.token) > 0: # print(lex.token) # lex.next() # exit(0) # For drawing random variables and to calculate the sample solution, we will # be executing Python code that is embedded in the quiz descriptions. # The evaluation of code will populate local variables. Its data types also # depend on the used libraries. # The following lists cluster some of these types. boolean_types = ["", ""] int_types = [ "", "", "", "", "", ] float_types = [""] # The following list contains all of Pythons basic keywords. These are used # in syntax highlighting in "*_DEBUG.html" files. python_kws = [ "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "False", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "None", "nonlocal", "not", "or", "pass", "raise", "return", "True", "try", "while", "with", "yield", ] # The following list of identifiers may be in locals of Python source that # uses "sympy". These identifiers must be skipped in the JSON output. skipVariables = [ "acos", "acosh", "acoth", "asin", "asinh", "atan", "atan2", "atanh", "ceil", "ceiling", "cos", "cosh", "cot", "coth", "exp", "floor", "ln", "log", "pi", "round", "sin", "sinc", "sinh", "tan", "transpose", ] # The following function rangeZ is provided as pseudo-intrinsic # function in Python scripts, embedded into the question descriptions. # It is an alternative version for "range", that excludes the zero. # This is beneficial for drawing random numbers of questions for math classes. # (the next line disables a warning, about camel-case function names) # pylint: disable-next=invalid-name def rangeZ(*a): """implements 'range', but excludes the zero""" r = [] if len(a) == 1: r = list(range(a[0])) elif len(a) == 2: r = list(range(a[0], a[1])) elif len(a) == 3: r = list(range(a[0], a[1], a[2])) if 0 in r: r.remove(0) return r # TODO: add comments starting from here class TextNode: """Tree structure for the question text""" def __init__(self, type_: str, data: str = "") -> None: self.type: str = type_ self.data: str = data self.children: list[TextNode] = [] # pylint: disable-next=too-many-branches,too-many-statements def parse(self) -> None: """parses text recursively""" if self.type == "root": self.children = [TextNode(" ", "")] lines = self.data.split("\n") self.data = "" for line in lines: line = line.strip() if len(line) == 0: continue type_ = line[0] # refer to "types" below if type_ not in "[(-!": type_ = " " if type_ != self.children[-1].type: self.children.append(TextNode(type_, "")) self.children[-1].type = type_ self.children[-1].data += line + "\n" if line.endswith("\\\\"): # line break # TODO: this is NOT allowed, if we are within math mode!! self.children[-1].data = self.children[-1].data[:-3] + "\n" self.children.append(TextNode(" ", "")) types = { " ": "paragraph", "(": "single-choice", "[": "multi-choice", "-": "itemize", "!": "command", } for child in self.children: child.type = types[child.type] child.parse() elif self.type in ("multi-choice", "single-choice"): options = self.data.strip().split("\n") self.data = "" for option in options: node = TextNode("answer") self.children.append(node) text = "" if self.type == "multi-choice": text = "]".join(option.split("]")[1:]).strip() else: text = ")".join(option.split(")")[1:]).strip() if option.startswith("[!"): # conditionally set option # TODO: check, if variable exists and is of type bool var_id = option[2:].split("]")[0] node.children.append(TextNode("var", var_id)) else: # statically set option correct = option.startswith("[x]") or option.startswith("(x)") node.children.append( TextNode("bool", "true" if correct else "false") ) node.children.append(TextNode("paragraph", text)) node.children[1].parse() elif self.type == "itemize": items = self.data.strip().split("\n") self.data = "" for child in items: node = TextNode("paragraph", child[1:].strip()) self.children.append(node) node.parse() elif self.type == "paragraph": lex = Lexer(self.data.strip()) self.data = "" self.children.append(self.parse_span(lex)) elif self.type == "command": if ( ".svg" in self.data or ".png" in self.data or ".jpg" in self.data or ".jpeg" in self.data ): self.parse_image() else: # TODO: report error pass else: raise SellError("unimplemented") def parse_image(self) -> Self: """parses an image inclusion""" img_path = self.data[1:].strip() img_width = 100 # percentage if ":" in img_path: tokens = img_path.split(":") img_path = tokens[0].strip() img_width = tokens[1].strip() self.type = "image" self.data = img_path self.children.append(TextNode("width", img_width)) def parse_span(self, lex: Lexer) -> Self: """parses a span element""" # grammar: span = { item }; # item = bold | math | input | string_var | plus_minus | text; # bold = "*" { item } "*"; # math = "$" { item } "$"; # input = "%" ["!"] var; # string_var = "&" var; # plus_minus = "+" "-"; # text = "\\" | otherwise; span = TextNode("span") while lex.token != "": span.children.append(self.parse_item(lex)) return span # pylint: disable-next=too-many-return-statements def parse_item(self, lex: Lexer, math_mode=False) -> Self: """parses a single item of a span/paragraph""" if not math_mode and lex.token == "*": return self.parse_bold_italic(lex) if lex.token == "$": return self.parse_math(lex) if not math_mode and lex.token == "%": return self.parse_input(lex) if not math_mode and lex.token == "&": return self.parse_string_var(lex) if math_mode and lex.token == "+": n = TextNode("text", lex.token) lex.next() if lex.token == "-": # "+-" automatically chooses "+" or "-", # depending on the sign or the following variable. # For the variable itself, only its absolute value is used. n.data += lex.token n.type = "plus_minus" lex.next() return n if not math_mode and lex.token == "\\": lex.next() if lex.token == "\\": lex.next() return TextNode("text", "
") n = TextNode("text", lex.token) lex.next() return n def parse_bold_italic(self, lex: Lexer) -> Self: """parses bold or italic text""" node = TextNode("italic") if lex.token == "*": lex.next() if lex.token == "*": node.type = "bold" lex.next() while lex.token not in ("", "*"): node.children.append(self.parse_item(lex)) if lex.token == "*": lex.next() if lex.token == "*": lex.next() return node def parse_math(self, lex: Lexer) -> Self: """parses inline math or display style math""" math = TextNode("math") if lex.token == "$": lex.next() if lex.token == "$": math.type = "display-math" lex.next() while lex.token not in ("", "$"): math.children.append(self.parse_item(lex, True)) if lex.token == "$": lex.next() if math.type == "display-math" and lex.token == "$": lex.next() return math def parse_input(self, lex: Lexer) -> Self: """parses an input element field""" input_ = TextNode("input") if lex.token == "%": lex.next() if lex.token == "!": input_.type = "input2" lex.next() input_.data = lex.token.strip() lex.next() return input_ def parse_string_var(self, lex: Lexer) -> Self: """parses a string variable""" sv = TextNode("string_var") if lex.token == "&": lex.next() sv.data = lex.token.strip() lex.next() return sv def optimize(self) -> Self: """optimizes the current text node recursively. E.g. multiple pure text items are concatenated into a single text node.""" children_opt = [] for c in self.children: opt = c.optimize() if ( opt.type == "text" and opt.data.startswith('"') is False and opt.data.startswith("`") is False and len(children_opt) > 0 and children_opt[-1].type == "text" and children_opt[-1].data.startswith('"') is False and children_opt[-1].data.startswith("`") is False ): children_opt[-1].data += opt.data else: children_opt.append(opt) self.children = children_opt return self def to_dict(self) -> dict: """recursively exports the text node instance to a dictionary""" # t := type, d := data, c := children return { "t": self.type, "d": self.data, "c": list(map(lambda o: o.to_dict(), self.children)), } # pylint: disable-next=too-many-instance-attributes class Question: """Question of the quiz""" def __init__(self, input_dirname: str, src_line_no: int) -> None: self.input_dirname: str = input_dirname self.src_line_no: int = src_line_no self.title: str = "" self.python_src: str = "" self.variables: set[str] = set() self.instances: list[dict] = [] self.text_src: str = "" self.text: TextNode = None self.error: str = "" self.python_src_tokens: set[str] = set() def build(self) -> None: """builds a question from text and Python sources""" if len(self.python_src) > 0: self.analyze_python_code() instances_str = [] if len(self.error) == 0: for _ in range(0, 5): # try to generate instances distinct to prior once # TODO: give up and keep less than 5, if applicable! instance = {} instance_str = "" for _ in range(0, 10): self.error = "" instance = self.run_python_code() instance_str = str(instance) if instance_str not in instances_str: break instances_str.append(instance_str) self.instances.append(instance) # if there is no randomization in the input, then one instance is enough if "rand" not in self.python_src: break if "No module named" in self.error: print("!!! " + self.error) self.text = TextNode("root", self.text_src) self.text.parse() var_occurrences: set[str] = set() self.post_process_text(self.text, False, var_occurrences) self.text.optimize() # pylint: disable-next=too-many-branches def post_process_text( self, node: TextNode, math, var_occurrences: set[str] ) -> None: """post processes the textual part. For example, a semantical check for the existing of referenced variables is applied. Also images are loaded and stringified.""" for c in node.children: self.post_process_text( c, math or node.type == "math" or node.type == "display-math", var_occurrences, ) if node.type == "input": if node.data.startswith('"'): # gap question node.type = "gap" node.data = node.data.replace('"', "") elif node.data in self.variables: var_id = node.data if var_id in var_occurrences: self.error += "It is not allowed to refer to a variable " self.error += "twice or more. Hint: Create a copy of " self.error += f"variable '{var_id}' in Python and ask for " self.error += "the new variable name. " self.error += f"Example code: '{var_id}2 = {var_id}'." self.error += f"Then ask for '%{var_id}2'." else: var_occurrences.add(var_id) elif node.data not in self.variables: # ask for numerical/term variable var_id = node.data self.error += f"Unknown input variable '{var_id}'. " elif node.type == "string_var": var_id = node.data if var_id not in self.variables: self.error += f"Unknown string variable '{var_id}'. " elif node.type == "text": if ( math and len(node.data) >= 2 and node.data.startswith('"') and node.data.endswith('"') ): node.data = node.data[1:-1] elif math and (node.data in self.variables): node.type = "var" elif ( not math and len(node.data) >= 2 and node.data.startswith("`") and node.data.endswith("`") ): node.type = "code" node.data = node.data[1:-1] elif node.type == "image": # TODO: warning, if file size is (too) large path = os.path.join(self.input_dirname, node.data) img_type = os.path.splitext(path)[1][1:] supported_img_types = ["svg", "png", "jpg", "jpeg"] if img_type not in supported_img_types: self.error += f"ERROR: image type '{img_type}' is not supported. " self.error += f"Use one of {', '.join(supported_img_types)}" elif os.path.isfile(path) is False: self.error += "ERROR: cannot find image at path '" + path + '"' else: # load image f = open(path, "rb") data = f.read() f.close() b64 = base64.b64encode(data) node.children.append(TextNode("data", b64.decode("utf-8"))) def float_to_str(self, v: float) -> str: """Converts float to string and cuts '.0' if applicable""" s = str(v) if s.endswith(".0"): return s[:-2] return s def analyze_python_code(self) -> None: """Get all tokens from Python source code. This is required to filter out all locals from libraries (refer to method run_python_code). Since relevant tokens are only those in the left-hand side of an assignment, we filter out non-assignment statements, as well as the right-hand side of statements. As a side effect, irrelevant symbols of packages are also filtered out (e.g. 'mod', is populated to the locals, when using 'sage.all.power_mod')""" lines = self.python_src.split("\n") for line in lines: if "=" not in line: continue lhs = line.split("=")[0] lex = Lexer(lhs) while len(lex.token) > 0: self.python_src_tokens.add(lex.token) lex.next() # check for forbidden code if "matplotlib" in self.python_src and "show(" in self.python_src: self.error += "Remove the call show(), " self.error += "since this would result in MANY open windows :-)" # pylint: disable-next=too-many-locals,too-many-branches,too-many-statements def run_python_code(self) -> dict: """Runs the questions python code and gathers all local variables.""" local_variables = {} res = {} src = self.python_src try: # pylint: disable-next=exec-used exec(src, globals(), local_variables) # pylint: disable-next=broad-exception-caught except Exception as e: # print(e) self.error += str(e) + ". " return res for local_id, value in local_variables.items(): if local_id in skipVariables or (local_id not in self.python_src_tokens): continue type_str = str(type(value)) if type_str in ("", ""): continue self.variables.add(local_id) t = "" # type v = "" # value if type_str in boolean_types: t = "bool" v = str(value).lower() elif type_str in int_types: t = "int" v = str(value) elif type_str in float_types: t = "float" v = self.float_to_str(value) elif type_str == "": t = "complex" # convert "-0" to "0" real = 0 if value.real == 0 else value.real imag = 0 if value.imag == 0 else value.imag v = self.float_to_str(real) + "," + self.float_to_str(imag) elif type_str == "": t = "vector" v = str(value).replace("[", "").replace("]", "").replace(" ", "") elif type_str == "": t = "set" v = ( str(value) .replace("{", "") .replace("}", "") .replace(" ", "") .replace("j", "i") ) elif type_str == "": # e.g. 'Matrix([[-1, 0, -2], [-1, 5*sin(x)*cos(x)/7, 2], [-1, 2, 0]])' t = "matrix" v = str(value)[7:-1] elif ( type_str == "" or type_str == "" ): # e.g. '[[ -6 -13 -12]\n [-17 -3 -20]\n [-14 -8 -16]\n [ -7 -15 -8]]' t = "matrix" v = re.sub(" +", " ", str(value)) # remove double spaces v = re.sub(r"\[ ", "[", v) # remove space(s) after "[" v = re.sub(r" \]", "]", v) # remove space(s) before "]" v = v.replace(" ", ",").replace("\n", "") elif type_str == "": t = "string" v = value else: t = "term" v = str(value).replace("**", "^") # in case that an ODE is contained in the question # and only one constant ("C1") is present, then substitute # "C1" by "C" if "dsolve" in self.python_src: if "C2" not in v: v = v.replace("C1", "C") # t := type, v := value v = v.replace("I", "i") # reformat sympy imaginary part res[local_id] = {"t": t, "v": v} if len(self.variables) > 50: self.error += "ERROR: Wrong usage of Python imports. Refer to pySELL docs!" # TODO: write the docs... if "matplotlib" in self.python_src and "plt" in local_variables: plt = local_variables["plt"] buf = io.BytesIO() plt.savefig(buf, format="svg", transparent=True) buf.seek(0) svg = buf.read() b64 = base64.b64encode(svg) res["__svg_image"] = {"t": "svg", "v": b64.decode("utf-8")} plt.clf() return res def to_dict(self) -> dict: """recursively exports the question to a dictionary""" return { "title": self.title, "error": self.error, "is_ode": "dsolve" # contains an Ordinary Differential Equation in self.python_src, "variables": list(self.variables), "instances": self.instances, "text": self.text.to_dict(), # the following is only relevant for debugging purposes, # i.e. only present in _DEBUG.html "src_line": self.src_line_no, "text_src_html": self.syntax_highlight_text(self.text_src), "python_src_html": self.syntax_highlight_python(self.python_src), "python_src_tokens": list(self.python_src_tokens), } # pylint: disable-next=too-many-branches,too-many-statements def syntax_highlight_text_line(self, src: str) -> str: """syntax highlights a single questions text line and returns the formatted code in HTML format""" html = "" math = False code = False bold = False italic = False n = len(src) i = 0 while i < n: ch = src[i] if ch == " ": html += " " elif not math and ch == "%": html += '' html += ch if i + 1 < n and src[i + 1] == "!": html += src[i + 1] i += 1 html += "" elif ch == "*" and i + 1 < n and src[i + 1] == "*": i += 1 bold = not bold if bold: html += '' html += "**" else: html += "**" html += "" elif ch == "*": italic = not italic if italic: html += '' html += "*" else: html += "*" html += "" elif ch == "$": display_style = False if i + 1 < n and src[i + 1] == "$": display_style = True i += 1 math = not math if math: html += '' html += ch if display_style: html += ch else: html += ch if display_style: html += ch html += "" elif ch == "`": code = not code if code: html += '' html += ch else: html += ch html += "" else: html += ch i += 1 if math: html += "" if code: html += "" if italic: html += "" if bold: html += "" return html def red_colored_span(self, inner_html: str) -> str: """embeds HTML code into a red colored span""" return '' + inner_html + "" def syntax_highlight_text(self, src: str) -> str: """syntax highlights a questions text and returns the formatted code in HTML format""" html = "" lines = src.split("\n") for line in lines: if len(line.strip()) == 0: continue if line.startswith("-"): html += self.red_colored_span("-") line = line[1:].replace(" ", " ") elif line.startswith("["): l1 = line.split("]")[0] + "]".replace(" ", " ") html += self.red_colored_span(l1) line = "]".join(line.split("]")[1:]).replace(" ", " ") elif line.startswith("("): l1 = line.split(")")[0] + ")".replace(" ", " ") html += self.red_colored_span(l1) line = ")".join(line.split(")")[1:]).replace(" ", " ") html += self.syntax_highlight_text_line(line) html += "
" return html def syntax_highlight_python(self, src: str) -> str: """syntax highlights a questions python code and returns the formatted code in HTML format""" lines = src.split("\n") html = "" for line in lines: if len(line.strip()) == 0: continue lex = Lexer(line) while len(lex.token) > 0: if len(lex.token) > 0 and lex.token[0] >= "0" and lex.token[0] <= "9": html += '' html += lex.token + "" elif lex.token in python_kws: html += '' html += lex.token + "" else: html += lex.token.replace(" ", " ") lex.next() html += "
" return html def compile_input_file(input_dirname: str, src: str) -> dict: """compiles a SELL input file to JSON""" lang = "en" title = "" author = "" info = "" questions = [] question = None parsing_python = False lines = src.split("\n") for line_no, line in enumerate(lines): line = line.split("#")[0] # remove comments line_not_stripped = line line = line.strip() if len(line) == 0: continue if line.startswith("LANG"): lang = line[4:].strip() elif line.startswith("TITLE"): title = line[5:].strip() elif line.startswith("AUTHOR"): author = line[6:].strip() elif line.startswith("INFO"): info = line[4:].strip() elif line.startswith("QUESTION"): question = Question(input_dirname, line_no + 1) questions.append(question) question.title = line[8:].strip() parsing_python = False elif question is not None: if line.startswith('"""'): parsing_python = not parsing_python else: if parsing_python: question.python_src += ( line_not_stripped.replace("\t", " ") + "\n" ) else: question.text_src += line + "\n" for question in questions: question.build() return { "lang": lang, "title": title, "author": author, "date": datetime.datetime.today().strftime("%Y-%m-%d"), "info": info, "questions": list(map(lambda o: o.to_dict(), questions)), } # the following code is automatically generated and updated by file "build.py" # @begin(html) HTML: str = b'' HTML += b' pySELL Quiz ' HTML += b'

DEBUG VERSION

This quiz was created using pySELL, the Python-based Simpl' HTML += b'e E-Learning Language, written by Andreas Schwenk, GPLv3' HTML += b'
last update on

' HTML = HTML.decode('utf-8') # @end(html) def main(): """the main function""" # get input and output path if len(sys.argv) < 2: print("usage: python sell.py [-J] INPUT_PATH.txt") print(" option -J enables to output a JSON file for debugging purposes") sys.exit(-1) write_explicit_json_file = "-J" in sys.argv input_path = sys.argv[-1] input_dirname = os.path.dirname(input_path) output_path = input_path.replace(".txt", ".html") output_debug_path = input_path.replace(".txt", "_DEBUG.html") output_json_path = input_path.replace(".txt", ".json") if os.path.isfile(input_path) is False: print("error: input file path does not exist") sys.exit(-1) # read input input_src: str = "" with open(input_path, mode="r", encoding="utf-8") as f: input_src = f.read() # compile out = compile_input_file(input_dirname, input_src) output_debug_json = json.dumps(out) output_debug_json_formatted = json.dumps(out, indent=2) for question in out["questions"]: del question["src_line"] del question["text_src_html"] del question["python_src_html"] del question["python_src_tokens"] output_json = json.dumps(out) # write test output if write_explicit_json_file: with open(output_json_path, "w", encoding="utf-8") as f: f.write(output_debug_json_formatted) # write html # (a) debug version (*_DEBUG.html) with open(output_debug_path, "w", encoding="utf-8") as f: f.write( HTML.replace( "let quizSrc = {};", "let quizSrc = " + output_debug_json + ";" ).replace("let debug = false;", "let debug = true;") ) # (b) release version (*.html) with open(output_path, "w", encoding="utf-8") as f: f.write(HTML.replace("let quizSrc = {};", "let quizSrc = " + output_json + ";")) # exit normally sys.exit(0) if __name__ == "__main__": main()