#!/usr/bin/env python3 # pylint: disable=too-many-lines """ ======= pySELL ================================================================= A Python based Simple E-Learning Language for the simple creation of interactive courses https://pysell.org LICENSE GPLv3 AUTHOR Andreas Schwenk DOCS Refer to https://github.com/andreas-schwenk/pysell and read the descriptions at the end of the page INSTALL Run 'pip install pysell', or use the stand-alone implementation sell.py CMD pysell [-J] [-S] PATH -J is optional and generates a JSON output file for debugging -S silent mode (no info prints) EXAMPLE pysell examples/ex1.txt outputs files examples/ex1.html and examples/ex1_DEBUG.html FAQ Q: Why is this file so large? A: The goal is to offer pySELL as a single file for easy sharing. Q: Why not package and publish pySELL as a module? A: That's already available! Simply run "pip install pysell" to install it as a package. """ from __future__ import annotations import base64 import datetime import io import json import os import re import sys from typing import Self class SellError(Exception): """exception""" # pylint: disable-next=too-few-public-methods class Lexer: """Scanner that takes a string input and returns a sequence of tokens; one at a time.""" def __init__(self, src: str) -> None: """sets the source to be scanned""" # the source code self.src: str = src # the current token self.token: str = "" # the current input position self.pos: int = 0 # set the first token to self.token self.next() def next(self) -> None: """gets the next token""" # start with a fresh token self.token = "" # loop up to the next special character stop = False while not stop and self.pos < len(self.src): # get the next character from the input ch = self.src[self.pos] # in case that we get a special character (a.k.a delimiter), # we stop if ch in "`^'\"%#*$()[]{}\\,.:;+-*/_!<>\t\n =?|&": # if the current token is not empty, return it for now and # keep the delimiter to the next call of next() if len(self.token) > 0: return # a delimiter stops further advancing in the input stop = True # keep quotes as a single token. Supported quote types are # double quotes ("...") and accent grave quotes (`...`) if ch in '"`': kind = ch # " or ` self.token += ch self.pos += 1 # advance to the quotation end while self.pos < len(self.src): if self.src[self.pos] == kind: break self.token += self.src[self.pos] self.pos += 1 # add the current character to the token self.token += ch self.pos += 1 # # lexer tests # lex = Lexer('a"x"bc 123 *blub* $`hello, world!`123$') # while len(lex.token) > 0: # print(lex.token) # lex.next() # exit(0) # For drawing random variables and to calculate the sample solution, we will # be executing Python code that is embedded in the quiz descriptions. # The evaluation of code will populate local variables. Its data types also # depend on the used libraries. # The following lists cluster some of these types. boolean_types = ["", ""] int_types = [ "", "", "", "", "", ] float_types = [""] # The following list contains all of Pythons basic keywords. These are used # in syntax highlighting in "*_DEBUG.html" files. python_kws = [ "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "False", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "None", "nonlocal", "not", "or", "pass", "raise", "return", "True", "try", "while", "with", "yield", ] # The following list of identifiers may be in locals of Python source that # uses "sympy". These identifiers must be skipped in the JSON output. skipVariables = [ "acos", "acosh", "acoth", "asin", "asinh", "atan", "atan2", "atanh", "ceil", "ceiling", "cos", "cosh", "cot", "coth", "exp", "floor", "ln", "log", "pi", "round", "sin", "sinc", "sinh", "tan", "transpose", ] # The following function rangeZ is provided as pseudo-intrinsic # function in Python scripts, embedded into the question descriptions. # It is an alternative version for "range", that excludes the zero. # This is beneficial for drawing random numbers of questions for math classes. # (the next line disables a warning, about camel-case function names) # pylint: disable-next=invalid-name def rangeZ(*a): """implements 'range', but excludes the zero""" r = [] if len(a) == 1: r = list(range(a[0])) elif len(a) == 2: r = list(range(a[0], a[1])) elif len(a) == 3: r = list(range(a[0], a[1], a[2])) if 0 in r: r.remove(0) return r # TODO: add comments starting from here class TextNode: """Tree structure for the question text""" def __init__(self, question: Question, type_: str, data: str = "") -> None: self.question: Question = question self.type: str = type_ self.data: str = data self.children: list[TextNode] = [] # pylint: disable-next=too-many-branches,too-many-statements,too-many-locals def parse(self) -> None: """parses text recursively""" if self.type == "root": # Split text by "\n" and remove empty lines. # Keep the trailing "\n" in each line, also keep preceding white spaces text = self.data lines = [line + "\n" for line in text.split("\n") if line.strip() != ""] # as first character, add the type of the line tmp = [] is_code = False for line in lines: if line.startswith("```"): is_code = not is_code continue t = "" if is_code: t = "c" else: line = line.lstrip() t = line[0] if line[0] in "[(-!" else "p" tmp.append(t + line) lines = tmp # join lines that have the same type (except for trailing "\\") tmp = [] last = "" for line in lines: if ( len(last) > 0 and line[0] == last[0] and (not last.endswith("\\\\\n")) ): tmp[-1] += line[1:] else: tmp.append(line) last = line lines = tmp # replace trailing "\\\\\n" by "\n" tmp = [] for line in lines: if line.endswith("\\\\\n"): tmp.append(line[:-3].rstrip() + "\n") else: tmp.append(line) lines = tmp # create children self.children = [] types = { "p": "paragraph", "c": "code-block", "(": "single-choice", "[": "multi-choice", "-": "itemize", "!": "command", } for line in lines: t = types[line[0]] txt = line[1:] self.children.append(TextNode(self.question, t, txt)) # parse children for child in self.children: child.parse() # remove redundant children (marked by type "trash") self.children = [c for c in self.children if c.type != "trash"] elif self.type in ("multi-choice", "single-choice"): options = self.data.strip().split("\n") self.data = "" for option in options: node = TextNode(self.question, "answer") self.children.append(node) text = "" if self.type == "multi-choice": text = "]".join(option.split("]")[1:]).strip() else: text = ")".join(option.split(")")[1:]).strip() if option.startswith("[!"): # conditionally set option # TODO: check, if variable exists and is of type bool var_id = option[2:].split("]")[0] node.children.append(TextNode(self.question, "var", var_id)) else: # statically set option correct = option.startswith("[x]") or option.startswith("(x)") node.children.append( TextNode(self.question, "bool", "true" if correct else "false") ) node.children.append(TextNode(self.question, "paragraph", text)) node.children[1].parse() elif self.type == "itemize": items = self.data.strip().split("\n") self.data = "" for child in items: node = TextNode(self.question, "paragraph", child[1:].strip()) self.children.append(node) node.parse() elif self.type == "paragraph": lex = Lexer(self.data.strip()) self.data = "" self.children.append(self.parse_span(lex)) elif self.type == "command": if ( ".svg" in self.data or ".png" in self.data or ".jpg" in self.data or ".jpeg" in self.data ): self.parse_image() elif "!TETRIS" in self.data: self.parse_game() else: self.question.error += f"Unknown command '{self.data[:5]}'... " elif self.type == "code-block": # do nothing pass else: raise SellError("unimplemented") def parse_image(self) -> Self: """parses an image inclusion, e.g. '!myImage.svg:25'""" # grammar: image = "!" path [ ":" width ]; # width = INT; img_path = self.data[1:].strip() img_width = 100 # percentage if ":" in img_path: tokens = img_path.split(":") img_path = tokens[0].strip() img_width = tokens[1].strip() self.type = "image" self.data = img_path self.children.append(TextNode(self.question, "width", img_width)) def parse_game(self) -> Self: """parses game info, e.g. '!TETRIS x y z'""" # grammar: game = "!TETRIS" ID ID { ID }; # TODO: switch game (yet !TETRIS is not parsed!) tokens = self.data[1:].replace("\n", "").split(" ") variable_ids = tokens[1:] if len(variable_ids) < 2: self.question.error += ( "For TETRIS, at least two variables must be provided. " ) return self.question.game = ["TETRIS", *variable_ids] for var_id in variable_ids: if var_id not in self.question.variables: self.question.error += ( f"Unknown variable '{var_id}' for the TETRIS game. " ) return # draw more randomized question instances num_instances = 20 # TODO! self.question.build_random_instances(num_instances) # filter out instances, where one of the false solutions is equal to # the correct solution valid_instances = [] correct_variable_id = self.question.game[1] incorrect_variable_ids = self.question.game[2:] for instance in self.question.instances: correct_value = instance[correct_variable_id]["v"] is_valid = True for v in incorrect_variable_ids: incorrect_value = instance[v]["v"] if incorrect_value == correct_value: is_valid = False break if is_valid: valid_instances.append(instance) self.question.instances = valid_instances # mark the present node for deletion (we extracted everything needed) self.type = "trash" def parse_span(self, lex: Lexer) -> Self: """parses a span element""" # grammar: span = { item }; # item = bold | math | input | string_var | plus_minus | text; # bold = "*" { item } "*"; # math = "$" { item } "$"; # input = "%" ["!"] var; # string_var = "&" var; # plus_minus = "+" "-"; # text = "\\" | otherwise; span = TextNode(self.question, "span") while lex.token != "": span.children.append(self.parse_item(lex)) return span # pylint: disable-next=too-many-return-statements def parse_item(self, lex: Lexer, math_mode=False) -> Self: """parses a single item of a span/paragraph""" if not math_mode and lex.token == "*": return self.parse_bold_italic(lex) if lex.token == "$": return self.parse_math(lex) if not math_mode and lex.token == "%": return self.parse_input(lex) if not math_mode and lex.token == "&": return self.parse_string_var(lex) if math_mode and lex.token == "+": n = TextNode(self.question, "text", lex.token) lex.next() if lex.token == "-": # "+-" automatically chooses "+" or "-", # depending on the sign or the following variable. # For the variable itself, only its absolute value is used. n.data += lex.token n.type = "plus_minus" lex.next() return n if not math_mode and lex.token == "\\": lex.next() if lex.token == "\\": lex.next() return TextNode(self.question, "text", "
") n = TextNode(self.question, "text", lex.token) lex.next() return n def parse_bold_italic(self, lex: Lexer) -> Self: """parses bold or italic text""" node = TextNode(self.question, "italic") if lex.token == "*": lex.next() if lex.token == "*": node.type = "bold" lex.next() while lex.token not in ("", "*"): node.children.append(self.parse_item(lex)) if lex.token == "*": lex.next() if lex.token == "*": lex.next() return node def parse_math(self, lex: Lexer) -> Self: """parses inline math or display style math""" math = TextNode(self.question, "math") if lex.token == "$": lex.next() if lex.token == "$": math.type = "display-math" lex.next() while lex.token not in ("", "$"): math.children.append(self.parse_item(lex, True)) if lex.token == "$": lex.next() if math.type == "display-math" and lex.token == "$": lex.next() return math def parse_input(self, lex: Lexer) -> Self: """parses an input element field""" input_ = TextNode(self.question, "input") if lex.token == "%": lex.next() if lex.token == "!": input_.type = "input2" lex.next() input_.data = lex.token.strip() lex.next() return input_ def parse_string_var(self, lex: Lexer) -> Self: """parses a string variable""" sv = TextNode(self.question, "string_var") if lex.token == "&": lex.next() sv.data = lex.token.strip() lex.next() return sv def optimize(self) -> Self: """optimizes the current text node recursively. E.g. multiple pure text items are concatenated into a single text node.""" children_opt = [] for c in self.children: opt = c.optimize() if ( opt.type == "text" and opt.data.startswith('"') is False and opt.data.startswith("`") is False and len(children_opt) > 0 and children_opt[-1].type == "text" and children_opt[-1].data.startswith('"') is False and children_opt[-1].data.startswith("`") is False ): children_opt[-1].data += opt.data else: children_opt.append(opt) self.children = children_opt return self def to_dict(self) -> dict: """recursively exports the text node instance to a dictionary""" # t := type, d := data, c := children return { "t": self.type, "d": self.data, "c": list(map(lambda o: o.to_dict(), self.children)), } # pylint: disable-next=too-many-instance-attributes class Question: """Question of the quiz""" def __init__(self, input_dirname: str, src_line_no: int) -> None: self.input_dirname: str = input_dirname self.src_line_no: int = src_line_no self.title: str = "" self.points: int = 1 self.python_src: str = "" self.variables: set[str] = set() self.instances: list[dict] = [] self.text_src: str = "" self.text: TextNode = None self.error: str = "" self.python_src_tokens: set[str] = set() self.game: list[str] = [] # e.g. ["TETRIS", "x", "y"] def build(self) -> None: """builds a question from text and Python sources""" self.build_random_instances() self.text = TextNode(self, "root", self.text_src) self.text.parse() var_occurrences: set[str] = set() self.post_process_text(self.text, False, var_occurrences) self.text.optimize() # pylint: disable-next=too-many-branches def post_process_text( self, node: TextNode, math, var_occurrences: set[str] ) -> None: """post processes the textual part. For example, a semantical check for the existing of referenced variables is applied. Also images are loaded and stringified.""" for c in node.children: self.post_process_text( c, math or node.type == "math" or node.type == "display-math", var_occurrences, ) if node.type == "input": if node.data.startswith('"'): # gap question node.type = "gap" node.data = node.data.replace('"', "") elif node.data in self.variables: var_id = node.data if var_id in var_occurrences: self.error += "It is not allowed to refer to a variable " self.error += "twice or more. Hint: Create a copy of " self.error += f"variable '{var_id}' in Python and ask for " self.error += "the new variable name. " self.error += f"Example code: '{var_id}2 = {var_id}'." self.error += f"Then ask for '%{var_id}2'." else: var_occurrences.add(var_id) elif node.data not in self.variables: # ask for numerical/term variable var_id = node.data self.error += f"Unknown input variable '{var_id}'. " elif node.type == "string_var": var_id = node.data if var_id not in self.variables: self.error += f"Unknown string variable '{var_id}'. " elif node.type == "text": if ( math and len(node.data) >= 2 and node.data.startswith('"') and node.data.endswith('"') ): node.data = node.data[1:-1] elif math and (node.data in self.variables): node.type = "var" elif ( not math and len(node.data) >= 2 and node.data.startswith("`") and node.data.endswith("`") ): node.type = "code" node.data = node.data[1:-1] elif node.type == "image": # TODO: warning, if file size is (too) large path = os.path.join(self.input_dirname, node.data) img_type = os.path.splitext(path)[1][1:] supported_img_types = ["svg", "png", "jpg", "jpeg"] if img_type not in supported_img_types: self.error += f"ERROR: image type '{img_type}' is not supported. " self.error += f"Use one of {', '.join(supported_img_types)}" elif os.path.isfile(path) is False: self.error += "ERROR: cannot find image at path '" + path + '"' else: # load image f = open(path, "rb") data = f.read() f.close() b64 = base64.b64encode(data) node.children.append(TextNode(self, "data", b64.decode("utf-8"))) def float_to_str(self, v: float) -> str: """Converts float to string and cuts '.0' if applicable""" s = str(v) if s.endswith(".0"): return s[:-2] return s def build_random_instances(self, num_instances: int = 5) -> None: """Runs the questions python code to build randomized instances""" self.instances = [] if len(self.python_src) > 0: self.analyze_python_code() instances_str = [] if len(self.error) == 0: for _ in range(0, num_instances): # try to generate instances distinct to prior once # TODO: give up and keep less than 5, if applicable! instance = {} instance_str = "" for _ in range(0, 10): self.error = "" instance = self.run_python_code() instance_str = str(instance) if instance_str not in instances_str: break instances_str.append(instance_str) self.instances.append(instance) # if there is no randomization in the input, then one instance is enough if "rand" not in self.python_src: break if "No module named" in self.error: print("!!! " + self.error) def analyze_python_code(self) -> None: """Get all tokens from Python source code. This is required to filter out all locals from libraries (refer to method run_python_code). Since relevant tokens are only those in the left-hand side of an assignment, we filter out non-assignment statements, as well as the right-hand side of statements. As a side effect, irrelevant symbols of packages are also filtered out (e.g. 'mod', is populated to the locals, when using 'sage.all.power_mod')""" lines = self.python_src.split("\n") for line in lines: if "=" not in line: continue lhs = line.split("=")[0] lex = Lexer(lhs) while len(lex.token) > 0: self.python_src_tokens.add(lex.token) lex.next() # check for forbidden code if "matplotlib" in self.python_src and "show(" in self.python_src: self.error += "Remove the call show(), " self.error += "since this would result in MANY open windows :-)" # pylint: disable-next=too-many-locals,too-many-branches,too-many-statements def run_python_code(self) -> dict: """Runs the questions python code and gathers all local variables.""" local_variables = {} res = {} src = self.python_src try: # pylint: disable-next=exec-used exec(src, globals(), local_variables) # pylint: disable-next=broad-exception-caught except Exception as e: # print(e) self.error += str(e) + ". " return res for local_id, value in local_variables.items(): if local_id in skipVariables or (local_id not in self.python_src_tokens): continue type_str = str(type(value)) if type_str in ("", ""): continue self.variables.add(local_id) t = "" # type v = "" # value if type_str in boolean_types: t = "bool" v = str(value).lower() elif type_str in int_types: t = "int" v = str(value) elif type_str in float_types: t = "float" v = self.float_to_str(value) elif type_str == "": t = "complex" # convert "-0" to "0" real = 0 if value.real == 0 else value.real imag = 0 if value.imag == 0 else value.imag v = self.float_to_str(real) + "," + self.float_to_str(imag) elif type_str == "": t = "vector" v = str(value).replace("[", "").replace("]", "").replace(" ", "") elif type_str == "": t = "set" v = ( str(value) .replace("{", "") .replace("}", "") .replace(" ", "") .replace("j", "i") ) elif type_str == "": # e.g. 'Matrix([[-1, 0, -2], [-1, 5*sin(x)*cos(x)/7, 2], [-1, 2, 0]])' t = "matrix" v = str(value)[7:-1] v = v.replace("**", "^") elif ( type_str == "" or type_str == "" ): # e.g. '[[ -6 -13 -12]\n [-17 -3 -20]\n [-14 -8 -16]\n [ -7 -15 -8]]' t = "matrix" v = re.sub(" +", " ", str(value)) # remove double spaces v = re.sub(r"\[ ", "[", v) # remove space(s) after "[" v = re.sub(r" \]", "]", v) # remove space(s) before "]" v = v.replace(" ", ",").replace("\n", "") elif type_str == "": t = "string" v = value else: t = "term" v = str(value).replace("**", "^") # in case that an ODE is contained in the question # and only one constant ("C1") is present, then substitute # "C1" by "C" if "dsolve" in self.python_src: if "C2" not in v: v = v.replace("C1", "C") # t := type, v := value v = v.replace("I", "i") # reformat sympy imaginary part res[local_id] = {"t": t, "v": v} if len(self.variables) > 50: self.error += "ERROR: Wrong usage of Python imports. Refer to pySELL docs!" # TODO: write the docs... if "matplotlib" in self.python_src and "plt" in local_variables: plt = local_variables["plt"] buf = io.BytesIO() plt.savefig(buf, format="svg", transparent=True) buf.seek(0) svg = buf.read() b64 = base64.b64encode(svg) res["__svg_image"] = {"t": "svg", "v": b64.decode("utf-8")} plt.clf() return res def to_dict(self) -> dict: """recursively exports the question to a dictionary""" return { "title": self.title, "points": self.points, "error": self.error, "is_ode": "dsolve" in self.python_src, # contains an Ordinary Differential Equation? "game": ",".join(self.game), "variables": list(self.variables), "instances": self.instances, "text": self.text.to_dict(), # the following is only relevant for debugging purposes, # i.e. only present in _DEBUG.html "src_line": self.src_line_no, "text_src_html": self.syntax_highlight_text(self.text_src), "python_src_html": self.syntax_highlight_python(self.python_src), "python_src_tokens": list(self.python_src_tokens), } # pylint: disable-next=too-many-branches,too-many-statements def syntax_highlight_text_line(self, src: str) -> str: """syntax highlights a single questions text line and returns the formatted code in HTML format""" html = "" math = False code = False bold = False italic = False n = len(src) i = 0 while i < n: ch = src[i] if ch == " ": html += " " elif not math and ch == "%": html += '' html += ch if i + 1 < n and src[i + 1] == "!": html += src[i + 1] i += 1 html += "" elif ch == "*" and i + 1 < n and src[i + 1] == "*": i += 1 bold = not bold if bold: html += '' html += "**" else: html += "**" html += "" elif ch == "*": italic = not italic if italic: html += '' html += "*" else: html += "*" html += "" elif ch == "$": display_style = False if i + 1 < n and src[i + 1] == "$": display_style = True i += 1 math = not math if math: html += '' html += ch if display_style: html += ch else: html += ch if display_style: html += ch html += "" elif ch == "`": code = not code if code: html += '' html += ch else: html += ch html += "" else: html += ch i += 1 if math: html += "" if code: html += "" if italic: html += "" if bold: html += "" return html def red_colored_span(self, inner_html: str) -> str: """embeds HTML code into a red colored span""" return '' + inner_html + "" def syntax_highlight_text(self, src: str) -> str: """syntax highlights a questions text and returns the formatted code in HTML format""" html = "" lines = src.split("\n") for line in lines: if len(line.strip()) == 0: continue if line.startswith("-"): html += self.red_colored_span("-") line = line[1:].replace(" ", " ") elif line.startswith("["): l1 = line.split("]")[0] + "]".replace(" ", " ") html += self.red_colored_span(l1) line = "]".join(line.split("]")[1:]).replace(" ", " ") elif line.startswith("("): l1 = line.split(")")[0] + ")".replace(" ", " ") html += self.red_colored_span(l1) line = ")".join(line.split(")")[1:]).replace(" ", " ") html += self.syntax_highlight_text_line(line) html += "
" return html def syntax_highlight_python(self, src: str) -> str: """syntax highlights a questions python code and returns the formatted code in HTML format""" lines = src.split("\n") html = "" for line in lines: if len(line.strip()) == 0: continue lex = Lexer(line) while len(lex.token) > 0: if len(lex.token) > 0 and lex.token[0] >= "0" and lex.token[0] <= "9": html += '' html += lex.token + "" elif lex.token in python_kws: html += '' html += lex.token + "" else: html += lex.token.replace(" ", " ") lex.next() html += "
" return html # pylint: disable-next=too-many-branches,too-many-locals def compile_input_file(input_dirname: str, src: str) -> dict: """compiles a SELL input file to JSON""" lang = "en" # language quiz_titel = "" author = "" topic = "" # TODO: not yet processed! info = "" timer = -1 # time limit for the worksheet (default: off) questions = [] question = None parsing_python = False lines = src.split("\n") for line_no, line in enumerate(lines): line = line.split("##")[0] # remove comments line_not_stripped = line line = line.strip() if len(line) == 0: continue if line.startswith("LANG"): lang = line[4:].strip() elif line.startswith("TITLE"): quiz_titel = line[5:].strip() elif line.startswith("AUTHOR"): author = line[6:].strip() elif line.startswith("TOPIC"): topic = line[5:].strip() elif line.startswith("INFO"): info = line[4:].strip() elif line.startswith("TIMER"): timer = int(line[5:].strip()) # TODO: handle parse integer errors elif line.startswith("QUESTION"): question = Question(input_dirname, line_no + 1) questions.append(question) # extract title and points # pattern = TITLE [ "(" INT "pts)" ]; pattern = r"(?P.+?)(?:\s\((?P<num>\d+)\spts\))?$" match = re.match(pattern, line[8:].strip()) title = "" num = None if match: title = match.group("title").strip() num = match.group("num") # This will be None if not present # print(f"Title: {title}, Points: {num}") question.title = title question.points = 1 if num is None else int(num) parsing_python = False elif question is not None: if line.startswith('"""'): parsing_python = not parsing_python else: if parsing_python: question.python_src += ( line_not_stripped.replace("\t", " ") + "\n" ) else: question.text_src += line_not_stripped.rstrip() + "\n" for question in questions: question.build() return { "lang": lang, "title": quiz_titel, "author": author, "date": datetime.datetime.today().strftime("%Y-%m-%d"), "info": info, "timer": timer, "questions": list(map(lambda o: o.to_dict(), questions)), } # the following code is automatically generated and updated by file "build.py" # @begin(html) HTML: str = b"" HTML += b'<!DOCTYPE html> <html> <head> <meta charset="UTF-8" /> <titl' HTML += b'e>pySELL Quiz ' HTML += b'
' HTML += b'