{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#default_exp asciidoc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# fastdoc.asciidoc\n", "> API for the fastdoc convertor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from fastdoc.imports import *\n", "from fastcore.script import *\n", "from warnings import warn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def markdown_cell(md):\n", " return nbformat.notebooknode.NotebookNode({'cell_type': 'markdown', 'source': md, 'metadata': {}})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def code_cell(code, metadata=None, outputs=None):\n", " return nbformat.notebooknode.NotebookNode(\n", " {'cell_type': 'code',\n", " 'execution_count': None,\n", " 'source': code,\n", " 'metadata': {} if metadata is None else metadata,\n", " 'outputs': [] if outputs is None else outputs})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocessing on the list of all cells" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Removing cells with the flag `# hide`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_hidden = re.compile(r'^\\s*#\\s*(hide|clean)\\s*$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def remove_hidden_cells(cells):\n", " \"Remove cells marked with #hide\"\n", " return [c for c in cells if _re_hidden.search(c['source']) is None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cells = [code_cell('# hide'), code_cell('lalala'), markdown_cell('lalala\\n# hide')]\n", "test_eq(remove_hidden_cells(cells), [code_cell('lalala')])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Isolating the bits in triple quotes annotated with asciidoc in code cells without outputs so that they are not interpreted by the converter, with adding `##clear##` so that the post-processing removes the `[python]` flag. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def isolate_adoc_blocks(cells):\n", " res = []\n", " for cell in cells:\n", " if cell['cell_type'] == 'markdown' and re.search(r'```\\s*asciidoc', cell['source']) is not None:\n", " lines = cell['source'].split('\\n')\n", " adoc,s,idx = False,0,0\n", " for line in lines:\n", " if re.search(r'^```\\s*asciidoc\\s*$', line) is not None and not adoc:\n", " res.append(markdown_cell('\\n'.join(lines[s:idx])))\n", " adoc,s = True,idx+1\n", " elif re.search(r'^```\\s*$', line) is not None and adoc:\n", " res.append(code_cell('##clear##' + '\\n'.join(lines[s:idx])))\n", " adoc,s = False,idx+1\n", " idx+=1\n", " assert not adoc, f\"Triple-quote asciidoc block not ended in {cell['source']}\"\n", " res.append(markdown_cell('\\n'.join(lines[s:])))\n", " else: res.append(cell)\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = \"\"\"This is some text\n", "```asciidoc\n", "This should be isolated\n", "```\n", "Some other text\n", "```asciidoc\n", "This should also be isolated\n", "```\n", "end\"\"\"\n", "test_eq(isolate_adoc_blocks([markdown_cell(test)]), [\n", " markdown_cell(\"This is some text\"),\n", " code_cell(\"##clear##This should be isolated\"),\n", " markdown_cell(\"Some other text\"),\n", " code_cell(\"##clear##This should also be isolated\"),\n", " markdown_cell(\"end\")\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocessing individual code cells" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Old way of putting `[WARNING]`, `[NOTE]` or `[TIP]`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "#TODO: remove when all notebooks have been ported to v2\n", "def replace_old_jekylls(cell):\n", " if cell['source'].startswith('jekyll'):\n", " pat1 = re.compile(r\"\"\"jekyll_(.*)\\(['\"].*\"\"\")\n", " pat2 = re.compile(r\"\"\"jekyll_.*\\(['\"]+([\\s\\S]*[^'\"])['\"]+\\)$\"\"\")\n", " jekyll_type = re.match(pat1, cell['source']).groups()[0]\n", " message = re.match(pat2, cell['source']).groups()[0]\n", " inst = {'warn':'WARNING', 'note':'NOTE', 'important':'TIP'}\n", " cell['metadata'] = {}\n", " cell['source'] = f'##clear##[{inst[jekyll_type]}]\\n====\\n{message}\\n===='\n", " cell['outputs'] = []\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(replace_old_jekylls(code_cell('jekyll_warn(\"\"\"Try to convert me!\"\"\")')), \n", " code_cell('##clear##[WARNING]\\n====\\nTry to convert me!\\n===='))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hide input of cells with `hide_input=True` in metadata (extension hide input) or a flag `#hide_input`. Put `##remove##` instead of the code that will be removed during post-processing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_hide_input = re.compile(r'^\\s*#\\s*hide_input\\s*$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def hide_input(cell):\n", " if cell['metadata'].get('hide_input', False) or _re_hide_input.search(cell[\"source\"]) is not None: cell['source'] = '##remove##'\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(hide_input(code_cell('some code', metadata={'hide_input': True}, outputs=[1])), \n", " code_cell('##remove##', metadata={'hide_input': True}, outputs=[1]))\n", "test_eq(hide_input(code_cell('# hide_input\\nsome code', outputs=[1])), \n", " code_cell('##remove##', outputs=[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hide outputs of cells with `collapsed=True` in their metadata or a flag #hide_output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_hide_output = re.compile(r'^\\s*#\\s*hide_output\\s*$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def hide_output(cell):\n", " if cell['metadata'].get('collapsed', False) or _re_hide_output.search(cell[\"source\"]) is not None:\n", " cell['outputs'] = []\n", " cell['source'] = re.sub(r'#\\s*hide_output\\s*\\n', '', cell['source'])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(hide_output(code_cell('some code', metadata={'collapsed': True}, outputs=[1])), \n", " code_cell('some code', metadata={'collapsed': True}))\n", "test_eq(hide_output(code_cell('# hide_output\\nsome code', outputs=[1])), \n", " code_cell('some code'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Replace outputs as `text_html` by `text_plain` (otherwise they are not kept)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def extract_html(cell):\n", " for o in cell['outputs']:\n", " if 'data' in o and 'text/html' in o['data']:\n", " o['data']['text/plain'] = o['data']['text/html']\n", " del o['data']['text/html']\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(extract_html(code_cell('some code', outputs=[{'data': {'text/html': 'some_html'}}])),\n", " code_cell('some code', outputs=[{'data': {'text/plain': 'some_html'}}]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deal with errors by putting them in plain text" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def split_max_len(text, l):\n", " words = text.split(' ')\n", " line,lines = \"\",[]\n", " for word in words:\n", " if len(line) + len(word) + 1 <= l: line += f' {word}'\n", " else:\n", " lines.append(line)\n", " line = \"\"\n", " if len(line) > 0: lines.append(line)\n", " return \"\\n\".join(lines)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def deal_error(cell):\n", " for i,out in enumerate(cell['outputs']):\n", " if out['output_type'] == 'error':\n", " msg = f\"{out['ename']}: {out['evalue']}\"\n", " cell['outputs'][i] = nbformat.notebooknode.NotebookNode({\n", " 'data': {'text/plain': split_max_len(msg, 81) },\n", " 'execution_count': None,\n", " 'metadata': {},\n", " 'output_type': 'execute_result'})\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(deal_error(code_cell('some code', outputs=[{'output_type': 'error', 'ename': 'Error name', 'evalue': 'This is an error.'}])), \n", " code_cell('some code', outputs = [\n", " {'data': {'text/plain': ' Error name: This is an error.'},\n", " 'execution_count': None,\n", " 'metadata': {},\n", " 'output_type': 'execute_result'}\n", " ]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove interrupted progress bars from the outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def remove_interrupted_pbars(cell):\n", " outs = []\n", " for out in cell['outputs']:\n", " if 'data' not in out or 'text/plain' not in out['data'] or 'progress-bar-interrupted' not in out['data']['text/plain']:\n", " outs.append(out)\n", " cell['outputs'] = outs\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(remove_interrupted_pbars(\n", " code_cell(\"some code\", outputs = [{'a': 1}, {'data': {'text/plain': 'progress-bar-interrupted'}}, {'b': 2}])),\n", " code_cell(\"some code\", outputs = [{'a': 1}, {'b': 2}]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get metadata for outputs." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def get_cell_meta(cell):\n", " for attr in [\"id\", \"caption\", \"alt\", \"width\"]:\n", " if re.search(r'^\\s*#\\s*' + attr + r'\\s(.*)$', cell[\"source\"], re.MULTILINE) is not None:\n", " cell[\"metadata\"][attr] = re.search(r'^\\s*#\\s*' + attr + r'\\s(.*)$', cell[\"source\"], re.MULTILINE).groups()[0]\n", " cell[\"source\"] = re.sub(r'#\\s*' + attr + r'\\s.*?($|\\n)', '', cell[\"source\"])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(get_cell_meta(code_cell(\"#id 123\\n#caption This is a bear\\nsome code\")), \n", " code_cell(\"some code\", metadata = {'id': '123', 'caption': 'This is a bear'}))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deal with table captions and refs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def caption_tables(cell):\n", " if 'outputs' not in cell or len(cell['outputs']) == 0: return cell\n", " output = cell['outputs'][0]\n", " if 'data' not in output or 'text/plain' not in output['data']: return cell\n", " text = output['data']['text/plain']\n", " if re.search(r'^<\\s*table\\s+([^>]*>)', text) is None: return cell\n", " table_id = cell['metadata'].get('id', None)\n", " caption = cell['metadata'].get('caption', None)\n", " text_id = '' if table_id is None else f'id=\"{table_id}\" '\n", " text_caption = '' if caption is None else f'\\n