{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#default_exp asciidoc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# fastdoc.asciidoc\n", "> API for the fastdoc convertor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from fastdoc.imports import *\n", "from fastcore.script import *\n", "from warnings import warn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def markdown_cell(md):\n", " return nbformat.notebooknode.NotebookNode({'cell_type': 'markdown', 'source': md, 'metadata': {}})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def code_cell(code, metadata=None, outputs=None):\n", " return nbformat.notebooknode.NotebookNode(\n", " {'cell_type': 'code',\n", " 'execution_count': None,\n", " 'source': code,\n", " 'metadata': {} if metadata is None else metadata,\n", " 'outputs': [] if outputs is None else outputs})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocessing on the list of all cells" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Removing cells with the flag `# hide`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_hidden = re.compile(r'^\\s*#\\s*(hide|clean)\\s*$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def remove_hidden_cells(cells):\n", " \"Remove cells marked with #hide\"\n", " return [c for c in cells if _re_hidden.search(c['source']) is None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cells = [code_cell('# hide'), code_cell('lalala'), markdown_cell('lalala\\n# hide')]\n", "test_eq(remove_hidden_cells(cells), [code_cell('lalala')])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Isolating the bits in triple quotes annotated with asciidoc in code cells without outputs so that they are not interpreted by the converter, with adding `##clear##` so that the post-processing removes the `[python]` flag. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def isolate_adoc_blocks(cells):\n", " res = []\n", " for cell in cells:\n", " if cell['cell_type'] == 'markdown' and re.search(r'```\\s*asciidoc', cell['source']) is not None:\n", " lines = cell['source'].split('\\n')\n", " adoc,s,idx = False,0,0\n", " for line in lines:\n", " if re.search(r'^```\\s*asciidoc\\s*$', line) is not None and not adoc:\n", " res.append(markdown_cell('\\n'.join(lines[s:idx])))\n", " adoc,s = True,idx+1\n", " elif re.search(r'^```\\s*$', line) is not None and adoc:\n", " res.append(code_cell('##clear##' + '\\n'.join(lines[s:idx])))\n", " adoc,s = False,idx+1\n", " idx+=1\n", " assert not adoc, f\"Triple-quote asciidoc block not ended in {cell['source']}\"\n", " res.append(markdown_cell('\\n'.join(lines[s:])))\n", " else: res.append(cell)\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test = \"\"\"This is some text\n", "```asciidoc\n", "This should be isolated\n", "```\n", "Some other text\n", "```asciidoc\n", "This should also be isolated\n", "```\n", "end\"\"\"\n", "test_eq(isolate_adoc_blocks([markdown_cell(test)]), [\n", " markdown_cell(\"This is some text\"),\n", " code_cell(\"##clear##This should be isolated\"),\n", " markdown_cell(\"Some other text\"),\n", " code_cell(\"##clear##This should also be isolated\"),\n", " markdown_cell(\"end\")\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocessing individual code cells" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Old way of putting `[WARNING]`, `[NOTE]` or `[TIP]`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "#TODO: remove when all notebooks have been ported to v2\n", "def replace_old_jekylls(cell):\n", " if cell['source'].startswith('jekyll'):\n", " pat1 = re.compile(r\"\"\"jekyll_(.*)\\(['\"].*\"\"\")\n", " pat2 = re.compile(r\"\"\"jekyll_.*\\(['\"]+([\\s\\S]*[^'\"])['\"]+\\)$\"\"\")\n", " jekyll_type = re.match(pat1, cell['source']).groups()[0]\n", " message = re.match(pat2, cell['source']).groups()[0]\n", " inst = {'warn':'WARNING', 'note':'NOTE', 'important':'TIP'}\n", " cell['metadata'] = {}\n", " cell['source'] = f'##clear##[{inst[jekyll_type]}]\\n====\\n{message}\\n===='\n", " cell['outputs'] = []\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(replace_old_jekylls(code_cell('jekyll_warn(\"\"\"Try to convert me!\"\"\")')), \n", " code_cell('##clear##[WARNING]\\n====\\nTry to convert me!\\n===='))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hide input of cells with `hide_input=True` in metadata (extension hide input) or a flag `#hide_input`. Put `##remove##` instead of the code that will be removed during post-processing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_hide_input = re.compile(r'^\\s*#\\s*hide_input\\s*$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def hide_input(cell):\n", " if cell['metadata'].get('hide_input', False) or _re_hide_input.search(cell[\"source\"]) is not None: cell['source'] = '##remove##'\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(hide_input(code_cell('some code', metadata={'hide_input': True}, outputs=[1])), \n", " code_cell('##remove##', metadata={'hide_input': True}, outputs=[1]))\n", "test_eq(hide_input(code_cell('# hide_input\\nsome code', outputs=[1])), \n", " code_cell('##remove##', outputs=[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hide outputs of cells with `collapsed=True` in their metadata or a flag #hide_output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_hide_output = re.compile(r'^\\s*#\\s*hide_output\\s*$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def hide_output(cell):\n", " if cell['metadata'].get('collapsed', False) or _re_hide_output.search(cell[\"source\"]) is not None:\n", " cell['outputs'] = []\n", " cell['source'] = re.sub(r'#\\s*hide_output\\s*\\n', '', cell['source'])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(hide_output(code_cell('some code', metadata={'collapsed': True}, outputs=[1])), \n", " code_cell('some code', metadata={'collapsed': True}))\n", "test_eq(hide_output(code_cell('# hide_output\\nsome code', outputs=[1])), \n", " code_cell('some code'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Replace outputs as `text_html` by `text_plain` (otherwise they are not kept)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def extract_html(cell):\n", " for o in cell['outputs']:\n", " if 'data' in o and 'text/html' in o['data']:\n", " o['data']['text/plain'] = o['data']['text/html']\n", " del o['data']['text/html']\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(extract_html(code_cell('some code', outputs=[{'data': {'text/html': 'some_html'}}])),\n", " code_cell('some code', outputs=[{'data': {'text/plain': 'some_html'}}]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deal with errors by putting them in plain text" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def split_max_len(text, l):\n", " words = text.split(' ')\n", " line,lines = \"\",[]\n", " for word in words:\n", " if len(line) + len(word) + 1 <= l: line += f' {word}'\n", " else:\n", " lines.append(line)\n", " line = \"\"\n", " if len(line) > 0: lines.append(line)\n", " return \"\\n\".join(lines)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def deal_error(cell):\n", " for i,out in enumerate(cell['outputs']):\n", " if out['output_type'] == 'error':\n", " msg = f\"{out['ename']}: {out['evalue']}\"\n", " cell['outputs'][i] = nbformat.notebooknode.NotebookNode({\n", " 'data': {'text/plain': split_max_len(msg, 81) },\n", " 'execution_count': None,\n", " 'metadata': {},\n", " 'output_type': 'execute_result'})\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(deal_error(code_cell('some code', outputs=[{'output_type': 'error', 'ename': 'Error name', 'evalue': 'This is an error.'}])), \n", " code_cell('some code', outputs = [\n", " {'data': {'text/plain': ' Error name: This is an error.'},\n", " 'execution_count': None,\n", " 'metadata': {},\n", " 'output_type': 'execute_result'}\n", " ]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove interrupted progress bars from the outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def remove_interrupted_pbars(cell):\n", " outs = []\n", " for out in cell['outputs']:\n", " if 'data' not in out or 'text/plain' not in out['data'] or 'progress-bar-interrupted' not in out['data']['text/plain']:\n", " outs.append(out)\n", " cell['outputs'] = outs\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(remove_interrupted_pbars(\n", " code_cell(\"some code\", outputs = [{'a': 1}, {'data': {'text/plain': 'progress-bar-interrupted'}}, {'b': 2}])),\n", " code_cell(\"some code\", outputs = [{'a': 1}, {'b': 2}]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get metadata for outputs." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def get_cell_meta(cell):\n", " for attr in [\"id\", \"caption\", \"alt\", \"width\"]:\n", " if re.search(r'^\\s*#\\s*' + attr + r'\\s(.*)$', cell[\"source\"], re.MULTILINE) is not None:\n", " cell[\"metadata\"][attr] = re.search(r'^\\s*#\\s*' + attr + r'\\s(.*)$', cell[\"source\"], re.MULTILINE).groups()[0]\n", " cell[\"source\"] = re.sub(r'#\\s*' + attr + r'\\s.*?($|\\n)', '', cell[\"source\"])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(get_cell_meta(code_cell(\"#id 123\\n#caption This is a bear\\nsome code\")), \n", " code_cell(\"some code\", metadata = {'id': '123', 'caption': 'This is a bear'}))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deal with table captions and refs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def caption_tables(cell):\n", " if 'outputs' not in cell or len(cell['outputs']) == 0: return cell\n", " output = cell['outputs'][0]\n", " if 'data' not in output or 'text/plain' not in output['data']: return cell\n", " text = output['data']['text/plain']\n", " if re.search(r'^<\\s*table\\s+([^>]*>)', text) is None: return cell\n", " table_id = cell['metadata'].get('id', None)\n", " caption = cell['metadata'].get('caption', None)\n", " text_id = '' if table_id is None else f'id=\"{table_id}\" '\n", " text_caption = '' if caption is None else f'\\n {caption}'\n", " output['data']['text/plain'] = re.sub(r'^<\\s*table\\s+([^>]*>)', '\\nTable code'}}])\n", "cell2 = code_cell(\"some code\", \n", " metadata={'id': '123', 'caption': 'a caption'},\n", " outputs=[{'data': {'text/plain': '
\\n \\nTable code'}}])\n", "test_eq(caption_tables(cell), cell2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cell = code_cell(\"#hide_input\\n#id 123\\n#caption a caption\", \n", " metadata={},\n", " outputs=[{'data': {'text/plain': '
a caption
\\nTable code'}, 'output_type':''}])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Wrap text in outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "TEXT_MAX_WIDTH = 80" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def _wrap_output(output):\n", " if 'text' in output:\n", " lines = ['\\n'.join(textwrap.wrap(l, width=TEXT_MAX_WIDTH, subsequent_indent = ' > ')) for l in output['text'].split('\\n')]\n", " output['text'] = '\\n'.join(lines)\n", " return output\n", " if ('data' not in output or 'text/plain' not in output['data']): return output\n", " text = output['data']['text/plain']\n", " if re.search(r'^<\\s*table\\s*([^>]*>)', text) is not None: return output\n", " lines = ['\\n'.join(textwrap.wrap(l, width=TEXT_MAX_WIDTH, subsequent_indent = ' > ')) for l in text.split('\\n')]\n", " output['data']['text/plain'] = '\\n'.join(lines)\n", " return output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def wrap_text_outputs(cell):\n", " if 'outputs' not in cell or len(cell['outputs']) == 0: return cell\n", " cell['outputs'] = [_wrap_output(o) for o in cell['outputs']]\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cell = code_cell(\"some code\", \n", " metadata={},\n", " outputs=[{'data': {'text/plain': 'This is a long output'*5}, 'output_type':''},\n", " {'text': 'This is a long output'*5}])\n", "wrapped = 'This is a long outputThis is a long outputThis is a long outputThis is a long\\n > outputThis is a long output'\n", "test_eq(wrap_text_outputs(cell), code_cell(\"some code\", \n", " metadata={},\n", " outputs=[{'data': {'text/plain': wrapped}, 'output_type':''},\n", " {'text': wrapped}]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Test code length" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "CODE_MAX_LEN = 80" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def check_code_len(cell):\n", " lines = cell['source'].split('\\n')\n", " for l in lines:\n", " if len(l) > CODE_MAX_LEN: warn(f\"Found code too long in a cell:\\n{cell['source']}\")\n", " return cell" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preprocessing individual markdown cells" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Replace \"\\` \\`\" by \\`\\`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def deal_quotes(cell):\n", " cell['source'] = re.sub(r'\"`([^`]*)`\"', r'`\\1`', cell['source'])\n", " cell['source'] = re.sub(r\"'\", r'xxsinglequote', cell['source'])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(deal_quotes(markdown_cell('\"`code`\"')), markdown_cell('`code`'))\n", "test_eq(deal_quotes(markdown_cell('a\"b\"c')), markdown_cell('a\"b\"c'))\n", "test_eq(deal_quotes(markdown_cell(\"a'b'c\")), markdown_cell('axxsinglequotebxxsinglequotec'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add one title level to every Markdown cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def add_title_level(cell):\n", " if cell['source'].startswith('#'): cell['source'] = '#' + cell['source']\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(add_title_level(markdown_cell('# title')), markdown_cell('## title'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove digits from numbered lists and format labeled lists" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def deal_with_lists(cell):\n", " lines = cell['source'].split('\\n')\n", " for i in range(len(lines)):\n", " lines[i] = re.sub(r'(^\\s*)\\d*\\.(.*)$', r'\\1.\\2xxnewl', lines[i])\n", " lines[i] = re.sub(r'(^\\s*)-\\s(.*::)\\s(.*)$', r'\\2xxnewls\\3xxnewl', lines[i])\n", " cell['source'] = '\\n'.join(lines)\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(deal_with_lists(markdown_cell(\" 1. Item\\n 2. Item\")),\n", " markdown_cell(\" . Itemxxnewl\\n . Itemxxnewl\"))\n", "test_eq(deal_with_lists(markdown_cell(\"- lbl1:: item1\\n- lbl2:: item2\")),\n", " markdown_cell(\"lbl1::xxnewlsitem1xxnewl\\nlbl2::xxnewlsitem2xxnewl\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Catch block quotes and put them in asciidoc blocks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_block_notes = re.compile(r\"\"\"\n", "# Catches any pattern > Title: content with title in group 1 and content in group 2\n", "^\\s*>\\s* # > followed by any number of whitespace\n", "([^:]*) # Catching group for any character but :\n", ":\\s* # : then any number of whitespace\n", "([^\\n]*) # Catching group for anything but a new line character\n", "(?:\\n|$) # Non-catching group for either a new line or the end of the text\n", "\"\"\", re.VERBOSE | re.MULTILINE)\n", "\n", "_re_forgot_column = re.compile(\"^\\s*>[^:]*$\", re.MULTILINE)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Catch Markdown URLs of the form\n", "\n", "```\n", "[link](https://github.com/fastai)\n", "```\n", "\n", "inside asciidoc blocks. Asciidoc expects URLs to be in the following format:\n", "\n", "```\n", "[BLOCK_NAME]\n", "====\n", "This is a block with some https://github.com/fastai[link]\n", "====\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_urls = re.compile(\"\\[(.*?)\\]\\((.*?)\\)\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def replace_jekylls(cell):\n", " block_names = {'warning':'WARNING', 'note':'NOTE', 'important':'TIP', 'tip': 'TIP', 'stop': 'WARNING',\n", " 'jargon':'JARGON', 'question':'QUESTION', 'a': 'ALEXIS', 'j': 'JEREMY', 's': 'SYLVAIN'}\n", " def _rep(m):\n", " typ,text = m.groups()\n", " text = re.sub(_re_urls, r\"\\2[\\1]\", text)\n", " name = block_names.get(typ.lower(), typ.upper())\n", " if name in ['ALEXIS', 'JEREMY', 'SYLVAIN', 'JARGON', 'QUESTION']:\n", " title = name[0]+name[1:].lower()\n", " surro = 'NOTE'\n", " if name=='JARGON':\n", " splits = text.split(': ')\n", " title = f'{title}: {splits[0]}'\n", " text = re.sub(_re_urls, r\"\\2[\\1]\", ': '.join(splits[1:]))\n", " if name in ['ALEXIS', 'JEREMY', 'SYLVAIN']:\n", " title = f\"{title} says\"\n", " surro = 'TIP'\n", " return f'```asciidoc\\n.{title}\\n[{surro}]\\n====\\n{text}\\n====\\n```\\n'\n", " elif len(name) != 0: return f\"```asciidoc\\n[{name}]\\n====\\n{text}\\n====\\n```\\n\"\n", " else: return f\"```asciidoc\\n____\\n{text}\\n____\\n```\\n\"\n", " if _re_forgot_column.search(cell[\"source\"]): warn(\"Found a non-processed block quote, please fix\")\n", " cell[\"source\"] = _re_block_notes.sub(_rep, cell[\"source\"])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(replace_jekylls(markdown_cell(\"text\\n> : This is a block quote\")),\n", " markdown_cell(\"text\\n```asciidoc\\n____\\nThis is a block quote\\n____\\n```\\n\"))\n", "test_eq(replace_jekylls(markdown_cell(\"text\\n> : This is a block quote with a [link](https://github.com/fastai)\")),\n", " markdown_cell(\"text\\n```asciidoc\\n____\\nThis is a block quote with a https://github.com/fastai[link]\\n____\\n```\\n\"))\n", "test_eq(replace_jekylls(markdown_cell(\"text\\n> jargon: term: Some new term\")),\n", " markdown_cell('text\\n```asciidoc\\n.Jargon: term\\n[NOTE]\\n====\\nSome new term\\n====\\n```\\n'))\n", "test_eq(replace_jekylls(markdown_cell(\"text\\n> jargon: term: Some new term with a [link](https://github.com/fastai)\")),\n", " markdown_cell('text\\n```asciidoc\\n.Jargon: term\\n[NOTE]\\n====\\nSome new term with a https://github.com/fastai[link]\\n====\\n```\\n'))\n", "test_warns(lambda: replace_jekylls(markdown_cell(\"text\\n> This is a block quote\")))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_sidebar = re.compile(r'^\\s*#\\s*sidebar\\s(.*)$', re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def interpret_sidebar(cell):\n", " lines = cell[\"source\"].split(\"\\n\")\n", " if _re_sidebar.search(lines[0]) is not None:\n", " title = _re_sidebar.search(lines[0]).groups()[0]\n", " body = \"\\n\".join(lines[1:])\n", " cell[\"source\"] = f\"```asciidoc\\n.{title}\\n****\\n{body}\\n****\\n```\\n\"\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'cell_type': 'markdown',\n", " 'source': '```asciidoc\\n.My intervention\\n****\\n\\nThis will be changed to a sidebar when converted in Asciidoc.\\n\\nIt can have several lines, contrary to a block quote.\\n****\\n```\\n',\n", " 'metadata': {}}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test = \"\"\"#sidebar My intervention\n", "\n", "This will be changed to a sidebar when converted in Asciidoc.\n", "\n", "It can have several lines, contrary to a block quote.\"\"\"\n", "interpret_sidebar(markdown_cell(test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_md_image = re.compile(r\"^(]*>)\", re.MULTILINE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "IMAGE_CONV_MULT = 0.6" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def process_images(cell):\n", " h = HTMLParseAttrs()\n", " def _rep(m):\n", " d = h(m.groups()[0])\n", " attrs = ['\"' + d.get('alt', '') + '\"']\n", " if 'width' in d: attrs.append(str(int(IMAGE_CONV_MULT * int(d['width']))))\n", " if 'width' in d and 'height' in d: attrs.append(str((int(IMAGE_CONV_MULT * int(d['height'])))))\n", " suff = f\"[{', '.join(attrs)}]\"\n", " pid = f\"[[{d['id']}]]\\n\" if 'id' in d else \"\"\n", " caption = f\".{d['caption']}\\n\" if 'caption' in d else \"\"\n", " return f\"```asciidoc\\n{pid}{caption}image::{d['src']}{suff}\\n```\"\n", " cell[\"source\"] = _re_md_image.sub(_rep, cell[\"source\"])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "txt = 'text\\n\"Alternative\\nother text'\n", "test_eq(process_images(markdown_cell(txt)), \n", " markdown_cell('text\\n```asciidoc\\n[[123]]\\n.This is an image\\nimage::puppy.jpg[\"Alternative text\", 420]\\n```\\nother text'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_reference = re.compile(r'<<([^>]*)>>')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def wrap_references(cell):\n", " cell[\"source\"] = _re_reference.sub(r'xxref\\1xxeref', cell[\"source\"])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(wrap_references(markdown_cell(\"There is a reference <> here.\")),\n", " markdown_cell(\"There is a reference xxrefrefxxeref here.\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def extract_attachments(cell, dest):\n", " if not 'attachments' in cell: return cell\n", " mime,img = first(first(cell['attachments'].values()).items())\n", " ext = mime.split('/')[1]\n", " for i in range(99999):\n", " p = dest/(f'att_{i:05d}.{ext}')\n", " if not p.exists(): break\n", " p.write_bytes(b64decode(img))\n", " del(cell['attachments'])\n", " cell['source'] = re.sub('attachment:image.png', str(p), cell['source'])\n", " return cell" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Catch sidebars: sidebars are delimited by header cells like `### Sidebar title` then `### End sidebar`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_sidebar_title = re.compile(r'#+\\s+Sidebar:\\s+(.*)$', re.IGNORECASE)\n", "_re_end_sidebar = re.compile(r'#+\\s+End sidebar', re.IGNORECASE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('Tenacity in deep learning',)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_re_sidebar_title.search('### Sidebar: Tenacity in deep learning').groups()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def sidebar_headers(cell):\n", " cell['source'] = _re_sidebar_title.sub(r'```asciidoc\\n.\\1\\n****\\n```', cell['source'])\n", " cell['source'] = _re_end_sidebar.sub(r'```asciidoc\\n****\\n```', cell['source'])\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(sidebar_headers(markdown_cell(\"### Sidebar: My intervention\")),\n", " markdown_cell(\"```asciidoc\\n.My intervention\\n****\\n```\"))\n", "test_eq(sidebar_headers(markdown_cell(\"### End sidebar\")), markdown_cell(\"```asciidoc\\n****\\n```\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### All preprocessing together" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "code_cell_tfms = [get_cell_meta, replace_old_jekylls, hide_input, hide_output, extract_html, deal_error,\n", " remove_interrupted_pbars, wrap_text_outputs, caption_tables, check_code_len]\n", "md_cell_tfms = [deal_quotes, wrap_references, interpret_sidebar, sidebar_headers, add_title_level, deal_with_lists,\n", " process_images, replace_jekylls]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Raw cells just need to have a new line added at the beginning" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def add_new_line(cell):\n", " cell['source'] = '\\n' + cell['source']\n", " return cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def treat_notebook(nb, dest):\n", " nb['cells'] = remove_hidden_cells(nb['cells'])\n", " tfm_func = {'code': compose(*code_cell_tfms), 'markdown': compose(partial(extract_attachments, dest=dest), *md_cell_tfms),\n", " 'raw': add_new_line}\n", " nb['cells'] = [tfm_func[c['cell_type']](c) for c in nb['cells']]\n", " nb['cells'] = isolate_adoc_blocks(nb['cells'])\n", " return nb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Post-processing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Replace special tokens by their values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def rep_spec_tok(adoc, metadata=None):\n", " adoc = re.sub('xxsinglequote', \"'\", adoc)\n", " adoc = re.sub('xxnewls', '\\n ', adoc)\n", " return re.sub('xxnewl\\s', '\\n', adoc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "nbconvert will flag the code cells with `[ipython3]`, we replace this by `[python]`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def ipython2python(adoc, metadata=None):\n", " return re.sub(r'\\[source, ipython3\\]','[source, python]', adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(ipython2python(\"[source, ipython3]\\n----\\nsome code\\n----\\n\"), \"[source, python]\\n----\\nsome code\\n----\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove empty cells or cells flagged for removal (because of hide_input)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def remove_cells(adoc, metadata=None):\n", " adoc = re.sub(r'\\n\\[source, python\\]\\n----(\\n)*----\\n','', adoc)\n", " return re.sub(r'\\n\\[source, python\\]\\n----\\n##remove##\\n----\\n','', adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(remove_cells(\"lalala\\n[source, python]\\n----\\n\\n----\\n\"), \"lalala\")\n", "test_eq(remove_cells(\"lalala\\n[source, python]\\n----\\n##remove##\\n----\\n\"), \"lalala\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clear code cells from the code flag when there is a `##clear##` tag." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_clear = re.compile(r'\\[source, python\\]\\n----\\n##clear##(.*?)----\\n', re.DOTALL)\n", "def clear_cells(adoc, metadata=None): return _re_clear.sub(r'\\1', adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(clear_cells(\n", " \"lalala\\n[source, python]\\n----\\n##clear##pure adoc\\n----\\nfoo\\nbla\\n[source, python]\\n----\\n##clear##pure adoc again\\n----\\nbli\"),\n", " \"lalala\\npure adoc\\nfoo\\nbla\\npure adoc again\\nbli\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Format LaTeX equations properly: they arrive either as `latexmath:[$equation$]` or `latexmath:[\\[equation\\]]`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def format_latex(adoc, metadata=None):\n", " #LaTeX equations\n", " adoc = re.sub(r\"latexmath:\\[\\$([^\\$]*)\\$\\]\", r\"latexmath:[\\\\(\\1\\\\)]\", adoc)\n", " return re.sub(r\"latexmath:\\[\\\\\\[(.*)\\\\\\]\\]\", r\"\\n[latexmath]\\n++++\\n\\\\begin{equation}\\n\\1\\n\\\\end{equation}\\n++++\\n\", adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(format_latex(r\"latexmath:[$equation$]\"), r\"latexmath:[\\(equation\\)]\")\n", "test_eq(format_latex(r\"latexmath:[\\[equation\\]]\"), \n", " \"\\n[latexmath]\\n++++\\n\\\\begin{equation}\\nequation\\n\\\\end{equation}\\n++++\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Format image outputs and make sure they point to the right folder." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "_re_image_output = re.compile(r'----\\n!\\[(?:svg|png|jpg)\\]\\((.+)\\)\\n----')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def format_outputs(adoc, metadata=None):\n", " folder = ({} if metadata is None else metadata).get('folder', '.')\n", " def _rep(m):\n", " name = m.groups()[0]\n", " d = metadata[name] if metadata is not None and name in metadata else {}\n", " attrs = ['\"' + d.get('alt', '') + '\"']\n", " if 'width' in d: attrs.append(str(d['width']))\n", " if 'width' in d and 'height' in d: attrs.append(str(d['height']))\n", " suff = f\"[{', '.join(attrs)}]\"\n", " pid = f\"[[{d['id']}]]\\n\" if 'id' in d else \"\"\n", " caption = f\".{d['caption']}\\n\" if 'caption' in d else \"\"\n", " return f\"{pid}{caption}image::{str(folder)}/{name}{suff}\"\n", " return _re_image_output.sub(_rep, adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(format_outputs('----\\n![svg](output.svg)\\n----', {'folder':'path', 'output.svg': {'alt': 'alt'}}),\n", " 'image::path/output.svg[\"alt\"]')\n", "test_eq(format_outputs('----\\n![svg](output.svg)\\n----', {'folder':'path', 'output.svg': {'alt': 'alt', 'width': 100}}),\n", " 'image::path/output.svg[\"alt\", 100]')\n", "test_eq(format_outputs('----\\n![png](output1.png)\\n----'),\n", " 'image::./output1.png[\"\"]')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Deal with quotes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def fix_quotes(adoc, metadata=None):\n", " return re.sub(r\"``([^'`]*)''\", r'\"\\1\"', adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(fix_quotes(\"``double quotes''\"), '\"double quotes\"')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Put back << >> around refs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def fix_references(adoc, metadata=None): return re.sub(r\"xxref(.*)xxeref\", r\"<<\\1>>\", adoc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_eq(fix_references(\"There is a reference xxrefrefxxeref here.\"), \"There is a reference <> here.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Format tables" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def format_tables(adoc, metadata=None):\n", " splits = adoc.split('----')\n", " seps = [''] + ['----' for _ in range(len(splits)-1)] + ['']\n", " for i,s in enumerate(splits):\n", " s = re.sub(r'
[\\s\\S]*', '', s)\n", " s = re.sub('

', '', s)\n", " s = re.sub('

', '', s)\n", " if len(s) > 0 and not s.startswith('\\n'): s = '\\n' + s\n", " if len(s) > 0 and not s.endswith('\\n'): s = s + '\\n'\n", " if s.startswith('\\n