{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#default_exp asciidoc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fastdoc.asciidoc\n",
    "> API for the fastdoc convertor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from fastdoc.imports import *\n",
    "from fastcore.script import *\n",
    "from warnings import warn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def markdown_cell(md):\n",
    "    return nbformat.notebooknode.NotebookNode({'cell_type': 'markdown', 'source': md, 'metadata': {}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def code_cell(code, metadata=None, outputs=None):\n",
    "    return nbformat.notebooknode.NotebookNode(\n",
    "        {'cell_type': 'code',\n",
    "         'execution_count': None,\n",
    "         'source': code,\n",
    "         'metadata': {} if metadata is None else metadata,\n",
    "         'outputs': [] if outputs is None else outputs})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preprocessing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preprocessing on the list of all cells"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Removing cells with the flag `# hide`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_hidden = re.compile(r'^\\s*#\\s*(hide|clean)\\s*$', re.MULTILINE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def remove_hidden_cells(cells):\n",
    "    \"Remove cells marked with #hide\"\n",
    "    return [c for c in cells if _re_hidden.search(c['source']) is None]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cells = [code_cell('# hide'), code_cell('lalala'), markdown_cell('lalala\\n# hide')]\n",
    "test_eq(remove_hidden_cells(cells), [code_cell('lalala')])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Isolating the bits in triple quotes annotated with asciidoc in code cells without outputs so that they are not interpreted by the converter, with adding `##clear##` so that the post-processing removes the `[python]` flag. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def isolate_adoc_blocks(cells):\n",
    "    res = []\n",
    "    for cell in cells:\n",
    "        if cell['cell_type'] == 'markdown' and re.search(r'```\\s*asciidoc', cell['source']) is not None:\n",
    "            lines = cell['source'].split('\\n')\n",
    "            adoc,s,idx = False,0,0\n",
    "            for line in lines:\n",
    "                if re.search(r'^```\\s*asciidoc\\s*$', line) is not None and not adoc:\n",
    "                    res.append(markdown_cell('\\n'.join(lines[s:idx])))\n",
    "                    adoc,s = True,idx+1\n",
    "                elif re.search(r'^```\\s*$', line) is not None and adoc:\n",
    "                    res.append(code_cell('##clear##' + '\\n'.join(lines[s:idx])))\n",
    "                    adoc,s = False,idx+1\n",
    "                idx+=1\n",
    "            assert not adoc, f\"Triple-quote asciidoc block not ended in {cell['source']}\"\n",
    "            res.append(markdown_cell('\\n'.join(lines[s:])))\n",
    "        else: res.append(cell)\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = \"\"\"This is some text\n",
    "```asciidoc\n",
    "This should be isolated\n",
    "```\n",
    "Some other text\n",
    "```asciidoc\n",
    "This should also be isolated\n",
    "```\n",
    "end\"\"\"\n",
    "test_eq(isolate_adoc_blocks([markdown_cell(test)]), [\n",
    "    markdown_cell(\"This is some text\"),\n",
    "    code_cell(\"##clear##This should be isolated\"),\n",
    "    markdown_cell(\"Some other text\"),\n",
    "    code_cell(\"##clear##This should also be isolated\"),\n",
    "    markdown_cell(\"end\")\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preprocessing individual code cells"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Old way of putting `[WARNING]`, `[NOTE]` or `[TIP]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "#TODO: remove when all notebooks have been ported to v2\n",
    "def replace_old_jekylls(cell):\n",
    "    if cell['source'].startswith('jekyll'):\n",
    "        pat1 = re.compile(r\"\"\"jekyll_(.*)\\(['\"].*\"\"\")\n",
    "        pat2 = re.compile(r\"\"\"jekyll_.*\\(['\"]+([\\s\\S]*[^'\"])['\"]+\\)$\"\"\")\n",
    "        jekyll_type = re.match(pat1, cell['source']).groups()[0]\n",
    "        message = re.match(pat2, cell['source']).groups()[0]\n",
    "        inst = {'warn':'WARNING', 'note':'NOTE', 'important':'TIP'}\n",
    "        cell['metadata'] = {}\n",
    "        cell['source'] = f'##clear##[{inst[jekyll_type]}]\\n====\\n{message}\\n===='\n",
    "        cell['outputs'] = []\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(replace_old_jekylls(code_cell('jekyll_warn(\"\"\"Try to convert me!\"\"\")')), \n",
    "        code_cell('##clear##[WARNING]\\n====\\nTry to convert me!\\n===='))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hide input of cells with `hide_input=True` in metadata (extension hide input) or a flag `#hide_input`. Put `##remove##` instead of the code that will be removed during post-processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_hide_input = re.compile(r'^\\s*#\\s*hide_input\\s*$', re.MULTILINE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def hide_input(cell):\n",
    "    if cell['metadata'].get('hide_input', False) or _re_hide_input.search(cell[\"source\"]) is not None: cell['source'] = '##remove##'\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(hide_input(code_cell('some code', metadata={'hide_input': True}, outputs=[1])), \n",
    "        code_cell('##remove##', metadata={'hide_input': True}, outputs=[1]))\n",
    "test_eq(hide_input(code_cell('# hide_input\\nsome code', outputs=[1])), \n",
    "        code_cell('##remove##', outputs=[1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hide outputs of cells with `collapsed=True` in their metadata or a flag #hide_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_hide_output = re.compile(r'^\\s*#\\s*hide_output\\s*$', re.MULTILINE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def hide_output(cell):\n",
    "    if cell['metadata'].get('collapsed', False) or _re_hide_output.search(cell[\"source\"]) is not None:\n",
    "        cell['outputs'] = []\n",
    "        cell['source'] = re.sub(r'#\\s*hide_output\\s*\\n', '', cell['source'])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(hide_output(code_cell('some code', metadata={'collapsed': True}, outputs=[1])), \n",
    "        code_cell('some code', metadata={'collapsed': True}))\n",
    "test_eq(hide_output(code_cell('# hide_output\\nsome code', outputs=[1])), \n",
    "        code_cell('some code'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Replace outputs as `text_html` by `text_plain` (otherwise they are not kept)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def extract_html(cell):\n",
    "    for o in cell['outputs']:\n",
    "        if 'data' in o and 'text/html' in o['data']:\n",
    "            o['data']['text/plain'] = o['data']['text/html']\n",
    "            del o['data']['text/html']\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(extract_html(code_cell('some code', outputs=[{'data': {'text/html': 'some_html'}}])),\n",
    "        code_cell('some code', outputs=[{'data': {'text/plain': 'some_html'}}]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Deal with errors by putting them in plain text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def split_max_len(text, l):\n",
    "    words = text.split(' ')\n",
    "    line,lines = \"\",[]\n",
    "    for word in words:\n",
    "        if len(line) + len(word) + 1 <= l: line += f' {word}'\n",
    "        else:\n",
    "            lines.append(line)\n",
    "            line = \"\"\n",
    "    if len(line) > 0: lines.append(line)\n",
    "    return \"\\n\".join(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def deal_error(cell):\n",
    "    for i,out in enumerate(cell['outputs']):\n",
    "        if out['output_type'] == 'error':\n",
    "            msg = f\"{out['ename']}: {out['evalue']}\"\n",
    "            cell['outputs'][i] = nbformat.notebooknode.NotebookNode({\n",
    "                'data': {'text/plain': split_max_len(msg, 81) },\n",
    "                'execution_count': None,\n",
    "                'metadata': {},\n",
    "                'output_type': 'execute_result'})\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(deal_error(code_cell('some code', outputs=[{'output_type': 'error', 'ename': 'Error name', 'evalue': 'This is an error.'}])), \n",
    "        code_cell('some code', outputs = [\n",
    "            {'data': {'text/plain': ' Error name: This is an error.'},\n",
    "             'execution_count': None,\n",
    "             'metadata': {},\n",
    "             'output_type': 'execute_result'}\n",
    "        ]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove interrupted progress bars from the outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def remove_interrupted_pbars(cell):\n",
    "    outs = []\n",
    "    for out in cell['outputs']:\n",
    "        if 'data' not in out or 'text/plain' not in out['data'] or 'progress-bar-interrupted' not in out['data']['text/plain']:\n",
    "            outs.append(out)\n",
    "    cell['outputs'] = outs\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(remove_interrupted_pbars(\n",
    "    code_cell(\"some code\", outputs = [{'a': 1}, {'data': {'text/plain': 'progress-bar-interrupted'}}, {'b': 2}])),\n",
    "        code_cell(\"some code\", outputs = [{'a': 1}, {'b': 2}]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get metadata for outputs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def get_cell_meta(cell):\n",
    "    for attr in [\"id\", \"caption\", \"alt\", \"width\"]:\n",
    "        if re.search(r'^\\s*#\\s*' + attr + r'\\s(.*)$', cell[\"source\"], re.MULTILINE) is not None:\n",
    "            cell[\"metadata\"][attr] = re.search(r'^\\s*#\\s*' + attr + r'\\s(.*)$', cell[\"source\"], re.MULTILINE).groups()[0]\n",
    "            cell[\"source\"] = re.sub(r'#\\s*' + attr + r'\\s.*?($|\\n)', '', cell[\"source\"])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(get_cell_meta(code_cell(\"#id 123\\n#caption This is a bear\\nsome code\")), \n",
    "        code_cell(\"some code\", metadata = {'id': '123', 'caption': 'This is a bear'}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Deal with table captions and refs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def caption_tables(cell):\n",
    "    if 'outputs' not in cell or len(cell['outputs']) == 0: return cell\n",
    "    output = cell['outputs'][0]\n",
    "    if 'data' not in output or 'text/plain' not in output['data']: return cell\n",
    "    text = output['data']['text/plain']\n",
    "    if re.search(r'^<\\s*table\\s+([^>]*>)', text) is None: return cell\n",
    "    table_id = cell['metadata'].get('id', None)\n",
    "    caption = cell['metadata'].get('caption', None)\n",
    "    text_id = '' if table_id is None else f'id=\"{table_id}\" '\n",
    "    text_caption = '' if caption is None else f'\\n  <caption>{caption}</caption>'\n",
    "    output['data']['text/plain'] = re.sub(r'^<\\s*table\\s+([^>]*>)', '<table '+text_id+r'\\1'+text_caption, text)\n",
    "    cell['outputs'][0] = output\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cell = code_cell(\"some code\", \n",
    "    metadata={'id': '123', 'caption': 'a caption'},\n",
    "    outputs=[{'data': {'text/plain': '<table border=\"1\">\\nTable code'}}])\n",
    "cell2 = code_cell(\"some code\", \n",
    "    metadata={'id': '123', 'caption': 'a caption'},\n",
    "    outputs=[{'data': {'text/plain': '<table id=\"123\" border=\"1\">\\n  <caption>a caption</caption>\\nTable code'}}])\n",
    "test_eq(caption_tables(cell), cell2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cell = code_cell(\"#hide_input\\n#id 123\\n#caption a caption\", \n",
    "    metadata={},\n",
    "    outputs=[{'data': {'text/plain': '<table border=\"1\">\\nTable code'}, 'output_type':''}])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Wrap text in outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "TEXT_MAX_WIDTH = 80"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _wrap_output(output):\n",
    "    if 'text' in output:\n",
    "        lines = ['\\n'.join(textwrap.wrap(l, width=TEXT_MAX_WIDTH, subsequent_indent = ' > ')) for l in output['text'].split('\\n')]\n",
    "        output['text'] = '\\n'.join(lines)\n",
    "        return output\n",
    "    if ('data' not in output or 'text/plain' not in output['data']): return output\n",
    "    text = output['data']['text/plain']\n",
    "    if re.search(r'^<\\s*table\\s*([^>]*>)', text) is not None: return output\n",
    "    lines = ['\\n'.join(textwrap.wrap(l, width=TEXT_MAX_WIDTH, subsequent_indent = ' > ')) for l in text.split('\\n')]\n",
    "    output['data']['text/plain'] = '\\n'.join(lines)\n",
    "    return output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def wrap_text_outputs(cell):\n",
    "    if 'outputs' not in cell or len(cell['outputs']) == 0: return cell\n",
    "    cell['outputs'] = [_wrap_output(o) for o in cell['outputs']]\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cell = code_cell(\"some code\", \n",
    "    metadata={},\n",
    "    outputs=[{'data': {'text/plain': 'This is a long output'*5}, 'output_type':''},\n",
    "             {'text': 'This is a long output'*5}])\n",
    "wrapped = 'This is a long outputThis is a long outputThis is a long outputThis is a long\\n > outputThis is a long output'\n",
    "test_eq(wrap_text_outputs(cell), code_cell(\"some code\", \n",
    "    metadata={},\n",
    "    outputs=[{'data': {'text/plain': wrapped}, 'output_type':''},\n",
    "             {'text': wrapped}]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Test code length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "CODE_MAX_LEN = 80"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def check_code_len(cell):\n",
    "    lines = cell['source'].split('\\n')\n",
    "    for l in lines:\n",
    "        if len(l) > CODE_MAX_LEN: warn(f\"Found code too long in a cell:\\n{cell['source']}\")\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Preprocessing individual markdown cells"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Replace \"\\` \\`\" by \\`\\`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def deal_quotes(cell):\n",
    "    cell['source'] = re.sub(r'\"`([^`]*)`\"', r'`\\1`', cell['source'])\n",
    "    cell['source'] = re.sub(r\"'\", r'xxsinglequote', cell['source'])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(deal_quotes(markdown_cell('\"`code`\"')), markdown_cell('`code`'))\n",
    "test_eq(deal_quotes(markdown_cell('a\"b\"c')), markdown_cell('a\"b\"c'))\n",
    "test_eq(deal_quotes(markdown_cell(\"a'b'c\")), markdown_cell('axxsinglequotebxxsinglequotec'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add one title level to every Markdown cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def add_title_level(cell):\n",
    "    if cell['source'].startswith('#'): cell['source'] = '#' + cell['source']\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(add_title_level(markdown_cell('# title')), markdown_cell('## title'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove digits from numbered lists and format labeled lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def deal_with_lists(cell):\n",
    "    lines = cell['source'].split('\\n')\n",
    "    for i in range(len(lines)):\n",
    "        lines[i] = re.sub(r'(^\\s*)\\d*\\.(.*)$', r'\\1.\\2xxnewl', lines[i])\n",
    "        lines[i] = re.sub(r'(^\\s*)-\\s(.*::)\\s(.*)$', r'\\2xxnewls\\3xxnewl', lines[i])\n",
    "    cell['source'] = '\\n'.join(lines)\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(deal_with_lists(markdown_cell(\"  1. Item\\n  2. Item\")),\n",
    "        markdown_cell(\"  . Itemxxnewl\\n  . Itemxxnewl\"))\n",
    "test_eq(deal_with_lists(markdown_cell(\"- lbl1:: item1\\n- lbl2:: item2\")),\n",
    "        markdown_cell(\"lbl1::xxnewlsitem1xxnewl\\nlbl2::xxnewlsitem2xxnewl\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Catch block quotes and put them in asciidoc blocks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_block_notes = re.compile(r\"\"\"\n",
    "# Catches any pattern > Title: content with title in group 1 and content in group 2\n",
    "^\\s*>\\s*     # > followed by any number of whitespace\n",
    "([^:]*)      # Catching group for any character but :\n",
    ":\\s*         # : then any number of whitespace\n",
    "([^\\n]*)     # Catching group for anything but a new line character\n",
    "(?:\\n|$)     # Non-catching group for either a new line or the end of the text\n",
    "\"\"\", re.VERBOSE | re.MULTILINE)\n",
    "\n",
    "_re_forgot_column = re.compile(\"^\\s*>[^:]*$\", re.MULTILINE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Catch Markdown URLs of the form\n",
    "\n",
    "```\n",
    "[link](https://github.com/fastai)\n",
    "```\n",
    "\n",
    "inside asciidoc blocks. Asciidoc expects URLs to be in the following format:\n",
    "\n",
    "```\n",
    "[BLOCK_NAME]\n",
    "====\n",
    "This is a block with some https://github.com/fastai[link]\n",
    "====\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_urls = re.compile(\"\\[(.*?)\\]\\((.*?)\\)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def replace_jekylls(cell):\n",
    "    block_names = {'warning':'WARNING', 'note':'NOTE', 'important':'TIP', 'tip': 'TIP', 'stop': 'WARNING',\n",
    "                   'jargon':'JARGON', 'question':'QUESTION', 'a': 'ALEXIS', 'j': 'JEREMY', 's': 'SYLVAIN'}\n",
    "    def _rep(m):\n",
    "        typ,text = m.groups()\n",
    "        text = re.sub(_re_urls, r\"\\2[\\1]\", text)\n",
    "        name = block_names.get(typ.lower(), typ.upper())\n",
    "        if name in ['ALEXIS', 'JEREMY', 'SYLVAIN', 'JARGON', 'QUESTION']:\n",
    "            title = name[0]+name[1:].lower()\n",
    "            surro = 'NOTE'\n",
    "            if name=='JARGON':\n",
    "                splits = text.split(': ')\n",
    "                title = f'{title}: {splits[0]}'\n",
    "                text = re.sub(_re_urls, r\"\\2[\\1]\", ': '.join(splits[1:]))\n",
    "            if name in ['ALEXIS', 'JEREMY', 'SYLVAIN']:\n",
    "                title = f\"{title} says\"\n",
    "                surro = 'TIP'\n",
    "            return f'```asciidoc\\n.{title}\\n[{surro}]\\n====\\n{text}\\n====\\n```\\n'\n",
    "        elif len(name) != 0: return f\"```asciidoc\\n[{name}]\\n====\\n{text}\\n====\\n```\\n\"\n",
    "        else:              return f\"```asciidoc\\n____\\n{text}\\n____\\n```\\n\"\n",
    "    if _re_forgot_column.search(cell[\"source\"]): warn(\"Found a non-processed block quote, please fix\")\n",
    "    cell[\"source\"] = _re_block_notes.sub(_rep, cell[\"source\"])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(replace_jekylls(markdown_cell(\"text\\n> : This is a block quote\")),\n",
    "    markdown_cell(\"text\\n```asciidoc\\n____\\nThis is a block quote\\n____\\n```\\n\"))\n",
    "test_eq(replace_jekylls(markdown_cell(\"text\\n> : This is a block quote with a [link](https://github.com/fastai)\")),\n",
    "    markdown_cell(\"text\\n```asciidoc\\n____\\nThis is a block quote with a https://github.com/fastai[link]\\n____\\n```\\n\"))\n",
    "test_eq(replace_jekylls(markdown_cell(\"text\\n> jargon: term: Some new term\")),\n",
    "    markdown_cell('text\\n```asciidoc\\n.Jargon: term\\n[NOTE]\\n====\\nSome new term\\n====\\n```\\n'))\n",
    "test_eq(replace_jekylls(markdown_cell(\"text\\n> jargon: term: Some new term with a [link](https://github.com/fastai)\")),\n",
    "    markdown_cell('text\\n```asciidoc\\n.Jargon: term\\n[NOTE]\\n====\\nSome new term with a https://github.com/fastai[link]\\n====\\n```\\n'))\n",
    "test_warns(lambda: replace_jekylls(markdown_cell(\"text\\n> This is a block quote\")))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_sidebar = re.compile(r'^\\s*#\\s*sidebar\\s(.*)$', re.MULTILINE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def interpret_sidebar(cell):\n",
    "    lines = cell[\"source\"].split(\"\\n\")\n",
    "    if _re_sidebar.search(lines[0]) is not None:\n",
    "        title = _re_sidebar.search(lines[0]).groups()[0]\n",
    "        body = \"\\n\".join(lines[1:])\n",
    "        cell[\"source\"] = f\"```asciidoc\\n.{title}\\n****\\n{body}\\n****\\n```\\n\"\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'cell_type': 'markdown',\n",
       " 'source': '```asciidoc\\n.My intervention\\n****\\n\\nThis will be changed to a sidebar when converted in Asciidoc.\\n\\nIt can have several lines, contrary to a block quote.\\n****\\n```\\n',\n",
       " 'metadata': {}}"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = \"\"\"#sidebar My intervention\n",
    "\n",
    "This will be changed to a sidebar when converted in Asciidoc.\n",
    "\n",
    "It can have several lines, contrary to a block quote.\"\"\"\n",
    "interpret_sidebar(markdown_cell(test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_md_image = re.compile(r\"^(<img\\ [^>]*>)\", re.MULTILINE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "IMAGE_CONV_MULT = 0.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def process_images(cell):\n",
    "    h = HTMLParseAttrs()\n",
    "    def _rep(m):\n",
    "        d = h(m.groups()[0])\n",
    "        attrs = ['\"' + d.get('alt', '') + '\"']\n",
    "        if 'width' in d: attrs.append(str(int(IMAGE_CONV_MULT * int(d['width']))))\n",
    "        if 'width' in d and 'height' in d: attrs.append(str((int(IMAGE_CONV_MULT * int(d['height'])))))\n",
    "        suff = f\"[{', '.join(attrs)}]\"\n",
    "        pid = f\"[[{d['id']}]]\\n\" if 'id' in d else \"\"\n",
    "        caption = f\".{d['caption']}\\n\" if 'caption' in d else \"\"\n",
    "        return f\"```asciidoc\\n{pid}{caption}image::{d['src']}{suff}\\n```\"\n",
    "    cell[\"source\"] = _re_md_image.sub(_rep, cell[\"source\"])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "txt = 'text\\n<img alt=\"Alternative text\" width=\"700\" caption=\"This is an image\" src=\"puppy.jpg\" id=\"123\"/>\\nother text'\n",
    "test_eq(process_images(markdown_cell(txt)), \n",
    "        markdown_cell('text\\n```asciidoc\\n[[123]]\\n.This is an image\\nimage::puppy.jpg[\"Alternative text\", 420]\\n```\\nother text'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_reference = re.compile(r'<<([^>]*)>>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def wrap_references(cell):\n",
    "    cell[\"source\"] = _re_reference.sub(r'xxref\\1xxeref', cell[\"source\"])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(wrap_references(markdown_cell(\"There is a reference <<ref>> here.\")),\n",
    "        markdown_cell(\"There is a reference xxrefrefxxeref here.\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def extract_attachments(cell, dest):\n",
    "    if not 'attachments' in cell: return cell\n",
    "    mime,img = first(first(cell['attachments'].values()).items())\n",
    "    ext = mime.split('/')[1]\n",
    "    for i in range(99999):\n",
    "        p = dest/(f'att_{i:05d}.{ext}')\n",
    "        if not p.exists(): break\n",
    "    p.write_bytes(b64decode(img))\n",
    "    del(cell['attachments'])\n",
    "    cell['source'] = re.sub('attachment:image.png', str(p), cell['source'])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Catch sidebars: sidebars are delimited by header cells like `### Sidebar title` then `### End sidebar`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_sidebar_title = re.compile(r'#+\\s+Sidebar:\\s+(.*)$', re.IGNORECASE)\n",
    "_re_end_sidebar = re.compile(r'#+\\s+End sidebar', re.IGNORECASE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Tenacity in deep learning',)"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_re_sidebar_title.search('### Sidebar: Tenacity in deep learning').groups()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def sidebar_headers(cell):\n",
    "    cell['source'] = _re_sidebar_title.sub(r'```asciidoc\\n.\\1\\n****\\n```', cell['source'])\n",
    "    cell['source'] = _re_end_sidebar.sub(r'```asciidoc\\n****\\n```', cell['source'])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(sidebar_headers(markdown_cell(\"### Sidebar: My intervention\")),\n",
    "        markdown_cell(\"```asciidoc\\n.My intervention\\n****\\n```\"))\n",
    "test_eq(sidebar_headers(markdown_cell(\"### End sidebar\")), markdown_cell(\"```asciidoc\\n****\\n```\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### All preprocessing together"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "code_cell_tfms = [get_cell_meta, replace_old_jekylls, hide_input, hide_output, extract_html, deal_error,\n",
    "                  remove_interrupted_pbars, wrap_text_outputs, caption_tables, check_code_len]\n",
    "md_cell_tfms = [deal_quotes, wrap_references, interpret_sidebar, sidebar_headers, add_title_level, deal_with_lists,\n",
    "                process_images, replace_jekylls]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Raw cells just need to have a new line added at the beginning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def add_new_line(cell):\n",
    "    cell['source'] = '\\n' + cell['source']\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def treat_notebook(nb, dest):\n",
    "    nb['cells'] = remove_hidden_cells(nb['cells'])\n",
    "    tfm_func = {'code': compose(*code_cell_tfms), 'markdown': compose(partial(extract_attachments, dest=dest), *md_cell_tfms),\n",
    "                'raw': add_new_line}\n",
    "    nb['cells'] = [tfm_func[c['cell_type']](c) for c in nb['cells']]\n",
    "    nb['cells'] = isolate_adoc_blocks(nb['cells'])\n",
    "    return nb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Post-processing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Replace special tokens by their values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def rep_spec_tok(adoc, metadata=None):\n",
    "    adoc = re.sub('xxsinglequote', \"'\", adoc)\n",
    "    adoc = re.sub('xxnewls', '\\n  ', adoc)\n",
    "    return re.sub('xxnewl\\s', '\\n', adoc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "nbconvert will flag the code cells with `[ipython3]`, we replace this by `[python]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def ipython2python(adoc, metadata=None):\n",
    "    return re.sub(r'\\[source, ipython3\\]','[source, python]', adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(ipython2python(\"[source, ipython3]\\n----\\nsome code\\n----\\n\"), \"[source, python]\\n----\\nsome code\\n----\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Remove empty cells or cells flagged for removal (because of hide_input)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def remove_cells(adoc, metadata=None):\n",
    "    adoc = re.sub(r'\\n\\[source, python\\]\\n----(\\n)*----\\n','', adoc)\n",
    "    return re.sub(r'\\n\\[source, python\\]\\n----\\n##remove##\\n----\\n','', adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(remove_cells(\"lalala\\n[source, python]\\n----\\n\\n----\\n\"), \"lalala\")\n",
    "test_eq(remove_cells(\"lalala\\n[source, python]\\n----\\n##remove##\\n----\\n\"), \"lalala\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Clear code cells from the code flag when there is a `##clear##` tag."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_clear = re.compile(r'\\[source, python\\]\\n----\\n##clear##(.*?)----\\n', re.DOTALL)\n",
    "def clear_cells(adoc, metadata=None): return _re_clear.sub(r'\\1', adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(clear_cells(\n",
    "    \"lalala\\n[source, python]\\n----\\n##clear##pure adoc\\n----\\nfoo\\nbla\\n[source, python]\\n----\\n##clear##pure adoc again\\n----\\nbli\"),\n",
    "        \"lalala\\npure adoc\\nfoo\\nbla\\npure adoc again\\nbli\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Format LaTeX equations properly: they arrive either as `latexmath:[$equation$]` or `latexmath:[\\[equation\\]]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def format_latex(adoc, metadata=None):\n",
    "    #LaTeX equations\n",
    "    adoc = re.sub(r\"latexmath:\\[\\$([^\\$]*)\\$\\]\", r\"latexmath:[\\\\(\\1\\\\)]\", adoc)\n",
    "    return re.sub(r\"latexmath:\\[\\\\\\[(.*)\\\\\\]\\]\", r\"\\n[latexmath]\\n++++\\n\\\\begin{equation}\\n\\1\\n\\\\end{equation}\\n++++\\n\", adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(format_latex(r\"latexmath:[$equation$]\"), r\"latexmath:[\\(equation\\)]\")\n",
    "test_eq(format_latex(r\"latexmath:[\\[equation\\]]\"), \n",
    "        \"\\n[latexmath]\\n++++\\n\\\\begin{equation}\\nequation\\n\\\\end{equation}\\n++++\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Format image outputs and make sure they point to the right folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_image_output = re.compile(r'----\\n!\\[(?:svg|png|jpg)\\]\\((.+)\\)\\n----')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def format_outputs(adoc, metadata=None):\n",
    "    folder = ({} if metadata is None else metadata).get('folder', '.')\n",
    "    def _rep(m):\n",
    "        name = m.groups()[0]\n",
    "        d = metadata[name] if metadata is not None and name in metadata else {}\n",
    "        attrs = ['\"' + d.get('alt', '') + '\"']\n",
    "        if 'width' in d: attrs.append(str(d['width']))\n",
    "        if 'width' in d and 'height' in d: attrs.append(str(d['height']))\n",
    "        suff = f\"[{', '.join(attrs)}]\"\n",
    "        pid = f\"[[{d['id']}]]\\n\" if 'id' in d else \"\"\n",
    "        caption = f\".{d['caption']}\\n\" if 'caption' in d else \"\"\n",
    "        return f\"{pid}{caption}image::{str(folder)}/{name}{suff}\"\n",
    "    return _re_image_output.sub(_rep, adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(format_outputs('----\\n![svg](output.svg)\\n----', {'folder':'path', 'output.svg': {'alt': 'alt'}}),\n",
    "        'image::path/output.svg[\"alt\"]')\n",
    "test_eq(format_outputs('----\\n![svg](output.svg)\\n----', {'folder':'path', 'output.svg': {'alt': 'alt', 'width': 100}}),\n",
    "        'image::path/output.svg[\"alt\", 100]')\n",
    "test_eq(format_outputs('----\\n![png](output1.png)\\n----'),\n",
    "        'image::./output1.png[\"\"]')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Deal with quotes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def fix_quotes(adoc, metadata=None):\n",
    "    return re.sub(r\"``([^'`]*)''\", r'\"\\1\"', adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(fix_quotes(\"``double quotes''\"), '\"double quotes\"')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Put back << >> around refs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def fix_references(adoc, metadata=None): return re.sub(r\"xxref(.*)xxeref\", r\"<<\\1>>\", adoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(fix_references(\"There is a reference xxrefrefxxeref here.\"), \"There is a reference <<ref>> here.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Format tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def format_tables(adoc, metadata=None):\n",
    "    splits = adoc.split('----')\n",
    "    seps = [''] + ['----' for _ in range(len(splits)-1)] + ['']\n",
    "    for i,s in enumerate(splits):\n",
    "        s = re.sub(r'<div>[\\s\\S]*<table', '<table', s)\n",
    "        s = re.sub('</div>', '', s)\n",
    "        s = re.sub('<p>', '', s)\n",
    "        s = re.sub('</p>', '', s)\n",
    "        if len(s) > 0 and not s.startswith('\\n'): s = '\\n' + s\n",
    "        if len(s) > 0 and not s.endswith('\\n'):   s = s + '\\n'\n",
    "        if s.startswith('\\n<table'): seps[i],seps[i+1] = '++++','++++'\n",
    "        elif '<table' in s:\n",
    "            res = re.search('<table', s)\n",
    "            begin,end = res.span()\n",
    "            s = s[:begin] + '\\n----\\n\\n++++\\n' + s[begin:]\n",
    "            seps[i+1] = '++++'\n",
    "        splits[i] = s\n",
    "    res = ''\n",
    "    for s,c in zip(seps,splits): res = res + s + c\n",
    "    return res.replace('\\n\\n--------', '')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Just as a personal preference, replace all blocks of three new lines or more by \\n\\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def remove_lines(text, metadata=None):\n",
    "    return re.sub(r'\\n\\n\\n\\n+([^\\n])', r'\\n\\n\\n\\1', text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(remove_lines('a\\n\\n\\n\\n\\n\\nb'), 'a\\n\\n\\nb')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "All together"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "post_process_tfms = [fix_quotes, rep_spec_tok, ipython2python, remove_cells, clear_cells, format_latex,\n",
    "                     format_outputs, fix_references, format_tables, remove_lines]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def post_process(adoc, metadata=None):\n",
    "    if not adoc.startswith('\\n'): adoc = '\\n' + adoc\n",
    "    adoc = re.sub('xxnewl\\s', '\\n', adoc)\n",
    "    adoc = compose(*post_process_tfms)(adoc, metadata=metadata)\n",
    "    return adoc.strip()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exporting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "c = ExportConfig()\n",
    "exporter = ASCIIDocExporter(c)\n",
    "exporter.exclude_input_prompt=True\n",
    "exporter.exclude_output_prompt=True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def add_metadata(nb):\n",
    "    \"Stripping removes metadata used in the conversion.\"\n",
    "    if 'language_info' not in nb['metadata']:\n",
    "        nb['metadata']['language_info'] = {\n",
    "            'codemirror_mode': {'name': 'ipython', 'version': 3},\n",
    "            'file_extension': '.py',\n",
    "            'mimetype': 'text/x-python',\n",
    "            'name': 'python',\n",
    "            'nbconvert_exporter': 'python',\n",
    "            'pygments_lexer': 'ipython3',\n",
    "            'version': '3.7.1'}\n",
    "    return nb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def output_num(n):\n",
    "    m = re.search(r'^output_(\\d*)_', n)\n",
    "    if m is None: return\n",
    "    return int(m.groups()[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(output_num('output_31_0.png'), 31)\n",
    "test_eq(output_num('output_12_0.svg'), 12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "import PIL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "IMAGE_OUT_MULT = 0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "import xml.etree.ElementTree as ET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def get_output_width(name, raw, folder):\n",
    "    if name.endswith('.svg'): return ET.fromstring(raw).attrib['width'].split('.')[0].replace('pt', '')\n",
    "    try: return PIL.Image.open(Path(folder)/name).size[0]\n",
    "    except: return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def convert_nb(fname, dest_path='.', folder=None):\n",
    "    \"Convert a notebook `fname` to html file in `dest_path`.\"\n",
    "    print(f\"Converting {fname}\")\n",
    "    fname = Path(fname)\n",
    "    dest_name = fname.with_suffix('.asciidoc').name\n",
    "    if folder is None: folder = Path(dest_path)/f'{fname.stem}_files'\n",
    "    #folder for images. Clear if exists\n",
    "    if folder.exists(): shutil.rmtree(folder)\n",
    "    os.makedirs(folder, exist_ok=True)\n",
    "\n",
    "    nb = add_metadata(treat_notebook(read_nb(fname), folder))\n",
    "    export = exporter.from_notebook_node(nb)\n",
    "    metadata = {'folder': folder.relative_to(dest_path)}\n",
    "    metadata.update({n: nb[\"cells\"][output_num(n)]['metadata'] for n in export[1]['outputs'].keys() if output_num(n) is not None})\n",
    "    for n,o in export[1]['outputs'].items():\n",
    "        with open(Path(folder)/n, 'wb') as f: f.write(o)\n",
    "        w = metadata[n]['width'] if 'width' in metadata[n] else get_output_width(n, o, folder)\n",
    "        if w is not None: metadata[n]['width'] = str(int(IMAGE_OUT_MULT * int(w)))\n",
    "    with open(f'{dest_path}/{dest_name}','w', encoding=\"utf8\") as f:\n",
    "        f.write(post_process(export[0], metadata))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converting test/_test.ipynb\n"
     ]
    }
   ],
   "source": [
    "dest = Path('test')\n",
    "convert_nb('test/_test.ipynb', dest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#convert_nb('test/_test.ipynb', Path('test'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _copy_images(path, dest_path):\n",
    "    os.makedirs(dest_path, exist_ok=True)\n",
    "    for f in path.iterdir():\n",
    "        if f.is_file(): shutil.copy(f, dest_path/f.name)\n",
    "        if f.is_dir(): _copy_images(f, dest_path/f.name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def copy_images(path, dest_path):\n",
    "    img_folder = dest_path/\"images\"\n",
    "    if img_folder.exists(): shutil.rmtree(img_folder)\n",
    "    _copy_images(path/\"images\", img_folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dest = Path('..')/'convert_book'\n",
    "# copy_images(Path('book'), dest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _convert1(fname, dest_path='.'):\n",
    "    try: convert_nb(fname, dest_path=dest_path)\n",
    "    except Exception as e:\n",
    "        print(f\"Error in notebook {fname}\")\n",
    "        print(e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@call_parse\n",
    "def fastdoc_convert_all(\n",
    "    path:str='book',  # Path to notebooks\n",
    "    dest_path:str='../convert_book'  # Path to generated asciidoc files\n",
    "):\n",
    "    path,dest_path = Path(path),Path(dest_path)\n",
    "    dest_path.mkdir(parents=True,exist_ok=True)\n",
    "    (path/'images').mkdir(parents=True,exist_ok=True)\n",
    "    nbs = [f for f in path.iterdir() if f.suffix == '.ipynb' and not f.name.startswith('_')]\n",
    "    parallel(_convert1, nbs, dest_path=dest_path)\n",
    "    for f in path.iterdir():\n",
    "        if f.suffix in ['.adoc', '.asciidoc']: shutil.copy(f, dest_path/f.name)\n",
    "    copy_images(path, dest_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#convert_all()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export -"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converted 00_asciidoc.ipynb.\n",
      "Converted 01_clean.ipynb.\n",
      "Converted index.ipynb.\n"
     ]
    }
   ],
   "source": [
    "from nbdev.export import *\n",
    "notebook2script()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "jupytext": {
   "split_at_heading": true
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}