# processors
> Some processors for NBProcessor

import ast

from nbdev.read import *
from nbdev.imports import *
from nbdev.process import *
from nbdev.showdoc import *
from nbdev.doclinks import *

from execnb.nbio import *
from execnb.shell import *
from fastcore.imports import *
from fastcore.xtras import *
import sys "316fccd8", "metadata": {}, "source": [ "## Helpers" ] }, { "cell_type": "code", "execution_count": null, "id": "9d36f27f", "metadata": {}, "outputs": [], "source": [ "#|hide\n", "_test_file = '../tests/docs_test.ipynb'" ] }, { "cell_type": "markdown", "id": "9f23622a", "metadata": {}, "source": [ "On this page we'll be using this private helper to process a notebook and return the results, to simplify testing:" ] }, { "cell_type": "code", "execution_count": null, "id": "dfdcb1c3", "metadata": {}, "outputs": [], "source": [ "def _run_procs(procs=None, preprocs=None, postprocs=None, return_nb=False, path=_test_file):\n", " nbp = NBProcessor(path, procs, preprocs=preprocs, postprocs=postprocs)\n", " nbp.process()\n", " if return_nb: return nbp.nb\n", " return '\\n'.join([str(cell) for cell in nbp.nb.cells])" ] }, { "cell_type": "markdown", "id": "6ee65c5d", "metadata": {}, "source": [ "## Cell processors" ] }, { "cell_type": "code", "execution_count": null, "id": "9dd2bef0", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def nbflags_(nbp, cell, *args):\n", " \"Hide cell from output\"\n", " nbp.nb._nbflags = args" ] }, { "cell_type": "code", "execution_count": null, "id": "2e9c18bb", "metadata": {}, "outputs": [], "source": [ "nbp = NBProcessor('../tests/01_everything.ipynb', nbflags_)\n", "nbp.process()\n", "test_eq(nbp.nb._nbflags, ('skip_showdoc', 'foobar'))" ] }, { "cell_type": "code", "execution_count": null, "id": "03e8fece-bf92-4a1d-9f8b-ef209107ff95", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def cell_lang(cell): return nested_attr(cell, 'metadata.language', 'python')\n", "\n", "def add_links(cell):\n", " \"Add links to markdown cells\"\n", " nl = NbdevLookup()\n", " if cell.cell_type == 'markdown': cell.source = nl.linkify(cell.source)\n", " for o in cell.get('outputs', []):\n", " if hasattr(o, 'data') and hasattr(o['data'], 'text/markdown'):\n", " o.data['text/markdown'] = [nl.link_line(s) for s in o.data['text/markdown']]" ] }, { "cell_type": "code", "execution_count": null, "id": "8ec86ad0-b020-4c5c-9154-cc34b95d58d4", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(add_links)\n", "assert \"[numpy.array](https://numpy.org/doc/stable/reference/generated/numpy.array.html#numpy.array)\" in res\n", "assert \"[ModuleMaker](https://nbdev.fast.ai/maker#ModuleMaker) but not a link to `foobar`.\" in res\n", "assert \"A link in a docstring: [ModuleMaker](https://nbdev.fast.ai/maker#ModuleMaker)\" in res\n", "assert \"And not a link to dict2nb.\" in res" ] }, { "cell_type": "markdown", "id": "463b9def-91ad-4b05-92c3-e074954e4faf", "metadata": {}, "source": [ "Gets rid of colors that are streamed from standard out, which can interfere with static site generators:" ] }, { "cell_type": "code", "execution_count": null, "id": "84073fa6-5907-41f2-b8b8-568a96112fbd", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_re_ansi_escape = re.compile(r'\\x1B(?:[@-Z\\\\-_]|\\[[0-?]*[ -/]*[@-~])')\n", "\n", "def strip_ansi(cell):\n", " \"Strip Ansi Characters.\"\n", " for outp in cell.get('outputs', []):\n", " if outp.get('name')=='stdout': outp['text'] = [_re_ansi_escape.sub('', o) for o in outp.text]" ] }, { "cell_type": "code", "execution_count": null, "id": "fc2bed97", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(strip_ansi)\n", "assert not _re_ansi_escape.findall(res)" ] }, { "cell_type": "code", "execution_count": null, "id": "640b7eca", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def strip_hidden_metadata(cell):\n", " '''Strips \"hidden\" metadata property from code cells so it doesn't interfere with docs rendering'''\n", " if cell.cell_type == 'code' and 'metadata' in cell: cell.metadata.pop('hidden',None)" ] }, { "cell_type": "code", "execution_count": null, "id": "995ebd32", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def hide_(nbp, cell):\n", " \"Hide cell from output\"\n", " del(cell['source'])" ] }, { "cell_type": "code", "execution_count": null, "id": "74cd3876", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(hide_)\n", "assert 'you will not be able to see this cell at all either' not in res" ] }, { "cell_type": "code", "execution_count": null, "id": "eb415328-044f-44ca-ac77-4f5ae65c2235", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def _re_hideline(lang=None): return re.compile(fr'{langs[lang]}\\|\\s*hide_line\\s*$', re.MULTILINE)\n", "\n", "def hide_line(cell):\n", " \"Hide lines of code in code cells with the directive `hide_line` at the end of a line of code\"\n", " lang = cell_lang(cell)\n", " if cell.cell_type == 'code' and _re_hideline(lang).search(cell.source):\n", " cell.source = '\\n'.join([c for c in cell.source.splitlines() if not _re_hideline(lang).search(c)])" ] }, { "cell_type": "code", "execution_count": null, "id": "20322d06-7adb-4d50-b1d5-b10ec5f5f2ae", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(hide_line)\n", "assert r\"def show():\\n a = 2\\n b = 3\" not in res\n", "assert r\"def show():\\n a = 2\" in res" ] }, { "cell_type": "code", "execution_count": null, "id": "f8ccf1d7", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def filter_stream_(nbp, cell, *words):\n", " \"Remove output lines containing any of `words` in `cell` stream output\"\n", " if not words: return\n", " for outp in cell.get('outputs', []):\n", " if outp.output_type == 'stream':\n", " outp['text'] = [l for l in outp.text if not re.search('|'.join(words), l)]" ] }, { "cell_type": "code", "execution_count": null, "id": "96dfdefb-ba07-4ab1-8a23-18e2b27ad707", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(filter_stream_)\n", "exp=r\"'A line\\n', 'Another line.\\n'\"\n", "assert exp in res" ] }, { "cell_type": "code", "execution_count": null, "id": "848fd452-3d63-4c41-aaa6-e14cbeb9fdcd", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_magics_pattern = re.compile(r'^\\s*(%%|%).*', re.MULTILINE)\n", "\n", "def clean_magics(cell):\n", " \"A preprocessor to remove cell magic commands\"\n", " if cell.cell_type == 'code': cell.source = _magics_pattern.sub('', cell.source).strip()" ] }, { "cell_type": "code", "execution_count": null, "id": "6acf27ec", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(clean_magics)\n", "assert \"%%\" not in res" ] }, { "cell_type": "code", "execution_count": null, "id": "97249bda-91dd-42ef-9d36-5efc45fad564", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_langs = 'bash|html|javascript|js|latex|markdown|perl|ruby|sh|svg'\n", "_lang_pattern = re.compile(rf'^\\s*%%\\s*({_langs})\\s*$', flags=re.MULTILINE)\n", "\n", "def lang_identify(cell):\n", " \"A preprocessor to identify bash/js/etc cells and mark them appropriately\"\n", " if cell.cell_type == 'code':\n", " lang = _lang_pattern.findall(cell.source)\n", " if lang: cell.metadata.language = lang[0]" ] }, { "cell_type": "markdown", "id": "36ffc28f-1735-48ac-942a-74c692afa99b", "metadata": {}, "source": [ "When we issue a shell command in a notebook with `!`, we need to change the code-fence from `python` to `bash` and remove the `!`:" ] }, { "cell_type": "code", "execution_count": null, "id": "be9ac553", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(lang_identify)\n", "assert \"'language': 'bash'\" in res" ] }, { "cell_type": "code", "execution_count": null, "id": "93e27a52", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_re_hdr_dash = re.compile(r'^#+\\s+.*\\s+-\\s*$', re.MULTILINE)\n", "\n", "def rm_header_dash(cell):\n", " \"Remove headings that end with a dash -\"\n", " if cell.source:\n", " src = cell.source.strip()\n", " if cell.cell_type == 'markdown' and src.startswith('#') and src.endswith(' -'): del(cell['source'])" ] }, { "cell_type": "code", "execution_count": null, "id": "e4a9ac53", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(rm_header_dash)\n", "assert 'some words' in res\n", "assert 'A heading to Hide' not in res\n", "assert 'Yet another heading to hide' not in res" ] }, { "cell_type": "code", "execution_count": null, "id": "75faf537", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_hide_dirs = {'export','exporti', 'hide','default_exp'}\n", "\n", "def rm_export(cell):\n", " \"Remove cells that are exported or hidden\"\n", " if cell.directives_:\n", " if cell.directives_.keys() & _hide_dirs: del(cell['source'])" ] }, { "cell_type": "code", "execution_count": null, "id": "fcb05919", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(rm_export)\n", "assert 'dontshow' not in res" ] }, { "cell_type": "code", "execution_count": null, "id": "2d9a0a30", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_re_showdoc = re.compile(r'^show_doc', re.MULTILINE)\n", "def _is_showdoc(cell): return cell['cell_type'] == 'code' and _re_showdoc.search(cell.source)\n", "\n", "def clean_show_doc(cell):\n", " \"Remove ShowDoc input cells\"\n", " if not _is_showdoc(cell): return\n", " cell.source = '#| echo: false\\n' + cell.source" ] }, { "cell_type": "code", "execution_count": null, "id": "0bc09dc4", "metadata": {}, "outputs": [], "source": [ "#|export\n", "_imps = {ast.Import, ast.ImportFrom}\n", "\n", "def _show_docs(trees):\n", " return [t for t in trees if isinstance(t,ast.Expr) and nested_attr(t, 'value.func.id')=='show_doc']\n", "\n", "_show_dirs = {'export','exports'}\n", "\n", "def _do_eval(cell):\n", " if cell_lang(cell) != 'python': return\n", " trees = cell.parsed_()\n", " if cell.cell_type != 'code' or not trees: return\n", " if cell.directives_.get('eval:', [''])[0].lower() == 'false': return\n", " if cell.directives_.keys() & _show_dirs or filter_ex(trees, risinstance(_imps)): return True\n", " if _show_docs(trees): return True" ] }, { "cell_type": "code", "execution_count": null, "id": "867cf721", "metadata": {}, "outputs": [], "source": [ "#|export\n", "class exec_show_docs:\n", " \"Execute cells needed for `show_docs` output, including exported cells and imports\"\n", " def __init__(self, nb):\n", " self.k = CaptureShell()\n", " if nb_lang(nb) == 'python': self.k.run_cell('from nbdev.showdoc import show_doc')\n", "\n", " def __call__(self, cell):\n", " flags = getattr(cell.nb, '_nbflags', [])\n", " if 'skip_showdoc' in flags: return\n", " if _do_eval(cell): self.k.cell(cell)\n", " if self.k.exc: raise Exception(f'Error: cell {cell.idx_}:\\n{cell.source}') from self.k.exc[1]" ] }, { "cell_type": "code", "execution_count": null, "id": "ad5df898", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(exec_show_docs)\n", "assert res" ] }, { "cell_type": "markdown", "id": "3e207415", "metadata": {}, "source": [ "## Notebook preprocessors" ] }, { "cell_type": "code", "execution_count": null, "id": "627a002c-83ba-40f7-a9bf-7a2a45e76ea3", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def populate_language(nb):\n", " \"Insert cell language indicator based on notebook metadata. You should to use this before `lang_identify`\"\n", " for cell in nb.cells:\n", " if cell.cell_type == 'code': cell.metadata.language = nb_lang(nb)" ] }, { "cell_type": "code", "execution_count": null, "id": "fb9310ca-7d4e-4c46-9382-aeeea188815c", "metadata": {}, "outputs": [], "source": [ "#|hide\n", "res = _run_procs(preprocs=[populate_language], return_nb=True)\n", "assert set(L(res.cells).attrgot('metadata').attrgot('language').filter()) == {'python'}" ] }, { "cell_type": "code", "execution_count": null, "id": "f7911c84-9cad-4c03-977c-7a2087607bf4", "metadata": {}, "outputs": [], "source": [ "#|hide\n", "# integration test with hide_line\n", "_nb = _run_procs(hide_line, preprocs=[populate_language], path='../tests/APL.ipynb')\n", "assert 'hide_line' not in _nb" ] }, { "cell_type": "code", "execution_count": null, "id": "a28611a4", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def insert_warning(nb):\n", " \"Insert Autogenerated Warning Into Notebook after the first cell.\"\n", " content = \"\"\n", " nb.cells.insert(1, mk_cell(content, 'markdown'))" ] }, { "cell_type": "markdown", "id": "fba30307", "metadata": {}, "source": [ "This preprocessor inserts a warning in the markdown destination that the file is autogenerated. This warning is inserted in the second cell so we do not interfere with front matter." ] }, { "cell_type": "code", "execution_count": null, "id": "bbcf41eb", "metadata": {}, "outputs": [], "source": [ "res = _run_procs(preprocs=[insert_warning])\n", "assert \"