In [None]:
#|default_exp processors

# processors
> Some processors for NBProcessor

In [None]:
#|export
import ast

from nbdev.read import *
from nbdev.imports import *
from nbdev.process import *
from nbdev.showdoc import *
from nbdev.doclinks import *

from execnb.nbio import *
from execnb.shell import *
from fastcore.imports import *
from fastcore.xtras import *
import sys

In [None]:
#|hide
from fastcore.test import *

## Helpers

In [None]:
#|hide
_test_file = '../tests/docs_test.ipynb'

On this page we'll be using this private helper to process a notebook and return the results, to simplify testing:

In [None]:
def _run_procs(procs=None, preprocs=None, postprocs=None, return_nb=False, path=_test_file):
    nbp = NBProcessor(path, procs, preprocs=preprocs, postprocs=postprocs)
    nbp.process()
    if return_nb: return nbp.nb
    return '\n'.join([str(cell) for cell in nbp.nb.cells])

## Cell processors

In [None]:
#|export
def nbflags_(nbp, cell, *args):
    "Hide cell from output"
    nbp.nb._nbflags = args

In [None]:
nbp = NBProcessor('../tests/01_everything.ipynb', nbflags_)
nbp.process()
test_eq(nbp.nb._nbflags, ('skip_showdoc', 'foobar'))

In [None]:
#|export
def cell_lang(cell): return nested_attr(cell, 'metadata.language', 'python')

def add_links(cell):
    "Add links to markdown cells"
    nl = NbdevLookup()
    if cell.cell_type == 'markdown': cell.source = nl.linkify(cell.source)
    for o in cell.get('outputs', []):
        if hasattr(o, 'data') and hasattr(o['data'], 'text/markdown'):
            o.data['text/markdown'] = [nl.link_line(s) for s in o.data['text/markdown']]

In [None]:
res = _run_procs(add_links)
assert "[numpy.array](https://numpy.org/doc/stable/reference/generated/numpy.array.html#numpy.array)" in res
assert "[ModuleMaker](https://nbdev.fast.ai/maker#ModuleMaker) but not a link to `foobar`." in res
assert "A link in a docstring: [ModuleMaker](https://nbdev.fast.ai/maker#ModuleMaker)" in res
assert "And not a link to <code>dict2nb</code>." in res

Gets rid of colors that are streamed from standard out, which can interfere with static site generators:

In [None]:
#|export
_re_ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')

def strip_ansi(cell):
    "Strip Ansi Characters."
    for outp in cell.get('outputs', []):
        if outp.get('name')=='stdout': outp['text'] = [_re_ansi_escape.sub('', o) for o in outp.text]

In [None]:
res = _run_procs(strip_ansi)
assert not _re_ansi_escape.findall(res)

In [None]:
#|export
def strip_hidden_metadata(cell):
    '''Strips "hidden" metadata property from code cells so it doesn't interfere with docs rendering'''
    if cell.cell_type == 'code' and 'metadata' in cell: cell.metadata.pop('hidden',None)

In [None]:
#|export
def hide_(nbp, cell):
    "Hide cell from output"
    del(cell['source'])

In [None]:
res = _run_procs(hide_)
assert 'you will not be able to see this cell at all either' not in res

In [None]:
#|export
def _re_hideline(lang=None): return re.compile(fr'{langs[lang]}\|\s*hide_line\s*$', re.MULTILINE)

def hide_line(cell):
    "Hide lines of code in code cells with the directive `hide_line` at the end of a line of code"
    lang = cell_lang(cell)
    if cell.cell_type == 'code' and _re_hideline(lang).search(cell.source):
        cell.source = '\n'.join([c for c in cell.source.splitlines() if not _re_hideline(lang).search(c)])

In [None]:
res = _run_procs(hide_line)
assert r"def show():\n    a = 2\n    b = 3" not in res
assert r"def show():\n    a = 2"                in res

In [None]:
#|export
def filter_stream_(nbp, cell, *words):
    "Remove output lines containing any of `words` in `cell` stream output"
    if not words: return
    for outp in cell.get('outputs', []):
        if outp.output_type == 'stream':
            outp['text'] = [l for l in outp.text if not re.search('|'.join(words), l)]

In [None]:
res = _run_procs(filter_stream_)
exp=r"'A line\n', 'Another line.\n'"
assert exp in res

In [None]:
#|export
_magics_pattern = re.compile(r'^\s*(%%|%).*', re.MULTILINE)

def clean_magics(cell):
    "A preprocessor to remove cell magic commands"
    if cell.cell_type == 'code': cell.source = _magics_pattern.sub('', cell.source).strip()

In [None]:
res = _run_procs(clean_magics)
assert "%%" not in res

In [None]:
#|export
_langs = 'bash|html|javascript|js|latex|markdown|perl|ruby|sh|svg'
_lang_pattern = re.compile(rf'^\s*%%\s*({_langs})\s*$', flags=re.MULTILINE)

def lang_identify(cell):
    "A preprocessor to identify bash/js/etc cells and mark them appropriately"
    if cell.cell_type == 'code':
        lang = _lang_pattern.findall(cell.source)
        if lang: cell.metadata.language = lang[0]

When we issue a shell command in a notebook with `!`, we need to change the code-fence from `python` to `bash` and remove the `!`:

In [None]:
res = _run_procs(lang_identify)
assert "'language': 'bash'" in res

In [None]:
#|export
_re_hdr_dash = re.compile(r'^#+\s+.*\s+-\s*$', re.MULTILINE)

def rm_header_dash(cell):
    "Remove headings that end with a dash -"
    if cell.source:
        src = cell.source.strip()
        if cell.cell_type == 'markdown' and src.startswith('#') and src.endswith(' -'): del(cell['source'])

In [None]:
res = _run_procs(rm_header_dash)
assert 'some words' in res
assert 'A heading to Hide' not in res
assert 'Yet another heading to hide' not in res

In [None]:
#|export
_hide_dirs = {'export','exporti', 'hide','default_exp'}

def rm_export(cell):
    "Remove cells that are exported or hidden"
    if cell.directives_:
        if cell.directives_.keys() & _hide_dirs: del(cell['source'])

In [None]:
res = _run_procs(rm_export)
assert 'dontshow' not in res

In [None]:
#|export
_re_showdoc = re.compile(r'^show_doc', re.MULTILINE)
def _is_showdoc(cell): return cell['cell_type'] == 'code' and _re_showdoc.search(cell.source)

def clean_show_doc(cell):
    "Remove ShowDoc input cells"
    if not _is_showdoc(cell): return
    cell.source = '#| echo: false\n' + cell.source

In [None]:
#|export
_imps = {ast.Import, ast.ImportFrom}

def _show_docs(trees):
    return [t for t in trees if isinstance(t,ast.Expr) and nested_attr(t, 'value.func.id')=='show_doc']

_show_dirs = {'export','exports'}

def _do_eval(cell):
    if cell_lang(cell) != 'python': return
    trees = cell.parsed_()
    if cell.cell_type != 'code' or not trees: return
    if cell.directives_.get('eval:', [''])[0].lower() == 'false': return
    if cell.directives_.keys() & _show_dirs or filter_ex(trees, risinstance(_imps)): return True
    if _show_docs(trees): return True

In [None]:
#|export
class exec_show_docs:
    "Execute cells needed for `show_docs` output, including exported cells and imports"
    def __init__(self, nb):
        self.k = CaptureShell()
        if nb_lang(nb) == 'python': self.k.run_cell('from nbdev.showdoc import show_doc')

    def __call__(self, cell):
        flags = getattr(cell.nb, '_nbflags', [])
        if 'skip_showdoc' in flags: return
        if _do_eval(cell): self.k.cell(cell)
        if self.k.exc: raise Exception(f'Error: cell {cell.idx_}:\n{cell.source}') from self.k.exc[1]

In [None]:
res = _run_procs(exec_show_docs)
assert res

## Notebook preprocessors

In [None]:
#|export
def populate_language(nb):
    "Insert cell language indicator based on notebook metadata.  You should to use this before `lang_identify`"
    for cell in nb.cells:
        if cell.cell_type == 'code': cell.metadata.language = nb_lang(nb)

In [None]:
#|hide
res = _run_procs(preprocs=[populate_language], return_nb=True)
assert set(L(res.cells).attrgot('metadata').attrgot('language').filter()) == {'python'}

In [None]:
#|hide
# integration test with hide_line
_nb = _run_procs(hide_line, preprocs=[populate_language], path='../tests/APL.ipynb')
assert 'hide_line' not in _nb

In [None]:
#| export
def insert_warning(nb):
    "Insert Autogenerated Warning Into Notebook after the first cell."
    content = "<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->"
    nb.cells.insert(1, mk_cell(content, 'markdown'))

This preprocessor inserts a warning in the markdown destination that the file is autogenerated.  This warning is inserted in the second cell so we do not interfere with front matter.

In [None]:
res = _run_procs(preprocs=[insert_warning])
assert "<!-- WARNING: THIS FILE WAS AUTOGENERATED!" in res

In [None]:
L('foo', None, 'a').filter(lambda x:x == 1)
_tstre = re.compile('a')

In [None]:
#|export
_def_types = (ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef)
def _def_names(cell, shown):
    return [showdoc_nm(o) for o in concat(cell.parsed_())
            if isinstance(o,_def_types) and o.name not in shown and o.name[0]!='_']

def _get_nm(tree):
    i = tree.value.args[0]
    if hasattr(i, 'id'): val = i.id
    else: val = try_attrs(i.value, 'id', 'func', 'attr')
    return f'{val}.{i.attr}' if isinstance(i, ast.Attribute) else i.id

In [None]:
#|export
def add_show_docs(nb):
    "Add show_doc cells after exported cells, unless they are already documented"
    def _want(c):
        return c.source and c.cell_type=='code' and ('export' in c.directives_ or 'exports' in c.directives_)

    exports = L(cell for cell in nb.cells if _want(cell))
    trees = nb.cells.map(NbCell.parsed_).concat()
    shown_docs = {_get_nm(t) for t in _show_docs(trees)}
    for cell in reversed(exports):
        if cell_lang(cell) != 'python': 
            raise ValueError(f'{cell.metadata.language} cell attempted export:\n{cell.source}')
        for nm in _def_names(cell, shown_docs):
            nb.cells.insert(cell.idx_+1, mk_cell(f'show_doc({nm})'))

In [None]:
res = _run_procs(preprocs=[populate_language, add_show_docs])
assert "show_doc(some_func)'" in res
assert "show_doc(and_another)'" in res
assert "show_doc(another_func)'" not in res

In [None]:
#|hide
# this test makes sure @patch works
_nb = _run_procs(preprocs=[populate_language, add_show_docs], return_nb=True, path='../tests/showdoc_test.ipynb')
assert r'show_doc(Foo.a_method)' in L(_nb.cells).attrgot('source')

## Notebook postprocessors

In [None]:
#| export
_re_title = re.compile(r'^#\s+(.*)[\n\r]+(?:^>\s+(.*))?', flags=re.MULTILINE)
_re_fm = re.compile(r'^---.*\S+.*---', flags=re.DOTALL)
_re_defaultexp = re.compile(r'^\s*#\|\s*default_exp\s+(\S+)', flags=re.MULTILINE)

def _celltyp(nb, cell_type): return nb.cells.filter(lambda c: c.cell_type == cell_type)
def is_frontmatter(nb): return _celltyp(nb, 'raw').filter(lambda c: _re_fm.search(c.get('source', '')))
def _istitle(cell): 
    txt = cell.get('source', '')
    return bool(_re_title.search(txt)) if txt else False

In [None]:
#|export
def _default_exp(nb):
    "get the default_exp from a notebook"
    code_src = nb.cells.filter(lambda x: x.cell_type == 'code').attrgot('source')
    default_exp = first(code_src.filter().map(_re_defaultexp.search).filter())
    return default_exp.group(1) if default_exp else None

In [None]:
_testnb = read_nb('../tests/docs_test.ipynb')
test_eq(_default_exp(_testnb), 'foobar')

In [None]:
#|export
def nb_fmdict(nb, remove=True): 
    "Infer the front matter from a notebook's markdown formatting"
    md_cells = _celltyp(nb, 'markdown').filter(_istitle)
    if not md_cells: return {}
    cell = md_cells[0]
    title,desc=_re_title.match(cell.source).groups()
    if title:
        flags = re.findall('^-\s+(.*)', cell.source, flags=re.MULTILINE)
        flags = [s.split(':', 1) for s in flags if ':' in s] if flags else []
        flags = merge({k:v for k,v in flags if k and v}, 
                      {'title':title}, {'description':desc} if desc else {})
        if remove: cell['source'] = None
        return flags
    else: return {}

In [None]:
_testnb = read_nb('../tests/docs_test.ipynb')
_res = nb_fmdict(_testnb)
test_eq(_res, dict(key1=' value1', key2=' value2', categories=' [c1, c2]', title='a title', description='A description'))

In [None]:
#|hide
_testnb2 = read_nb('../tests/directives.ipynb')
test_eq(nb_fmdict(_testnb2), {})

In [None]:
#|export
DEFAULT_FM_KEYS = ['title', 'description', 'author', 'image', 'categories', 'output-file', 'aliases']

def construct_fm(fmdict:dict, keys = DEFAULT_FM_KEYS):
    "construct front matter from a dictionary, but only for `keys`"
    if not fmdict: return None
    return '---\n'+'\n'.join([f"{k}: {fmdict[k]}" for k in keys if k in fmdict])+'\n---'

In [None]:
_testdict = nb_fmdict(read_nb('../tests/docs_test.ipynb'))
_res = construct_fm(_testdict)
test_eq(len(_res.splitlines()), 5)
print(_res)

---
title: a title
description: A description
categories:  [c1, c2]
---


In [None]:
#|export
def insert_frontmatter(nb, fm_dict:dict, filter_keys:list=DEFAULT_FM_KEYS):
    "Add frontmatter into notebook based on `filter_keys` that exist in `fmdict`."
    fm = construct_fm(fm_dict, keys=filter_keys)
    if fm: nb.cells.insert(0, NbCell(0, dict(cell_type='raw', metadata={}, source=fm, directives_={})))

In [None]:
#|export
def infer_frontmatter(nb):
    "Insert front matter if it doesn't exist automatically from nbdev styled markdown."
    if is_frontmatter(nb): return
    _exp = _default_exp(nb)
    _fmdict = merge(nb_fmdict(nb), {'output-file': _exp+'.html'} if _exp else {})
    if 'title' in _fmdict: insert_frontmatter(nb, fm_dict=_fmdict)

In [None]:
_raw_res = _run_procs()
_res = _run_procs(postprocs=infer_frontmatter)
assert '# a title' in _raw_res and '# a title' not in _res
assert r'description: A description\n' in _res
assert r'categories:  [c1, c2]\n' in _res
assert r'output-file: foobar.html\n---' in _res

## Export -

In [None]:
#|eval: false
#|hide
from nbdev.doclinks import nbdev_export
nbdev_export()