{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#default_exp clean"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fastdoc.clean\n",
    "> Create \"clean\" version of notebooks with all outputs and non-header markdown removed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from fastdoc.imports import *\n",
    "from fastdoc.asciidoc import copy_images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def count_tk(fname):\n",
    "    nb = read_nb(Path(fname))\n",
    "    c = 0\n",
    "    for cell in nb['cells']: c += len(re.findall('TK', cell['source']))\n",
    "    return c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def find_tokens(path='book'):\n",
    "    path = Path(path)\n",
    "    tks = [(f,count_tk(f)) for f in path.iterdir() if f.suffix == '.ipynb' and not f.name.startswith('_')]\n",
    "    tks.sort(key=lambda o:o[1], reverse=True)\n",
    "    if tks[0]==0: print(\"No TK remaining!\")\n",
    "    else:\n",
    "        for f,tk in tks: \n",
    "            if tk !=0: print(f'{f} still has {tk} TK.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# find_tokens()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_header = re.compile(r'^#+\\s+\\S+')\n",
    "_re_clean  = re.compile(r'^\\s*#\\s*clean\\s*')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def is_header_cell(cell): return _re_header.search(cell['source']) is not None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def is_clean_cell(cell): return _re_clean.search(cell['source']) is not None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_re_questionnaire = re.compile(r'^#+\\s+Questionnaire')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def get_stop_idx(cells):\n",
    "    i = 0\n",
    "    while i < len(cells) and _re_questionnaire.search(cells[i]['source']) is None: i+=1\n",
    "    return i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def clean_tags(cell):\n",
    "    if is_header_cell(cell): return cell\n",
    "    for attr in [\"id\", \"caption\", \"alt\", \"width\", \"hide_input\", \"hide_output\", \"clean\"]:\n",
    "        cell[\"source\"] = re.sub(r'#\\s*' + attr + r'.*?($|\\n)', '', cell[\"source\"])\n",
    "    return cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def clean_nb(fname, dest=None):\n",
    "    fname = Path(fname)\n",
    "    nb = read_nb(fname)\n",
    "    i = get_stop_idx(nb['cells'])\n",
    "    nb['cells'] = [clean_tags(c) for j,c in enumerate(nb['cells']) if \n",
    "                   c['cell_type']=='code' or is_header_cell(c) or is_clean_cell(c) or j >= i]\n",
    "    if dest is None: dest = fname.parent/f'{fname.stem}_clean.ipynb'\n",
    "    with open(dest, 'w') as f: nbformat.write(nb, f, version=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clean_nb('test/_test.ipynb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def clean_all(path='book', dest_path=None):\n",
    "    path = Path(path)\n",
    "    dest_path = Path('..')/'fastbook' if dest_path is None else Path(dest_path)\n",
    "    nbs = [f for f in path.iterdir() if f.suffix == '.ipynb' and not f.name.startswith('_')]\n",
    "    for nb in nbs: \n",
    "        shutil.copy(nb, dest_path/nb.name)\n",
    "        clean_nb(nb, dest=dest_path/'clean'/nb.name)\n",
    "    shutil.copy(path/'utils.py', dest_path/'utils.py')\n",
    "    copy_images(path, dest_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export -"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converted 01_asciidoc.ipynb.\n",
      "Converted 02_clean.ipynb.\n"
     ]
    }
   ],
   "source": [
    "from nbdev.export import *\n",
    "notebook2script()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "jupytext": {
   "split_at_heading": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}