{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#default_exp xtras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from fastcore.imports import *\n",
    "from fastcore.foundation import *\n",
    "from fastcore.basics import *\n",
    "from functools import wraps\n",
    "\n",
    "import mimetypes,pickle,random,json,subprocess,shlex,bz2,gzip,zipfile,tarfile\n",
    "import imghdr,struct,distutils.util,tempfile,time,string,collections\n",
    "from contextlib import contextmanager,ExitStack\n",
    "from pdb import set_trace\n",
    "from datetime import datetime, timezone\n",
    "from timeit import default_timer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastcore.test import *\n",
    "from nbdev.showdoc import *\n",
    "from fastcore.nb_imports import *\n",
    "from time import sleep"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Utility functions\n",
    "\n",
    "> Utility functions used in the fastai library"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Collections"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def dict2obj(d):\n",
    "    \"Convert (possibly nested) dicts (or lists of dicts) to `AttrDict`\"\n",
    "    if isinstance(d, (L,list)): return L(d).map(dict2obj)\n",
    "    if not isinstance(d, dict): return d\n",
    "    return AttrDict(**{k:dict2obj(v) for k,v in d.items()})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is a convenience to give you \"dotted\" access to (possibly nested) dictionaries, e.g:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "d1 = dict(a=1, b=dict(c=2,d=3))\n",
    "d2 = dict2obj(d1)\n",
    "test_eq(d2.b.c, 2)\n",
    "test_eq(d2.b['c'], 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It can also be used on lists of dicts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_list_of_dicts = [d1, d1]\n",
    "ds = dict2obj(_list_of_dicts)\n",
    "test_eq(ds[0].b.c, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def obj2dict(d):\n",
    "    \"Convert (possibly nested) AttrDicts (or lists of AttrDicts) to `dict`\"\n",
    "    if isinstance(d, (L,list)): return list(L(d).map(obj2dict))\n",
    "    if not isinstance(d, dict): return d\n",
    "    return dict(**{k:obj2dict(v) for k,v in d.items()})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`obj2dict` can be used to reverse what is done by `dict2obj`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(obj2dict(d2), d1)\n",
    "test_eq(obj2dict(ds), _list_of_dicts) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _repr_dict(d, lvl):\n",
    "    if isinstance(d,dict):\n",
    "        its = [f\"{k}: {_repr_dict(v,lvl+1)}\" for k,v in d.items()]\n",
    "    elif isinstance(d,(list,L)): its = [_repr_dict(o,lvl+1) for o in d]\n",
    "    else: return str(d)\n",
    "    return '\\n' + '\\n'.join([\" \"*(lvl*2) + \"- \" + o for o in its])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def repr_dict(d):\n",
    "    \"Print nested dicts and lists, such as returned by `dict2obj`\"\n",
    "    return _repr_dict(d,0).strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "- a: 1\n",
      "- b: \n",
      "  - c: 2\n",
      "  - d: 3\n"
     ]
    }
   ],
   "source": [
    "print(repr_dict(d2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`repr_dict` is used to display `AttrDict` both with `repr` and in Jupyter Notebooks:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def __repr__(self:AttrDict): return repr_dict(self)\n",
    "\n",
    "AttrDict._repr_markdown_ = AttrDict.__repr__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "- a: 1\n",
      "- b: \n",
      "  - c: 2\n",
      "  - d: 3\n"
     ]
    }
   ],
   "source": [
    "print(repr(d2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "- a: 1\n",
       "- b: \n",
       "  - c: 2\n",
       "  - d: 3"
      ],
      "text/plain": [
       "- a: 1\n",
       "- b: \n",
       "  - c: 2\n",
       "  - d: 3"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def is_listy(x):\n",
    "    \"`isinstance(x, (tuple,list,L,slice,Generator))`\"\n",
    "    return isinstance(x, (tuple,list,L,slice,Generator))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert is_listy((1,))\n",
    "assert is_listy([1])\n",
    "assert is_listy(L([1]))\n",
    "assert is_listy(slice(2))\n",
    "assert not is_listy(array([1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def shufflish(x, pct=0.04):\n",
    "    \"Randomly relocate items of `x` up to `pct` of `len(x)` from their starting location\"\n",
    "    n = len(x)\n",
    "    return L(x[i] for i in sorted(range_of(x), key=lambda o: o+n*(1+random.random()*pct)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def mapped(f, it):\n",
    "    \"map `f` over `it`, unless it's not listy, in which case return `f(it)`\"\n",
    "    return L(it).map(f) if is_listy(it) else f(it)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _f(x,a=1): return x-a\n",
    "\n",
    "test_eq(mapped(_f,1),0)\n",
    "test_eq(mapped(_f,[1,2]),[0,1])\n",
    "test_eq(mapped(_f,(1,)),(0,))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reindexing Collections"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "#hide\n",
    "class IterLen:\n",
    "    \"Base class to add iteration to anything supporting `__len__` and `__getitem__`\"\n",
    "    def __iter__(self): return (self[i] for i in range_of(self))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@docs\n",
    "class ReindexCollection(GetAttr, IterLen):\n",
    "    \"Reindexes collection `coll` with indices `idxs` and optional LRU cache of size `cache`\"\n",
    "    _default='coll'\n",
    "    def __init__(self, coll, idxs=None, cache=None, tfm=noop):\n",
    "        if idxs is None: idxs = L.range(coll)\n",
    "        store_attr()\n",
    "        if cache is not None: self._get = functools.lru_cache(maxsize=cache)(self._get)\n",
    "\n",
    "    def _get(self, i): return self.tfm(self.coll[i])\n",
    "    def __getitem__(self, i): return self._get(self.idxs[i])\n",
    "    def __len__(self): return len(self.coll)\n",
    "    def reindex(self, idxs): self.idxs = idxs\n",
    "    def shuffle(self): random.shuffle(self.idxs)\n",
    "    def cache_clear(self): self._get.cache_clear()\n",
    "    def __getstate__(self): return {'coll': self.coll, 'idxs': self.idxs, 'cache': self.cache, 'tfm': self.tfm}\n",
    "    def __setstate__(self, s): self.coll,self.idxs,self.cache,self.tfm = s['coll'],s['idxs'],s['cache'],s['tfm']\n",
    "\n",
    "    _docs = dict(reindex=\"Replace `self.idxs` with idxs\",\n",
    "                shuffle=\"Randomly shuffle indices\",\n",
    "                cache_clear=\"Clear LRU cache\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h4 id=\"ReindexCollection\" class=\"doc_header\"><code>class</code> <code>ReindexCollection</code><a href=\"\" class=\"source_link\" style=\"float:right\">[source]</a></h4>\n",
       "\n",
       "> <code>ReindexCollection</code>(**`coll`**, **`idxs`**=*`None`*, **`cache`**=*`None`*, **`tfm`**=*`noop`*) :: [`GetAttr`](/basics.html#GetAttr)\n",
       "\n",
       "Reindexes collection `coll` with indices `idxs` and optional LRU cache of size `cache`"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(ReindexCollection, title_level=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is useful when constructing batches or organizing data in a particular manner (i.e. for deep learning).  This class is primarly used in organizing data for language models in fastai."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can supply a custom index upon instantiation with the `idxs` argument, or you can call the `reindex` method to supply a new index for your collection.\n",
    "\n",
    "Here is how you can reindex a list such that the elements are reversed:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['e', 'd', 'c', 'b', 'a']"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rc=ReindexCollection(['a', 'b', 'c', 'd', 'e'], idxs=[4,3,2,1,0])\n",
    "list(rc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Alternatively, you can use the `reindex` method:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h6 id=\"ReindexCollection.reindex\" class=\"doc_header\"><code>ReindexCollection.reindex</code><a href=\"__main__.py#L14\" class=\"source_link\" style=\"float:right\">[source]</a></h6>\n",
       "\n",
       "> <code>ReindexCollection.reindex</code>(**`idxs`**)\n",
       "\n",
       "Replace `self.idxs` with idxs"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(ReindexCollection.reindex, title_level=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['e', 'd', 'c', 'b', 'a']"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rc=ReindexCollection(['a', 'b', 'c', 'd', 'e'])\n",
    "rc.reindex([4,3,2,1,0])\n",
    "list(rc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can optionally specify a LRU cache, which uses [functools.lru_cache](https://docs.python.org/3/library/functools.html#functools.lru_cache) upon instantiation:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CacheInfo(hits=1, misses=1, maxsize=2, currsize=1)"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sz = 50\n",
    "t = ReindexCollection(L.range(sz), cache=2)\n",
    "\n",
    "#trigger a cache hit by indexing into the same element multiple times\n",
    "t[0], t[0]\n",
    "t._get.cache_info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can optionally clear the LRU cache by calling the `cache_clear` method:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h5 id=\"ReindexCollection.cache_clear\" class=\"doc_header\"><code>ReindexCollection.cache_clear</code><a href=\"__main__.py#L16\" class=\"source_link\" style=\"float:right\">[source]</a></h5>\n",
       "\n",
       "> <code>ReindexCollection.cache_clear</code>()\n",
       "\n",
       "Clear LRU cache"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(ReindexCollection.cache_clear, title_level=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CacheInfo(hits=0, misses=0, maxsize=2, currsize=0)"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sz = 50\n",
    "t = ReindexCollection(L.range(sz), cache=2)\n",
    "\n",
    "#trigger a cache hit by indexing into the same element multiple times\n",
    "t[0], t[0]\n",
    "t.cache_clear()\n",
    "t._get.cache_info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h5 id=\"ReindexCollection.shuffle\" class=\"doc_header\"><code>ReindexCollection.shuffle</code><a href=\"__main__.py#L15\" class=\"source_link\" style=\"float:right\">[source]</a></h5>\n",
       "\n",
       "> <code>ReindexCollection.shuffle</code>()\n",
       "\n",
       "Randomly shuffle indices"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(ReindexCollection.shuffle, title_level=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that an ordered index is automatically constructed for the data structure even if one is not supplied."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['c', 'f', 'e', 'g', 'h', 'b', 'd', 'a']"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rc=ReindexCollection(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])\n",
    "rc.shuffle()\n",
    "list(rc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sz = 50\n",
    "t = ReindexCollection(L.range(sz), cache=2)\n",
    "test_eq(list(t), range(sz))\n",
    "test_eq(t[sz-1], sz-1)\n",
    "test_eq(t._get.cache_info().hits, 1)\n",
    "t.shuffle()\n",
    "test_eq(t._get.cache_info().hits, 1)\n",
    "test_ne(list(t), range(sz))\n",
    "test_eq(set(t), set(range(sz)))\n",
    "t.cache_clear()\n",
    "test_eq(t._get.cache_info().hits, 0)\n",
    "test_eq(t.count(0), 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#hide\n",
    "#Test ReindexCollection pickles\n",
    "t1 = pickle.loads(pickle.dumps(t))\n",
    "test_eq(list(t), list(t1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## File Functions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Utilities (other than extensions to Pathlib.Path) for dealing with IO."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# export\n",
    "@contextmanager\n",
    "def maybe_open(f, mode='r', **kwargs):\n",
    "    \"Context manager: open `f` if it is a path (and close on exit)\"\n",
    "    if isinstance(f, (str,os.PathLike)):\n",
    "        with open(f, mode, **kwargs) as f: yield f\n",
    "    else: yield f"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is useful for functions where you want to accept a path *or* file. `maybe_open` will not close your file handle if you pass one in."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _f(fn):\n",
    "    with maybe_open(fn) as f: return f.encoding\n",
    "\n",
    "fname = '00_test.ipynb'\n",
    "sys_encoding = 'cp1252' if sys.platform == 'win32' else 'UTF-8'\n",
    "test_eq(_f(fname), sys_encoding)\n",
    "with open(fname) as fh: test_eq(_f(fh), sys_encoding)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For example, we can use this to reimplement [`imghdr.what`](https://docs.python.org/3/library/imghdr.html#imghdr.what) from the Python standard library, which is [written in Python 3.9](https://github.com/python/cpython/blob/3.9/Lib/imghdr.py#L11) as:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def what(file, h=None):\n",
    "    f = None\n",
    "    try:\n",
    "        if h is None:\n",
    "            if isinstance(file, (str,os.PathLike)):\n",
    "                f = open(file, 'rb')\n",
    "                h = f.read(32)\n",
    "            else:\n",
    "                location = file.tell()\n",
    "                h = file.read(32)\n",
    "                file.seek(location)\n",
    "        for tf in imghdr.tests:\n",
    "            res = tf(h, f)\n",
    "            if res: return res\n",
    "    finally:\n",
    "        if f: f.close()\n",
    "    return None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's an example of the use of this function:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'jpeg'"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fname = 'images/puppy.jpg'\n",
    "what(fname)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With `maybe_open`, `Self`, and `L.map_first`, we can rewrite this in a much more concise and (in our opinion) clear way:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def what(file, h=None):\n",
    "    if h is None:\n",
    "        with maybe_open(file, 'rb') as f: h = f.peek(32)\n",
    "    return L(imghdr.tests).map_first(Self(h,file))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "...and we can check that it still works:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(what(fname), 'jpeg')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "...along with the version passing a file handle:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(fname,'rb') as f: test_eq(what(f), 'jpeg')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "...along with the `h` parameter version:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(fname,'rb') as f: test_eq(what(None, h=f.read(32)), 'jpeg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _jpg_size(f):\n",
    "        size,ftype = 2,0\n",
    "        while not 0xc0 <= ftype <= 0xcf:\n",
    "            f.seek(size, 1)\n",
    "            byte = f.read(1)\n",
    "            while ord(byte) == 0xff: byte = f.read(1)\n",
    "            ftype = ord(byte)\n",
    "            size = struct.unpack('>H', f.read(2))[0] - 2\n",
    "        f.seek(1, 1)  # `precision'\n",
    "        h,w = struct.unpack('>HH', f.read(4))\n",
    "        return w,h\n",
    "\n",
    "def _gif_size(f): return struct.unpack('<HH', head[6:10])\n",
    "\n",
    "def _png_size(f):\n",
    "    assert struct.unpack('>i', head[4:8])[0]==0x0d0a1a0a\n",
    "    return struct.unpack('>ii', head[16:24])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def image_size(fn):\n",
    "    \"Tuple of (w,h) for png, gif, or jpg; `None` otherwise\"\n",
    "    d = dict(png=_png_size, gif=_gif_size, jpeg=_jpg_size)\n",
    "    with maybe_open(fn, 'rb') as f: return d[imghdr.what(f)](f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(image_size(fname), (1200,803))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def bunzip(fn):\n",
    "    \"bunzip `fn`, raising exception if output already exists\"\n",
    "    fn = Path(fn)\n",
    "    assert fn.exists(), f\"{fn} doesn't exist\"\n",
    "    out_fn = fn.with_suffix('')\n",
    "    assert not out_fn.exists(), f\"{out_fn} already exists\"\n",
    "    with bz2.BZ2File(fn, 'rb') as src, out_fn.open('wb') as dst:\n",
    "        for d in iter(lambda: src.read(1024*1024), b''): dst.write(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "f = Path('files/test.txt')\n",
    "if f.exists(): f.unlink()\n",
    "bunzip('files/test.txt.bz2')\n",
    "t = f.open().readlines()\n",
    "test_eq(len(t),1)\n",
    "test_eq(t[0], 'test\\n')\n",
    "f.unlink()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def join_path_file(file, path, ext=''):\n",
    "    \"Return `path/file` if file is a string or a `Path`, file otherwise\"\n",
    "    if not isinstance(file, (str, Path)): return file\n",
    "    path.mkdir(parents=True, exist_ok=True)\n",
    "    return path/f'{file}{ext}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = Path.cwd()/'_tmp'/'tst'\n",
    "f = join_path_file('tst.txt', path)\n",
    "assert path.exists()\n",
    "test_eq(f, path/'tst.txt')\n",
    "with open(f, 'w') as f_: assert join_path_file(f_, path) == f_\n",
    "shutil.rmtree(Path.cwd()/'_tmp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def loads(s, cls=None, object_hook=None, parse_float=None,\n",
    "          parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):\n",
    "    \"Same as `json.loads`, but handles `None`\"\n",
    "    if not s: return {}\n",
    "    return json.loads(s, cls=cls, object_hook=object_hook, parse_float=parse_float,\n",
    "          parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def loads_multi(s:str):\n",
    "    \"Generator of >=0 decoded json dicts, possibly with non-json ignored text at start and end\"\n",
    "    _dec = json.JSONDecoder()\n",
    "    while s.find('{')>=0:\n",
    "        s = s[s.find('{'):]\n",
    "        obj,pos = _dec.raw_decode(s)\n",
    "        if not pos: raise ValueError(f'no JSON object found at {pos}')\n",
    "        yield obj\n",
    "        s = s[pos:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tst = \"\"\"\n",
    "# ignored\n",
    "{ \"a\":1 }\n",
    "hello\n",
    "{\n",
    "\"b\":2\n",
    "}\n",
    "\"\"\"\n",
    "\n",
    "test_eq(list(loads_multi(tst)), [{'a': 1}, {'b': 2}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def untar_dir(file, dest):\n",
    "    with tempfile.TemporaryDirectory(dir='.') as d:\n",
    "        d = Path(d)\n",
    "        with tarfile.open(mode='r:gz', fileobj=file) as t: t.extractall(d)\n",
    "        next(d.iterdir()).rename(dest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def repo_details(url):\n",
    "    \"Tuple of `owner,name` from ssh or https git repo `url`\"\n",
    "    res = remove_suffix(url.strip(), '.git')\n",
    "    res = res.split(':')[-1]\n",
    "    return res.split('/')[-2:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(repo_details('https://github.com/fastai/fastai.git'), ['fastai', 'fastai'])\n",
    "test_eq(repo_details('git@github.com:fastai/nbdev.git\\n'), ['fastai', 'nbdev'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def run(cmd, *rest, ignore_ex=False, as_bytes=False, stderr=False):\n",
    "    \"Pass `cmd` (splitting with `shlex` if string) to `subprocess.run`; return `stdout`; raise `IOError` if fails\"\n",
    "    if rest: cmd = (cmd,)+rest\n",
    "    elif isinstance(cmd,str): cmd = shlex.split(cmd)\n",
    "    res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
    "    stdout = res.stdout\n",
    "    if stderr and res.stderr: stdout += b' ;; ' + res.stderr\n",
    "    if not as_bytes: stdout = stdout.decode().strip()\n",
    "    if ignore_ex: return (res.returncode, stdout)\n",
    "    if res.returncode: raise IOError(stdout)\n",
    "    return stdout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can pass a string (which will be split based on standard shell rules), a list, or pass args directly:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if sys.platform == 'win32':\n",
    "    assert 'ipynb' in run('cmd /c dir /p')\n",
    "    assert 'ipynb' in run(['cmd', '/c', 'dir', '/p'])\n",
    "    assert 'ipynb' in run('cmd', '/c', 'dir',  '/p')\n",
    "else:\n",
    "    assert 'ipynb' in run('ls -ls')\n",
    "    assert 'ipynb' in run(['ls', '-l'])\n",
    "    assert 'ipynb' in run('ls', '-l')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some commands fail in non-error situations, like `grep`. Use `ignore_ex` in those cases, which will return a tuple of stdout and returncode:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if sys.platform == 'win32':\n",
    "    test_eq(run('cmd /c findstr asdfds 00_test.ipynb', ignore_ex=True)[0], 1)\n",
    "else:\n",
    "    test_eq(run('grep asdfds 00_test.ipynb', ignore_ex=True)[0], 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`run` automatically decodes returned bytes to a `str`. Use `as_bytes` to skip that:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if sys.platform == 'win32':\n",
    "    # why I ingore as_types, becuase every time nbdev_clean_nbs will update \\n to \\nn\n",
    "    test_eq(run('cmd /c echo hi'), 'hi')\n",
    "else:\n",
    "    test_eq(run('echo hi', as_bytes=True), b'hi\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def open_file(fn, mode='r', **kwargs):\n",
    "    \"Open a file, with optional compression if gz or bz2 suffix\"\n",
    "    if isinstance(fn, io.IOBase): return fn\n",
    "    fn = Path(fn)\n",
    "    if   fn.suffix=='.bz2': return bz2.BZ2File(fn, mode, **kwargs)\n",
    "    elif fn.suffix=='.gz' : return gzip.GzipFile(fn, mode, **kwargs)\n",
    "    elif fn.suffix=='.zip': return zipfile.ZipFile(fn, mode, **kwargs)\n",
    "    else: return open(fn,mode, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def save_pickle(fn, o):\n",
    "    \"Save a pickle file, to a file name or opened file\"\n",
    "    with open_file(fn, 'wb') as f: pickle.dump(o, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def load_pickle(fn):\n",
    "    \"Load a pickle file from a file name or opened file\"\n",
    "    with open_file(fn, 'rb') as f: return pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for suf in '.pkl','.bz2','.gz':\n",
    "    # delete=False is added for Windows. https://stackoverflow.com/questions/23212435/permission-denied-to-write-to-my-temporary-file\n",
    "    with tempfile.NamedTemporaryFile(suffix=suf, delete=False) as f:\n",
    "        fn = Path(f.name)\n",
    "        save_pickle(fn, 't')\n",
    "        t = load_pickle(fn)\n",
    "    f.close()\n",
    "    test_eq(t,'t')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extensions to Pathlib.Path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following methods are added to the standard python libary [Pathlib.Path](https://docs.python.org/3/library/pathlib.html#basic-use)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def readlines(self:Path, hint=-1, encoding='utf8'):\n",
    "    \"Read the content of `self`\"\n",
    "    with self.open(encoding=encoding) as f: return f.readlines(hint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def read_json(self:Path, encoding=None, errors=None):\n",
    "    \"Same as `read_text` followed by `loads`\"\n",
    "    return loads(self.read_text(encoding=encoding, errors=errors))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def mk_write(self:Path, data, encoding=None, errors=None, mode=511):\n",
    "    \"Make all parent dirs of `self`, and write `data`\"\n",
    "    self.parent.mkdir(exist_ok=True, parents=True, mode=mode)\n",
    "    self.write_text(data, encoding=encoding, errors=errors)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def ls(self:Path, n_max=None, file_type=None, file_exts=None):\n",
    "    \"Contents of path as a list\"\n",
    "    extns=L(file_exts)\n",
    "    if file_type: extns += L(k for k,v in mimetypes.types_map.items() if v.startswith(file_type+'/'))\n",
    "    has_extns = len(extns)==0\n",
    "    res = (o for o in self.iterdir() if has_extns or o.suffix in extns)\n",
    "    if n_max is not None: res = itertools.islice(res, n_max)\n",
    "    return L(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We add an `ls()` method to `pathlib.Path` which is simply defined as `list(Path.iterdir())`, mainly for convenience in REPL environments such as notebooks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Path('.gitattributes')"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path = Path()\n",
    "t = path.ls()\n",
    "assert len(t)>0\n",
    "t1 = path.ls(10)\n",
    "test_eq(len(t1), 10)\n",
    "t2 = path.ls(file_exts='.ipynb')\n",
    "assert len(t)>len(t2)\n",
    "t[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can also pass an optional `file_type` MIME prefix and/or a list of file extensions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Path('../fastcore/__init__.py'), Path('01_basics.ipynb'))"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lib_path = (path/'../fastcore')\n",
    "txt_files=lib_path.ls(file_type='text')\n",
    "assert len(txt_files) > 0 and txt_files[0].suffix=='.py'\n",
    "ipy_files=path.ls(file_exts=['.ipynb'])\n",
    "assert len(ipy_files) > 0 and ipy_files[0].suffix=='.ipynb'\n",
    "txt_files[0],ipy_files[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#hide\n",
    "path = Path()\n",
    "pkl = pickle.dumps(path)\n",
    "p2 = pickle.loads(pkl)\n",
    "test_eq(path.ls()[0], p2.ls()[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@patch\n",
    "def __repr__(self:Path):\n",
    "    b = getattr(Path, 'BASE_PATH', None)\n",
    "    if b:\n",
    "        try: self = self.relative_to(b)\n",
    "        except: pass\n",
    "    return f\"Path({self.as_posix()!r})\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "fastai also updates the `repr` of `Path` such that, if `Path.BASE_PATH` is defined, all paths are printed relative to that path (as long as they are contained in `Path.BASE_PATH`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = ipy_files[0].absolute()\n",
    "try:\n",
    "    Path.BASE_PATH = t.parent.parent\n",
    "    test_eq(repr(t), f\"Path('nbs/{t.name}')\")\n",
    "finally: Path.BASE_PATH = None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Other Helpers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def truncstr(s:str, maxlen:int, suf:str='…', space='')->str:\n",
    "    \"Truncate `s` to length `maxlen`, adding suffix `suf` if truncated\"\n",
    "    return s[:maxlen-len(suf)]+suf if len(s)+len(space)>maxlen else s+space"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "w = 'abacadabra'\n",
    "test_eq(truncstr(w, 10), w)\n",
    "test_eq(truncstr(w, 5), 'abac…')\n",
    "test_eq(truncstr(w, 5, suf=''), 'abaca')\n",
    "test_eq(truncstr(w, 11, space='_'), w+\"_\")\n",
    "test_eq(truncstr(w, 10, space='_'), w[:-1]+'…')\n",
    "test_eq(truncstr(w, 5, suf='!!'), 'aba!!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "spark_chars = '▁▂▃▅▆▇'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _ceil(x, lim=None): return x if (not lim or x <= lim) else lim\n",
    "\n",
    "def _sparkchar(x, mn, mx, incr, empty_zero):\n",
    "    if x is None or (empty_zero and not x): return ' '\n",
    "    if incr == 0: return spark_chars[0]\n",
    "    res = int((_ceil(x,mx)-mn)/incr-0.5)\n",
    "    return spark_chars[res]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def sparkline(data, mn=None, mx=None, empty_zero=False):\n",
    "    \"Sparkline for `data`, with `None`s (and zero, if `empty_zero`) shown as empty column\"\n",
    "    valid = [o for o in data if o is not None]\n",
    "    if not valid: return ' '\n",
    "    mn,mx,n = ifnone(mn,min(valid)),ifnone(mx,max(valid)),len(spark_chars)\n",
    "    res = [_sparkchar(x=o, mn=mn, mx=mx, incr=(mx-mn)/n, empty_zero=empty_zero) for o in data]\n",
    "    return ''.join(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "without \"empty_zero\": ▅▂ ▁▂▁▃▇▅\n",
      "   with \"empty_zero\": ▅▂ ▁▂ ▃▇▅\n"
     ]
    }
   ],
   "source": [
    "data = [9,6,None,1,4,0,8,15,10]\n",
    "print(f'without \"empty_zero\": {sparkline(data, empty_zero=False)}')\n",
    "print(f'   with \"empty_zero\": {sparkline(data, empty_zero=True )}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can set a maximum and minimum for the y-axis of the sparkline with the arguments `mn` and `mx` respectively:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'▂▅▇▇'"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sparkline([1,2,3,400], mn=0, mx=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def autostart(g):\n",
    "    \"Decorator that automatically starts a generator\"\n",
    "    @functools.wraps(g)\n",
    "    def f():\n",
    "        r = g()\n",
    "        next(r)\n",
    "        return r\n",
    "    return f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "class EventTimer:\n",
    "    \"An event timer with history of `store` items of time `span`\"\n",
    "    def __init__(self, store=5, span=60):\n",
    "        self.hist,self.span,self.last = collections.deque(maxlen=store),span,default_timer()\n",
    "        self._reset()\n",
    "\n",
    "    def _reset(self): self.start,self.events = self.last,0\n",
    "\n",
    "    def add(self, n=1):\n",
    "        \"Record `n` events\"\n",
    "        if self.duration>self.span:\n",
    "            self.hist.append(self.freq)\n",
    "            self._reset()\n",
    "        self.events +=n\n",
    "        self.last = default_timer()\n",
    "\n",
    "    @property\n",
    "    def duration(self): return default_timer()-self.start\n",
    "    @property\n",
    "    def freq(self): return self.events/self.duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h4 id=\"EventTimer\" class=\"doc_header\"><code>class</code> <code>EventTimer</code><a href=\"\" class=\"source_link\" style=\"float:right\">[source]</a></h4>\n",
       "\n",
       "> <code>EventTimer</code>(**`store`**=*`5`*, **`span`**=*`60`*)\n",
       "\n",
       "An event timer with history of `store` items of time `span`"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(EventTimer, title_level=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add events with `add`, and get number of `events` and their frequency (`freq`)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num Events: 8, Freq/sec: 423.0\n",
      "Most recent:  ▂▂▁▁▇ 318.5 319.0 266.9 275.6 427.7\n"
     ]
    }
   ],
   "source": [
    "# Random wait function for testing\n",
    "def _randwait(): yield from (sleep(random.random()/200) for _ in range(100))\n",
    "\n",
    "c = EventTimer(store=5, span=0.03)\n",
    "for o in _randwait(): c.add(1)\n",
    "print(f'Num Events: {c.events}, Freq/sec: {c.freq:.01f}')\n",
    "print('Most recent: ', sparkline(c.hist), *L(c.hist).map('{:.01f}'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "_fmt = string.Formatter()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def stringfmt_names(s:str)->list:\n",
    "    \"Unique brace-delimited names in `s`\"\n",
    "    return uniqueify(o[1] for o in _fmt.parse(s) if o[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s = '/pulls/{pull_number}/reviews/{review_id}'\n",
    "test_eq(stringfmt_names(s), ['pull_number','review_id'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "class PartialFormatter(string.Formatter):\n",
    "    \"A `string.Formatter` that doesn't error on missing fields, and tracks missing fields and unused args\"\n",
    "    def __init__(self):\n",
    "        self.missing = set()\n",
    "        super().__init__()\n",
    "\n",
    "    def get_field(self, nm, args, kwargs):\n",
    "        try: return super().get_field(nm, args, kwargs)\n",
    "        except KeyError:\n",
    "            self.missing.add(nm)\n",
    "            return '{'+nm+'}',nm\n",
    "\n",
    "    def check_unused_args(self, used, args, kwargs):\n",
    "        self.xtra = filter_keys(kwargs, lambda o: o not in used)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h4 id=\"PartialFormatter\" class=\"doc_header\"><code>class</code> <code>PartialFormatter</code><a href=\"\" class=\"source_link\" style=\"float:right\">[source]</a></h4>\n",
       "\n",
       "> <code>PartialFormatter</code>() :: `Formatter`\n",
       "\n",
       "A `string.Formatter` that doesn't error on missing fields, and tracks missing fields and unused args"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(PartialFormatter, title_level=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def partial_format(s:str, **kwargs):\n",
    "    \"string format `s`, ignoring missing field errors, returning missing and extra fields\"\n",
    "    fmt = PartialFormatter()\n",
    "    res = fmt.format(s, **kwargs)\n",
    "    return res,list(fmt.missing),fmt.xtra"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The result is a tuple of `(formatted_string,missing_fields,extra_fields)`, e.g:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res,missing,xtra = partial_format(s, pull_number=1, foo=2)\n",
    "test_eq(res, '/pulls/1/reviews/{review_id}')\n",
    "test_eq(missing, ['review_id'])\n",
    "test_eq(xtra, {'foo':2})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def utc2local(dt:datetime)->datetime:\n",
    "    \"Convert `dt` from UTC to local time\"\n",
    "    return dt.replace(tzinfo=timezone.utc).astimezone(tz=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2000-01-01 12:00:00 UTC is 2000-01-01 12:00:00+00:00 local time\n"
     ]
    }
   ],
   "source": [
    "dt = datetime(2000,1,1,12)\n",
    "print(f'{dt} UTC is {utc2local(dt)} local time')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def local2utc(dt:datetime)->datetime:\n",
    "    \"Convert `dt` from local to UTC time\"\n",
    "    return dt.replace(tzinfo=None).astimezone(tz=timezone.utc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2000-01-01 12:00:00 local is 2000-01-01 12:00:00+00:00 UTC time\n"
     ]
    }
   ],
   "source": [
    "print(f'{dt} local is {local2utc(dt)} UTC time')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def trace(f):\n",
    "    \"Add `set_trace` to an existing function `f`\"\n",
    "    if getattr(f, '_traced', False): return f\n",
    "    def _inner(*args,**kwargs):\n",
    "        set_trace()\n",
    "        return f(*args,**kwargs)\n",
    "    _inner._traced = True\n",
    "    return _inner"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can add a breakpoint to an existing function, e.g:\n",
    "\n",
    "```python\n",
    "Path.cwd = trace(Path.cwd)\n",
    "Path.cwd()\n",
    "```\n",
    "\n",
    "Now, when the function is called it will drop you into the debugger.  Note, you must issue  the `s` command when you begin to step into the function that is being traced."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def round_multiple(x, mult, round_down=False):\n",
    "    \"Round `x` to nearest multiple of `mult`\"\n",
    "    def _f(x_): return (int if round_down else round)(x_/mult)*mult\n",
    "    res = L(x).map(_f)\n",
    "    return res if is_listy(x) else res[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_eq(round_multiple(63,32), 64)\n",
    "test_eq(round_multiple(50,32), 64)\n",
    "test_eq(round_multiple(40,32), 32)\n",
    "test_eq(round_multiple( 0,32),  0)\n",
    "test_eq(round_multiple(63,32, round_down=True), 32)\n",
    "test_eq(round_multiple((63,40),32), (64,32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "@contextmanager\n",
    "def modified_env(*delete, **replace):\n",
    "    \"Context manager temporarily modifying `os.environ` by deleting `delete` and replacing `replace`\"\n",
    "    prev = dict(os.environ)\n",
    "    try:\n",
    "        os.environ.update(replace)\n",
    "        for k in delete: os.environ.pop(k, None)\n",
    "        yield\n",
    "    finally:\n",
    "        os.environ.clear()\n",
    "        os.environ.update(prev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# USER isn't in Cloud Linux Environments\n",
    "env_test = 'USERNAME' if sys.platform == \"win32\" else 'SHELL'\n",
    "oldusr = os.environ[env_test]\n",
    "\n",
    "replace_param = {env_test: 'a'}\n",
    "with modified_env('PATH', **replace_param):\n",
    "    test_eq(os.environ[env_test], 'a')\n",
    "    assert 'PATH' not in os.environ\n",
    "\n",
    "assert 'PATH' in os.environ\n",
    "test_eq(os.environ[env_test], oldusr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "class ContextManagers(GetAttr):\n",
    "    \"Wrapper for `contextlib.ExitStack` which enters a collection of context managers\"\n",
    "    def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()\n",
    "    def __enter__(self): self.default.map(self.stack.enter_context)\n",
    "    def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<h4 id=\"ContextManagers\" class=\"doc_header\"><code>class</code> <code>ContextManagers</code><a href=\"\" class=\"source_link\" style=\"float:right\">[source]</a></h4>\n",
       "\n",
       "> <code>ContextManagers</code>(**`mgrs`**) :: [`GetAttr`](/basics.html#GetAttr)\n",
       "\n",
       "Wrapper for `contextlib.ExitStack` which enters a collection of context managers"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_doc(ContextManagers, title_level=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def str2bool(s):\n",
    "    \"Case-insensitive convert string `s` too a bool (`y`,`yes`,`t`,`true`,`on`,`1`->`True`)\"\n",
    "    if not isinstance(s,str): return bool(s)\n",
    "    return bool(distutils.util.strtobool(s)) if s else False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for o in \"y YES t True on 1\".split(): assert str2bool(o)\n",
    "for o in \"n no FALSE off 0\".split(): assert not str2bool(o)\n",
    "for o in 0,None,'',False: assert not str2bool(o)\n",
    "for o in 1,True: assert str2bool(o)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def _is_instance(f, gs):\n",
    "    tst = [g if type(g) in [type, 'function'] else g.__class__ for g in gs]\n",
    "    for g in tst:\n",
    "        if isinstance(f, g) or f==g: return True\n",
    "    return False\n",
    "\n",
    "def _is_first(f, gs):\n",
    "    for o in L(getattr(f, 'run_after', None)):\n",
    "        if _is_instance(o, gs): return False\n",
    "    for g in gs:\n",
    "        if _is_instance(f, L(getattr(g, 'run_before', None))): return False\n",
    "    return True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def sort_by_run(fs):\n",
    "    end = L(fs).attrgot('toward_end')\n",
    "    inp,res = L(fs)[~end] + L(fs)[end], L()\n",
    "    while len(inp):\n",
    "        for i,o in enumerate(inp):\n",
    "            if _is_first(o, inp):\n",
    "                res.append(inp.pop(i))\n",
    "                break\n",
    "        else: raise Exception(\"Impossible to sort\")\n",
    "    return res"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Export -"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converted 00_test.ipynb.\n",
      "Converted 01_basics.ipynb.\n",
      "Converted 02_foundation.ipynb.\n",
      "Converted 03_xtras.ipynb.\n",
      "Converted 03a_parallel.ipynb.\n",
      "Converted 03b_net.ipynb.\n",
      "Converted 04_dispatch.ipynb.\n",
      "Converted 05_transform.ipynb.\n",
      "Converted 07_meta.ipynb.\n",
      "Converted 08_script.ipynb.\n",
      "Converted index.ipynb.\n"
     ]
    }
   ],
   "source": [
    "#hide\n",
    "from nbdev.export import notebook2script\n",
    "notebook2script()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "jupytext": {
   "split_at_heading": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}