{ "cells": [ { "cell_type": "markdown", "id": "14bcecd2", "metadata": { "toc": true }, "source": [ "

Table of Contents

\n", "
" ] }, { "cell_type": "markdown", "id": "21d626af", "metadata": {}, "source": [ "# What is the hidden action/function of `*` in the `dummy` func below?\n", "\n", "You can have a dummy function below without a code and `*` can take out things from a list and put into a tuple. What exactly is happening here with `*`?" ] }, { "cell_type": "code", "execution_count": 3, "id": "548f4207", "metadata": {}, "outputs": [], "source": [ "from pdb import set_trace" ] }, { "cell_type": "code", "execution_count": 4, "id": "f8b47ffb", "metadata": {}, "outputs": [], "source": [ "def dummy(*sth):\n", " set_trace()\n", " pass" ] }, { "cell_type": "code", "execution_count": 5, "id": "1cefb677", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: unmatched ')'\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** IndentationError: expected an indented block\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "> \u001b[0;32m/var/folders/gz/ch3n2mp51m9386sytqf97s6w0000gn/T/ipykernel_55977/4281242037.py\u001b[0m(3)\u001b[0;36mdummy\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m 1 \u001b[0;31m\u001b[0;32mdef\u001b[0m \u001b[0mdummy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0msth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m 2 \u001b[0;31m \u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m----> 3 \u001b[0;31m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\n", "ipdb> a\n", "sth = ([{'a': 1, 'b': 2}],)\n", "ipdb> q\n" ] } ], "source": [ "dummy([{'a':1, 'b':2}])" ] }, { "cell_type": "code", "execution_count": 6, "id": "b5d53c35", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: unmatched ')'\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** IndentationError: expected an indented block\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "> \u001b[0;32m/var/folders/gz/ch3n2mp51m9386sytqf97s6w0000gn/T/ipykernel_55977/4281242037.py\u001b[0m(3)\u001b[0;36mdummy\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m 1 \u001b[0;31m\u001b[0;32mdef\u001b[0m \u001b[0mdummy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0msth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m 2 \u001b[0;31m \u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m----> 3 \u001b[0;31m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\n", "ipdb> a\n", "sth = ({'a': 1, 'b': 2},)\n", "ipdb> q\n" ] } ], "source": [ "dummy(*[{'a':1, 'b':2}]) # * removes the list [] bracket" ] }, { "cell_type": "code", "execution_count": 7, "id": "e005e7f1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: unmatched ')'\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** IndentationError: expected an indented block\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "> \u001b[0;32m/var/folders/gz/ch3n2mp51m9386sytqf97s6w0000gn/T/ipykernel_55977/4281242037.py\u001b[0m(3)\u001b[0;36mdummy\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m 1 \u001b[0;31m\u001b[0;32mdef\u001b[0m \u001b[0mdummy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0msth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m 2 \u001b[0;31m \u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m----> 3 \u001b[0;31m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\n", "ipdb> a\n", "sth = ([{'a': 1, 'b': 2}], {'c': 3, 'd': 4}, {'e': 5})\n", "ipdb> q\n" ] } ], "source": [ "dummy([{'a':1, 'b':2}], {'c':3, 'd':4}, {'e':5})" ] }, { "cell_type": "code", "execution_count": 8, "id": "abcb2a9a", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: unmatched ')'\n", "*** SyntaxError: invalid syntax\n", "*** SyntaxError: EOF while scanning triple-quoted string literal\n", "*** IndentationError: expected an indented block\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "*** AttributeError: module 'pdb' has no attribute 'Color'\n", "> \u001b[0;32m/var/folders/gz/ch3n2mp51m9386sytqf97s6w0000gn/T/ipykernel_55977/4281242037.py\u001b[0m(3)\u001b[0;36mdummy\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m 1 \u001b[0;31m\u001b[0;32mdef\u001b[0m \u001b[0mdummy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0msth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m 2 \u001b[0;31m \u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\u001b[0;32m----> 3 \u001b[0;31m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0m\n", "ipdb> a\n", "sth = ({'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5})\n", "ipdb> q\n" ] } ], "source": [ "dummy(*[{'a':1, 'b':2}], {'c':3, 'd':4}, {'e':5}) # * removes the list [] bracket" ] }, { "cell_type": "markdown", "id": "c5c8c11c", "metadata": {}, "source": [ "# What types of data can be used as `params` for `Optimizer`? " ] }, { "cell_type": "markdown", "id": "fe91b3f9", "metadata": {}, "source": [ "According to official docs (see the source below), `params` \n", "- should be `Tensor` and \n", "- contain both parameters and hyper parameters.\n" ] }, { "cell_type": "markdown", "id": "cf63d742", "metadata": {}, "source": [ "```python\n", "class Optimizer(_BaseOptimizer):\n", " \"Base optimizer class for the fastai library, updating `params` with `cbs`\"\n", " _keep_on_clear = ['force_train', 'do_wd']\n", " def __init__(self,\n", " params:Tensor, # Parameters and hyper parameters\n", " cbs:list, # `Optimizer` callbacks\n", " train_bn:bool=True, # Batch normalization is always trained\n", " **defaults # Default values to set on hyper parameters\n", " ):\n", "```" ] }, { "cell_type": "markdown", "id": "05c52405", "metadata": {}, "source": [ "However, in actual source code and tests involving `params`, we can tell that `params`:\n", "- can be almost anything, a number, a list, a tuple, a range, a generator, and finally a tensor\n", "- I don't see any example or source code of using `params` as hyper parameters\n", "\n", "First, let's see what type of data can be used as `params`" ] }, { "cell_type": "markdown", "id": "23724e3c", "metadata": {}, "source": [ "```python\n", "# there are two lines of source codes to process `params` \n", "params = L(params)\n", "self.param_lists = L(L(p) for p in params) if isinstance(params[0], (L,list)) else L([params])\n", "```" ] }, { "cell_type": "code", "execution_count": 9, "id": "84caf6c4", "metadata": {}, "outputs": [], "source": [ "from fastai.optimizer import L, listify" ] }, { "cell_type": "markdown", "id": "767064c1", "metadata": {}, "source": [ "By reading the source of `L` from `L??`, `params` can be anything that can be `listify`ed." ] }, { "cell_type": "code", "execution_count": 10, "id": "a0746b62", "metadata": {}, "outputs": [], "source": [ "from fastai.optimizer import _BaseOptimizer, Tensor, Optimizer, noop, test_eq" ] }, { "cell_type": "markdown", "id": "a00bdd25", "metadata": {}, "source": [ "In fact, tests from the source have given us examples of `params` being a list, a range and a generator." ] }, { "cell_type": "code", "execution_count": 11, "id": "82d12434", "metadata": {}, "outputs": [], "source": [ "# The 4 examples provided by the official source code\n", "opt = Optimizer([1,2,3], noop) # param as a list\n", "test_eq(opt.param_lists, [[1,2,3]])\n", "opt = Optimizer(range(3), noop)# param as a range\n", "test_eq(opt.param_lists, [[0,1,2]])\n", "opt = Optimizer([[1,2],[3]], noop) # as list of lists\n", "test_eq(opt.param_lists, [[1,2],[3]])\n", "opt = Optimizer(([o,o+1] for o in range(0,4,2)), noop) # as a generator\n", "test_eq(opt.param_lists, [[0,1],[2,3]])" ] }, { "cell_type": "markdown", "id": "003dd39c", "metadata": {}, "source": [ "I have added examples where `params` can be a digit, a tuple, a tensor" ] }, { "cell_type": "code", "execution_count": 12, "id": "a4009942", "metadata": {}, "outputs": [], "source": [ "# I have added 2 examples for `params` as a tuple and a digit and a tensor\n", "opt = Optimizer((1,2,3), noop) # param as a tuple\n", "test_eq(opt.param_lists, [[1,2,3]])\n", "opt = Optimizer((1), noop) # param as a digit\n", "test_eq(opt.param_lists, [[1]])" ] }, { "cell_type": "code", "execution_count": 13, "id": "1d83dc11", "metadata": {}, "outputs": [], "source": [ "t = Tensor([[1,2],[3,4]])\n", "t1 = Tensor([[1,2],[3,4]])" ] }, { "cell_type": "code", "execution_count": 14, "id": "e131dfba", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(#1) [[tensor([[1., 2.],\n", " [3., 4.]])]]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opt = Optimizer(t, noop) # params as a single tensor\n", "opt.param_lists" ] }, { "cell_type": "code", "execution_count": 15, "id": "ebd0a555", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(#1) [[tensor([[1., 2.],\n", " [3., 4.]]), [tensor([[1., 2.],\n", " [3., 4.]])]]]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opt = Optimizer([t, [t1]], noop) # param as a list of tensors, first item is just a tensor\n", "opt.param_lists" ] }, { "cell_type": "code", "execution_count": 16, "id": "8697609c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(#2) [[tensor([[1., 2.],\n", " [3., 4.]])],[tensor([[1., 2.],\n", " [3., 4.]])]]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opt = Optimizer([[t], t1], noop) # param as a list of tensors, first item is a list of tensor\n", "opt.param_lists" ] }, { "cell_type": "markdown", "id": "ef6c74e4", "metadata": {}, "source": [ "# `defaults` provide hyper parameters not `params`" ] }, { "cell_type": "markdown", "id": "6b1e1414", "metadata": {}, "source": [ "According to the docs above, `params` is said to be both parameters and hyper parameters. But according to the actual source, hyper parameters like `lr`, `mom` are provided by `defaults` from `**defaults` (user input) or from `cbs` (another user input), not `params`. \n", "\n", "In fact, all hyper parameters are processed and stored in `self.hypers` for use later. There are tests in the source to demon it. " ] }, { "cell_type": "code", "execution_count": 19, "id": "8dff74d7", "metadata": {}, "outputs": [], "source": [ "def tst_arg(p, lr=0, **kwargs): return p\n", "tst_arg.defaults = dict(lr=1e-2)\n", "\n", "def tst_arg2(p, lr2=0, **kwargs): return p\n", "tst_arg2.defaults = dict(lr2=1e-3)\n", "\n", "def tst_arg3(p, mom=0, **kwargs): return p\n", "tst_arg3.defaults = dict(mom=0.9)\n", "\n", "def tst_arg4(p, **kwargs): return p" ] }, { "cell_type": "code", "execution_count": 21, "id": "2d5218ab", "metadata": {}, "outputs": [], "source": [ "opt = Optimizer([1,2,3], [tst_arg,tst_arg2, tst_arg3]) # hyper params provided by cbs\n", "test_eq(opt.hypers, [{'lr2': 1e-3, 'mom': 0.9, 'lr': 1e-2}])\n", "test_eq(opt.param_lists, [[1, 2, 3]])" ] }, { "cell_type": "code", "execution_count": 22, "id": "c4519510", "metadata": {}, "outputs": [], "source": [ "opt = Optimizer([1,2,3], tst_arg, lr=0.1) # hyper params provided by both cbs and **defaults\n", "test_eq(opt.hypers, [{'lr': 0.1}])\n", "test_eq(opt.param_lists, [[1,2,3]])" ] }, { "cell_type": "code", "execution_count": null, "id": "05f6d025", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "gist": { "data": { "description": "Optimizer.__init__.ipynb", "public": true }, "id": "" }, "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 5 }