{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lua"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Variables and Printing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "--assignment and printing \n",
    "\n",
    "a, b = 24, \"tacos\" -- can assign tuple-style, like in python \n",
    "c = 'please' -- can enclose string literals in single or double quotes\n",
    "print(a, b, c, \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- string concatenation\n",
    "\n",
    "d = b .. ', ' .. c\n",
    "print(d)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Scalar Math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    " -- syntax similar to MATLAB\n",
    "print(2*a, a^2, a%2, \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- note that all numbers are implicitly floats/doubles!\n",
    "print(a/7, \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- if you want to ensure you get something integral, use math.ceil or math.floor\n",
    "print(math.floor(a/7), math.ceil(a/7), \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- some other useful math functions\n",
    "print(math.min(1, 22, 44), math.max(1, 22, 44), \"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Control Flow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- while loops are enclosed in while-do-end blocks\n",
    "i = 1\n",
    "while i < 3 do\n",
    "    print(i)\n",
    "    i = i + 1 -- N.B. no 'i += 1' or 'i++' syntax in Lua \n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- for-loops iterate over a range of numbers, INCLUSIVE!\n",
    "for i = 3, 5 do\n",
    "    print(i)\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- like in python, you can specify the step size with a 3rd loop argument\n",
    "for i = 10, 1, -4 do\n",
    "    print(i)\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- conditional statements go in if-then-elseif-else-end blocks\n",
    "val = 24\n",
    "\n",
    "if val == 0 then\n",
    "    print(\"zero!\")\n",
    "elseif val%2 == 0 then\n",
    "    print(\"even and nonzero!\")\n",
    "elseif val ~= 13 then           -- N.B. Lua uses '~=' to mean '!='; also works for strings!\n",
    "    print(\"odd and not 13!\")\n",
    "else\n",
    "    print(\"everything else!\")\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- lua allows the 'break' keyword\n",
    "for i = 1, 3 do\n",
    "    if i % 2 == 0 then\n",
    "        break\n",
    "    end\n",
    "    print(i)\n",
    "end\n",
    "\n",
    "-- but it doesn't have 'continue'; \n",
    "-- see http://lua-users.org/wiki/ContinueProposal and \n",
    "-- http://stackoverflow.com/questions/3524970/why-does-lua-have-no-continue-statement for some workarounds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Truth and Falsity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- nil and false evaluate to false\n",
    "a, b =  nil, false\n",
    "\n",
    "-- everything else evaluates to true\n",
    "c, d = \"taco\", 0\n",
    "\n",
    "if a or b then\n",
    "    print(\"first!\")\n",
    "elseif c and d then\n",
    "    print(\"second!\")\n",
    "else\n",
    "    print(\"third!\")\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- 'and' and 'or' have interesting side effects; allow for 'ternary if' as follows:\n",
    "val2 = a and 1 or 2 -- a is falsey, so we get 2\n",
    "print(val2, \"\\n\")\n",
    "\n",
    "val3 = c and 3 or 4 --  c is truthy, so we get 3\n",
    "print(val3, \"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- local vs global variables\n",
    "\n",
    "var = 22 -- global\n",
    "\n",
    "function f1()\n",
    "    local var = 33 -- N.B. local variables generally lead to faster code! \n",
    "    return var + 1\n",
    "end\n",
    "\n",
    "print(f1(), \"\\n\")\n",
    "\n",
    "function f2()\n",
    "    return var + 1\n",
    "end\n",
    "\n",
    "print(f2(), \"\\n\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- default and extra arguments\n",
    "\n",
    "function encodeDigits(a, b, c)\n",
    "    local a = a or 0 -- common convention for specifying default args\n",
    "    local b = b or 0\n",
    "    local c = c or 0\n",
    "    assert(a >= 0 and a < 10)\n",
    "    assert(b >= 0 and b < 10)\n",
    "    assert(c >= 0 and c < 10)    \n",
    "    return a*1 + b*10 + c*100\n",
    "end\n",
    "\n",
    "print(encodeDigits(1, 2, 3),\"\\n\") -- no defaults used\n",
    "print(encodeDigits(2),\"\\n\") -- defaults for b and c used\n",
    "print(encodeDigits(nil, 2),\"\\n\") -- defaults for a and c used\n",
    "print(encodeDigits(), \"\\n\") -- all defaults used\n",
    "print(encodeDigits(1, 2, 3, 4),\"\\n\") -- 4th argument ignored"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- returning multiple values\n",
    "\n",
    "function divWithRemainder(a, b)\n",
    "    return math.floor(a/b), a%b\n",
    "end\n",
    "\n",
    "d, r = divWithRemainder(10, 3)\n",
    "print(d, r, \"\\n\")\n",
    "\n",
    "-- if you attempt to place multiple values in a single variable, lua just forgets the values after the first\n",
    "d = divWithRemainder(10 ,3)\n",
    "print(d) -- N.B. you don't get a tuple like in python; just 3\n",
    "\n",
    "-- (function stuff outside the scope of this tutorial: functions are first class objects, closures)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tables (more or less the only native data-structure provided by Lua)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Tables as Dictionaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- tables can be used as hash-based associative arrays (like python dictionaries)\n",
    "t1 = {} -- construct an empty table\n",
    "t1[\"one\"] = 1\n",
    "t1[\"two\"] = 2\n",
    "t1[3] = \"three\"\n",
    "print(t1, \"\\n\")\n",
    "\n",
    "t2 = {[\"one\"]=1, [\"two\"]=2, [3]=\"three\"} -- constructing a table literal\n",
    "print(t2, \"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- can access string attributes either with brackets, or with dot notation\n",
    "print(t2[\"one\"], t2[3], \"\\n\")\n",
    "print(t2.one)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- iterating over key, value pairs\n",
    "for k,v in pairs(t1) do\n",
    "    print(k,v)\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- remove elements from dictionaries by setting to nil\n",
    "t1[\"one\"] = nil\n",
    "print(t1,\"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Tables as (ordered) arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- when a table uses only integer keys 1..n, it can also function as an array!\n",
    "-- N.B. Tables (and tensors) are 1-indexed!!!\n",
    "arr = {} -- construct an empty array\n",
    "arr[1] = \"one\"\n",
    "arr[2] = \"two\"\n",
    "arr[3] = \"three\"\n",
    "print(arr,\"\\n\")\n",
    "\n",
    "arr2 = {\"one\", \"two\", \"three\"} -- construct an array literal\n",
    "print(arr2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- can get the length of an array by prepending with '#'\n",
    "print(#arr,\"\\n\") \n",
    "\n",
    "--N.B. '#' only works with array-style tables (and not with dictionary-style tables)\n",
    "  -- If you want to get the size of a dictionary in constant time, you need to store it somewhere; gross!\n",
    "ugh = {[\"one\"]=1, [\"two\"]=2}\n",
    "print(#ugh,\"\\n\") -- misleading!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- instead to using integer keys to index, can also append to table as follows\n",
    "arr3 = {}\n",
    "table.insert(arr3, \"one\") -- equivalent to t[#t+1] = \"one\"\n",
    "table.insert(arr3, \"two\")\n",
    "print(arr3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- can iterate over an array in order as follows\n",
    "for i, el in ipairs(arr2) do -- ipairs() is like enumerate() in python\n",
    "    print(i, el)\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- to remove elements from array, use table.remove (which is inefficient)\n",
    "table.remove(arr2,2)\n",
    "print(arr2,\"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Torch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tensor Basics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "--[[ Tensors are multi-dimensional generalizations of arrays/matrices, and are the primary data-structure provided\n",
    "     by Torch (just as arrays are the primary data-structure providedy by Numpy). Tensors are great, and anytime  \n",
    "     you can use them you probably should.\n",
    "\n",
    "     Also check out https://github.com/torch/torch7/blob/master/doc/tensor.md for documentation on Tensor objects,\n",
    "     and https://github.com/torch/torch7/blob/master/doc/maths.md for documentation on mathematical operations\n",
    "     defined on Tensors\n",
    "--]]\n",
    "\n",
    "-- here are some ways of constructing Tensors (of different sizes and dimensions)\n",
    "A = torch.Tensor(3, 3) -- an empty 3x3 Tensor (initialized with garbage)\n",
    "B = torch.zeros(3, 3, 2) -- 3x3x2 Tensor initalized with zeros\n",
    "C = torch.ones(3, 1, 3)  -- 3x1x3 Tensor initialized with ones\n",
    "D = torch.randn(2) -- 2-vector (still a Tensor) initialized with standard gaussian noise\n",
    "E = torch.rand(1, 1, 1, 1)  -- 1x1x1x1 Tensor initialized with uniform noise\n",
    "F = torch.Tensor({{1, 1}, {2, 2}}) -- 2x2 literal tensor\n",
    "\n",
    "print(A,\"\\n\")\n",
    "print(B,\"\\n\")\n",
    "print(C,\"\\n\")\n",
    "print(D,\"\\n\")\n",
    "print(E,\"\\n\")\n",
    "print(F)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- by default Tensor() gives you a \"real\" Tensor, and you can set whether \"real\" defaults to float or double.\n",
    "-- if you want to explicitly pick one, there are also specialized constructors\n",
    "A = torch.FloatTensor(3, 3)\n",
    "print(A,\"\\n\")\n",
    "B = torch.LongTensor(3, 3) -- N.B. LongTensors hold integers and are very important; we use them to store indices\n",
    "print(B,\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- some important ways to get Tensor metadata\n",
    "A = torch.randn(2,3)\n",
    "print(A:dim(),\"\\n\") -- number of dimensions\n",
    "print(A:size(1),\"\\n\") -- size along 1st dimension; can do any (existing) dimension e.g. A:size(2)\n",
    "print(A:size()) -- gives a data structure with sizes of ALL dimensions; not actually that useful\n",
    "print(A:nElement(),\"\\n\") -- total number of element\n",
    "print(A:isContiguous()) -- does Tensor address a contiguous block of memory"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Views on Tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- can \"view\" a tensor in a different shape without doing any copy\n",
    "a = torch.range(1,6) -- numbers 1 thru 6\n",
    "print(a,\"\\n\")\n",
    "A = a:view(2,3) -- the ':' notation implicitly adds 'self' to a function call (when defined on objects)\n",
    "print(A)\n",
    "\n",
    "-- note view() reshapes along rows (like C and numpy), not along columns (like fortran and R) \n",
    "B = A:view(3,2)\n",
    "print(B,\"\\n\")\n",
    "\n",
    "-- note a, A, and B address the same memory!\n",
    "B:zero() -- zeroes out a tensor\n",
    "print(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Accessing Sub-Tensors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- index notation allows you to index along the first dimension\n",
    "A = torch.range(1,6):view(2,3)\n",
    "firstRow = A[1]\n",
    "print(A,\"\\n\")\n",
    "print(firstRow)\n",
    "\n",
    "-- this does no memory copy!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- select() allows you to index along any dimension\n",
    "firstCol = A:select(2,1) -- select()'s first argument is the desired dimension\n",
    "print(firstCol)\n",
    "-- also does no memory copy!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- instead of accessing a single index, can narrow a Tensor along a chosen dimension\n",
    "firstRow = A:narrow(1,1,1) -- arguments are dim, startIdx, length along dim to extract\n",
    "last2Cols = A:narrow(2,2,2)\n",
    "print(firstRow,\"\\n\")\n",
    "print(last2Cols)\n",
    "\n",
    "-- this also addresses SAME memory as in original tensor\n",
    "last2Cols:zero()\n",
    "print(A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- while narrow() calls can be chained, can also use sub() to narrow along first 4 dimensions\n",
    "A = torch.range(1,6):view(2,3)\n",
    "firstRow = A:sub(1,1) -- arguments are start and stop idx (inclusive) for each dimension (up to 4)\n",
    "last2Cols = A:sub(1,2,2,3) -- using start and stop indices for first two dimensions here\n",
    "bottomRight = A:sub(2,2,3,3)\n",
    "\n",
    "print(firstRow,\"\\n\")\n",
    "print(last2Cols,\"\\n\")\n",
    "print(bottomRight,\"\\n\")\n",
    "\n",
    "-- as above, this addresses same memory as original"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- instead of using sub() and narrow(), can also specify ranges by indexing with tables\n",
    "firstRow = A[{{1,1},{}}] -- expects table of range-tables, 1 for each dimension; empty table means everything\n",
    "last2Cols = A[{{},{2,3}}] -- note similarity to sub()\n",
    "bottomRight = A[{{2,2},{3,3}}]\n",
    "\n",
    "print(firstRow,\"\\n\")\n",
    "print(last2Cols,\"\\n\")\n",
    "print(bottomRight,\"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sparse Indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- we can select non-contiguous items along the first dimension using index()\n",
    "A = torch.range(1,9):view(3,3)\n",
    "idxs = torch.LongTensor({1,3}) -- indices are often required to be stored in LongTensors\n",
    "firstAndThirdRows = A:index(1,idxs) -- first argument is the dimension\n",
    "print(A,\"\\n\")\n",
    "print(firstAndThirdRows,\"\\n\")\n",
    "\n",
    "--N.B. index() does a memory copy!\n",
    "firstAndThirdRows:zero()\n",
    "print(A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- can also update a matrix sparsely with indexAdd()\n",
    "A = torch.zeros(3,3)\n",
    "idxs = torch.LongTensor({1,3})\n",
    "U = torch.randn(2,3)\n",
    "A:indexAdd(1,idxs,U) -- U must be of dimension idxs:size(1) x A:size(2)\n",
    "print(A)\n",
    "-- there's also indexFill() and indexCopy(); see the documentation!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### In-place Operations vs. Copying Operations (Very Important!)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- for most (mathematical) operations defined on tensors, you will have a choice between allocating new memory for\n",
    "-- the result of the operation, or placing the result in some already-allocated tensor.\n",
    "-- for example, let's consider taking the element-wise absolute value of a tensor A\n",
    "A = torch.randn(3,3)\n",
    "\n",
    "-- if we want to allocate a NEW tensor B s.t. B = abs(A), we do the following\n",
    "B = torch.abs(A) -- in general, using torch.f to call a function f on a tensor will allocate new memory\n",
    "\n",
    "-- let's make sure A has not changed\n",
    "print(A,\"\\n\")\n",
    "print(B,\"\\n\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- suppose instead we have some tensor C lying around that we want to use to store abs(A)\n",
    "C = torch.Tensor(3,3)\n",
    "-- we can use C to store abs(A) as follows\n",
    "C:abs(A) -- recall the ':' notation is short-hand for passing 'self' to a function defined on an object\n",
    "         -- in general, calling X:f(args) will use X's memory to store the result of f(args)\n",
    "print(C)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- often it is convenient to use a tensor to store its own result\n",
    "A:abs()\n",
    "-- now A has changed\n",
    "print(A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- the in-place/copying distinction is important for 2 reasons:\n",
    "-- 1) doing in-place operations is generally much faster than allocating memory repeatedly (e.g., in a loop)\n",
    "-- 2) on the other hand, easy to mess up your data by accidentally doing things in-place\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Element-wise mathematical operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- many useful elementwise operations are defined, and (as above), can be used in-place or not\n",
    "torch.sqrt(A)\n",
    "A:sqrt()\n",
    "torch.tanh(A)\n",
    "A:tanh()\n",
    "-- can add or multiply by constant scalars\n",
    "A:add(0.5)\n",
    "A:mul(2.6)\n",
    "A:div(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Row or Column-wise Operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- can take sum, mean, stddev of rows or columns as follows\n",
    "A = torch.randn(2,3)\n",
    "colSums = A:sum(1) -- sum along first dimension; can also do A:mean(1), A:std(1), etc\n",
    "rowSums = A:sum(2) -- sum along second dimension; can also do A:mean(2), A:std(2), etc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- torch combines max and argmax() \n",
    "a = torch.range(2,6)\n",
    "maxval, argmax = a:max(1) -- argument specifies dimension\n",
    "print(maxval, argmax)\n",
    "-- can also take min()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear Algebra"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- adding and multiplying tensors\n",
    "A = torch.randn(2,3)\n",
    "B = torch.randn(2,3)\n",
    "A:add(B) -- puts A+B in A; if want new memory, do torch.add(A,B)\n",
    "B:cmul(A) -- puts ELEMENTWISE multiplication of A and B in B\n",
    "B:cdiv(A) -- puts ELEMENTWISE division B/A in B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- matrix multiplication\n",
    "Ans = torch.Tensor(2,2) -- we'll use this to store an answer\n",
    "-- computes (A B^T) and puts it in Ans\n",
    "Ans:mm(A,B:t())  -- N.B. B:t() transposes B\n",
    "\n",
    "-- dot products\n",
    "dotprod = A:dot(B) -- note, A and B don't need to be vectors (that is, they can have dim > 1)\n",
    "\n",
    "-- matrix-vector products\n",
    "mvAns = torch.Tensor(2) -- stores mv-prod answer\n",
    "v = torch.randn(3) -- 1 dimensional, so a vector\n",
    "mvAns:mv(A,v) -- note could also have done mvAns:view(2,1):mm(A,v:view(3,1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### \"Add\"-style Linear Algebra"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- instead of overwriting memory w/ a result, you may want to add it to something already computed\n",
    "u = torch.ones(3)\n",
    "v = torch.Tensor(3):fill(2.2)\n",
    "w = torch.Tensor(3):fill(2)\n",
    "c = 3\n",
    "-- compute u = u + c * (v .* w), where .* is elementwise multiplication\n",
    "u:addcmul(c,v,w)\n",
    "print(u,\"\\n\")\n",
    "-- N.B. can also do addcdiv(), which will often be very handy!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- add-style matrix multiplication\n",
    "Ans = torch.ones(3,3)\n",
    "A = torch.eye(3) -- torch.eye makes an identity matrix\n",
    "B = torch.eye(3):mul(2)\n",
    "Ans:addmm(c,A,B) -- N.B. many more options here; see the documentation!\n",
    "print(Ans)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- possible to use infix notation like in numpy/matlab, but it always does a copy, so try to avoid it!\n",
    "A = torch.randn(3,3)\n",
    "B = torch.randn(3,3)\n",
    "A = A + B\n",
    "C = A * B\n",
    "-- etc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### OOP\n",
    " - We won't cover classes/object-oriented programming here, but you can easily define classes with torch.class, as described here https://github.com/torch/torch7/blob/master/doc/utility.md#torch.class"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Exercise\n",
    "### To tie everything together, let's implement finite difference gradient-checking for a simple function\n",
    "\n",
    "Let's consider the quadratic function $$f(\\mathbf{x}) = \\frac{1}{2} \\mathbf{x}^{T} \\mathbf{A} \\mathbf{x} + \\mathbf{b}^T \\mathbf{x} \\,$$, where $\\mathbf{A} \\,$ is symmetric and $\\mathbf{x} \\in \\mathbb{R}^D$.\n",
    "\n",
    "The gradient in this case is\n",
    "$$ \\nabla_{\\mathbf{x}}(f) = \\langle \\frac{\\partial f}{\\partial x_1}, \\ldots, \\frac{\\partial f}{\\partial x_D} \\rangle = \\mathbf{A} \\mathbf{x} + \\mathbf{b} \\,$$\n",
    "\n",
    "Suppose that we compute the gradient above analytically (in code). We can use finite-differences to check that our implementation is bug-free. First, define the masking vector $\\mathbf{m}^j = \\epsilon \\cdot \\mathbf{\\delta}(j)$, where $\\mathbf{m}, \\mathbf{\\delta} \\in \\mathbb{R}^D$ and $\\epsilon$ is some very small scalar value (e.g., 0.00001).\n",
    "\n",
    "If our computed gradient is correct, we expect\n",
    "$$ \\frac{\\partial f}{\\partial x_j} \\approx \\frac{f(\\mathbf{x} + \\mathbf{m}^j) - f(\\mathbf{x} - \\mathbf{m}^j)}{2\\epsilon} \\, $$\n",
    "The term on the RHS of the equation above is the ``central'' finite difference wrt $x_j$.\n",
    "\n",
    "We'll implement $f$, its gradient, and a finite difference checker below!\n",
    "\n",
    "(P.S. I have no idea how to prevent those bars to the right of the equations from showing up, but ignore them...)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- let's define some global memory we'll update, and some fixed, global parameters\n",
    "buf = nil\n",
    "grad = nil\n",
    "\n",
    "torch.manualSeed(287)\n",
    "D = 3 -- dimensionality of x\n",
    "A = torch.randn(D,D)\n",
    "-- ensure symmetric (note this does a memory copy!)\n",
    "A = A + A:t()\n",
    "b = torch.randn(D)\n",
    "x = torch.randn(D)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- implementation of f(x) = x^T A x + b^T x\n",
    "function f(x,A,b) \n",
    "  if buf == nil then\n",
    "    buf = torch.Tensor()\n",
    "  end\n",
    "  buf:resize(A:size(1))\n",
    "  -- first do Ax\n",
    "  buf:mv(A,x)\n",
    "  return buf:dot(x)*0.5 + b:dot(x)\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- implementation of \\nabla_x(f) = Ax + b\n",
    "function dfdx(x,A,b)\n",
    "  if grad == nil then\n",
    "    grad = torch.Tensor()\n",
    "  end\n",
    "  grad:resizeAs(x)\n",
    "  grad:mv(A,x)\n",
    "  grad:add(b)\n",
    "  return grad\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "function finiteDiffChecker(f,dfdx,x,A,b)\n",
    "  -- first let's compute the gradient at our current point\n",
    "  local grad = dfdx(x,A,b)\n",
    "  -- now let's check it with finite differences\n",
    "  local eps = 1e-5 -- use this for computing finite diffs\n",
    "  local xcopy = x:clone()\n",
    "  print(\"grad\", \"         finite-difference\")\n",
    "  for j = 1, grad:size(1) do\n",
    "    -- perturb x[d]\n",
    "    xcopy[j] = xcopy[j] + eps\n",
    "    x[j] = x[j] - eps\n",
    "    -- form finite difference: (f(x+eps,A,b) - f(x-eps,A,b))/(2*eps)\n",
    "    local diff = f(xcopy,A,b) - f(x,A,b)\n",
    "    local finiteDiff = diff/(2*eps)\n",
    "    -- now compare to our analytic gradient\n",
    "    print(grad[j], finiteDiff)\n",
    "    assert(torch.abs(grad[j]-finiteDiff) <= 1e-4)\n",
    "    -- reset x and xcopy\n",
    "    xcopy[j] = xcopy[j] - eps\n",
    "    x[j] = x[j] + eps\n",
    "  end\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "-- let's check our gradients!\n",
    "finiteDiffChecker(f,dfdx,x,A,b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HDF5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "-- hdf5 is a format that lets us write and read tensors in both torch and numpy\n",
    "-- you should install torch-hdf5 by following the instructions at https://github.com/deepmind/torch-hdf5\n",
    "-- here are some simple examples of using torch-hdf5 to read and write\n",
    "\n",
    "require 'hdf5'\n",
    "\n",
    "-- writing\n",
    "myFile = hdf5.open('myh5file.h5', 'w')\n",
    "myFile:write('dataset1', torch.rand(5, 5))\n",
    "-- can write multiple 'datasets' to the same .h5 file\n",
    "myFile:write('dataset2', torch.randn(3))\n",
    "-- etc\n",
    "myFile:close()\n",
    "\n",
    "-- reading\n",
    "myFile = hdf5.open('myh5file.h5', 'r')\n",
    "dataset1 = myFile:read('dataset1'):all()\n",
    "dataset2 = myFile:read('dataset2'):all()\n",
    "myFile:close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Final Note\n",
    "\n",
    "- Do NOT do your assignments in an iTorch notebook; they're slow and annoying."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "iTorch",
   "language": "lua",
   "name": "itorch"
  },
  "language_info": {
   "name": "lua",
   "version": "20100"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}