{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Lua" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Variables and Printing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "--assignment and printing \n", "\n", "a, b = 24, \"tacos\" -- can assign tuple-style, like in python \n", "c = 'please' -- can enclose string literals in single or double quotes\n", "print(a, b, c, \"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- string concatenation\n", "\n", "d = b .. ', ' .. c\n", "print(d)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scalar Math" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ " -- syntax similar to MATLAB\n", "print(2*a, a^2, a%2, \"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- note that all numbers are implicitly floats/doubles!\n", "print(a/7, \"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- if you want to ensure you get something integral, use math.ceil or math.floor\n", "print(math.floor(a/7), math.ceil(a/7), \"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- some other useful math functions\n", "print(math.min(1, 22, 44), math.max(1, 22, 44), \"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Control Flow" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- while loops are enclosed in while-do-end blocks\n", "i = 1\n", "while i < 3 do\n", " print(i)\n", " i = i + 1 -- N.B. no 'i += 1' or 'i++' syntax in Lua \n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- for-loops iterate over a range of numbers, INCLUSIVE!\n", "for i = 3, 5 do\n", " print(i)\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- like in python, you can specify the step size with a 3rd loop argument\n", "for i = 10, 1, -4 do\n", " print(i)\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- conditional statements go in if-then-elseif-else-end blocks\n", "val = 24\n", "\n", "if val == 0 then\n", " print(\"zero!\")\n", "elseif val%2 == 0 then\n", " print(\"even and nonzero!\")\n", "elseif val ~= 13 then -- N.B. Lua uses '~=' to mean '!='; also works for strings!\n", " print(\"odd and not 13!\")\n", "else\n", " print(\"everything else!\")\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- lua allows the 'break' keyword\n", "for i = 1, 3 do\n", " if i % 2 == 0 then\n", " break\n", " end\n", " print(i)\n", "end\n", "\n", "-- but it doesn't have 'continue'; \n", "-- see http://lua-users.org/wiki/ContinueProposal and \n", "-- http://stackoverflow.com/questions/3524970/why-does-lua-have-no-continue-statement for some workarounds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Truth and Falsity" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- nil and false evaluate to false\n", "a, b = nil, false\n", "\n", "-- everything else evaluates to true\n", "c, d = \"taco\", 0\n", "\n", "if a or b then\n", " print(\"first!\")\n", "elseif c and d then\n", " print(\"second!\")\n", "else\n", " print(\"third!\")\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- 'and' and 'or' have interesting side effects; allow for 'ternary if' as follows:\n", "val2 = a and 1 or 2 -- a is falsey, so we get 2\n", "print(val2, \"\\n\")\n", "\n", "val3 = c and 3 or 4 -- c is truthy, so we get 3\n", "print(val3, \"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- local vs global variables\n", "\n", "var = 22 -- global\n", "\n", "function f1()\n", " local var = 33 -- N.B. local variables generally lead to faster code! \n", " return var + 1\n", "end\n", "\n", "print(f1(), \"\\n\")\n", "\n", "function f2()\n", " return var + 1\n", "end\n", "\n", "print(f2(), \"\\n\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- default and extra arguments\n", "\n", "function encodeDigits(a, b, c)\n", " local a = a or 0 -- common convention for specifying default args\n", " local b = b or 0\n", " local c = c or 0\n", " assert(a >= 0 and a < 10)\n", " assert(b >= 0 and b < 10)\n", " assert(c >= 0 and c < 10) \n", " return a*1 + b*10 + c*100\n", "end\n", "\n", "print(encodeDigits(1, 2, 3),\"\\n\") -- no defaults used\n", "print(encodeDigits(2),\"\\n\") -- defaults for b and c used\n", "print(encodeDigits(nil, 2),\"\\n\") -- defaults for a and c used\n", "print(encodeDigits(), \"\\n\") -- all defaults used\n", "print(encodeDigits(1, 2, 3, 4),\"\\n\") -- 4th argument ignored" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- returning multiple values\n", "\n", "function divWithRemainder(a, b)\n", " return math.floor(a/b), a%b\n", "end\n", "\n", "d, r = divWithRemainder(10, 3)\n", "print(d, r, \"\\n\")\n", "\n", "-- if you attempt to place multiple values in a single variable, lua just forgets the values after the first\n", "d = divWithRemainder(10 ,3)\n", "print(d) -- N.B. you don't get a tuple like in python; just 3\n", "\n", "-- (function stuff outside the scope of this tutorial: functions are first class objects, closures)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tables (more or less the only native data-structure provided by Lua)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Tables as Dictionaries" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- tables can be used as hash-based associative arrays (like python dictionaries)\n", "t1 = {} -- construct an empty table\n", "t1[\"one\"] = 1\n", "t1[\"two\"] = 2\n", "t1[3] = \"three\"\n", "print(t1, \"\\n\")\n", "\n", "t2 = {[\"one\"]=1, [\"two\"]=2, [3]=\"three\"} -- constructing a table literal\n", "print(t2, \"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- can access string attributes either with brackets, or with dot notation\n", "print(t2[\"one\"], t2[3], \"\\n\")\n", "print(t2.one)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- iterating over key, value pairs\n", "for k,v in pairs(t1) do\n", " print(k,v)\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- remove elements from dictionaries by setting to nil\n", "t1[\"one\"] = nil\n", "print(t1,\"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Tables as (ordered) arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- when a table uses only integer keys 1..n, it can also function as an array!\n", "-- N.B. Tables (and tensors) are 1-indexed!!!\n", "arr = {} -- construct an empty array\n", "arr[1] = \"one\"\n", "arr[2] = \"two\"\n", "arr[3] = \"three\"\n", "print(arr,\"\\n\")\n", "\n", "arr2 = {\"one\", \"two\", \"three\"} -- construct an array literal\n", "print(arr2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- can get the length of an array by prepending with '#'\n", "print(#arr,\"\\n\") \n", "\n", "--N.B. '#' only works with array-style tables (and not with dictionary-style tables)\n", " -- If you want to get the size of a dictionary in constant time, you need to store it somewhere; gross!\n", "ugh = {[\"one\"]=1, [\"two\"]=2}\n", "print(#ugh,\"\\n\") -- misleading!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- instead to using integer keys to index, can also append to table as follows\n", "arr3 = {}\n", "table.insert(arr3, \"one\") -- equivalent to t[#t+1] = \"one\"\n", "table.insert(arr3, \"two\")\n", "print(arr3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- can iterate over an array in order as follows\n", "for i, el in ipairs(arr2) do -- ipairs() is like enumerate() in python\n", " print(i, el)\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- to remove elements from array, use table.remove (which is inefficient)\n", "table.remove(arr2,2)\n", "print(arr2,\"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Torch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Tensor Basics" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "--[[ Tensors are multi-dimensional generalizations of arrays/matrices, and are the primary data-structure provided\n", " by Torch (just as arrays are the primary data-structure providedy by Numpy). Tensors are great, and anytime \n", " you can use them you probably should.\n", "\n", " Also check out https://github.com/torch/torch7/blob/master/doc/tensor.md for documentation on Tensor objects,\n", " and https://github.com/torch/torch7/blob/master/doc/maths.md for documentation on mathematical operations\n", " defined on Tensors\n", "--]]\n", "\n", "-- here are some ways of constructing Tensors (of different sizes and dimensions)\n", "A = torch.Tensor(3, 3) -- an empty 3x3 Tensor (initialized with garbage)\n", "B = torch.zeros(3, 3, 2) -- 3x3x2 Tensor initalized with zeros\n", "C = torch.ones(3, 1, 3) -- 3x1x3 Tensor initialized with ones\n", "D = torch.randn(2) -- 2-vector (still a Tensor) initialized with standard gaussian noise\n", "E = torch.rand(1, 1, 1, 1) -- 1x1x1x1 Tensor initialized with uniform noise\n", "F = torch.Tensor({{1, 1}, {2, 2}}) -- 2x2 literal tensor\n", "\n", "print(A,\"\\n\")\n", "print(B,\"\\n\")\n", "print(C,\"\\n\")\n", "print(D,\"\\n\")\n", "print(E,\"\\n\")\n", "print(F)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- by default Tensor() gives you a \"real\" Tensor, and you can set whether \"real\" defaults to float or double.\n", "-- if you want to explicitly pick one, there are also specialized constructors\n", "A = torch.FloatTensor(3, 3)\n", "print(A,\"\\n\")\n", "B = torch.LongTensor(3, 3) -- N.B. LongTensors hold integers and are very important; we use them to store indices\n", "print(B,\"\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- some important ways to get Tensor metadata\n", "A = torch.randn(2,3)\n", "print(A:dim(),\"\\n\") -- number of dimensions\n", "print(A:size(1),\"\\n\") -- size along 1st dimension; can do any (existing) dimension e.g. A:size(2)\n", "print(A:size()) -- gives a data structure with sizes of ALL dimensions; not actually that useful\n", "print(A:nElement(),\"\\n\") -- total number of element\n", "print(A:isContiguous()) -- does Tensor address a contiguous block of memory" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Views on Tensors" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- can \"view\" a tensor in a different shape without doing any copy\n", "a = torch.range(1,6) -- numbers 1 thru 6\n", "print(a,\"\\n\")\n", "A = a:view(2,3) -- the ':' notation implicitly adds 'self' to a function call (when defined on objects)\n", "print(A)\n", "\n", "-- note view() reshapes along rows (like C and numpy), not along columns (like fortran and R) \n", "B = A:view(3,2)\n", "print(B,\"\\n\")\n", "\n", "-- note a, A, and B address the same memory!\n", "B:zero() -- zeroes out a tensor\n", "print(a)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Accessing Sub-Tensors" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- index notation allows you to index along the first dimension\n", "A = torch.range(1,6):view(2,3)\n", "firstRow = A[1]\n", "print(A,\"\\n\")\n", "print(firstRow)\n", "\n", "-- this does no memory copy!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- select() allows you to index along any dimension\n", "firstCol = A:select(2,1) -- select()'s first argument is the desired dimension\n", "print(firstCol)\n", "-- also does no memory copy!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- instead of accessing a single index, can narrow a Tensor along a chosen dimension\n", "firstRow = A:narrow(1,1,1) -- arguments are dim, startIdx, length along dim to extract\n", "last2Cols = A:narrow(2,2,2)\n", "print(firstRow,\"\\n\")\n", "print(last2Cols)\n", "\n", "-- this also addresses SAME memory as in original tensor\n", "last2Cols:zero()\n", "print(A)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- while narrow() calls can be chained, can also use sub() to narrow along first 4 dimensions\n", "A = torch.range(1,6):view(2,3)\n", "firstRow = A:sub(1,1) -- arguments are start and stop idx (inclusive) for each dimension (up to 4)\n", "last2Cols = A:sub(1,2,2,3) -- using start and stop indices for first two dimensions here\n", "bottomRight = A:sub(2,2,3,3)\n", "\n", "print(firstRow,\"\\n\")\n", "print(last2Cols,\"\\n\")\n", "print(bottomRight,\"\\n\")\n", "\n", "-- as above, this addresses same memory as original" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- instead of using sub() and narrow(), can also specify ranges by indexing with tables\n", "firstRow = A[{{1,1},{}}] -- expects table of range-tables, 1 for each dimension; empty table means everything\n", "last2Cols = A[{{},{2,3}}] -- note similarity to sub()\n", "bottomRight = A[{{2,2},{3,3}}]\n", "\n", "print(firstRow,\"\\n\")\n", "print(last2Cols,\"\\n\")\n", "print(bottomRight,\"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sparse Indexing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- we can select non-contiguous items along the first dimension using index()\n", "A = torch.range(1,9):view(3,3)\n", "idxs = torch.LongTensor({1,3}) -- indices are often required to be stored in LongTensors\n", "firstAndThirdRows = A:index(1,idxs) -- first argument is the dimension\n", "print(A,\"\\n\")\n", "print(firstAndThirdRows,\"\\n\")\n", "\n", "--N.B. index() does a memory copy!\n", "firstAndThirdRows:zero()\n", "print(A)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- can also update a matrix sparsely with indexAdd()\n", "A = torch.zeros(3,3)\n", "idxs = torch.LongTensor({1,3})\n", "U = torch.randn(2,3)\n", "A:indexAdd(1,idxs,U) -- U must be of dimension idxs:size(1) x A:size(2)\n", "print(A)\n", "-- there's also indexFill() and indexCopy(); see the documentation!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### In-place Operations vs. Copying Operations (Very Important!)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- for most (mathematical) operations defined on tensors, you will have a choice between allocating new memory for\n", "-- the result of the operation, or placing the result in some already-allocated tensor.\n", "-- for example, let's consider taking the element-wise absolute value of a tensor A\n", "A = torch.randn(3,3)\n", "\n", "-- if we want to allocate a NEW tensor B s.t. B = abs(A), we do the following\n", "B = torch.abs(A) -- in general, using torch.f to call a function f on a tensor will allocate new memory\n", "\n", "-- let's make sure A has not changed\n", "print(A,\"\\n\")\n", "print(B,\"\\n\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- suppose instead we have some tensor C lying around that we want to use to store abs(A)\n", "C = torch.Tensor(3,3)\n", "-- we can use C to store abs(A) as follows\n", "C:abs(A) -- recall the ':' notation is short-hand for passing 'self' to a function defined on an object\n", " -- in general, calling X:f(args) will use X's memory to store the result of f(args)\n", "print(C)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- often it is convenient to use a tensor to store its own result\n", "A:abs()\n", "-- now A has changed\n", "print(A)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- the in-place/copying distinction is important for 2 reasons:\n", "-- 1) doing in-place operations is generally much faster than allocating memory repeatedly (e.g., in a loop)\n", "-- 2) on the other hand, easy to mess up your data by accidentally doing things in-place\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Element-wise mathematical operations" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- many useful elementwise operations are defined, and (as above), can be used in-place or not\n", "torch.sqrt(A)\n", "A:sqrt()\n", "torch.tanh(A)\n", "A:tanh()\n", "-- can add or multiply by constant scalars\n", "A:add(0.5)\n", "A:mul(2.6)\n", "A:div(1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Row or Column-wise Operations" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- can take sum, mean, stddev of rows or columns as follows\n", "A = torch.randn(2,3)\n", "colSums = A:sum(1) -- sum along first dimension; can also do A:mean(1), A:std(1), etc\n", "rowSums = A:sum(2) -- sum along second dimension; can also do A:mean(2), A:std(2), etc" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- torch combines max and argmax() \n", "a = torch.range(2,6)\n", "maxval, argmax = a:max(1) -- argument specifies dimension\n", "print(maxval, argmax)\n", "-- can also take min()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Linear Algebra" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- adding and multiplying tensors\n", "A = torch.randn(2,3)\n", "B = torch.randn(2,3)\n", "A:add(B) -- puts A+B in A; if want new memory, do torch.add(A,B)\n", "B:cmul(A) -- puts ELEMENTWISE multiplication of A and B in B\n", "B:cdiv(A) -- puts ELEMENTWISE division B/A in B" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- matrix multiplication\n", "Ans = torch.Tensor(2,2) -- we'll use this to store an answer\n", "-- computes (A B^T) and puts it in Ans\n", "Ans:mm(A,B:t()) -- N.B. B:t() transposes B\n", "\n", "-- dot products\n", "dotprod = A:dot(B) -- note, A and B don't need to be vectors (that is, they can have dim > 1)\n", "\n", "-- matrix-vector products\n", "mvAns = torch.Tensor(2) -- stores mv-prod answer\n", "v = torch.randn(3) -- 1 dimensional, so a vector\n", "mvAns:mv(A,v) -- note could also have done mvAns:view(2,1):mm(A,v:view(3,1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### \"Add\"-style Linear Algebra" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- instead of overwriting memory w/ a result, you may want to add it to something already computed\n", "u = torch.ones(3)\n", "v = torch.Tensor(3):fill(2.2)\n", "w = torch.Tensor(3):fill(2)\n", "c = 3\n", "-- compute u = u + c * (v .* w), where .* is elementwise multiplication\n", "u:addcmul(c,v,w)\n", "print(u,\"\\n\")\n", "-- N.B. can also do addcdiv(), which will often be very handy!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- add-style matrix multiplication\n", "Ans = torch.ones(3,3)\n", "A = torch.eye(3) -- torch.eye makes an identity matrix\n", "B = torch.eye(3):mul(2)\n", "Ans:addmm(c,A,B) -- N.B. many more options here; see the documentation!\n", "print(Ans)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- possible to use infix notation like in numpy/matlab, but it always does a copy, so try to avoid it!\n", "A = torch.randn(3,3)\n", "B = torch.randn(3,3)\n", "A = A + B\n", "C = A * B\n", "-- etc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### OOP\n", " - We won't cover classes/object-oriented programming here, but you can easily define classes with torch.class, as described here https://github.com/torch/torch7/blob/master/doc/utility.md#torch.class" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Exercise\n", "### To tie everything together, let's implement finite difference gradient-checking for a simple function\n", "\n", "Let's consider the quadratic function $$f(\\mathbf{x}) = \\frac{1}{2} \\mathbf{x}^{T} \\mathbf{A} \\mathbf{x} + \\mathbf{b}^T \\mathbf{x} \\,$$, where $\\mathbf{A} \\,$ is symmetric and $\\mathbf{x} \\in \\mathbb{R}^D$.\n", "\n", "The gradient in this case is\n", "$$ \\nabla_{\\mathbf{x}}(f) = \\langle \\frac{\\partial f}{\\partial x_1}, \\ldots, \\frac{\\partial f}{\\partial x_D} \\rangle = \\mathbf{A} \\mathbf{x} + \\mathbf{b} \\,$$\n", "\n", "Suppose that we compute the gradient above analytically (in code). We can use finite-differences to check that our implementation is bug-free. First, define the masking vector $\\mathbf{m}^j = \\epsilon \\cdot \\mathbf{\\delta}(j)$, where $\\mathbf{m}, \\mathbf{\\delta} \\in \\mathbb{R}^D$ and $\\epsilon$ is some very small scalar value (e.g., 0.00001).\n", "\n", "If our computed gradient is correct, we expect\n", "$$ \\frac{\\partial f}{\\partial x_j} \\approx \\frac{f(\\mathbf{x} + \\mathbf{m}^j) - f(\\mathbf{x} - \\mathbf{m}^j)}{2\\epsilon} \\, $$\n", "The term on the RHS of the equation above is the ``central'' finite difference wrt $x_j$.\n", "\n", "We'll implement $f$, its gradient, and a finite difference checker below!\n", "\n", "(P.S. I have no idea how to prevent those bars to the right of the equations from showing up, but ignore them...)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- let's define some global memory we'll update, and some fixed, global parameters\n", "buf = nil\n", "grad = nil\n", "\n", "torch.manualSeed(287)\n", "D = 3 -- dimensionality of x\n", "A = torch.randn(D,D)\n", "-- ensure symmetric (note this does a memory copy!)\n", "A = A + A:t()\n", "b = torch.randn(D)\n", "x = torch.randn(D)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- implementation of f(x) = x^T A x + b^T x\n", "function f(x,A,b) \n", " if buf == nil then\n", " buf = torch.Tensor()\n", " end\n", " buf:resize(A:size(1))\n", " -- first do Ax\n", " buf:mv(A,x)\n", " return buf:dot(x)*0.5 + b:dot(x)\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- implementation of \\nabla_x(f) = Ax + b\n", "function dfdx(x,A,b)\n", " if grad == nil then\n", " grad = torch.Tensor()\n", " end\n", " grad:resizeAs(x)\n", " grad:mv(A,x)\n", " grad:add(b)\n", " return grad\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "function finiteDiffChecker(f,dfdx,x,A,b)\n", " -- first let's compute the gradient at our current point\n", " local grad = dfdx(x,A,b)\n", " -- now let's check it with finite differences\n", " local eps = 1e-5 -- use this for computing finite diffs\n", " local xcopy = x:clone()\n", " print(\"grad\", \" finite-difference\")\n", " for j = 1, grad:size(1) do\n", " -- perturb x[d]\n", " xcopy[j] = xcopy[j] + eps\n", " x[j] = x[j] - eps\n", " -- form finite difference: (f(x+eps,A,b) - f(x-eps,A,b))/(2*eps)\n", " local diff = f(xcopy,A,b) - f(x,A,b)\n", " local finiteDiff = diff/(2*eps)\n", " -- now compare to our analytic gradient\n", " print(grad[j], finiteDiff)\n", " assert(torch.abs(grad[j]-finiteDiff) <= 1e-4)\n", " -- reset x and xcopy\n", " xcopy[j] = xcopy[j] - eps\n", " x[j] = x[j] + eps\n", " end\n", "end" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "-- let's check our gradients!\n", "finiteDiffChecker(f,dfdx,x,A,b)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# HDF5" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "-- hdf5 is a format that lets us write and read tensors in both torch and numpy\n", "-- you should install torch-hdf5 by following the instructions at https://github.com/deepmind/torch-hdf5\n", "-- here are some simple examples of using torch-hdf5 to read and write\n", "\n", "require 'hdf5'\n", "\n", "-- writing\n", "myFile = hdf5.open('myh5file.h5', 'w')\n", "myFile:write('dataset1', torch.rand(5, 5))\n", "-- can write multiple 'datasets' to the same .h5 file\n", "myFile:write('dataset2', torch.randn(3))\n", "-- etc\n", "myFile:close()\n", "\n", "-- reading\n", "myFile = hdf5.open('myh5file.h5', 'r')\n", "dataset1 = myFile:read('dataset1'):all()\n", "dataset2 = myFile:read('dataset2'):all()\n", "myFile:close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Final Note\n", "\n", "- Do NOT do your assignments in an iTorch notebook; they're slow and annoying." ] } ], "metadata": { "kernelspec": { "display_name": "iTorch", "language": "lua", "name": "itorch" }, "language_info": { "name": "lua", "version": "20100" } }, "nbformat": 4, "nbformat_minor": 0 }