{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "from bokeh.plotting import figure, show, output_notebook" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## algorithm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def gradient_descent(F, dF, x, steps=100, lr=0.001):\n", " loss = []\n", " \n", " for _ in range(steps):\n", " dx = dF(x)\n", " x -= lr * dx\n", " loss.append(F(x))\n", "\n", " return x, loss" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def rmsprop(F, dF, x, steps=100, lr=0.001, decay=.9, eps=1e-8):\n", " loss = []\n", " dx_mean_sqr = np.zeros(x.shape, dtype=float)\n", "\n", " for _ in range(steps):\n", " dx = dF(x)\n", " dx_mean_sqr = decay * dx_mean_sqr + (1 - decay) * dx ** 2\n", " x -= lr * dx / (np.sqrt(dx_mean_sqr) + eps)\n", " loss.append(F(x))\n", " \n", " return x, loss" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def rmsprop_momentum(F, dF, x, steps=100, lr=0.001, decay=.9, eps=1e-8, mu=.9):\n", " loss = []\n", " dx_mean_sqr = np.zeros(x.shape, dtype=float)\n", " momentum = np.zeros(x.shape, dtype=float)\n", "\n", " for _ in range(steps):\n", " dx = dF(x)\n", " dx_mean_sqr = decay * dx_mean_sqr + (1 - decay) * dx ** 2\n", " momentum = mu * momentum + lr * dx / (np.sqrt(dx_mean_sqr) + eps)\n", " x -= momentum\n", " loss.append(F(x))\n", "\n", " return x, loss" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## function" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def F(x):\n", " residual = A @ x - np.eye(len(A), dtype=float)\n", " return np.sum(residual ** 2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def dF(x):\n", " return 2 * A.T @ (A @ x - np.eye(len(A), dtype=float))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "A = np.array([\n", " [2, 5, 1, 4, 6],\n", " [3, 5, 0, 0, 0],\n", " [1, 1, 0, 3, 8],\n", " [6, 6, 2, 2, 1],\n", " [8, 3, 5, 1, 4],\n", "], dtype=float)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## optimization" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(array([[ 0.79, -0.01, 0.18, 0.19, -0.08],\n", " [-0.01, 0.8 , 0. , 0.2 , -0.07],\n", " [ 0.18, 0. , 0.85, -0.15, 0.07],\n", " [ 0.19, 0.2 , -0.15, 0.66, 0.13],\n", " [-0.08, -0.07, 0.07, 0.13, 0.95]]), 0.54691984767143453)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X, loss1 = gradient_descent(F, dF, A * 0, steps=300)\n", "(A @ X).round(2), loss1[-1]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(array([[ 0.84, -0.05, 0.1 , 0.1 , -0.06],\n", " [-0.04, 0.82, 0.03, 0.19, -0.03],\n", " [ 0.12, 0.03, 0.9 , -0.08, 0.03],\n", " [ 0.15, 0.2 , -0.12, 0.75, 0.06],\n", " [-0.08, -0.09, 0.04, 0.1 , 0.97]]), 0.32396954419819657)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X, loss2 = rmsprop(F, dF, A * 0, steps=300)\n", "(A @ X).round(2), loss2[-1]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(array([[ 0.99, 0.01, 0. , -0.01, 0. ],\n", " [-0. , 1. , 0. , -0. , 0. ],\n", " [-0. , 0.01, 1. , -0.01, 0. ],\n", " [-0.01, 0.01, 0. , 0.99, 0. ],\n", " [-0.01, 0.01, 0. , -0.01, 1. ]]), 0.00062303887772378397)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X, loss3 = rmsprop_momentum(F, dF, A * 0, steps=300)\n", "(A @ X).round(2), loss3[-1]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force === true) {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"
\\n\"+\n", " \"\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"\\n\"+\n",
" \"