{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Performance comparison\n", "Presented by Chiyoung Ahn (https://github.com/chiyahn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Consider minimizing following objective function\n", "$$\n", "f(x) = \\sum_{i=1}^N \\left[ (x_i - m_i)^2 + \\log(x_i) \\right]\n", "$$\n", "with a fixed $m$ and some fairly large `N` by limited-memory BFGS (LBFGS):" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "using NLopt, BenchmarkTools, ForwardDiff, NLSolversBase, DiffResults, Flux, ReverseDiff, DiffResults" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "f (generic function with 1 method)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "N = 5000\n", "x0 = fill(1.0, N)\n", "m = range(2,5,length = N)\n", "f(x) = sum((x .- m).^2) + sum(log.(x))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Vanilla `ForwardDiff.jl`" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 696.862 ms (67239 allocations: 3.70 GiB)\n", "got 5966.9406953073385 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "function g!(G::Vector, x::Vector)\n", " ForwardDiff.gradient!(G, f, x)\n", "end\n", "\n", "function fg!(x::Vector, grad::Vector)\n", " if length(grad) > 0 # gradient of f(x)\n", " g!(grad, x)\n", " end\n", " f(x)\n", "end\n", "\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(1.0, N)) # find `x` above 0\n", "min_objective!(opt, fg!) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0) seconds = 30.0\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. `NLoptAdapter` with autodiff" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NLoptAdapter" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# See nlopt-tutorial.ipynb\n", "struct NLoptAdapter{T} <: Function where T <: AbstractObjective\n", " nlsolver_base::T\n", "end\n", "\n", "# implement fg!; note that the order is reversed\n", "(adapter::NLoptAdapter)(x, df) = adapter.nlsolver_base.fdf(df, x)\n", "(adapter::NLoptAdapter)(result, x, jacobian_transpose) = adapter.nlsolver_base.fdf(result, jacobian_transpose', x)\n", "\n", "# constructors\n", "NLoptAdapter(f, x, autodiff) = NLoptAdapter(OnceDifferentiable(f, x, autodiff = autodiff))\n", "NLoptAdapter(f!, x::Vector, F::Vector, autodiff) = NLoptAdapter(OnceDifferentiable(f!, x, F, autodiff = autodiff))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 546.883 ms (67112 allocations: 3.69 GiB)\n", "got 5966.9406953073385 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "f_opt = NLoptAdapter(f, zeros(N), :forward)\n", "\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(0.0, N)) # find `x` above -2.0\n", "min_objective!(opt, f_opt) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0) seconds = 30.0\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. `NLoptAdapter` with numerical gradients" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 6.025 s (1809596 allocations: 8.24 GiB)\n", "got 5966.94069530734 after 11 iterations (returned FTOL_REACHED)\n" ] } ], "source": [ "f_opt = NLoptAdapter(f, zeros(N), :finite)\n", "\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(0.0, N)) # find `x` above -2.0\n", "min_objective!(opt, f_opt) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0) seconds = 30.0\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. `Flux.jl`" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 7.268 ms (3752 allocations: 5.35 MiB)\n", "got 5966.9406953073385 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "using Flux\n", "using Flux.Tracker: gradient_\n", "\n", "function fg!(x::Vector, grad::Vector)\n", " val = f(x)\n", " grad[:] = gradient_(f, x)[1]\n", " return val\n", "end\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(1.0, N)) # find `x` above 0\n", "min_objective!(opt, fg!) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0)\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. `ReverseDiff.jl`" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 16.604 ms (62 allocations: 41.42 KiB)\n", "got 5966.9406953073385 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "# precompile tape\n", "result = DiffResults.GradientResult(similar(x0));\n", "tape = ReverseDiff.compile(ReverseDiff.GradientTape(f, similar(x0)));\n", "\n", "function fg!(x::Vector, grad::Vector)\n", " ReverseDiff.gradient!(result, tape, x)\n", " grad[:] = DiffResults.gradient(result)\n", " DiffResults.value(result)\n", "end\n", "\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(1.0, N)) # find `x` above 0\n", "min_objective!(opt, fg!) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0)\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The current version of `ReverseDiff.jl` (v0.3.1) has broadcast implementation targetting an older version of Julia, v0.6. To fully exploit the performance of `ReverseDiff.jl`, some explicit broadcasting is needed:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "f (generic function with 1 method)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# explicit broadcasting\n", "ReverseDiff.@forward g(x, m) = (x - m)^2\n", "f(x) = sum(broadcast(g, x, m)) + sum(broadcast(log, x))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compare the performance:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5.637 ms (125 allocations: 44.23 KiB)\n", "got 5966.9406953073385 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "# precompile tape\n", "result = DiffResults.GradientResult(similar(x0));\n", "tape = ReverseDiff.compile(ReverseDiff.GradientTape(f, similar(x0)));\n", "\n", "function fg!(x::Vector, grad::Vector)\n", " ReverseDiff.gradient!(result, tape, x)\n", " grad[:] = DiffResults.gradient(result)\n", " DiffResults.value(result)\n", "end\n", "\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(1.0, N)) # find `x` above 0\n", "min_objective!(opt, fg!) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0)\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# `Flux.jl` and `ReverseDiff.jl` have insanely good performance\n", "\n", "Let's see how it goes when we have `N=1000000`:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "f (generic function with 1 method)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "N = 1000000 # one million!\n", "x0 = fill(1.0, N)\n", "m = range(2,5,length = N)\n", "f(x) = sum((x .- m).^2) + sum(log.(x))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. `Flux.jl`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 3.266 s (3752 allocations: 1.01 GiB)\n", "got 1.193404758611993e6 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "function fg!(x::Vector, grad::Vector)\n", " val = f(x)\n", " grad[:] = gradient_(f, x)[1]\n", " return val\n", "end\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(1.0, N)) # find `x` above 0\n", "min_objective!(opt, fg!) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0)\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. `ReverseDiff.jl`" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1.363 s (125 allocations: 7.63 MiB)\n", "got 1.193404758611993e6 after 9 iterations (returned SUCCESS)\n" ] } ], "source": [ "# explicit broadcasting\n", "ReverseDiff.@forward g(x, m) = (x - m)^2\n", "f(x) = sum(broadcast(g, x, m)) + sum(broadcast(log, x))\n", "\n", "# precompile tape\n", "result = DiffResults.GradientResult(similar(x0));\n", "tape = ReverseDiff.compile(ReverseDiff.GradientTape(f, similar(x0)));\n", "\n", "function fg!(x::Vector, grad::Vector)\n", " ReverseDiff.gradient!(result, tape, x)\n", " grad[:] = DiffResults.gradient(result)\n", " DiffResults.value(result)\n", "end\n", "\n", "# define the optimization problem\n", "opt = Opt(:LD_LBFGS, N) # 2 indicates the length of `x`\n", "lower_bounds!(opt, fill(1.0, N)) # find `x` above 0\n", "min_objective!(opt, fg!) # specifies that optimization problem is on minimization\n", "\n", "# solve the optimization problem\n", "(minf,minx,ret) = @btime optimize($opt, $x0)\n", "numevals = opt.numevals # the number of function evaluations\n", "println(\"got $minf after $numevals iterations (returned $ret)\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "🤯" ] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.0.0", "language": "julia", "name": "julia-1.0" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.0.0" } }, "nbformat": 4, "nbformat_minor": 2 }