{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Random Sampling in Julia - speed check" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Written in Julia 1.7.0\n", "\n", "using Random, Distributions, BenchmarkTools\n", "# See docs: http://juliastats.org/Distributions.jl/stable/starting/" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# set seed\n", "Random.seed!(123);" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1.4632513788889214" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# base Julia has rand() and randn()\n", "rand() # random [0, 1]\n", "randn() # random Normal(μ=0, σ=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "how long does it take to sample 1M from a normal distribution?" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "BenchmarkTools.Trial: 946 samples with 1 evaluation.\n", " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m4.317 ms\u001b[22m\u001b[39m … \u001b[35m 10.960 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 25.03%\n", " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m4.923 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m5.282 ms\u001b[22m\u001b[39m ± \u001b[32m803.382 μs\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m5.40% ± 9.83%\n", "\n", " \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m█\u001b[39m▂\u001b[39m█\u001b[34m▁\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", " \u001b[39m▂\u001b[39m▃\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▅\u001b[39m▇\u001b[39m▄\u001b[39m▃\u001b[32m▄\u001b[39m\u001b[39m▄\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▃\n", " 4.32 ms\u001b[90m Histogram: frequency by time\u001b[39m 7.78 ms \u001b[0m\u001b[1m<\u001b[22m\n", "\n", " Memory estimate\u001b[90m: \u001b[39m\u001b[33m7.63 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m2\u001b[39m." ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# sample from normal distribution\n", "@benchmark s = rand(Normal(5, 2), 10^6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is....really fast." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "how long does it take to sample 1M from a triangular distribution?\n", "\n", "*note: Triangular takes arguments (min, max, mode) whereas numpy took (min, mode, max)*" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "BenchmarkTools.Trial: 517 samples with 1 evaluation.\n", " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m8.436 ms\u001b[22m\u001b[39m … \u001b[35m17.522 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 20.14%\n", " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m9.096 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m9.669 ms\u001b[22m\u001b[39m ± \u001b[32m 1.212 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m3.82% ± 7.18%\n", "\n", " \u001b[39m \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▆\u001b[39m█\u001b[39m▂\u001b[34m▁\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", " \u001b[39m▃\u001b[39m▄\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▇\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▃\u001b[32m▃\u001b[39m\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▃\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▃\n", " 8.44 ms\u001b[90m Histogram: frequency by time\u001b[39m 14.1 ms \u001b[0m\u001b[1m<\u001b[22m\n", "\n", " Memory estimate\u001b[90m: \u001b[39m\u001b[33m7.63 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m2\u001b[39m." ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@benchmark rand(TriangularDist(0, 5, 3), 10^6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So ~10ms vs python's 435 ms. Wow!\n", "\n", "Let's try 1M samples from a discrete distribution with 3 potential values:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[0, 1, 2], p=[0.2, 0.7, 0.1])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = Distributions.DiscreteNonParametric([0, 1, 2], [0.2, 0.7, 0.1])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "BenchmarkTools.Trial: 397 samples with 1 evaluation.\n", " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m10.955 ms\u001b[22m\u001b[39m … \u001b[35m18.581 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 0.00%\n", " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m12.150 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m12.602 ms\u001b[22m\u001b[39m ± \u001b[32m 1.195 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m2.94% ± 5.79%\n", "\n", " \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▄\u001b[39m█\u001b[39m▆\u001b[39m▄\u001b[39m \u001b[39m▂\u001b[34m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", " \u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m▇\u001b[39m\u001b[39m▆\u001b[39m█\u001b[39m▅\u001b[39m▅\u001b[32m▆\u001b[39m\u001b[39m▅\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▅\u001b[39m▂\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▃\u001b[39m▂\u001b[39m \u001b[39m▃\n", " 11 ms\u001b[90m Histogram: frequency by time\u001b[39m 16.2 ms \u001b[0m\u001b[1m<\u001b[22m\n", "\n", " Memory estimate\u001b[90m: \u001b[39m\u001b[33m7.63 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m7\u001b[39m." ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "@benchmark rand(d, 10^6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Discrete distribution takes about 13 ms vs the 43 ms it took Python, still very impressive." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bottom Line: Use Julia!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.7.0", "language": "julia", "name": "julia-1.7" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.7.0" } }, "nbformat": 4, "nbformat_minor": 4 }