{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "295717a7-cb8a-4491-a5c5-1622b347e7e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2.978052 seconds\n", " 2.977178 seconds\n", " 2.951632 seconds\n" ] }, { "data": { "text/plain": [ "3.141649096" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function mcpi(L=10^9)\n", " c = 0\n", " for i in 1:L\n", " c += rand()^2 + rand()^2 ≤ 1\n", " end\n", " 4c/L\n", "end\n", "\n", "@time mcpi()\n", "@time mcpi()\n", "@time mcpi()" ] }, { "cell_type": "code", "execution_count": 2, "id": "050863f1-a3f1-4197-80c5-3c3722c5cd2c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.597769 seconds (14.40 k allocations: 962.984 KiB, 49.09% compilation time)\n", " 0.586309 seconds (186 allocations: 18.641 KiB)\n", " 0.569634 seconds (185 allocations: 18.266 KiB)\n" ] }, { "data": { "text/plain": [ "3.141586064" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function mcpi_threads(L=10^9)\n", " c = zeros(Int, Threads.nthreads())\n", " Threads.@threads for i in 1:L\n", " tid = Threads.threadid()\n", " c[tid] += rand()^2 + rand()^2 ≤ 1\n", " end\n", " 4sum(c)/L\n", "end\n", "\n", "@time mcpi_threads()\n", "@time mcpi_threads()\n", "@time mcpi_threads()" ] }, { "cell_type": "code", "execution_count": 3, "id": "4b32fe2b-9729-4ca1-a925-eeea69c9db41", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.009801 seconds\n", " 0.010009 seconds\n", " 0.009699 seconds\n" ] }, { "data": { "text/plain": [ "4.0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using LoopVectorization\n", "\n", "# The result shall be 4.0 or 0.0.\n", "function mcpi_turbo_incorrect(L=10^9)\n", " c = 0\n", " @turbo for i in 1:L\n", " c += rand()^2 + rand()^2 ≤ 1\n", " end\n", " 4c/L\n", "end\n", "\n", "@time mcpi_turbo_incorrect()\n", "@time mcpi_turbo_incorrect()\n", "@time mcpi_turbo_incorrect()" ] }, { "cell_type": "code", "execution_count": 4, "id": "d20cbc91-cf16-4a0e-a8f6-f701efde07a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.473665 seconds\n", " 0.477346 seconds\n", " 0.477735 seconds\n" ] }, { "data": { "text/plain": [ "3.141584064" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using LoopVectorization\n", "\n", "rand_is_in_unit_circle(i) = rand()^2 + rand()^2 ≤ 1\n", "\n", "function mcpi_turbo(L=10^9)\n", " c = 0\n", " @turbo for i in 1:L\n", " c += rand_is_in_unit_circle(i)\n", " end\n", " 4c/L\n", "end\n", "\n", "@time mcpi_turbo()\n", "@time mcpi_turbo()\n", "@time mcpi_turbo()" ] }, { "cell_type": "code", "execution_count": 5, "id": "5c81d3f8-5a59-4da5-bcc4-d34ec83cd382", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Threads.nthreads() = 12\n", " 0.137297 seconds (27.16 k allocations: 1.736 MiB, 33.36% compilation time)\n", " 0.121686 seconds (9 allocations: 336 bytes)\n", " 0.111965 seconds (4 allocations: 496 bytes)\n" ] }, { "data": { "text/plain": [ "3.141732224" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using LoopVectorization\n", "\n", "rand_is_in_unit_circle(i) = rand()^2 + rand()^2 ≤ 1\n", "\n", "function mcpi_tturbo(L=10^9)\n", " c = 0\n", " @tturbo for i in 1:L\n", " c += rand_is_in_unit_circle(i)\n", " end\n", " 4c/L\n", "end\n", "\n", "@show Threads.nthreads()\n", "@time mcpi_tturbo()\n", "@time mcpi_tturbo()\n", "@time mcpi_tturbo()" ] }, { "cell_type": "code", "execution_count": 6, "id": "5d33fe0b-e6e4-4316-b03d-96c1becd1a46", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 292.623 ms (0 allocations: 0 bytes)\n", " 55.793 ms (62 allocations: 7.61 KiB)\n", " 46.542 ms (0 allocations: 0 bytes)\n", " 8.269 ms (0 allocations: 0 bytes)\n", " 18.527 ms (122 allocations: 6.25 KiB)\n" ] }, { "data": { "text/plain": [ "(3.1416882, 3.14183216, 3.14119872, 3.14136, 3.14176212)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using CUDA\n", "using BenchmarkTools\n", "\n", "function mcpi_cuda_count(L=10^8, t::Type{T}=Float32) where T\n", " 4count(≤(1), sum(x->x^2, CUDA.rand(T, 2, L); dims=1)) / L\n", "end\n", "\n", "a = @btime mcpi(10^8)\n", "b = @btime mcpi_threads(10^8) # Threads.@threads\n", "c = @btime mcpi_turbo(10^8) # LoopVectorization.@turbo (single thread)\n", "d = @btime mcpi_tturbo(10^8) # LoopVectorization.@tturbo (multi-thread)\n", "e = @btime mcpi_cuda_count(10^8) # CUDA (GPU)\n", "a, b, c, d, e" ] }, { "cell_type": "code", "execution_count": null, "id": "b0552ed2-7d8c-4979-9b61-cde1fe35f5d1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "encoding": "# -*- coding: utf-8 -*-", "formats": "ipynb,jl:hydrogen" }, "kernelspec": { "display_name": "Julia 1.10.0", "language": "julia", "name": "julia-1.10" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.10.0" } }, "nbformat": 4, "nbformat_minor": 5 }