{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Calculation Time: 17.7 seconds\n", "pi: 3.1737\n", "3.0655" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt1\n", "from datetime import datetime\n", "\n", "%matplotlib inline\n", "\n", "# returns the elapsed milliseconds since the start of the program\n", "def timesec(start_time):\n", " dt = datetime.now() - start_time\n", " ms = (dt.days * 24 * 60 * 60 + dt.seconds) + dt.microseconds /1000000\n", " return ms\n", "\n", "start_time = datetime.now()\n", "\n", "fig= plt1.figure(figsize=(15, 5), constrained_layout=True)\n", "widths = [5,5,5]\n", "heights = [5]\n", "gs=fig.add_gridspec(1,3,width_ratios=widths, height_ratios=heights, wspace=0.05)\n", "\n", "ax1a=fig.add_subplot(gs[0,:1])\n", "ax1b=fig.add_subplot(gs[0,1:2])\n", "ax1c=fig.add_subplot(gs[0,2:])\n", "\n", "ax1a.clear()\n", "ax1b.clear()\n", "ax1c.clear()\n", "\n", "#---------------------------------------Main Code---------------------------------------\n", "#Samples, trials (pick trials >1000 to avoid zero counts) and repeats\n", "samples=500\n", "trials=2000\n", "repeats=1000\n", "\n", "#Set arrays to zero\n", "piarray=np.zeros(repeats)\n", "narray=np.zeros(repeats)\n", "stdarray=np.zeros(repeats)\n", "countarray=np.zeros(repeats)\n", "\n", "#Sample array and expected value\n", "sa=np.array([0,1,2,3,4,5,6,7,8,9])\n", "#sa=np.array([0,1])\n", "ev=(np.max(sa)-np.min(sa))/2\n", "\n", "#Detemermine: Stdev, And dx\n", "var=((np.max(sa)-np.min(sa)+1)**2-1)/12\n", "stdev=np.sqrt(var)\n", "stdevt=stdev/np.sqrt(samples)\n", "dx=1/samples\n", "\n", "for p in range(repeats):\n", "\n", " #Create Array random numbers\n", " random=np.random.choice(sa,[trials,samples])\n", "\n", " #Determine mean from random no of numbers (of samples)\n", " m=np.mean(random,axis=1)\n", " mn=np.mean(m)\n", "\n", " #Create dataframe and count number of observation per interval\n", " df=pd.DataFrame({'m' : m})\n", " dfg=df.groupby(['m'])['m'].agg(['count']).reset_index()\n", "\n", " #Histogram x=bins and y is density\n", " x=dfg['m'].to_numpy()\n", " yt=dfg['count'].to_numpy()\n", " y=yt/(dx*trials)\n", "\n", " #Count number of observation on mean: 4.5\n", " out=dfg[(dfg['m']<(ev+dx/2)) & (dfg['m']>(ev-dx/2))]\n", " #out=dfg[((dfg['m'] < (4.5+stdevt+dx/2)) & (dfg['m'] > (4.5+stdevt-dx/2))) | ((dfg['m'] < (4.5-stdevt+dx/2)) & (dfg['m'] > (4.5-stdevt-dx/2)))]\n", " \n", " nc=out['count'].to_numpy()\n", " n=np.sum(nc)\n", " narray[p]=n\n", "\n", " #Stdev Array\n", " stdev=np.std(m)\n", " stdarray[p]=stdev\n", " \n", " #Countarray\n", " count=np.sum(yt)\n", " countarray[p]=count\n", " \n", " #Plot 200 distributions\n", " if (1000*p/repeats)%2==0: \n", " ax1a.plot(x,y, marker='o', color='grey', linestyle='', markersize=0.02, linewidth=0, zorder=10)\n", "\n", "#Calculate pi and fill array\n", "piarray=0.5*(trials*dx/(stdevt*narray))**2\n", "#piarray=0.5*(trials*dx/(stdevt*narray)*np.exp(-0.5*(1-ev/stdevt)**2))**2\n", "\n", "#Output \n", "nmean=np.mean(narray)\n", "nsum=np.sum(narray)\n", "nstdev=np.std(narray)\n", "p=nsum/np.sum(countarray)\n", "nstdevm=nstdev/np.sqrt(repeats)\n", "\n", "#probability mean value theoretical, sample size based upon samples from [0,1,2..9]\n", "pt=dx/(stdevt*np.sqrt(2*np.pi))\n", "n=(np.max(sa)-np.min(sa))*1/dx\n", "\n", "#Normality test https://en.wikipedia.org/wiki/Binomial_distribution#Normal_approximation\n", "ntest=True\n", "\n", "if (n*p<9):\n", " ntp='np=' + str(np.round(n*p,0)) + ', <9 fail'\n", " ntest=False\n", "else:\n", " ntp='np=' + str(np.round(n*p,0)) + ', ≥9 pass'\n", " \n", "if ((n*(1-p))<9):\n", " nomp='n(1-p)=' + str(np.round(n*(1-p),0)) + ', <9 fail'\n", " ntest=False\n", "else:\n", " nomp='n(1-p)=' + str(np.round(n*(1-p),0)) + ', ≥9 pass'\n", "\n", "if (ntest==False):\n", " nttext='normality test failed'\n", "else:\n", " nttext='normality test passed' \n", "\n", "#95% score\n", "#nerr=trials*1.96*np.sqrt(p*(1-p)/(trials*repeats))\n", "#nerr=n*1.96*np.sqrt(p*(1-p)/(n))\n", "\n", "pim=0.5*(dx*trials/(stdevt*nmean))**2\n", "#pim=0.5*(trials*dx/(stdevt*narray)*np.exp(-0.5*(1-ev/stdevt)**2))**2\n", "\n", "piminc=0.5*(dx*trials/(stdevt*(nmean+2*nstdevm)))**2\n", "pimaxc=0.5*(dx*trials/(stdevt*(nmean-2*nstdevm)))**2\n", "#---------------------------------------Main Code---------------------------------------\n", "\n", "xt=np.linspace(np.min(sa),np.max(sa),1000)\n", "yt=1/(stdevt*np.sqrt(2*np.pi))*np.exp(-0.5*((ev-xt)/stdevt)**2) \n", "\n", "ax1a.plot(xt,yt, marker='', color='blue', linestyle='-', markersize=0, linewidth=1.5,zorder=10,label='$\\pi$ (theory)')\n", "ax1a.plot(x,y, marker='o', color='black', linestyle='', markersize=0.1, linewidth=0, zorder=10,label='statistical')\n", "ax1a.set_xlabel('$Mean$',fontsize=10)\n", "ax1a.set_ylabel('$Density$',fontsize=10)\n", "ax1a.set_title('Mean of '+ str(samples) + ' samples, '+ str(sa) + '\\n' +str(trials) + ' trials, ' + str(repeats) + ' repeats',fontsize=12)\n", "ax1a.text(0.02,0.98,'Mean: ' + str(np.round(np.mean(m),4)) + ', Std: ' + str(np.round(np.mean(stdarray),4)) + '\\nn: ' + str(np.round(n,4)) + ' (bins)\\np: ' + str(np.round(p,4)) + ' (mean)\\n' + nomp + '\\n' + ntp + '\\n' + nttext , backgroundcolor='white', transform=ax1a.transAxes, fontsize=10,verticalalignment='top')\n", "ax1a.grid(b=True, which='major', color='#666666', linestyle='-', zorder=0) \n", "ax1a.legend(loc='upper right',fontsize=10,markerscale=15)\n", "ax1a.ticklabel_format(axis='y', style='sci', scilimits=None, useOffset=None, useLocale=None, useMathText=None)\n", "ax1a.axes.set_ylim([0,1.2*1/(np.std(m)*np.sqrt(2*np.pi))])\n", "ax1a.axes.set_xlim([ev-3*stdevt,ev+3*stdevt])\n", "\n", "#Determine Bins and Max n array\n", "step=(np.max(narray)-np.min(narray))/20\n", "nround=step*np.round(narray/step,0)\n", "df=pd.DataFrame({'nr' : nround})\n", "dfg=df.groupby(['nr'])['nr'].agg(['count']).reset_index()\n", "x=dfg['nr'].to_numpy()\n", "bins=dfg['nr'].to_numpy()\n", "y=dfg['count'].to_numpy()\n", "y=y/(step*np.sum(y))\n", "mx=np.max(y)\n", "\n", "ax1b.hist(narray, density=True, bins=(bins-0.5*step), rwidth=1, color='black', edgecolor='white', linewidth=1, zorder=10)\n", "ax1b.plot(x,y, marker='v', color='gray', linestyle='-', markersize=2, zorder=10,linewidth=0)\n", "ax1b.plot([nmean,nmean],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=1.5,zorder=12, label='$\\pi$ (statistical)')\n", "#ax1b.plot([nmean-3*nstdevm,nmean-3*nstdevm],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5,zorder=12)\n", "#ax1b.plot([nmean+3*nstdevm,nmean+3*nstdevm],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5,zorder=12)\n", "ax1b.set_xlabel('$Count$',fontsize=10)\n", "ax1b.set_ylabel('$Density$',fontsize=10)\n", "ax1b.set_title('Count $\\overline{n}$ (mean='+ str(ev) +')\\n' + str(trials) + ' trials, ' + str(repeats) + ' repeats',fontsize=12)\n", "ax1b.text(0.02,0.98,'Mean: ' + str(np.round(nmean,4)) + ', Std: '+ str(np.round(nstdev,4)) + '\\n' + str(np.round(nmean-2*nstdevm,4)) + '$<\\overline{n}$<'+ str(np.round(nmean+2*nstdevm,4)) + ' (95%)' , backgroundcolor='white', transform=ax1b.transAxes, fontsize=10,verticalalignment='top', zorder=15)\n", "ax1b.grid(b=True, which='major', color='#666666', linestyle='-', zorder=0)\n", "ax1b.ticklabel_format(axis='y', style='sci', scilimits=None, useOffset=None, useLocale=None, useMathText=None)\n", "ax1b.axes.set_ylim([0,1.2*mx])\n", "ax1b.axes.set_xlim([np.mean(narray)-4*np.std(narray),np.mean(narray)+4*np.std(narray)])\n", "ax1b.ticklabel_format(axis=\"x\", style=\"sci\", scilimits=(0,0))\n", "ax1b.legend(loc='upper right',fontsize=10)\n", "\n", "#Determine Bins and Max Piarray\n", "step=(np.max(piarray)-np.min(piarray))/20\n", "piround=step*np.round(piarray/step,0)\n", "df=pd.DataFrame({'pir' : piround})\n", "dfg=df.groupby(['pir'])['pir'].agg(['count']).reset_index()\n", "x=dfg['pir'].to_numpy()\n", "bins=dfg['pir'].to_numpy()\n", "y=dfg['count'].to_numpy()\n", "y=y/(step*np.sum(y))\n", "mx=np.max(y)\n", "\n", "ax1c.hist(piarray, density=True, rwidth=1, bins=(bins-0.5*step), color='black', edgecolor='white', linewidth=1, zorder=1)\n", "ax1c.plot(x,y, marker='v', color='gray', linestyle='-', markersize=2, zorder=10,linewidth=0)\n", "ax1c.plot([pim,pim],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=1.5, label='$\\pi$ (statistical)')\n", "#ax1c.plot([piminc,piminc],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5)\n", "#ax1c.plot([pimaxc,pimaxc],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5)\n", "ax1c.set_xlabel('$\\pi$',fontsize=10)\n", "ax1c.set_ylabel('$Density$',fontsize=10)\n", "ax1c.set_title('$\\pi$ Histogram\\n'+ str(repeats) + ' repeats',fontsize=12)\n", "ax1c.text(0.02,0.98,'$\\pi$: ' + str(np.round(pim,4)) + ' (mean $\\overline{n}$)'+ '\\n' + str(np.round(piminc,4)) + '$<\\pi$<'+ str(np.round(pimaxc,4)) + ' (95%)' , backgroundcolor='white', transform=ax1c.transAxes, fontsize=10,verticalalignment='top')\n", "ax1c.grid(b=True, which='major', color='#666666', linestyle='-', zorder=0)\n", "ax1c.legend(loc='upper right',fontsize=10)\n", "ax1c.axes.set_ylim([0,1.2*mx])\n", "#ax1c.axes.set_xlim([np.min(piarray),np.mx(piarray)])\n", "ax1c.ticklabel_format(axis='y', style='sci', scilimits=None, useOffset=None, useLocale=None, useMathText=None)\n", "\n", "plt1.savefig('Pi Random ' + str(sa) + ', Samples=' + str(int(samples)) + ', Trials=' + str(int(trials)) + ', Repeats=' + str(int(repeats)), dpi=300, bbox_inches='tight')\n", "\n", "print(\"Calculation Time: \" + str(round(timesec(start_time),1))+\" seconds\")\n", "print('pi: ' + str(np.round(pim,4)))\n", "print(str(np.round(piminc,4)) + '