{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 第7章『適合度検定』" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## p.137" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-04-14T17:28:12.645564Z", "start_time": "2019-04-14T17:28:12.351168Z" } }, "outputs": [], "source": [ "\"\"\" Pythonでは累乗計算では ** または pow を用います。 \"\"\"\n", "\"\"\" Pythonでは配列の要素ごとの演算は numpy を用います。 \"\"\"\n", "\"\"\" Pythonでは検定には scipy.stats が便利です。 \"\"\"\n", "# https://ryamada.hatenadiary.jp/entry/20150101/1419899280\n", "\n", "import numpy as np\n", "import scipy.stats\n", "\n", "def HWE_test(o1, o2, o3):\n", " n = o1 + o2 + o3\n", " p = (2*o1 + o2)/(2*n)\n", " ob = np.array([o1,o2,o3])\n", " ex = np.array([n*p**2, 2*n*p*(1-p), n*(1-p)**2])\n", " X = np.sum((ob-ex)**2/ex)\n", " p_value = 1 - scipy.stats.chi2.cdf(X, 1)\n", " print(p_value)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-04-14T17:28:16.188314Z", "start_time": "2019-04-14T17:28:16.183829Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.002701152959209474\n" ] } ], "source": [ "HWE_test(50, 120, 30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## p.139" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-04-14T17:28:31.937454Z", "start_time": "2019-04-14T17:28:31.932703Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import scipy.stats\n", "# https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.chi2_contingency.html\n", "\n", "def Association_test(n10, n11, n12, n20, n21, n22):\n", " case = np.array([2*n10+n11,n11+2*n12])\n", " ctrl = np.array([2*n20+n21,n21+2*n22])\n", " matrix = np.array([case, ctrl])\n", " result = scipy.stats.chi2_contingency(matrix, correction=False)\n", " print(result)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-04-14T17:28:50.801685Z", "start_time": "2019-04-14T17:28:50.797960Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(9.603841536614647, 0.0019417073371168674, 1, array([[196., 204.],\n", " [294., 306.]]))\n" ] } ], "source": [ "Association_test(50, 120, 30, 60, 150, 90)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-04-14T17:28:53.034914Z", "start_time": "2019-04-14T17:28:53.030032Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import scipy.stats\n", "# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.fisher_exact.html\n", "\n", "def Association_test_fisher(n10, n11, n12, n20, n21, n22):\n", " case = np.array([2*n10+n11, n11+2*n12])\n", " ctrl = np.array([2*n20+n21, n21+2*n22])\n", " matrix = np.array([case, ctrl])\n", " result = scipy.stats.fisher_exact(matrix)\n", " print(result)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-04-14T17:28:58.367014Z", "start_time": "2019-04-14T17:28:58.341849Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1.4938271604938271, 0.0023904276302564188)\n" ] } ], "source": [ "Association_test_fisher(50, 120, 30, 60, 150, 90)" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }