{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datascience import *\n", "import numpy as np\n", "import matplotlib\n", "from mpl_toolkits.mplot3d import Axes3D\n", "%matplotlib inline\n", "import matplotlib.pyplot as plots\n", "plots.style.use('fivethirtyeight')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Decisions ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# np.array(list) converts list to an array\n", "# provided all the elements of list are of the same type\n", "\n", "n = 100\n", "second = round(n * 0.6)\n", "third = round(n * 0.4)\n", "\n", "year = np.array(['Second'] * second + ['Third'] * third)\n", "major = np.array(['Declared'] * (round(second * 0.5)) + ['Undeclared'] * (round(second * 0.5)) + \\\n", " ['Declared'] * (round(third * 0.8)) + ['Undeclared'] * (round(third * 0.2)))\n", " \n", "students = Table().with_columns(\n", " 'Year', year,\n", " 'Major', major\n", ")\n", "students.show(3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "students.pivot('Major', 'Year')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Chance of third year, given that they have declared\n", "# P(third year | declared) = 32/(30+32)\n", "\n", "32 / 62" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# P(second year | declared)\n", "\n", "30 / 62" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Bayes' Rule" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Proportion of 2nd Years among students who are Declared\n", "(0.6 * 0.5) / ((0.6 * 0.5) + (0.4 * 0.8))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Proportion of 3rd Years among students who are Declared\n", "(0.4 * 0.8) / ((0.6 * 0.5) + (0.4 * 0.8))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Interpretation by Physicians of Clinical Laboratory Results (1978)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Problem did not give the true positive rate (it was assumed to be 100%)\n", "(0.001 * 1) / ((0.001 * 1) + (0.999 * 0.05))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "n = 10000\n", "disease = round(n * 0.001)\n", "no_disease = round(n * 0.999)\n", "\n", "status = np.array(['Disease'] * disease + ['No disease'] * no_disease)\n", "result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05)) + \\\n", " ['Test -'] * (round(no_disease * 0.95)))\n", " \n", "persons = Table().with_columns(\n", " 'Status', status,\n", " 'Test Result', result\n", ")\n", "persons.show(3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Among the Test+ column, more people do *not* have the disease than do\n", "persons.pivot('Test Result', 'Status')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Number with \"No disease\"\n", "500 + 9490" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# P(Disease | Test +)\n", "10/510" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Using Subjective Prior Probabilities ##" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#P(Disease | positive), P(Disease) = .1\n", "(0.1 * 1) / ((0.1 * 1) + (0.999 * 0.05))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#P(Disease | positive), P(Disease) = .5\n", "(0.5 * 1) / ((0.5 * 1) + (0.5 * 0.05))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_population(prior_disease_prob, n):\n", " disease = round(n * prior_disease_prob)\n", " no_disease = round(n * (1 - prior_disease_prob))\n", "\n", " status = np.array(['Disease'] * disease + ['No disease'] * no_disease)\n", " result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05)) + \\\n", " ['Test -'] * (round(no_disease * 0.95)))\n", " \n", " t = Table().with_columns(\n", " 'Status', status,\n", " 'Test Result', result\n", " )\n", " return t.pivot('Test Result', 'Status')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "create_population(0.5, 10000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "5000 / (5000 + 250)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }