{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. Conditional Probability Tables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This Notebook shows how to create the CPTs for the Student example from Koller & Friedman."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Python version: 3.8.10\n",
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n",
      "\n",
      "available imports:\n",
      "  import os\n",
      "  import logging\n",
      "  import pandas as pd\n",
      "  import numpy as np\n",
      "\n",
      "connect to this kernel with:\n",
      "  jupyter console --existing 9fa5c31c-e49b-417e-882b-5f4ace153127\n",
      "\n",
      "Could not create logging directory \"../logs\"\n",
      "Logging to: \"../logs/notebook.log\"\n",
      "Current date/time: 11-06-2021, 21:27\n",
      "Current working directory: \"/Users/melle/software-development/thomas-master/notebooks\"\n"
     ]
    }
   ],
   "source": [
    "%run '_preamble.ipynb'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from thomas.core.factors import CPT\n",
    "\n",
    "from IPython.display import display, HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def subset(full_dict, keys):\n",
    "    \"\"\"Return a subset of a dict.\"\"\"\n",
    "    return {k: full_dict[k] for k in keys}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're defining CPTs for multiple random variables. The dictionary\n",
    "# `states` keeps track the states each variable can take on.\n",
    "states = {\n",
    "    'I': ['i0', 'i1'],\n",
    "    'S': ['s0', 's1'],\n",
    "    'D': ['d0', 'd1'],\n",
    "    'G': ['g1', 'g2','g3'],\n",
    "    'L': ['l0', 'l1'],\n",
    "}\n",
    "\n",
    "# We'll store the CPTs in a dict, indexed by the name of the \n",
    "# conditioned variable.\n",
    "P = dict()\n",
    "\n",
    "# Create the CPT (which isn't really conditional probabilities, but rather prior\n",
    "# probabilities) for random variable I.\n",
    "P['I'] = CPT(\n",
    "    [0.7, 0.3], \n",
    "    states=subset(states, ['I']),\n",
    "    description='Intelligence'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <div style=\"margin-top:6px\">\n",
       "                    <span><b>P(I)</b></span>\n",
       "                    <span style=\"font-style: italic;\">Intelligence</span>\n",
       "                    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>I</th>\n",
       "      <th>i0</th>\n",
       "      <th>i1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <td>0.7</td>\n",
       "      <td>0.3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "                </div>\n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "P(I)\n",
       "I \n",
       "i0    0.7\n",
       "i1    0.3\n",
       "dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Display the CPT for random variable 'I': intelligence. The variable's states\n",
    "# are listed as columns.\n",
    "P['I']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the CPT for random variable 'S'. The probabilities for S are conditional\n",
    "# on I. In other words, the CPT defines S given I which can be written as \n",
    "# P(S|I).\n",
    "P['S'] = CPT(\n",
    "    [0.95, 0.05, \n",
    "     0.20, 0.80], \n",
    "    states=subset(states, ['I', 'S']),\n",
    "    description='SAT Score'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <div style=\"margin-top:6px\">\n",
       "                    <span><b>P(S|I)</b></span>\n",
       "                    <span style=\"font-style: italic;\">SAT Score</span>\n",
       "                    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>S</th>\n",
       "      <th>s0</th>\n",
       "      <th>s1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>I</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>i0</th>\n",
       "      <td>0.95</td>\n",
       "      <td>0.05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>i1</th>\n",
       "      <td>0.20</td>\n",
       "      <td>0.80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "                </div>\n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "P(S|I)\n",
       "I   S \n",
       "i0  s0    0.95\n",
       "    s1    0.05\n",
       "i1  s0    0.20\n",
       "    s1    0.80\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Display the CPT for random variable 'S': SAT Score. Again, the variable's \n",
    "# states are listed as columns. The conditioning variables' states are listed\n",
    "# as rows.\n",
    "P['S']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P(S|I)\n",
      "I   S \n",
      "i0  s0    0.95\n",
      "    s1    0.05\n",
      "i1  s0    0.20\n",
      "    s1    0.80\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# Internally, P['S'] is essentially a multi-level factor\n",
    "print(P['S'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the remained of the CPTs\n",
    "P['D'] = CPT(\n",
    "    [0.6, 0.4], \n",
    "    states=subset(states, ['D']),\n",
    "    description='Difficulty'\n",
    ")\n",
    "\n",
    "P['G'] = CPT(\n",
    "    [0.30, 0.40, 0.30, \n",
    "     0.05, 0.25, 0.70, \n",
    "     0.90, 0.08, 0.02, \n",
    "     0.50, 0.30, 0.20],\n",
    "    states=subset(states, ['I', 'D', 'G']),\n",
    "    description='Grade'\n",
    ")\n",
    "\n",
    "P['L'] = CPT(\n",
    "    [0.10, 0.90,\n",
    "     0.40, 0.60,\n",
    "     0.99, 0.01],\n",
    "    states=subset(states, ['G', 'L']),\n",
    "    description='Letter'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <div style=\"margin-top:6px\">\n",
       "                    <span><b>P(G|I,D)</b></span>\n",
       "                    <span style=\"font-style: italic;\">Grade</span>\n",
       "                    <div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>G</th>\n",
       "      <th>g1</th>\n",
       "      <th>g2</th>\n",
       "      <th>g3</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>I</th>\n",
       "      <th>D</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">i0</th>\n",
       "      <th>d0</th>\n",
       "      <td>0.30</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d1</th>\n",
       "      <td>0.05</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">i1</th>\n",
       "      <th>d0</th>\n",
       "      <td>0.90</td>\n",
       "      <td>0.08</td>\n",
       "      <td>0.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d1</th>\n",
       "      <td>0.50</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>\n",
       "                </div>\n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "P(G|I,D)\n",
       "I   D   G \n",
       "i0  d0  g1    0.30\n",
       "        g2    0.40\n",
       "        g3    0.30\n",
       "    d1  g1    0.05\n",
       "        g2    0.25\n",
       "        g3    0.70\n",
       "i1  d0  g1    0.90\n",
       "        g2    0.08\n",
       "        g3    0.02\n",
       "    d1  g1    0.50\n",
       "        g2    0.30\n",
       "        g3    0.20\n",
       "dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# There can, of course, be more than one conditioning variable\n",
    "P['G']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The CPT can be accessed through the __getitem__ accessor:\n",
    "P['I']['i0']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "factor(I,S)\n",
       "I   S \n",
       "i0  s0    0.95\n",
       "    s1    0.05\n",
       "i1  s0    0.20\n",
       "    s1    0.80\n",
       "dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The same goes for multi-level CPTs\n",
    "P['S'].as_factor()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "thomas-jupyter3",
   "language": "python",
   "name": "thomas-jupyter3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}