{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. Conditional Probability Tables"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This Notebook shows how to create the CPTs for the Student example from Koller & Friedman."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Python version: 3.8.10\n",
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"\n",
"available imports:\n",
" import os\n",
" import logging\n",
" import pandas as pd\n",
" import numpy as np\n",
"\n",
"connect to this kernel with:\n",
" jupyter console --existing 9fa5c31c-e49b-417e-882b-5f4ace153127\n",
"\n",
"Could not create logging directory \"../logs\"\n",
"Logging to: \"../logs/notebook.log\"\n",
"Current date/time: 11-06-2021, 21:27\n",
"Current working directory: \"/Users/melle/software-development/thomas-master/notebooks\"\n"
]
}
],
"source": [
"%run '_preamble.ipynb'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from thomas.core.factors import CPT\n",
"\n",
"from IPython.display import display, HTML"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def subset(full_dict, keys):\n",
" \"\"\"Return a subset of a dict.\"\"\"\n",
" return {k: full_dict[k] for k in keys}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# We're defining CPTs for multiple random variables. The dictionary\n",
"# `states` keeps track the states each variable can take on.\n",
"states = {\n",
" 'I': ['i0', 'i1'],\n",
" 'S': ['s0', 's1'],\n",
" 'D': ['d0', 'd1'],\n",
" 'G': ['g1', 'g2','g3'],\n",
" 'L': ['l0', 'l1'],\n",
"}\n",
"\n",
"# We'll store the CPTs in a dict, indexed by the name of the \n",
"# conditioned variable.\n",
"P = dict()\n",
"\n",
"# Create the CPT (which isn't really conditional probabilities, but rather prior\n",
"# probabilities) for random variable I.\n",
"P['I'] = CPT(\n",
" [0.7, 0.3], \n",
" states=subset(states, ['I']),\n",
" description='Intelligence'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
"
\n",
"
P(I)\n",
"
Intelligence\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" I | \n",
" i0 | \n",
" i1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | \n",
" 0.7 | \n",
" 0.3 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" "
],
"text/plain": [
"P(I)\n",
"I \n",
"i0 0.7\n",
"i1 0.3\n",
"dtype: float64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Display the CPT for random variable 'I': intelligence. The variable's states\n",
"# are listed as columns.\n",
"P['I']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Create the CPT for random variable 'S'. The probabilities for S are conditional\n",
"# on I. In other words, the CPT defines S given I which can be written as \n",
"# P(S|I).\n",
"P['S'] = CPT(\n",
" [0.95, 0.05, \n",
" 0.20, 0.80], \n",
" states=subset(states, ['I', 'S']),\n",
" description='SAT Score'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
"
\n",
"
P(S|I)\n",
"
SAT Score\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" S | \n",
" s0 | \n",
" s1 | \n",
"
\n",
" \n",
" I | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" i0 | \n",
" 0.95 | \n",
" 0.05 | \n",
"
\n",
" \n",
" i1 | \n",
" 0.20 | \n",
" 0.80 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" "
],
"text/plain": [
"P(S|I)\n",
"I S \n",
"i0 s0 0.95\n",
" s1 0.05\n",
"i1 s0 0.20\n",
" s1 0.80\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Display the CPT for random variable 'S': SAT Score. Again, the variable's \n",
"# states are listed as columns. The conditioning variables' states are listed\n",
"# as rows.\n",
"P['S']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"P(S|I)\n",
"I S \n",
"i0 s0 0.95\n",
" s1 0.05\n",
"i1 s0 0.20\n",
" s1 0.80\n",
"dtype: float64\n"
]
}
],
"source": [
"# Internally, P['S'] is essentially a multi-level factor\n",
"print(P['S'])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Create the remained of the CPTs\n",
"P['D'] = CPT(\n",
" [0.6, 0.4], \n",
" states=subset(states, ['D']),\n",
" description='Difficulty'\n",
")\n",
"\n",
"P['G'] = CPT(\n",
" [0.30, 0.40, 0.30, \n",
" 0.05, 0.25, 0.70, \n",
" 0.90, 0.08, 0.02, \n",
" 0.50, 0.30, 0.20],\n",
" states=subset(states, ['I', 'D', 'G']),\n",
" description='Grade'\n",
")\n",
"\n",
"P['L'] = CPT(\n",
" [0.10, 0.90,\n",
" 0.40, 0.60,\n",
" 0.99, 0.01],\n",
" states=subset(states, ['G', 'L']),\n",
" description='Letter'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
"
\n",
"
P(G|I,D)\n",
"
Grade\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" G | \n",
" g1 | \n",
" g2 | \n",
" g3 | \n",
"
\n",
" \n",
" I | \n",
" D | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" i0 | \n",
" d0 | \n",
" 0.30 | \n",
" 0.40 | \n",
" 0.30 | \n",
"
\n",
" \n",
" d1 | \n",
" 0.05 | \n",
" 0.25 | \n",
" 0.70 | \n",
"
\n",
" \n",
" i1 | \n",
" d0 | \n",
" 0.90 | \n",
" 0.08 | \n",
" 0.02 | \n",
"
\n",
" \n",
" d1 | \n",
" 0.50 | \n",
" 0.30 | \n",
" 0.20 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" "
],
"text/plain": [
"P(G|I,D)\n",
"I D G \n",
"i0 d0 g1 0.30\n",
" g2 0.40\n",
" g3 0.30\n",
" d1 g1 0.05\n",
" g2 0.25\n",
" g3 0.70\n",
"i1 d0 g1 0.90\n",
" g2 0.08\n",
" g3 0.02\n",
" d1 g1 0.50\n",
" g2 0.30\n",
" g3 0.20\n",
"dtype: float64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# There can, of course, be more than one conditioning variable\n",
"P['G']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The CPT can be accessed through the __getitem__ accessor:\n",
"P['I']['i0']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"factor(I,S)\n",
"I S \n",
"i0 s0 0.95\n",
" s1 0.05\n",
"i1 s0 0.20\n",
" s1 0.80\n",
"dtype: float64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The same goes for multi-level CPTs\n",
"P['S'].as_factor()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "thomas-jupyter3",
"language": "python",
"name": "thomas-jupyter3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}