{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 3. Conditional Probability Tables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This Notebook shows how to create the CPTs for the Student example from Koller & Friedman." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python version: 3.8.10\n", "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n", "\n", "available imports:\n", " import os\n", " import logging\n", " import pandas as pd\n", " import numpy as np\n", "\n", "connect to this kernel with:\n", " jupyter console --existing 9fa5c31c-e49b-417e-882b-5f4ace153127\n", "\n", "Could not create logging directory \"../logs\"\n", "Logging to: \"../logs/notebook.log\"\n", "Current date/time: 11-06-2021, 21:27\n", "Current working directory: \"/Users/melle/software-development/thomas-master/notebooks\"\n" ] } ], "source": [ "%run '_preamble.ipynb'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from thomas.core.factors import CPT\n", "\n", "from IPython.display import display, HTML" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def subset(full_dict, keys):\n", " \"\"\"Return a subset of a dict.\"\"\"\n", " return {k: full_dict[k] for k in keys}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# We're defining CPTs for multiple random variables. The dictionary\n", "# `states` keeps track the states each variable can take on.\n", "states = {\n", " 'I': ['i0', 'i1'],\n", " 'S': ['s0', 's1'],\n", " 'D': ['d0', 'd1'],\n", " 'G': ['g1', 'g2','g3'],\n", " 'L': ['l0', 'l1'],\n", "}\n", "\n", "# We'll store the CPTs in a dict, indexed by the name of the \n", "# conditioned variable.\n", "P = dict()\n", "\n", "# Create the CPT (which isn't really conditional probabilities, but rather prior\n", "# probabilities) for random variable I.\n", "P['I'] = CPT(\n", " [0.7, 0.3], \n", " states=subset(states, ['I']),\n", " description='Intelligence'\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", " P(I)\n", " Intelligence\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Ii0i1
0.70.3
\n", "
\n", "
\n", "
\n", " " ], "text/plain": [ "P(I)\n", "I \n", "i0 0.7\n", "i1 0.3\n", "dtype: float64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the CPT for random variable 'I': intelligence. The variable's states\n", "# are listed as columns.\n", "P['I']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Create the CPT for random variable 'S'. The probabilities for S are conditional\n", "# on I. In other words, the CPT defines S given I which can be written as \n", "# P(S|I).\n", "P['S'] = CPT(\n", " [0.95, 0.05, \n", " 0.20, 0.80], \n", " states=subset(states, ['I', 'S']),\n", " description='SAT Score'\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", " P(S|I)\n", " SAT Score\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Ss0s1
I
i00.950.05
i10.200.80
\n", "
\n", "
\n", "
\n", " " ], "text/plain": [ "P(S|I)\n", "I S \n", "i0 s0 0.95\n", " s1 0.05\n", "i1 s0 0.20\n", " s1 0.80\n", "dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the CPT for random variable 'S': SAT Score. Again, the variable's \n", "# states are listed as columns. The conditioning variables' states are listed\n", "# as rows.\n", "P['S']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "P(S|I)\n", "I S \n", "i0 s0 0.95\n", " s1 0.05\n", "i1 s0 0.20\n", " s1 0.80\n", "dtype: float64\n" ] } ], "source": [ "# Internally, P['S'] is essentially a multi-level factor\n", "print(P['S'])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Create the remained of the CPTs\n", "P['D'] = CPT(\n", " [0.6, 0.4], \n", " states=subset(states, ['D']),\n", " description='Difficulty'\n", ")\n", "\n", "P['G'] = CPT(\n", " [0.30, 0.40, 0.30, \n", " 0.05, 0.25, 0.70, \n", " 0.90, 0.08, 0.02, \n", " 0.50, 0.30, 0.20],\n", " states=subset(states, ['I', 'D', 'G']),\n", " description='Grade'\n", ")\n", "\n", "P['L'] = CPT(\n", " [0.10, 0.90,\n", " 0.40, 0.60,\n", " 0.99, 0.01],\n", " states=subset(states, ['G', 'L']),\n", " description='Letter'\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", " P(G|I,D)\n", " Grade\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Gg1g2g3
ID
i0d00.300.400.30
d10.050.250.70
i1d00.900.080.02
d10.500.300.20
\n", "
\n", "
\n", "
\n", " " ], "text/plain": [ "P(G|I,D)\n", "I D G \n", "i0 d0 g1 0.30\n", " g2 0.40\n", " g3 0.30\n", " d1 g1 0.05\n", " g2 0.25\n", " g3 0.70\n", "i1 d0 g1 0.90\n", " g2 0.08\n", " g3 0.02\n", " d1 g1 0.50\n", " g2 0.30\n", " g3 0.20\n", "dtype: float64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# There can, of course, be more than one conditioning variable\n", "P['G']" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# The CPT can be accessed through the __getitem__ accessor:\n", "P['I']['i0']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "factor(I,S)\n", "I S \n", "i0 s0 0.95\n", " s1 0.05\n", "i1 s0 0.20\n", " s1 0.80\n", "dtype: float64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# The same goes for multi-level CPTs\n", "P['S'].as_factor()" ] } ], "metadata": { "kernelspec": { "display_name": "thomas-jupyter3", "language": "python", "name": "thomas-jupyter3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 }