{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chapter 8 exercises"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import itertools as it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class bcolors:\n",
    "    HEADER = '\\033[95m'\n",
    "    OKBLUE = '\\033[94m'\n",
    "    OKGREEN = '\\033[92m'\n",
    "    WARNING = '\\033[93m'\n",
    "    FAIL = '\\033[91m'\n",
    "    ENDC = '\\033[0m'\n",
    "    BOLD = '\\033[1m'\n",
    "    UNDERLINE = '\\033[4m'\n",
    "\n",
    "def color_print(string, bcolor=bcolors.WARNING):\n",
    "    print(bcolor + string + bcolors.ENDC)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.3\n",
    "\n",
    "Consider three binary variables $a, b, c \\in \\{0, 1\\}$ having the joint distribution given by the table below. Show by direct evaluation that this distribution has the property that $a$ and $b$s are marginally dependent, so that $p(a,b) \\neq p(a)p(b)$, but that they become independent when conditioned on $c$ so that $p(a,b|c) = p(a|c)p(b|c)$ fo both $c=0$ and $c=1$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = np.array([\n",
    "    [0, 0, 0, 0.192],\n",
    "    [0, 0, 1, 0.144],\n",
    "    [0, 1, 0, 0.048],\n",
    "    [0, 1, 1, 0.216],\n",
    "    [1, 0, 0, 0.192],\n",
    "    [1, 0, 1, 0.064],\n",
    "    [1, 1, 0, 0.048],\n",
    "    [1, 1, 1, 0.096],\n",
    "])\n",
    "\n",
    "p = pd.DataFrame(p, columns=[\"a\", \"b\", \"c\", \"p_abc\"])\n",
    "p = p.set_index([\"a\", \"b\", \"c\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Factorization without conditioning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>p_abc</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.144</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         p_abc\n",
       "a   b         \n",
       "0.0 0.0  0.336\n",
       "    1.0  0.264\n",
       "1.0 0.0  0.256\n",
       "    1.0  0.144"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# p(a, b)\n",
    "p.xs(0, level=2) + p.xs(1, level=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    b  \n",
       "0.0  0.0    0.3552\n",
       "     1.0    0.2448\n",
       "1.0  0.0    0.2368\n",
       "     1.0    0.1632\n",
       "dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# p(a)p(b)\n",
    "p_marg = p.xs(0, level=2) + p.xs(1, level=2)\n",
    "p_a = p_marg.xs(0, level=-1) + p_marg.xs(1, level=-1)\n",
    "p_b = p_marg.xs(0, level=0) + p_marg.xs(1, level=0)\n",
    "\n",
    "p_a[\"key\"] = 1\n",
    "p_b[\"key\"] = 1\n",
    "\n",
    "p_marg = pd.merge(p_a.reset_index(), p_b.reset_index(), on=\"key\",\n",
    "         suffixes=(\"_a\", \"_b\")).drop(\"key\", axis=1)\n",
    "\n",
    "p_marg.set_index([\"a\", \"b\"]).prod(axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Factorization with conditioning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>p_abc</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         p_abc\n",
       "a   b         \n",
       "0.0 0.0    0.4\n",
       "    1.0    0.1\n",
       "1.0 0.0    0.4\n",
       "    1.0    0.1"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# p(a,b|c=0)\n",
    "p_marg = p.xs(0, level=-1)\n",
    "p_marg = p_marg / p_marg.sum()\n",
    "p_marg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    b  \n",
       "0.0  0.0    0.4\n",
       "     1.0    0.1\n",
       "1.0  0.0    0.4\n",
       "     1.0    0.1\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# p(a|c=0)p(b|c=0)\n",
    "p_a = p_marg.sum(level=0)\n",
    "p_b = p_marg.sum(level=1)\n",
    "\n",
    "p_a = p_a.reset_index().assign(key=1)\n",
    "p_b = p_b.reset_index().assign(key=1)\n",
    "\n",
    "p_fact = (pd.merge(p_a, p_b, on=\"key\", suffixes=(\"_a\", \"_b\"))\n",
    "            .drop(\"key\", axis=1)\n",
    "            .set_index([\"a\", \"b\"]))\n",
    "\n",
    "p_fact.prod(axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.4\n",
    "\n",
    "Evaluate the distributions $p(a)$, $p(b | c)$, and $p(c | a)$ corresponding to the joint distribution given in Table 8.2. Hence show by direct evaluation that $p(a, b, c) = p(a)p(c | a)p(b | c)$. Draw the corresponding directed graph."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "p_a = p.sum(level=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>p_abc</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.0</th>\n",
       "      <td>0.6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     p_abc  key\n",
       "a              \n",
       "0.0    0.6    1\n",
       "1.0    0.4    2"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = 1\n",
    "p_a[\"key\"] = [1,2]\n",
    "p_a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>p_abc</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <th>c</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.6</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         p_abc  key\n",
       "a   c              \n",
       "0.0 0.0    0.4    1\n",
       "    1.0    0.6    1\n",
       "1.0 0.0    0.6    2\n",
       "    1.0    0.4    2"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# p(c|a)\n",
    "p_c_giv_a = p.sum(level=[\"a\", \"c\"]) / p.sum(level=[\"a\", \"c\"]).sum(level=0)\n",
    "p_c_giv_a[\"key\"] = [1, 1, 2, 2]\n",
    "p_c_giv_a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>p_abc</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         p_abc  key\n",
       "b   c              \n",
       "0.0 0.0    0.8    1\n",
       "    1.0    0.4    2\n",
       "1.0 0.0    0.2    1\n",
       "    1.0    0.6    2"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# p(b|c)\n",
    "p_b_giv_c = p.sum(level=[\"b\", \"c\"]) / p.sum(level=[\"b\", \"c\"]).sum(level=1)\n",
    "p_b_giv_c[\"key\"] = [1, 2, 1, 2]\n",
    "p_b_giv_c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "fact = p_a.merge(p_c_giv_a.reset_index(), on=\"key\")\n",
    "fact = fact.drop([\"key\"], axis=1).set_index([\"a\", \"c\"])\n",
    "fact = fact.prod(axis=1)\n",
    "fact = pd.DataFrame(fact, columns=[\"p\"])\n",
    "fact[\"key\"] = [1, 2, 1, 2]\n",
    "\n",
    "fact = fact.reset_index().merge(p_b_giv_c.reset_index(), on=\"key\")\n",
    "fact = fact.rename({\"c_x\": \"c\"}, axis=1).drop([\"key\", \"c_y\"], axis=1)\n",
    "fact = fact.groupby([\"a\", \"c\", \"b\"]).sum().prod(axis=1)\n",
    "\n",
    "fact.name=\"p_factorized\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Final Answer:\n",
    "\n",
    "Contains a bug: c-level is swapped"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>p_abc</th>\n",
       "      <th>p_factorized</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">0.0</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.192</td>\n",
       "      <td>0.192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.144</td>\n",
       "      <td>0.048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.048</td>\n",
       "      <td>0.144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.216</td>\n",
       "      <td>0.216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">1.0</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">0.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.192</td>\n",
       "      <td>0.192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.064</td>\n",
       "      <td>0.048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">1.0</th>\n",
       "      <th>0.0</th>\n",
       "      <td>0.048</td>\n",
       "      <td>0.064</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>0.096</td>\n",
       "      <td>0.096</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             p_abc  p_factorized\n",
       "a   b   c                       \n",
       "0.0 0.0 0.0  0.192         0.192\n",
       "        1.0  0.144         0.048\n",
       "    1.0 0.0  0.048         0.144\n",
       "        1.0  0.216         0.216\n",
       "1.0 0.0 0.0  0.192         0.192\n",
       "        1.0  0.064         0.048\n",
       "    1.0 0.0  0.048         0.064\n",
       "        1.0  0.096         0.096"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p.join(fact, on=[\"a\", \"b\", \"c\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.8\n",
    "Consider the graphical model represented by the *noist*-OR\n",
    "$$\n",
    "    p(y=1|x_1, \\ldots, x_M) = 1 - (1 - \\mu_0)\\prod_{n=1}^N(1 - \\mu_i)^{x_i}\n",
    "$$\n",
    "\n",
    "Show that $p$ can be interpreted as a \"soft\" (probabilisitc) form of the logical OR function (i.e, the function that gives $y=1$ whenever at least on the $x_i=1$). Discuss the interpretation of $\\mu_0$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p(y|mu_1=0, mu_2=0)=0\n",
      "p(y|mu_1=0, mu_2=1)=1\n",
      "p(y|mu_1=1, mu_2=0)=1\n",
      "p(y|mu_1=1, mu_2=1)=1\n"
     ]
    }
   ],
   "source": [
    "# ** Considering two parameters plus the null term: mu_0, mu_1, mu_2 **\n",
    "mu_0 = 0\n",
    "x1, x2 = 1, 1\n",
    "# We consider x1 and x2 as variables to consider inside the OR\n",
    "for mu_1, mu_2 in it.product([0, 1], repeat=2):\n",
    "    p_y = 1 - (1 - mu_0) * (1 - mu_1) ** x1 * (1 - mu_2) ** x2\n",
    "    print(f\"p(y|mu_1={mu_1}, mu_2={mu_2})={p_y}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p(y|mu_1=0, mu_2=0, mu_3=0, mu_4=0)=0\n",
      "p(y|mu_1=0, mu_2=0, mu_3=0, mu_4=1)=1\n",
      "p(y|mu_1=0, mu_2=0, mu_3=1, mu_4=0)=1\n",
      "p(y|mu_1=0, mu_2=0, mu_3=1, mu_4=1)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=0, mu_4=0)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=0, mu_4=1)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=1, mu_4=0)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=1, mu_4=1)=1\n",
      "p(y|mu_1=1, mu_2=0, mu_3=0, mu_4=0)=1\n",
      "p(y|mu_1=1, mu_2=0, mu_3=0, mu_4=1)=1\n",
      "p(y|mu_1=1, mu_2=0, mu_3=1, mu_4=0)=1\n",
      "p(y|mu_1=1, mu_2=0, mu_3=1, mu_4=1)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=0, mu_4=0)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=0, mu_4=1)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=1, mu_4=0)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=1, mu_4=1)=1\n"
     ]
    }
   ],
   "source": [
    "# ** Considering four parameters plus the null term: mu_0, mu_1, mu_2, mu_3, mu_4 **\n",
    "\n",
    "x1, x2, x3, x4 = 1, 1, 1, 1\n",
    "# We consider x1 and x2 as variables to consider inside the OR\n",
    "for mu_1, mu_2, mu_3, mu_4 in it.product([0, 1], repeat=4):\n",
    "    p_y = 1 - (1 - mu_0) * (1 - mu_1) ** x1 * (1 - mu_2) ** x2 * (1 - mu_3) ** x3 * (1 - mu_4) ** x4\n",
    "    print(f\"p(y|mu_1={mu_1}, mu_2={mu_2}, mu_3={mu_3}, mu_4={mu_4})={p_y}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First remarks: \n",
    "\n",
    "* The $\\{x_n\\}_{n=1}^N$ elements control for which variables are we interested in computing the *soft*-OR fuction\n",
    "* If the model considers $\\forall n\\geq 1. \\mu_n\\in\\{0,1\\}$ and $\\mu_0 = 0$, then the model corresponds to the *hard* OR function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[93mp(y|mu_1=0, mu_2=0, mu_3=0, mu_4=0)=0\u001b[0m\n",
      "p(y|mu_1=0, mu_2=0, mu_3=0, mu_4=1)=1\n",
      "\u001b[93mp(y|mu_1=0, mu_2=0, mu_3=1, mu_4=0)=0\u001b[0m\n",
      "p(y|mu_1=0, mu_2=0, mu_3=1, mu_4=1)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=0, mu_4=0)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=0, mu_4=1)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=1, mu_4=0)=1\n",
      "p(y|mu_1=0, mu_2=1, mu_3=1, mu_4=1)=1\n",
      "\u001b[93mp(y|mu_1=1, mu_2=0, mu_3=0, mu_4=0)=0\u001b[0m\n",
      "p(y|mu_1=1, mu_2=0, mu_3=0, mu_4=1)=1\n",
      "\u001b[93mp(y|mu_1=1, mu_2=0, mu_3=1, mu_4=0)=0\u001b[0m\n",
      "p(y|mu_1=1, mu_2=0, mu_3=1, mu_4=1)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=0, mu_4=0)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=0, mu_4=1)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=1, mu_4=0)=1\n",
      "p(y|mu_1=1, mu_2=1, mu_3=1, mu_4=1)=1\n"
     ]
    }
   ],
   "source": [
    "# In this example, we \"turn off\" x1 and x3, meaning that the OR\n",
    "# function is computing using only x2 and x4 \n",
    "\n",
    "x1, x2, x3, x4 = 0, 1, 0, 1\n",
    "# We consider x1 and x2 as variables to consider inside the OR\n",
    "for mu_1, mu_2, mu_3, mu_4 in it.product([0, 1], repeat=4):\n",
    "    p_y = 1 - (1 - mu_0) * (1 - mu_1) ** x1 * (1 - mu_2) ** x2 * (1 - mu_3) ** x3 * (1 - mu_4) ** x4\n",
    "    p_y_str = f\"p(y|mu_1={mu_1}, mu_2={mu_2}, mu_3={mu_3}, mu_4={mu_4})={p_y}\"\n",
    "    if (mu_2 == 0 or mu_3 == 0) and p_y == 0:\n",
    "        color_print(p_y_str)\n",
    "    else:\n",
    "        print(p_y_str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**The more general case**\n",
    "\n",
    "$\\mu_0$ controls the level of the *null* term: the bigger it is, the more the null terms (0) converge to 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p(y|mu_1=0, mu_2=0, mu_3=0, mu_4=1)=0.45\n",
      "p(y|mu_1=0, mu_2=0, mu_3=1, mu_4=1)=0.45\n",
      "p(y|mu_1=0, mu_2=1, mu_3=0, mu_4=1)=1.0\n",
      "p(y|mu_1=0, mu_2=1, mu_3=1, mu_4=1)=1.0\n",
      "p(y|mu_1=1, mu_2=0, mu_3=0, mu_4=1)=0.45\n",
      "p(y|mu_1=1, mu_2=0, mu_3=1, mu_4=1)=0.45\n",
      "p(y|mu_1=1, mu_2=1, mu_3=0, mu_4=1)=1.0\n",
      "p(y|mu_1=1, mu_2=1, mu_3=1, mu_4=1)=1.0\n"
     ]
    }
   ],
   "source": [
    "x1, x2, x3 = 0, 1, 0\n",
    "\n",
    "mu_0 = 0.45\n",
    "# We consider x1 and x2 as variables to consider inside the OR\n",
    "for mu_1, mu_2, mu_3 in it.product([0, 1], repeat=3):\n",
    "    p_y = 1 - (1 - mu_0) * (1 - mu_1) ** x1 * (1 - mu_2) ** x2 * (1 - mu_3) ** x3\n",
    "    p_y_str = f\"p(y|mu_1={mu_1}, mu_2={mu_2}, mu_3={mu_3}, mu_4={mu_4})={p_y:.2}\"\n",
    "    print(p_y_str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$\\forall n\\geq 1.\\mu_n$ controls the value of the positive terms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p(y|x1=0, x2=0, x3=0)=0.00\n",
      "p(y|x1=0, x2=0, x3=1)=0.50\n",
      "p(y|x1=0, x2=1, x3=0)=0.20\n",
      "p(y|x1=0, x2=1, x3=1)=0.60\n",
      "p(y|x1=1, x2=0, x3=0)=0.30\n",
      "p(y|x1=1, x2=0, x3=1)=0.65\n",
      "p(y|x1=1, x2=1, x3=0)=0.44\n",
      "p(y|x1=1, x2=1, x3=1)=0.72\n"
     ]
    }
   ],
   "source": [
    "\n",
    "mu_1, mu_2, mu_3 = 0.3, 0.2, 0.5\n",
    "mu_0 = 0\n",
    "# We consider x1 and x2 as variables to consider inside the OR\n",
    "for x1, x2, x3 in it.product([0, 1], repeat=3):\n",
    "    p_y = 1 - (1 - mu_0) * (1 - mu_1) ** x1 * (1 - mu_2) ** x2 * (1 - mu_3) ** x3\n",
    "    p_y_str = f\"p(y|x1={x1}, x2={x2}, x3={x3})={p_y:.2f}\"\n",
    "    print(p_y_str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.11\n",
    "\n",
    "Consider the example of the car fuel system, and suppose that insted of observing the state of the fuel gauge $G$ directly, the gauge is seen by the driver $D$ who reports to us the reading of the gauge. This report is either that the gauge shows full $D=1$ or that it shows empty $D=0$.\n",
    "\n",
    "$$\n",
    "    p(D=1|G=1) = 0.9\\\\\n",
    "    p(D=0|G=0) = 0.9\n",
    "$$\n",
    "\n",
    "Suppose that the driver tells us that the fuel gauge reads empty (D=0).\n",
    "1. Evaluate the probability that the tank is empty given only this observation\n",
    "\n",
    "$$\n",
    "    p(F=0|D=0)\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "pB1 = 0.9\n",
    "pB0 = 1 - pB1\n",
    "\n",
    "pG0 = 0.315\n",
    "pG1 = 1 - pG0\n",
    "\n",
    "pF1 = 0.9\n",
    "pF0 = 1 - pF1\n",
    "\n",
    "pG0_giv_F0 = 0.81\n",
    "pG1_giv_F0 = 1 - pG0_giv_F0\n",
    "\n",
    "pD1_giv_G1 = 0.9\n",
    "pD0_giv_G0 = 0.9\n",
    "\n",
    "pD0_giv_G1 = 1 - pD1_giv_G1\n",
    "pD1_giv_G0 = 1 - pD0_giv_G0\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "pD0 = pG1 * pD0_giv_G1 + pG0 * pD0_giv_G0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "pF0_D0 = pF0 * pG0_giv_F0 * pD0_giv_G0 + pF0 * pG1_giv_F0 * pD0_giv_G1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.09999999999999998"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pF0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.21249999999999997"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The probability that the tank is empty given that the\n",
    "# driver told us is empty\n",
    "pF0_D0 / pD0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. Evaluate the corresponding probability given also the observation that the battery is flat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "pG1_giv_B1F1 = 0.8\n",
    "pG1_giv_B1F0 = 0.2\n",
    "pG1_giv_B0F1 = 0.2\n",
    "pG1_giv_B0F0 = 0.1\n",
    "\n",
    "pG0_giv_B1F1 = 1 - pG1_giv_B1F1\n",
    "pG0_giv_B1F0 = 1 - pG0_giv_B1F1\n",
    "pG0_giv_B0F1 = 1 - pG1_giv_B0F1\n",
    "pG0_giv_B0F0 = 1 - pG1_giv_B0F0\n",
    "\n",
    "pG0_giv_B0 = pG0_giv_B0F0 * pF0 + pG0_giv_B0F1 * pF1\n",
    "pG1_giv_B0 = 1 - pG0_giv_B0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "pD0_B0 = pB0 * pG0_giv_B0 * pD0_giv_G0 + pB0 * pG1_giv_B0 * pD0_giv_G1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "pF0_D0_B0 = pF0 * pB0 * (pG0_giv_B0F0 * pD0_giv_G0 + pG1_giv_B0F0 * pD0_giv_G1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.023295454545454536"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pF0_D0_B0 / pD0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.09999999999999998"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pF0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.12\n",
    "Show that there are $2^{M(M-1)/2}$ distinct undirected graphs over a set of $M$ distinct random variables. Draw the eight posibilities for the case of $M=3$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Any given $M$-node Markov Random Field (MRF)has ${M}\\choose{2}$ edges. Notice\n",
    "\n",
    "$$\n",
    "\\begin{aligned}\n",
    "{M\\choose 2} &= \\frac{M!}{(M-2)!2!}\\\\\n",
    "             &=\\frac{M(M-1)(M-2)!}{(M-2)!2!}\\\\\n",
    "             &=\\frac{M(M-1)}{2}\n",
    "\\end{aligned}\n",
    "$$\n",
    "\n",
    "Therefore, the number of edges $E$ is given by $E = M(M-1)/2$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A 2-node MRF has  1 edge(s)\n",
      "A 3-node MRF has  3 edge(s)\n",
      "A 4-node MRF has  6 edge(s)\n",
      "A 5-node MRF has 10 edge(s)\n",
      "A 6-node MRF has 15 edge(s)\n",
      "A 7-node MRF has 21 edge(s)\n"
     ]
    }
   ],
   "source": [
    "from scipy.special import comb\n",
    "for nodes in range(2, 8):\n",
    "    edges = nodes * (nodes - 1) // 2\n",
    "    print(f\"A {nodes}-node MRF has {edges:>2.0f} edge(s)\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To compute how many distinct graphs can be produced given an $M$-node MRF, we notice that, if the graph is not connected to any edge, then we are *choosing* 0 out of $M$ nodes; for which there exists only one possible configuration. If the graph is connected via two nodes (or one edge), then the total possible configurations are given by ${E\\choose 1}$. In general, if the graph has $E$ edges, then the number possible configurations considering $k$ edges is given by ${E\\choose k}$. Therefore, the total number of configurations for a MRF is given by the sum of configuration ($C$) that a given number of edges can have. This is\n",
    "\n",
    "$$\n",
    "    C = \\sum_{k=0}^E{E \\choose k}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A 2-node MRF has       2 possible configurations\n",
      "A 3-node MRF has       8 possible configurations\n",
      "A 4-node MRF has      64 possible configurations\n",
      "A 5-node MRF has    1024 possible configurations\n",
      "A 6-node MRF has   32768 possible configurations\n",
      "A 7-node MRF has 2097152 possible configurations\n"
     ]
    }
   ],
   "source": [
    "for nodes in range(2, 8):\n",
    "    edges = nodes * (nodes - 1) // 2\n",
    "    configurations = sum(comb(edges, k) for k in range(edges + 1))\n",
    "    print(f\"A {nodes}-node MRF has {configurations:>7.0f} possible configurations\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next, by the binomial theorem, we know that\n",
    "$$\n",
    "    (x + y)^N = \\sum_{k=0}^N{N \\choose k}x^{N-k}y^k\n",
    "$$\n",
    "\n",
    "Thus,\n",
    "$$\n",
    "\\begin{aligned}\n",
    "    (1 + 1)^N = 2 ^ N &= \\sum_{k=0}^N{N \\choose k}1^{N-k}1^k \\\\\n",
    "    &= \\sum_{k=0}^N{N \\choose k}\n",
    "\\end{aligned}\n",
    "$$\n",
    "\n",
    "Therefore, the total number of cofigurations can be written as:\n",
    "$$\n",
    "    2^E = \\sum_{k=0}^E{E \\choose k} = \\sum_{k=0}^{M(M-1)/2}{M(M-1)/2 \\choose k}\n",
    "$$\n",
    "\n",
    "Which implies that there are $2^{M(M-1)/2}$ possible configurations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A 2-node MRF has       2 possible configurations\n",
      "A 3-node MRF has       8 possible configurations\n",
      "A 4-node MRF has      64 possible configurations\n",
      "A 5-node MRF has    1024 possible configurations\n",
      "A 6-node MRF has   32768 possible configurations\n",
      "A 7-node MRF has 2097152 possible configurations\n"
     ]
    }
   ],
   "source": [
    "for nodes in range(2, 8):\n",
    "    edges = nodes * (nodes - 1) // 2\n",
    "    configurations = 2 ** edges\n",
    "    print(f\"A {nodes}-node MRF has {configurations:>7.0f} possible configurations\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}