{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "s1",
     "content",
     "l1"
    ]
   },
   "source": [
    "# Linearly Inseparable Datasets\n",
    "\n",
    "## The Non-Convex Regions\n",
    "\n",
    "### Non-Convex Regions\n",
    "\n",
    "* What are non-convex regions?\n",
    "\n",
    "### Linearly Inseparable Regions\n",
    "\n",
    "* What are linearly inseparable regions?\n",
    "\n",
    "Here is an image of linearly inseparable, non-convex regions that we would like to identify by clustering. \n",
    "\n",
    "\n",
    "\n",
    "Let us look at just the data points:\n",
    "\n",
    "<img src='https://s3.amazonaws.com/rfjh/media/CKEditorImages/2017/06/19/4_almonds.png'/>\n",
    "\n",
    "<br/>\n",
    "## Exercise:\n",
    "\n",
    "Let us now create this dataset of 4 almonds using half-moons.\n",
    "\n",
    " - Create a dataframe, moon_df with the structure given the X, y.\n",
    " \n",
    "<table border=\"1\" cellpadding=\"1\" cellspacing=\"1\" style=\"width:500px; float: left;\">\n",
    "\t<tbody>\n",
    "\t\t<tr>\n",
    "\t\t\t<td>X_0</td>\n",
    "\t\t\t<td>X_1</td>\n",
    "\t\t\t<td>y</td>\n",
    "\t\t</tr>\n",
    "\t\t<tr>\n",
    "\t\t\t<td>X[:,0]</td>\n",
    "\t\t\t<td>X[:,1]</td>\n",
    "\t\t\t<td>y</td>\n",
    "\t\t</tr>\n",
    "\t</tbody>\n",
    "</table> "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": [
     "s1",
     "ce",
     "l1"
    ]
   },
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "from sklearn.datasets import make_moons\n",
    "\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "\n",
    "plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots\n",
    "plt.rcParams['image.interpolation'] = 'nearest'\n",
    "plt.rcParams['image.cmap'] = 'gray'\n",
    "\n",
    "N_Samples = 1000\n",
    "D = 2\n",
    "K = 4\n",
    "\n",
    "X, y = make_moons(n_samples = 2*N_Samples, noise=0.05, shuffle = False)\n",
    "x_vec, y_vec = make_moons(n_samples = 2*N_Samples, noise=0.08, shuffle = False)\n",
    "x_vec[:,0] += 2.5\n",
    "y_vec += 2\n",
    "X = np.concatenate((X, x_vec), axis=0)\n",
    "y = np.concatenate((y, y_vec), axis=0)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "s1",
     "l1",
     "hint"
    ]
   },
   "source": [
    "<p>use the pd.DataFrame() command and sns.pairplot(x_vars, y_vars, hue, data)</p>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "s1",
     "l1",
     "ans"
    ]
   },
   "outputs": [],
   "source": [
    "moon_df = pd.DataFrame({'X_0':X[:,0], 'X_1':X[:,1], 'y':y})\r\n",
    "g = sns.pairplot(x_vars=\"X_0\", y_vars=\"X_1\", hue=\"y\", data = moon_df)\r\n",
    "g.fig.set_size_inches(14, 6)\r\n",
    "sns.despine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "s1",
     "hid",
     "l1"
    ]
   },
   "outputs": [],
   "source": [
    "ref_tmp_var = False\n",
    "\n",
    "\n",
    "try:\n",
    "    ref_assert_var = False\n",
    "    moon_df_ = pd.DataFrame({'X_0':X[:,0], 'X_1':X[:,1], 'y':y})\n",
    "    \n",
    "    import numpy as np\r\n",
    "    \r\n",
    "    if np.all(moon_df['X_0'] == moon_df_['X_0']) and np.all(moon_df['X_1'] == moon_df_['X_1']) and np.all(moon_df['y'] == moon_df_['y']):\r\n",
    "      ref_assert_var = True\r\n",
    "      out = g\r\n",
    "    else:\r\n",
    "      ref_assert_var = False\n",
    "    \n",
    "except Exception:\n",
    "    print('Please follow the instructions given and use the same variables provided in the instructions.')\n",
    "else:\n",
    "    if ref_assert_var:\n",
    "        ref_tmp_var = True\n",
    "    else:\n",
    "        print('Please follow the instructions given and use the same variables provided in the instructions.')\n",
    "\n",
    "\n",
    "assert ref_tmp_var"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "l2",
     "content",
     "s2"
    ]
   },
   "source": [
    "\n",
    "<br/><br/><br/>\n",
    "## Linearly Inseparable Datasets\n",
    "\n",
    "Applying k-means to the dataset:We see that the k-means failed. Let us now try GMMs\n",
    "\n",
    "<img src='https://s3.amazonaws.com/rfjh/media/CKEditorImages/2017/06/19/k-means-moons.png'/>\n",
    "\n",
    "<br/>\n",
    "## Exercise:\n",
    "\n",
    " - Apply GMMs on the dataset and visualize the graph, g.\n",
    " - Assign the cluster assignments of each data point to gmm_cluster column in the blob_df dataframe.\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true,
    "tags": [
     "l2",
     "ce",
     "s2"
    ]
   },
   "outputs": [],
   "source": [
    "import matplotlib as mpl\n",
    "from matplotlib import pyplot as plt\n",
    "from sklearn.datasets import make_moons\n",
    "\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "\n",
    "from sklearn import datasets\n",
    "from sklearn import mixture\n",
    "from sklearn.mixture import GaussianMixture\n",
    "\n",
    "#Generate the half moon data-set (4-halfmoons)\n",
    "N_Samples = 1000\n",
    "X, y = make_moons(n_samples = 2*N_Samples, noise=0.05, shuffle = False)\n",
    "x_vec, y_vec = make_moons(n_samples = 2*N_Samples, noise=0.08, shuffle = False)\n",
    "x_vec[:,0] += 2.5\n",
    "y_vec += 2\n",
    "X = np.concatenate((X, x_vec), axis=0)\n",
    "y = np.concatenate((y, y_vec), axis=0)\n",
    "\n",
    "#visualizing using seaborn library\n",
    "moon_df = pd.DataFrame({'X_0':X[:,0],'X_1':X[:,1], 'y':y})\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "l2",
     "s2",
     "hint"
    ]
   },
   "source": [
    "<p>Use the code from GMMs.</p>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "l2",
     "s2",
     "ans"
    ]
   },
   "outputs": [],
   "source": [
    "mixture_model = GaussianMixture(n_components = 4, covariance_type = 'spherical')\r\n",
    "mixture_model.fit(X)\r\n",
    "moon_df['gmm_clus'] = mixture_model.predict(X)\r\n",
    "# Plot the clusters\r\n",
    "g = sns.pairplot(x_vars=\"X_0\", y_vars=\"X_1\", hue=\"gmm_clus\", data = moon_df)\r\n",
    "g.fig.set_size_inches(14, 6)\r\n",
    "sns.despine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "l2",
     "hid",
     "s2"
    ]
   },
   "outputs": [],
   "source": [
    "ref_tmp_var = False\n",
    "\n",
    "\n",
    "try:\n",
    "    ref_assert_var = False\n",
    "    mixture_model_ = GaussianMixture(n_components = 4, covariance_type = 'spherical')\r\n",
    "    mixture_model_.fit(X)\r\n",
    "    y_hat = mixture_model_.predict(X)\n",
    "    \n",
    "    import numpy as np\r\n",
    "    \r\n",
    "    if len(y_hat) == len(moon_df['gmm_clus']):\r\n",
    "      ref_assert_var = True\r\n",
    "      out = g\r\n",
    "    else:\r\n",
    "      ref_assert_var = False\n",
    "    \n",
    "except Exception:\n",
    "    print('Please follow the instructions given and use the same variables provided in the instructions.')\n",
    "else:\n",
    "    if ref_assert_var:\n",
    "        ref_tmp_var = True\n",
    "    else:\n",
    "        print('Please follow the instructions given and use the same variables provided in the instructions.')\n",
    "\n",
    "\n",
    "assert ref_tmp_var"
   ]
  }
 ],
 "metadata": {
  "executed_sections": [],
  "rf_version": 1
 },
 "nbformat": 4,
 "nbformat_minor": 2
}