{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ML Basics, warming up with small data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Automatically created module for IPython interactive environment\n" ] } ], "source": [ "print(__doc__)\n", "\n", "import matplotlib.pyplot as plt\n", "from sklearn import datasets\n", "from sklearn.decomposition import PCA\n", "from sklearn import metrics\n", "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read poll data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimestampQ1_General background in data analysis?Q2_Hands-on experience in data analysis using Python?Q3_Experience in programming in general?Q4_General background in machine learning?Q5_Hands-on experience in running machine learning applications?Q6_Which one would you prefer on a Sunday afternoon?Q7_Hands-on experience in image analysis using satellite images?Q8_Level of interest in mathematics?Q9_Level of interest in reading?Q10_Level of stress about this class?Q11_Your overall motivation about this class?
02020/01/14 5:11:10 PM EST85467Running53573
12020/01/14 5:15:45 PM EST88556Reading77678
22020/01/14 10:10:14 PM EST66665Watching a movie77777
32020/01/15 10:02:48 AM EST53644Watching a movie388510
42020/01/15 10:03:20 AM EST66543Reading454108
\n", "
" ], "text/plain": [ " Timestamp Q1_General background in data analysis? \\\n", "0 2020/01/14 5:11:10 PM EST 8 \n", "1 2020/01/14 5:15:45 PM EST 8 \n", "2 2020/01/14 10:10:14 PM EST 6 \n", "3 2020/01/15 10:02:48 AM EST 5 \n", "4 2020/01/15 10:03:20 AM EST 6 \n", "\n", " Q2_Hands-on experience in data analysis using Python? \\\n", "0 5 \n", "1 8 \n", "2 6 \n", "3 3 \n", "4 6 \n", "\n", " Q3_Experience in programming in general? \\\n", "0 4 \n", "1 5 \n", "2 6 \n", "3 6 \n", "4 5 \n", "\n", " Q4_General background in machine learning? \\\n", "0 6 \n", "1 5 \n", "2 6 \n", "3 4 \n", "4 4 \n", "\n", " Q5_Hands-on experience in running machine learning applications? \\\n", "0 7 \n", "1 6 \n", "2 5 \n", "3 4 \n", "4 3 \n", "\n", " Q6_Which one would you prefer on a Sunday afternoon? \\\n", "0 Running \n", "1 Reading \n", "2 Watching a movie \n", "3 Watching a movie \n", "4 Reading \n", "\n", " Q7_Hands-on experience in image analysis using satellite images? \\\n", "0 5 \n", "1 7 \n", "2 7 \n", "3 3 \n", "4 4 \n", "\n", " Q8_Level of interest in mathematics? Q9_Level of interest in reading? \\\n", "0 3 5 \n", "1 7 6 \n", "2 7 7 \n", "3 8 8 \n", "4 5 4 \n", "\n", " Q10_Level of stress about this class? \\\n", "0 7 \n", "1 7 \n", "2 7 \n", "3 5 \n", "4 10 \n", "\n", " Q11_Your overall motivation about this class? \n", "0 3 \n", "1 8 \n", "2 7 \n", "3 10 \n", "4 8 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfInit = pd.read_csv(('./Data/MUSA-650WelcomePoll.csv'))\n", "dfInit.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calculate relative timestamp" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimestamptsRel
02020-01-14 17:11:100.0
12020-01-14 17:15:45275.0
22020-01-14 22:10:1417944.0
32020-01-15 10:02:4860698.0
42020-01-15 10:03:2060730.0
52020-01-15 10:03:4360753.0
62020-01-15 10:03:5060760.0
72020-01-15 10:03:5360763.0
82020-01-15 10:03:5960769.0
92020-01-15 10:04:0360773.0
\n", "
" ], "text/plain": [ " Timestamp tsRel\n", "0 2020-01-14 17:11:10 0.0\n", "1 2020-01-14 17:15:45 275.0\n", "2 2020-01-14 22:10:14 17944.0\n", "3 2020-01-15 10:02:48 60698.0\n", "4 2020-01-15 10:03:20 60730.0\n", "5 2020-01-15 10:03:43 60753.0\n", "6 2020-01-15 10:03:50 60760.0\n", "7 2020-01-15 10:03:53 60763.0\n", "8 2020-01-15 10:03:59 60769.0\n", "9 2020-01-15 10:04:03 60773.0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfInit.Timestamp = pd.to_datetime(dfInit.Timestamp, format='%Y/%m/%d %I:%M:%S %p EST')\n", "dfInit['tsRel'] = (dfInit.Timestamp - dfInit.Timestamp.min()).dt.total_seconds()\n", "dfInit[['Timestamp', 'tsRel']].head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Column names" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Q1_General background in data analysis?',\n", " 'Q2_Hands-on experience in data analysis using Python?',\n", " 'Q3_Experience in programming in general?',\n", " 'Q4_General background in machine learning?',\n", " 'Q5_Hands-on experience in running machine learning applications?',\n", " 'Q6_Which one would you prefer on a Sunday afternoon?',\n", " 'Q7_Hands-on experience in image analysis using satellite images?',\n", " 'Q8_Level of interest in mathematics?',\n", " 'Q9_Level of interest in reading?',\n", " 'Q10_Level of stress about this class?',\n", " 'Q11_Your overall motivation about this class?',\n", " 'tsRel']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = dfInit[dfInit.columns[1:]]\n", "initCol = df.columns.tolist()\n", "initCol" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q6Q7Q8Q9Q10Q11tsRel
085467Running535730.0
188556Reading77678275.0
266665Watching a movie7777717944.0
353644Watching a movie38851060698.0
466543Reading45410860730.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 tsRel\n", "0 8 5 4 6 7 Running 5 3 5 7 3 0.0\n", "1 8 8 5 5 6 Reading 7 7 6 7 8 275.0\n", "2 6 6 6 6 5 Watching a movie 7 7 7 7 7 17944.0\n", "3 5 3 6 4 4 Watching a movie 3 8 8 5 10 60698.0\n", "4 6 6 5 4 3 Reading 4 5 4 10 8 60730.0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns = df.columns.str.split('_', 1).str[0].tolist()\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize correlations" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#sns.pairplot(df, kind = 'reg')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11tsRel
Q11.0000000.7667250.7138770.6240630.7388570.6761500.405270-0.244388-0.4418830.0422060.313668
Q20.7667251.0000000.6390080.4818360.5682440.6922750.399874-0.321784-0.3299910.0993480.383485
Q30.7138770.6390081.0000000.5648250.5440570.6164670.596732-0.083087-0.6316570.4005800.457261
Q40.6240630.4818360.5648251.0000000.9455410.4507520.426714-0.461877-0.1688680.0367390.440323
Q50.7388570.5682440.5440570.9455411.0000000.4449400.467669-0.463201-0.263556-0.0239300.407507
Q70.6761500.6922750.6164670.4507520.4449401.0000000.184545-0.198770-0.2844180.0452760.152171
Q80.4052700.3998740.5967320.4267140.4676690.1845451.000000-0.126656-0.2056980.5010680.036620
Q9-0.244388-0.321784-0.083087-0.461877-0.463201-0.198770-0.1266561.000000-0.2174100.228420-0.193531
Q10-0.441883-0.329991-0.631657-0.168868-0.263556-0.284418-0.205698-0.2174101.000000-0.133846-0.311850
Q110.0422060.0993480.4005800.036739-0.0239300.0452760.5010680.228420-0.1338461.0000000.430875
tsRel0.3136680.3834850.4572610.4403230.4075070.1521710.036620-0.193531-0.3118500.4308751.000000
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 \\\n", "Q1 1.000000 0.766725 0.713877 0.624063 0.738857 0.676150 0.405270 \n", "Q2 0.766725 1.000000 0.639008 0.481836 0.568244 0.692275 0.399874 \n", "Q3 0.713877 0.639008 1.000000 0.564825 0.544057 0.616467 0.596732 \n", "Q4 0.624063 0.481836 0.564825 1.000000 0.945541 0.450752 0.426714 \n", "Q5 0.738857 0.568244 0.544057 0.945541 1.000000 0.444940 0.467669 \n", "Q7 0.676150 0.692275 0.616467 0.450752 0.444940 1.000000 0.184545 \n", "Q8 0.405270 0.399874 0.596732 0.426714 0.467669 0.184545 1.000000 \n", "Q9 -0.244388 -0.321784 -0.083087 -0.461877 -0.463201 -0.198770 -0.126656 \n", "Q10 -0.441883 -0.329991 -0.631657 -0.168868 -0.263556 -0.284418 -0.205698 \n", "Q11 0.042206 0.099348 0.400580 0.036739 -0.023930 0.045276 0.501068 \n", "tsRel 0.313668 0.383485 0.457261 0.440323 0.407507 0.152171 0.036620 \n", "\n", " Q9 Q10 Q11 tsRel \n", "Q1 -0.244388 -0.441883 0.042206 0.313668 \n", "Q2 -0.321784 -0.329991 0.099348 0.383485 \n", "Q3 -0.083087 -0.631657 0.400580 0.457261 \n", "Q4 -0.461877 -0.168868 0.036739 0.440323 \n", "Q5 -0.463201 -0.263556 -0.023930 0.407507 \n", "Q7 -0.198770 -0.284418 0.045276 0.152171 \n", "Q8 -0.126656 -0.205698 0.501068 0.036620 \n", "Q9 1.000000 -0.217410 0.228420 -0.193531 \n", "Q10 -0.217410 1.000000 -0.133846 -0.311850 \n", "Q11 0.228420 -0.133846 1.000000 0.430875 \n", "tsRel -0.193531 -0.311850 0.430875 1.000000 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.corr()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAfKklEQVR4nO3de5gdVZnv8e8vCQExco0gJGACRGeAGVCuXlAEMidw1OjD+Ig6BhznRM6AOo4egcMw4hz1QRyvA2OMctPxwJzxAlGDDiCKgjAhECAhREJE0iaKgCKBQOju9/xR1Vhp9u6u3XtV9e7i98lTT/al9rtq7955s3rVqvUqIjAzs4lv0ngfgJmZpeGEbmbWEE7oZmYN4YRuZtYQTuhmZg3hhG5m1hBO6GZmiUm6WNKDkla2eV6SviBpraQ7Jb08RbtO6GZm6V0KzBvh+eOBOfm2EPhiikad0M3MEouIG4BHRthlPvDVyNwM7CRpj27bndJtgCo9/dC6Si9j7b9lSZXhYdOjlYaPX2+sND6A9p5VbQO/H+k7373NS26pND7A+27btdL4K57cUGn8syfvW2l8gKP2qv67OvOWH6qb13eSb6a+cN/3kPWshyyOiMUdNDcDWF+435c/1tUH1dMJ3cysF+XJu5MEPlyr/3y67sA6oZuZAQwO1NlaH7BX4f5MoOtfxTyGbmYGMNBffuveEmBBPtvlSODRiOh6XMo9dDMzIGIwWSxJlwNHA9Ml9QEfAbbJ2olFwFLgBGAt8ATwrhTtOqGbmQEMpkvoEfG2UZ4P4LRkDeac0M3MABL20MdL0jF0STMlXSXpXknrJF0gaVtJu0q6XtImSRekbNPMLInBgfJbj0qW0CUJ+BZwZUQMXQH1POB84EngHOBDqdozM0sqBstvPSplD/0Y4MmIuAQgIgaADwALAEXET8kSu5lZz4mB/tJbr0qZ0A8AlhcfiIg/APcD+5UNImmhpFsl3fqVr16e8PDMzEYwOFh+61EpT4qK1lc6dXQ5bvEKrKov/Tcze0YPD6WUlTKhrwJOLD4gaQdgd2BNwnbMzNLr4ZOdZaUccrkO2F7SAgBJk4FPAxdExOaE7ZiZpeeTon+UT5R/M/CXku4FHgYGI+LjAJLuBz4DnCKpT9L+qdo2M+tavZf+VyLphUURsR54I4CkVwKXSzokIpZHxKyUbZmZJdXDJzvLquxK0Yi4CXhxVfHNzFLKZlpPbL7038wMenpsvCwndDMz8JCLmVljuIderaprfk454o2Vxn/63z9TafyB1fdXGh9g0qOPVRt/1t6VxqeGf6MPD1Y7K3fG1J0rjb/jlurHjp83fQKMTw88Pd5H0LWeTuhmZrXxkIuZWUN4yMXMrCEa0EN3kWgzM0i+2qKkeZLWSFor6cwWz+8o6TuS7pC0SlLXdUXdQzczAyLhSdF8LasLgblAH7BM0pKIuLuw22nA3RHxBkkvBNZI+npEbBlru+6hm5lB6sW5DgfWRsS6PEFfAcwf3iLwgrza2zTgEaCrhWLqqik6V9JySXflfx+Tsl0zs651MORSLMSTbwuHRZsBrC/c78sfK7oA+FNgA3AX8P6I7s7MJhtyKdQU/WJEzM9/5VhMVlP0UuANEbFB0oHAD3j2mzMzGz8d5NJiIZ42WhX2GV6w578BK8jKd+4LXCPpJ3mltzGpq6bovRGxId9vFbCdpG0Ttm1m1p20J0X7gL0K92eS9cSL3gV8KzJrgV8Af9LNWxiPmqInArdHxFOtghR/lbno+zclPDwzsxGkHUNfBsyRNFvSVOAkYPil7w8AxwJI2h14KbCum7dQa01RSQcAnwT+ol2Q4q8ym7/3OdcUNbN69KcrXBER/ZJOJxtengxcHBGrJJ2aP78I+D/ApZLuIsuTZ0TEQ920W1tNUUkzgW8DCyLivoTtmpl1L/GVohGxFFg67LFFhdsbGKFzOxa11BQFtgW+B5wVETcmbNPMLI3EFxaNh7pqip5ONo5+jqQV+bZbqrbNzLrWgCLRddUU/RjwsZRtmZkl1cM977JcU9TMDHq6512W13IxM4Oks1zGixO6mRlATPxZ0r2d0Dc9Wmn4qkvEbfPWv680fvz27ErjA/DUmBd+KyUe6mra7ai2e+U+lcYHeM2KakuXnbPx+krjX378fqPv1KXJu2xTeRtd8xi6mVlDOKGbmTWET4qamTXEwMB4H0HXnNDNzMBDLmZmjeGEbmbWEB5DNzNrhhic+PPQ66openhhUa47JL05ZbtmZl3zaot/VKgpemVEzAHmAM8jqym6Ejg0Ig4G5gFfkuTfDsysdwwMlN96VF01RSdFxNBCCdvRurKRmdn4cQ99KyPWFJV0hKRVwF3AqYUEv5Wtaopee2vCwzMzG4ET+lZGrCkaEbdExAHAYcBZkrZrFSQiFkfEoRFx6LuPOzTh4ZmZjSCi/FaCpHmS1khaK+nMNvscnZ9bXCXpx92+hdpqig49FhGrJT0OHAi4C25mvSFhzzsvwXkhMBfoA5ZJWhIRdxf22Qn4V2BeRDyQoopbXTVFXzR0ElTSi4GXkg3FmJn1hsEov43ucGBtRKyLiC3AFcD8Yfu8HfhWRDwAEBEPdvsW6qop+mrgDkkrgG8DfxsR1a6bambWiQ5muRTP9eXbwmHRZgDrC/f78seKXgLsLOlHkpYPdYa7UVdN0a8BX0vZlplZStHBkEtELAYWj7CLWr1s2P0pwCHAsWRTvH8m6eaI+HnpA2kRsBKuKWpmE0raK0X7gL0K92cCG1rs81BEPA48LukG4CBgzAk96ZWiZmYTVgyW30a3DJgjabakqcBJwJJh+1wFHCVpiqTtgSOA1d28BV+taWYGSXvoEdEv6XTgB8Bk4OKIWCXp1Pz5RfmMv+8DdwKDwFciYmU37fZ0Qo9fb6w0/sDq+yuNX3XNz6mnf7zS+ACx6ZFK42/+yBmVxo8t1V8EctnmTZXGP23PoyqNf/Wy6tPA64+t9t9yEv1pL+mPiKXA0mGPLRp2/1PAp1K12dMJ3cysNl4+18ysIRqwfK4TupkZnU1b7FVO6GZm4B66mVljOKGbmTVEDxeuKKuWEnSF5/eWtEnSh1K2a2bWrRiM0luvqqsE3ZDPAlenatPMLJm0qy2Oi5RDLs8qQSfpA8AvJZ0NHAesAx5P2KaZWRoNmOVSVwm6g4AzgI8mbM/MLJ0G9NDrKkH3UeCzETHqNdLFdYYvvunu0XY3M0ujAQm9rhJ0TwPnSzof2AkYlPRkRFwwPEhxneEnPn9q735yZtYoMeAhl6K2Jegi4rCImBURs4DPAZ9olczNzMZNA3rodZWgMzPraU2YtlhXCbrlhX3OTdmmmVkSPZyoy3IJOjMzyEpMTHC+9N/MDIj+iZ/RXVPUzAyyHnrZrQRJ8yStkbRW0pkj7HeYpAFJf9ndG3AP3cwMIOnJznyW34XAXKAPWCZpSUTc3WK/T5LVHu1aTyd07T2r0viTHn2s0vg8taXS8FXX+wTQtF2qbaDi33InTdum2gZq8Ot4stL4j06eVml8gMEnJsBKhmm/i4cDayNiHYCkK4D5wPCrJd8LfBM4LEWjHnIxM6OzaYvFK9rzbeGwcDOA9YX7ffljz5A0g2yq91aFo7vR0z10M7PadNBDL17R3oZavWzY/c8BZ+QLGZZvfARO6GZmQPQnDdcH7FW4PxPYMGyfQ4Er8mQ+HThBUn9EXDnWRp3QzcyASDuGvgyYI2k28CvgJODtW7UXMXvotqRLge92k8zBCd3MLJMwoUdEv6TTyWavTAYujohVkk7Nn082bl7khG5mRvIeOhGxFFg67LGWiTwiTknRZi01RSXNkrRZ0op8q+R/JzOzsYrB8luvqrOm6H0RcXC+nZqqXTOzFGJApbdeVUtNUeDLCdsxM0uul3veZdVVU3QKMFvS7ZJ+LOmodkGKE/Yv+s//Snh4ZmbtxaBKb70qZQ99pJqi2wJ7R8TDkg4BrpR0QJ7wt1KcsL/52+dN/AWKzWxCcA99a6vIJso/o1BT9M6IeBggL3ZxH/CShG2bmXUlQqW3XlVLTVFgWn4fSfuQnTBdl7BtM7OueJZLwSg1RV8D3CnpDuAbwKkRUf1SgWZmJQ0OqPTWq+qqKfpNsiUizcx6Ui+f7CzLNUXNzHBCNzNrjGjAnDondDMz3EM3M2uMXp6OWFZvJ/TfVzsRZtKsvSuNHw89VGn8zR85o9L4QOU1P7f/bLWrQtxx8N9XGh/gtdvvWGn838ZTlcbftb/6sYaVN06vvI22l5+XNNDDs1fK6u2EbmZWE/fQzcwawmPoZmYN4VkuZmYN0YQeetKKRWZmE9XA4KTSWxmS5klaI2mtpDNbPP8OSXfm202SDur2PbiHbmZG2iGXfDHCC4G5QB+wTNKSiLi7sNsvgNdGxO8kHU+2bPgR3bRbV03RdxTqia6QNCjp4JRtm5l1YzBUeivhcGBtRKyLiC3AFcD84g4RcVNE/C6/ezMws9v3UEtN0Yj4+lA9UeCdwP0RsSJV22Zm3epkPfRiZbV8Wzgs3AxgfeF+X/5YO+8Gru72PdRSU1TS2RGxKd/vbcDlCds1M+taJ0MuxcpqbbTqxrdsQdLryBL6q8sfQWt11RTdr/DwWxkhoW9VU/RHdyQ8PDOz9hIPufQBexXuzwQ2DN9J0p8DXwHmD1V160ZdNUWzG9IRwBMRsbJdkK1qil7y4QbMDDWziaDs7JWSlgFzJM0GfgWcBLy9uIOkvcmGqd8ZET9P0WjKhL4KOLH4QKGm6Jr8oZPwcIuZ9aCUvceI6Jd0OvADYDJwcUSsknRq/vwi4B+BXYF/zU5B0h8Rh7aLWUbKhH4dcJ6kBRHx1WJN0YjYLGkS8BaycnRmZj2l5FBKaRGxFFg67LFFhdt/A/xNyjbrqikKWSLviwgXhzazntPJLJdeVVdN0eUR8SPgyJTtmZmlUvFK0bVwTVEzMyBazjScWHzpv5kZ0N/DQyllOaGbmeEeeuU2L7ml2gYqHjTb7pX7VBo/tlQ/6jdp2jaVxq+6RNxBKz5TaXyA6Yf+Q6Xxq17V9YS3PFptA8CSb+xUeRvd8hi6mVlDuIduZtYQ7qGbmTXEgHvoZmbN0IAKdE7oZmYAg+6hm5k1QxOWdnVCNzOjGSdF66opuo2kyyTdJWm1pLNStmtm1q1BqfTWq2qpKUq2bO62EfFnwCHAeyTNStW2mVm3BjrYelUtNUWBW4HnS5pCluS3AH9I2LaZWVeaMMulrpqi9wCPAxuBB4B/johHWgUp1hS97P6NCQ/PzKy9QVR661UpE/pINUWnkv2msicwG/igpJYLnUTE4og4NCIOPXnWHgkPz8ysvehgK0PSPElrJK2VdGaL5yXpC/nzd0p6ebfvIWVCXwVsVQ+vUFP0r4HvR8TTEfEgcOPwfc3MxtOgym+jyUtwXggcD+wPvE3S/sN2O57sXOMcYCHwxW7fQ8qEfh2wvaQF8Mwb+jRwAfBz4Jj8f6Tnk1Uuuidh22ZmXRnsYCvhcGBtRKyLiC3AFcD8YfvMB74amZuBnSR1NSxRV03RC4FpwEpgGXBJRNyZqm0zs24NqPxWwgxgfeF+X/5Yp/t0pLaaomRTF83MelInFxZJWkg2TDJkcUQsLu7S4mXDh9/L7NMR1xQ1M6OzhJ4n78Uj7NIH7FW4PxPYMIZ9OpL0SlEzs4kqVH4rYRkwR9JsSVOBk4Alw/ZZAizIzy0eCTwaEV3N1fZaLmZmpF3LJSL6JZ0O/ACYDFwcEasknZo/vwhYCpwArAWeAN7Vbbs9ndDfd9uulcZ/eHBzpfFfs+LpSuNftnlTpfHr8Nrtd6w0ftX1PgHOvfVjlcZ/z6EfrjT+L79baXgAfjp1S+VtvKPL16e+pD8ilpIl7eJjiwq3AzgtZZs9ndDNzOrShEv/ndDNzGjG8rlO6GZmOKGbmTWGKxaZmTWEx9DNzBqilwtXlFVXCbqpki7JS9DdIenolO2amXVrkCi99aq6StD9D4C8BN1c4NOSfJWqmfWMxKstjouUSfVZJeiADwALgJeTLa9Lvh767/F66GbWQ1IXuBgPdZWgWwPMlzRF0myyQtF7PSuCmdk4aUIPPeVJ0ZFK0P0Q2IOsWPQvgZuA/pZBCstSHr7LwcyZNjvhIZqZtdavXu57l1NXCbpVEfGBiDg4IuYDOwH3tgpSrCnqZG5mdfGQy9ZGKkE3VHoOSXOB/oi4O2HbZmZdacKQS10l6HYDbpO0GjgDeGeqds3MUmjCtMU6S9C9NGVbZmYp9W6aLs8l6MzM6O2hlLJ86b+ZGTDQgD66E7qZGc3oofvyezMzIDr40w1Ju0i6Jl/z6hpJO7fYZy9J10taLWmVpPeXid3TPfQVT26oNP6Mqc/6HJM6Z+P1lcY/bc+jKo0P8Ot4stL4v42nKo1fx5KoVdf8/NKt51ca/x9rqLu6fvDRytvoVo099DOB6yLiPEln5vfPGLZPP/DBiLhN0guA5ZKuGW26t3voZmbUOm1xPnBZfvsy4E3Dd4iIjRFxW377MWA1MGO0wE7oZmZ0dqWopIWSbi1sCztoaveI2AhZ4ia7TqctSbOAlwG3jBa4p4dczMzq0t9BzzsiFgOL2z0v6VrgRS2eOruTY5I0Dfgm8Hf5YocjckI3M4OuT3ZuFSviuHbPSfqNpD0iYqOkPYAH2+y3DVky/3pEfKtMux5yMTOj1rVclgAn57dPBq4avkNeMOgiYHVEfKZsYCd0MzPqm7YInAfMzde8mpvfR9Kekpbm+7yKbM2rYyStyLcTRgs8piEXSTOBC4H9gcnAUuCDwDTgG8BhwKURcXrhNYcAl5KVpVsKvD9f0MvMbNzVNW0xIh4Gjm3x+AbghPz2T8lqSXSk4x76KLVDnwTOAT7U4qVfJCtcMfSaeZ22bWZWlYGI0luvGsuQy0i1Q5X/z7LV1Sj5wP8OEfGzvFf+VVrMvTQzGy9NWD53LAl9pNqh+7V5zQygr3C/jzaT5IvzOx/Z3PLkr5lZcjWOoVdmLAl9pNqhI71muJafSrEE3S7PG3G+vZlZMs/VikUj1Q5d0+Y1fcDMwv2ZQLULtZiZdeC5OuTStnZoRGxu9YL88tbHJB2Zn1RdQIu5l2Zm4+U5OeQySu1QJN0PfAY4RVKfpP3zl/5P4CvAWuA+4OruD9/MLI0mzHIZ0zz0kWqHRsSsNq+5FThwrAdqZlalXh5KKavrtVxcO9TMmqCXT3aW5cW5zMxIuzjXeHFCNzPDQy5mZo3RhKWlejqhnz1530rj77hloNL4lx/f7sLZNK5eVv2P79HJ0yqNv2t/tf+ITnhL9bUsf/ndauNXXfPzn279WKXxAR5/77srb6NbA+6hm5k1g4dczMwawkMuZmYN4R66mVlDeNqimVlD9PIl/WW5pqiZGfWttihpF0nXSLo3/3vnEfadLOl2SaXmUo0poUuaKemq/IDWSbpA0raSdpV0vaRNki4Y9pqPS1ovadNY2jQzq1KNy+eeCVyXl/C8Lr/fzvuB1WUD11lT9DvA4Z22Z2ZWh4govXVpPnBZfvsy2pTjlDQT+O9kq9SWUktN0Xy/m/N10c3Mek4nPfRiqcx8W9hBU7sP5cL873al2T4HfJgO1g0by0nRljVF83XQ9wNWjCHmM/IPZiHAu3c8nGO3r/ZqSzMz6GyWS0QsBha3e17StcCLWjx1dpn4kl4PPBgRyyUdXfa4xpLQx1JTtLTiB3X5nu+Y+KedzWxCGIh0C+hGxHHtnpP0G0l7RMRGSXsAD7bY7VXAGyWdAGwH7CDp3yLir0Zqt66aomZmPa3GMfQlwMn57ZNpUY4zIs6KiJl5waCTgB+OlsyhppqiZma9rsZZLucBc/MSnnPz+0jaU9LSbgLXVlNU0vmS+sj+M+iTdG43B25mllJdRaIj4uGIODYi5uR/P5I/viEiTmix/48i4vVlYtdZU/TDZGdszcx6zmADrhR1TVEzM7yWi5lZY6Sc5TJenNDNzPCQi5lZYzRhyEW9XKWj74hjKj24502vtqbolN22qTQ+FdfjBBh8otrPaOWN0yuN/8Ck7SqND/DTqVsqjb9+8PFK4//bq56oND7A8//losrb2Gb6Pl1d3Ljv9JeX/gd130O3JbmQMjX30M3MaEYP3QndzAwYiGp/G62DE7qZGS4SbWbWGC4SbWbWEO6hm5k1RBPmoY+4OJeknST97Sj73C/pLkl3SvqxpBGXAZB0yvB6o2Zm462uxbmqNNpqizsBIyb03Osi4s+BHwH/0O1BmZnVbSAGS2+9arSEfh6wr6QVkr4s6Yb89kpJR7XY/2fADABJL5T0TUnL8u1VqQ/ezCyVGgtcVGa0hH4mcF9EHAzcA/wgv30QrWuHzgOuzG9/HvhsRBwGnEjJytXF4qtff3BDmZeYmXVtMKL01qs6OSm6DLhY0jbAlRFRTOjXS9qdrDbe0JDLccD+0jNXyO4g6QWjNVKsKVr1pf9mZkN6ueddVumKRRFxA/Aa4FfA14ZK0OVeR7Ym+irgnwqxXxERB+fbjIh4LNFxm5klVVcJOkm7SLpG0r353zu32W8nSd+QdI+k1ZJeMVrs0RL6Y8AL8uAvBh6MiC8DFwEvL+6Y1xP9O2CBpF2A/wROLxzcwaMdjJnZeKlxDP1M4LqImENWo/nMNvt9Hvh+RPwJ2TD36tECjzjkEhEPS7pR0krg+cDjkp4GNgELWuy/UdLlwGnA+4ALJd2Zt3MDcOpoB2RmNh5qnL0yHzg6v30Z2ezAM4o7SNqBbETkFICI2AKMuqznqGPoEfH2UZ6fNez+ewt339pi/0uBS0dr18ysTjWe7Nw9IjbCM53g3Vrssw/wW+ASSQcBy4H3R8SIaymXHkM3M2uyToZcirPx8m1hMZaka/Pp3cO3+SUPZwrZsPYXI+JlwOO0H5rZ6kVmZs95nVwBWpyN1+b549o9J+k3kvbIe+d7kM0OHK4P6IuIW/L736BEQncP3cyMWk+KLgFOzm+fDFzV4lh+DayX9NL8oWOBu0cL7B66mRm1jqGfB/w/Se8GHgDeAiBpT+ArEXFCvt97ga9LmgqsA941auRO/lfq9Q1YOJHjN+E9+DMa//hNeA91fEZN3Jo25LJw9F16On4dbUz0+HW0MdHj19HGRI/fSE1L6GZmz1lO6GZmDdG0hN52GtEEiV9HGxM9fh1tTPT4dbQx0eM3kvITEGZmNsE1rYduZvac5YRuZtYQEzKhS5op6ap8PeF1ki6QtK2kXSVdL2lTt4WoR2hjrqTleWHs5ZKOSRz/8LzM3wpJd0h6c8r4hef3zj+nD40l/ijvYZakzYX3sShx/HcUYq+QNDiW5ZlHiL+NpMvyn/FqSWeN5fhHaWOqpEvyNu6QdHSCmG2//5IOydtaK+kL0h8rzySK/3FJ6yVtahPTBefrMN4T4cdwwYGA/wLeld+fTLY+++fJlvh9NdkyvRdU1MbLgD3zxw8EfpU4/vbAlPzxoXUepqSKX9jnm8B/AB+q4DOaBays6uc8bL8/A9YlPv63A1fkj28P3A/MStzGacAl+eO7ka2mN6mq73/+mlfkr78aOD5x/CPz7+umNnFH/U7kn/P0/PZHgS+Psv8pw4/jub6N+wF0fMDZmgY3DHtsB+B3wLQUP+gybeSPCXgY2Lai+LOB39B5Qh8xPvAm4FPAuYw9oY/UxoGj/eNN+Bl9Avh44vjvBL5DtjTGrsDPgV0St3ER8FeFx68DDk/xuQz//ueJ9p7C/bcBX0oVf9i+7RL6FcBmslrEXyarj7ACWAkcle9zP39M6POApfntF5J1QJbl26tGO47n6jYRh1wOIOvNPCMi/kD2Zdiv5jZOBG6PiKdSxpd0hKRVwF3AqRHRnzD+QWSL6X+0w5idtDEFmC3p9vxX56MSxy/+DN4KXJ44/j1ky5VuJFtr458j4pHEbawB5kuaImk2cAiwV5cx233/Z5Ct3jekL38sVfwyai84/1w0ERfnErRc57LtmGAVbUg6APgk8Bep40e2ZOYBkv4UuEzS1RHxZKL4HyX7x7FphGHUbtvYFtg7sopXhwBXSjogTwwp4mc3pCOAJyJiZQdxy8SfCgwAewI7Az+RdG1ErEvYxg/Jes63Ar8EbgLK/Mc9lu9/q+fazVeu499XLQXnn4smYg99FXBo8QFl5Zp2J+v1VN6GpJnAt4EFEXFf6vhDj0XEarKe4oEJ4+8InC/pfrIasP9b0unPitBdG3dGxMP5e1gO3Ae8JGH8oc/oJMbWOx8t/l+T1XJ8OiIeBG4cvm+CNlZFxAciK6A+H9gJuLfLmO2+/33AzML9mcCGhPE7Ei44X5mJmNCvA7Yf+hJImgx8mmwsbXPVbZD1Pr8HnBURN1YQ/0WSpuSPvxh4Kdmvu0niR8RhETErstKBnwM+ERFjmSkw0nuYlt9H0j7AHLLlP1O9h82SJpEtO3rFGI59tOP/OXCMMs8nO+F3T+I2hmIjaS7QHxGjrnc9Usx23//Iyp09JunIfHbLAlqswT3W+CW54HwdxnsQfywb2VjjErIeze8pnOAhS36PkBWy7gP2T9kG2a+Bj5ON+w1tuyWM/06y3skK4DbgTak/o8I+5zLGk6KjvIcT8/dwR/4e3lDBz/lo4OYqvkdkJ47/I38PdwP/q4I2ZpH1eFcD1wIvrvL7T9brXkn229IF5FeJJ4x/fn5/MP/73BZx/29+DL/I/74d+AkwuxB7emH/fwHOAaYD/w7cmf88FuXPn4JPim79GY/3AXT9BuCVZGOQh0zUNiZ6/Ca8h4n6GTXhc/GWbvNaLmZmDTERx9DNzKwFJ3Qzs4ZwQjczawgndDOzhnBCNzNrCCd0M7OG+P8fAUIPfuGowAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "corr = df.corr()\n", "sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Q1_General background in data analysis?',\n", " 'Q2_Hands-on experience in data analysis using Python?',\n", " 'Q3_Experience in programming in general?',\n", " 'Q4_General background in machine learning?',\n", " 'Q5_Hands-on experience in running machine learning applications?',\n", " 'Q6_Which one would you prefer on a Sunday afternoon?',\n", " 'Q7_Hands-on experience in image analysis using satellite images?',\n", " 'Q8_Level of interest in mathematics?',\n", " 'Q9_Level of interest in reading?',\n", " 'Q10_Level of stress about this class?',\n", " 'Q11_Your overall motivation about this class?',\n", " 'tsRel']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "initCol" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Handling categorical variables (visualization)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.catplot(x=\"Q6\", y=\"Q11\", data=df);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Handling categorical variables (Data analysis)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df2 = pd.get_dummies(df, columns=['Q6'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11tsRelQ6_ReadingQ6_RunningQ6_Watching a movie
085467535730.0010
18855677678275.0100
2666657777717944.0001
35364438851060698.0001
46654345410860730.0100
58783384102860753.0010
64311111108860760.0100
7737654686960763.0100
8555444455760769.0001
9666664665660773.0001
10444535278760783.0001
11777227675860790.0010
12888668786860800.0001
13444111577860801.0001
1487777710561060812.0010
15776666665760817.0001
16766551774760823.0100
17666552997960939.0001
1899999765410443956.0001
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 Q11 tsRel Q6_Reading \\\n", "0 8 5 4 6 7 5 3 5 7 3 0.0 0 \n", "1 8 8 5 5 6 7 7 6 7 8 275.0 1 \n", "2 6 6 6 6 5 7 7 7 7 7 17944.0 0 \n", "3 5 3 6 4 4 3 8 8 5 10 60698.0 0 \n", "4 6 6 5 4 3 4 5 4 10 8 60730.0 1 \n", "5 8 7 8 3 3 8 4 10 2 8 60753.0 0 \n", "6 4 3 1 1 1 1 1 10 8 8 60760.0 1 \n", "7 7 3 7 6 5 4 6 8 6 9 60763.0 1 \n", "8 5 5 5 4 4 4 4 5 5 7 60769.0 0 \n", "9 6 6 6 6 6 4 6 6 5 6 60773.0 0 \n", "10 4 4 4 5 3 5 2 7 8 7 60783.0 0 \n", "11 7 7 7 2 2 7 6 7 5 8 60790.0 0 \n", "12 8 8 8 6 6 8 7 8 6 8 60800.0 0 \n", "13 4 4 4 1 1 1 5 7 7 8 60801.0 0 \n", "14 8 7 7 7 7 7 10 5 6 10 60812.0 0 \n", "15 7 7 6 6 6 6 6 6 5 7 60817.0 0 \n", "16 7 6 6 5 5 1 7 7 4 7 60823.0 1 \n", "17 6 6 6 5 5 2 9 9 7 9 60939.0 0 \n", "18 9 9 9 9 9 7 6 5 4 10 443956.0 0 \n", "\n", " Q6_Running Q6_Watching a movie \n", "0 1 0 \n", "1 0 0 \n", "2 0 1 \n", "3 0 1 \n", "4 0 0 \n", "5 1 0 \n", "6 0 0 \n", "7 0 0 \n", "8 0 1 \n", "9 0 1 \n", "10 0 1 \n", "11 1 0 \n", "12 0 1 \n", "13 0 1 \n", "14 1 0 \n", "15 0 1 \n", "16 0 0 \n", "17 0 1 \n", "18 0 1 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "dfTmp = df2[['Q8', 'Q9', 'Q10', 'Q11', 'Q6_Reading', 'Q6_Running', 'Q6_Watching a movie',]].copy()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "#sns.pairplot(dfTmp, kind = 'reg')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "corr = df.corr()\n", "sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dealing with outliers (focusing on tsRel)\n", "\n", "### Visualize data" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Q1_General background in data analysis?',\n", " 'Q2_Hands-on experience in data analysis using Python?',\n", " 'Q3_Experience in programming in general?',\n", " 'Q4_General background in machine learning?',\n", " 'Q5_Hands-on experience in running machine learning applications?',\n", " 'Q6_Which one would you prefer on a Sunday afternoon?',\n", " 'Q7_Hands-on experience in image analysis using satellite images?',\n", " 'Q8_Level of interest in mathematics?',\n", " 'Q9_Level of interest in reading?',\n", " 'Q10_Level of stress about this class?',\n", " 'Q11_Your overall motivation about this class?',\n", " 'tsRel']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "initCol" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Q: Is there a correlation between how fast a student answered the poll and answers to questions?" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tsRel 1.000000\n", "Q3 0.457261\n", "Q10 -0.311850\n", "Q11 0.430875\n", "Name: tsRel, dtype: float64\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "dfTmp = df[['tsRel','Q3', 'Q10', 'Q11']].copy()\n", "corr = dfTmp.corr()\n", "sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)\n", "print(corr['tsRel'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### WARNING: Outliers may lead to incorrect conclusions!" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.regplot(x='tsRel', y='Q3', data=dfTmp, color=\"g\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.distplot(dfTmp.tsRel, hist=True, rug=True, color=\"g\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### What is an outlier? Let's zoom into the data" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.0, 100000.0)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAacAAAEGCAYAAADBr1rTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXhV1bn48e+bhIQpCTPGBAJUFBkcUbFqreIAXCv0p1aoFrS2lIpFW70t6LW3tVqxelscUAQn0IpQ61Vs8aLghLaAoIiMEhkDSCJDBsh0kvf3x9kn7IQkZ+fk5JzAfj/Pc56zz9prrb32FnlZe6+9lqgqxhhjTEuSEO8GGGOMMbVZcDLGGNPiWHAyxhjT4lhwMsYY0+JYcDLGGNPiJMW7AfHSpUsX7dWrV7ybYYwxx5RVq1Z9o6pdm/s4vg1OvXr1YuXKlfFuhjHGHFNEZHssjmO39YwxxrQ4FpyMMca0OBacjDHGtDgWnIwxxrQ4FpyMMca0OBacjDHGtDgWnIwxxrQ4noKTiAwTkU0ikiMik+vYLyLymLN/jYicFa6siHQSkXdEZLPz3dFJ7ywi74lIsYg84cqfKiKrXZ9vRGSas+8mEcl37ftJUy6KMcaY+AobnEQkEZgODAf6A2NEpH+tbMOBvs5nPPCUh7KTgSWq2hdY4vwGKAXuBe5yH0BVi1T1jNAH2A685soyz7X/GU9nb4yp4eIXLmbasmnxboYxnnpO5wI5qrpFVcuBV4CRtfKMBOZo0DKgg4hkhCk7EpjtbM8GRgGo6iFV/YhgkKqTiPQFugFLvZykMcab5bnL+Xjnx/FuhjGeglMmsNP1O9dJ85KnobLdVXUPgPPdzXuzGUOwp+Rexvca55biqyLSoxF1GWOAskAZZZVl7CrcFe+mGOMpOEkdabXXdq8vj5eykRgNzHX9fhPopaqnAYs50iOrQUTGi8hKEVmZn58fhWYYc/woLCsEILcwN84tMcZbcMoF3D2RLGC3xzwNld3r3PrD+c7z0mAROR1IUtVVoTRV3aeqZc7PWcDZdZVV1ZmqOlhVB3ft2uyT6hpzTAkFpz3Fe6isqoxza4zfeQlOnwB9RaS3iCQT7LUsqJVnATDWGbU3BChwbtU1VHYBMM7ZHge84bHNY6jZawoFt5CrgQ0e6zLGOArKCgAIVAXIO+Tp34rGNJuwS2aoakBEbgMWAYnAc6q6TkQmOPtnAAuBEUAOcBi4uaGyTtVTgfkicguwA7gudEwR2QakAckiMgq4QlXXO7t/4BzLbZKIXA0EgP3ATY25CMaYIz0nCN7ay0jNaCC3Mc3L03pOqrqQYAByp81wbSsw0WtZJ30fMLSeMr0aaEufOtKmAFPqK2OMCa+gtKB6e1fRLs7hnDi2xvidzRBhjAGO7jkZE08WnIwxwJFnToANJzdxZ8HJGAMc6TlltM8gt8h6Tia+LDgZY4DgM6eUxBT6dOxjt/VM3FlwMsYAwZ5TWkoaWWlZdlvPxJ0FJ2MMAIXlhaS3TiczNZPcwlxqzg5mTGxZcDLGAMHbeqGeU0mghIOlB+PdJONjFpyMMUDwtl56SjpZaVmADSc38WXByRgDBIeSp6WkkZkWXDjAgpOJJwtOxhjA6Tm1PtJz2lVkgyJM/FhwMsYAzjOn5DQy2mcgiPWcTFxZcDLGoKrVPadWia3o3r67DSc3cWXByRjD4YrDVGolaSlpAGSlZdksESauLDgZY6qnLkpPSQcgMzXTek4mriw4GWOqJ32t0XOyZ04mjiw4GWOqe06h4JSZmsmB0gMcrjgcz2YZH7PgZIypXmgwvXXwtl71cHK7tWfixIKTMebonpO9iGvizIKTMab6mVNoQIS9iGvizYKTMabOZ05gPScTPxacjDHVz5xCwaldcjs6tO5gwcnEjafgJCLDRGSTiOSIyOQ69ouIPObsXyMiZ4UrKyKdROQdEdnsfHd00juLyHsiUiwiT9Q6zvtOXaudTzcnPUVE5jnHWC4ivSK7HMb4U2FZIe1atSMxIbE6LSsty27rmbgJG5xEJBGYDgwH+gNjRKR/rWzDgb7OZzzwlIeyk4ElqtoXWOL8BigF7gXuqqdJN6jqGc4nz0m7BTigqicBfwEeCndexpgjCsoKqkfqhYQWHTQmHrz0nM4FclR1i6qWA68AI2vlGQnM0aBlQAcRyQhTdiQw29meDYwCUNVDqvoRwSDllbuuV4GhIiKNKG+Mr4WWaHez5dpNPHkJTpnATtfvXCfNS56GynZX1T0Aznc3j21+3rmld68rAFUfR1UDQAHQuXZBERkvIitFZGV+fr7Hwxlz/CsoK6geqReSlZbF18VfU1FZEadWGT/zEpzq6oGoxzxeyjbGDao6CLjI+fwozPFrJqjOVNXBqjq4a9euTWiGMceXunpOmamZKMqe4j1xapXxMy/BKRfo4fqdBez2mKehsnudW38433mEoaq7nO8i4GWCtw1rHF9EkoB0YH+4+owxQaHlMtxslggTT16C0ydAXxHpLSLJwGhgQa08C4Cxzqi9IUCBc6uuobILgHHO9jjgjYYaISJJItLF2W4FXAWsraOua4F3VbUpPTRjfCW00KCbzRJh4ikpXAZVDYjIbcAiIBF4TlXXicgEZ/8MYCEwAsgBDgM3N1TWqXoqMF9EbgF2ANeFjiki24A0IFlERgFXANuBRU5gSgQWA7OcIs8CL4pIDsEe0+jILocx/tRgz8mGk5s4CBucAFR1IcEA5E6b4dpWYKLXsk76PmBoPWV61dOUs+vJX4oruBljvKusqqSovOioZ04dW3ekTVIb6zmZuLAZIozxueLyYoCjRuuJCJlp9q6TiQ8LTsb4XO2FBt1slggTLxacjPG52pO+utksESZeLDgZ43O1Fxp0y0rLYnfRbqq0KtbNMj5nwckYn2uo55SVlkV5ZTnfHP4m1s0yPmfByRifq73QoJut62TixYKTMT4XrucENkuEiT0LTsb4XEPPnGyWCBMvFpyM8bnCskISJIF2rdodta97u+4kSqINJzcxZ8HJGJ8rKCsgLSWNupZAS0xI5MTUE63nZGLOgpMxPlfXchlumWmZ1nMyMWfByRifq2uhQbestCzrOZmYs+BkjM+F7TnZLBEmDiw4GeNzBaUFdY7UC8lKy6K4vLh6yLkxsWDByRifC9dzCr3rZL0nE0sWnIzxuXDPnEKzRNiLuCaWLDgZ43PWczItkQUnY3ysvLKc0kBpgz2nE1NPBCw4mdiy4GSMjzU0r15ISlIKXdt2tXedTExZcDLGx7wEJ7B3nUzseQpOIjJMRDaJSI6ITK5jv4jIY87+NSJyVriyItJJRN4Rkc3Od0cnvbOIvCcixSLyhCt/WxH5p4hsFJF1IjLVte8mEckXkdXO5yeRXhBj/KShSV/dbJYIE2thg5OIJALTgeFAf2CMiPSvlW040Nf5jAee8lB2MrBEVfsCS5zfAKXAvcBddTTnEVXtB5wJXCAiw1375qnqGc7nmXDnZYxpRM8p1XpOJra89JzOBXJUdYuqlgOvACNr5RkJzNGgZUAHEckIU3YkMNvZng2MAlDVQ6r6EcEgVU1VD6vqe852OfApkNW40zXGuDW00KBbZlom3xz+htJAaYP5jIkWL8EpE9jp+p3rpHnJ01DZ7qq6B8D57ua10SLSAfgewR5XyDXOLcVXRaRHPeXGi8hKEVmZn5/v9XDGHLca88wJYHfR7mZvkzHgLTgdPY8+qMc8Xso2iogkAXOBx1R1i5P8JtBLVU8DFnOkR1bzwKozVXWwqg7u2rVrU5phzHHB6zMne9fJxJqX4JQLuHsiWUDtfz7Vl6ehsnudW38433ke2zwT2Kyq00IJqrpPVcucn7OAsz3WZYyvee052SwRJta8BKdPgL4i0ltEkoHRwIJaeRYAY51Re0OAAudWXUNlFwDjnO1xwBvhGiIi9wPpwB210jNcP68GNng4L2N8r6CsgOTEZFontW4wn/WcTKwlhcugqgERuQ1YBCQCz6nqOhGZ4OyfASwERgA5wGHg5obKOlVPBeaLyC3ADuC60DFFZBuQBiSLyCjgCqAQuAfYCHzqrNr5hDMyb5KIXA0EgP3ATZFeEGP8JNzURSGpKamkJqdacDIxEzY4AajqQoIByJ02w7WtwESvZZ30fcDQesr0qqcpdT3DQlWnAFPqKWOMqUe4SV/dstKy7F0nEzM2Q4QxPua15wQ2S4SJLQtOxvhYuIUG3WyWCBNLFpyM8bFG9ZxSs9hTtIdAVaCZW2WMBSdjfK0xz5wy0zKp1Er2Fu9t5lYZY8HJGF9r7DMnwG7tmZiw4GSMT6lq8JlTI0brgb3rZGLDgpMxPlUSKKFSKz33nGyWCBNLFpyM8Smv8+qFdGnbheTEZOs5mZiw4GSMT3mdVy9ERMhMteHkJjYsOBnjU6G1nLwGJ7AXcU3sWHAyxqdCPSevAyIgOJzcgpOJBQtOxvhUY2/rQfBF3F1FuwhOp2lM87HgZIxPNXZABARv65UGStlfsr+5mmUMYMHJGN+KpOeUmeYMJ7dBEaaZWXAyxqciHRAB9iKuaX4WnIzxqcKyQtq2aktSgqdl3YAjL+JacDLNzYKTMT7VmKmLQk5ofwIJkmCzRJhmZ8HJGJ8qLPc+6WtIq8RWnND+BOs5mWZnwckYn2rMQoNuNkuEiQULTsb4VGOWy3CzWSJMLHgKTiIyTEQ2iUiOiEyuY7+IyGPO/jUicla4siLSSUTeEZHNzndHJ72ziLwnIsUi8kSt45wtIl84dT0mIuKkp4jIPCd9uYj0iuxyGOMfjVlo0C0z1WaJMM0vbHASkURgOjAc6A+MEZH+tbINB/o6n/HAUx7KTgaWqGpfYInzG6AUuBe4q47mPOXUHzrWMCf9FuCAqp4E/AV4KNx5GeN3Tek5FZQVUFxe3AytMibIS8/pXCBHVbeoajnwCjCyVp6RwBwNWgZ0EJGMMGVHArOd7dnAKABVPaSqHxEMUtWc+tJU9d8anDtlTqhMrbpeBYaGelXGmLpFMloPXCvi2og904y8BKdMYKfrd66T5iVPQ2W7q+oeAOe7m4d2uO8luOuqPo6qBoACoHOY+ozxrSqtoqi8KKKek80SYWLBS3CqqwdSe9bH+vJ4KetVQ3V5Oo6IjBeRlSKyMj8/P8JmGHPsKyorAho3r16IzRJhYsFLcMoFerh+ZwG7PeZpqOxe51Zd6JZdnod2ZNVTV/VxRCQJSAeOmplSVWeq6mBVHdy1a9cwhzPm+BXJvHohNkuEiQUvwekToK+I9BaRZGA0sKBWngXAWGfU3hCgwLlV11DZBcA4Z3sc8EZDjXDqKxKRIc7zpLGuMu66rgXeVZvT35h6RTKvXkibVm3o1KaTPXMyzSrspFqqGhCR24BFQCLwnKquE5EJzv4ZwEJgBJADHAZubqisU/VUYL6I3ALsAK4LHVNEtgFpQLKIjAKuUNX1wM+BF4A2wFvOB+BZ4EURySHYYxod0dUwxiciWWjQLSsti9wi6zmZ5uNpxkdVXUgwALnTZri2FZjotayTvg8YWk+ZXvWkrwQG1pFeiiu4GWMaFlrLKZKeEzizRFjPyTQjmyHCGB+q7jlFMCACbJYI0/wsOBnjQ0155gTBnlPeoTzKK8uj2SxjqllwMsaHovHMSVH2FO2JZrOMqWbByRgfKigtQBDaJbeLqLy962SamwUnY3woNK9egkT2V4DNEmGamwUnY3wokoUG3aznZJqbBSdjfCjShQZD0lPSaduqrQ0nN83GgpMxPhTpchkhImIv4ppm5eklXGNMyzRz1cyIyuXszyE1OTXi8uPPHm/vOplmZT0nY3yoNFBK61atm1SHzRJhmpMFJ2N8qCRQQpukNk2qIysti11Fu6jSqii1ypgjLDgZ40MlFSW0Tmp6zylQFSD/kK2NZqLPgpMxPlNZVUlFVQVtWjW95wQ2nNw0DwtOxvhMSaAEICq39QB2Fu5scpuMqc2CkzE+U1IRneDUM70nADsKdjS5TcbUZsHJGJ8pDZQCNPmZU5e2XWiT1MaCk2kWFpyM8ZnQbb2mDiUXEXqm92R7wfZoNMuYGiw4GeMz0XrmBJDdIdt6TqZZWHAyxmei9cwJoGdaT7YftJ6TiT4LTsb4TOiZU1OHkkNwUMTeQ3ur6zQmWiw4GeMz1c+cmjggAoK39cDedTLR5yk4icgwEdkkIjkiMrmO/SIijzn714jIWeHKikgnEXlHRDY73x1d+6Y4+TeJyJVOWqqIrHZ9vhGRac6+m0Qk37XvJ025KMYcz0orSkmURFoltGpyXaHh5HZrz0Rb2OAkIonAdGA40B8YIyL9a2UbDvR1PuOBpzyUnQwsUdW+wBLnN87+0cAAYBjwpIgkqmqRqp4R+gDbgddcbZjn2v9MYy+EMX5REghOXSQiTa4rOz3Yc7JBESbavPSczgVyVHWLqpYDrwAja+UZCczRoGVABxHJCFN2JDDb2Z4NjHKlv6KqZaq6Fchx6qkmIn2BbsDSRpyrMQZn0tcoPG+C4HLtgthwchN1XoJTJuCenyTXSfOSp6Gy3VV1D4Dz3a0RxxtDsKekrrRrnFuKr4pIj7pORETGi8hKEVmZn2+TVRp/Kqlo+ozkIcmJyZyYeqL1nEzUeQlOdfX91WMeL2UjOd5oYK7r95tAL1U9DVjMkR5ZzUpUZ6rqYFUd3LVr1zDNMOb4VBoojVpwAuxFXNMsvASnXMDdE8kCdnvM01DZvc6tP5zvPC/HE5HTgSRVXRVKU9V9qlrm/JwFnO3hvIzxpWgsNOjWM72n9ZxM1HkJTp8AfUWkt4gkE+y1LKiVZwEw1hm1NwQocG7VNVR2ATDO2R4HvOFKHy0iKSLSm+AgixWuY42hZq8pFNxCrgY2eDgvY3wpGgsNumWnB2eJsEUHTTQlhcugqgERuQ1YBCQCz6nqOhGZ4OyfASwERhAcvHAYuLmhsk7VU4H5InILsAO4zimzTkTmA+uBADBRVStdTfqBcyy3SSJytZN/P3BTo66CMT4SjYUG3Xqm96S8spy8Q3mc0P6EqNVr/C1scAJQ1YUEA5A7bYZrW4GJXss66fuAofWUeQB4oJ59fepImwJMqf8MjDEAqhrV0Xpw5EXcHQU7LDiZqLEZIozxkYqqCqq0KuoDIsBexDXRZcHJGB+J5qSvIfYirmkOFpyM8ZFoLTTolt46nbSUNBtObqLKgpMxPhKthQZrs+HkJtosOBnjI9FcaNAtOz3bek4mqiw4GeMjzfHMCaznZKLPgpMxPhLNhQbdstOz2V+yn+Ly4qjWa/zLgpMxPhLqOUVzQAQcGU5uvScTLRacjPGR6p5TtJ85OS/i2rtOJlosOBnjIyWBEloltCIxITGq9VrPyUSbBSdjfCTaUxeFZLTPICkhyYKTiRoLTsb4SDQXGnRLTEgkKy3LhpObqLHgZIyPRHuhQTcbTm6iyYKTMT5SEiiJ+uwQIfYirokmC07G+Ehz95x2Fe4iUBVolvqNv1hwMsZHor3QoFt2ejaVWsnuot3NUr/xFwtOxvhIc43WAxtObqLLgpMxPlGlVZQFyprttp69iGuiyYKTMT5RFihD0WYLTj3SegDWczLRYcHJGJ9ojoUG3dolt6Nzm84WnExUeApOIjJMRDaJSI6ITK5jv4jIY87+NSJyVriyItJJRN4Rkc3Od0fXvilO/k0icqUr/X0nbbXz6eakp4jIPKfMchHpFdnlMOb41VwLDbpld7Dh5CY6wgYnEUkEpgPDgf7AGBHpXyvbcKCv8xkPPOWh7GRgiar2BZY4v3H2jwYGAMOAJ516Qm5Q1TOcT56TdgtwQFVPAv4CPOT9EhjjD8210KCbvYhrosVLz+lcIEdVt6hqOfAKMLJWnpHAHA1aBnQQkYwwZUcCs53t2cAoV/orqlqmqluBHKeehrjrehUYKiLi4dyM8Y3mWmjQLfQirqo22zGMP3gJTpnATtfvXCfNS56GynZX1T0Aznc3j8d73rmld68rAFWXUdUAUAB0rn0iIjJeRFaKyMr8/Pz6z9iY41BzLTTo1jO9J8XlxRwsPdhsxzD+4CU41dUDqf3PovryeCnbmOPdoKqDgIucz48a0UZUdaaqDlbVwV27dg3TDGOOL8210KBbdroznNyeO5km8hKccoEert9ZQO1XwOvL01DZvc6tP5zv0POjesuo6i7nuwh4mSO3+6rLiEgSkA7s93BuxvhGcy006GYv4ppo8RKcPgH6ikhvEUkmOFhhQa08C4Cxzqi9IUCBc6uuobILgHHO9jjgDVf6aGcEXm+CgyxWiEiSiHQBEJFWwFXA2jrquhZ4V+2mtzE1lARKEISUpJRmO4a9iGuiJSlcBlUNiMhtwCIgEXhOVdeJyARn/wxgITCC4OCFw8DNDZV1qp4KzBeRW4AdwHVOmXUiMh9YDwSAiapaKSLtgEVOYEoEFgOznLqeBV4UkRyCPabRTbkoxhyPSgIlpCSlkCDN93pj17ZdSUlMsZ6TabKwwQlAVRcSDEDutBmubQUmei3rpO8DhtZT5gHggVpph4Cz68lfihPcjDF1a66FBt1EJDicvNCCk2kamyHCGJ8oDZQ260i9kOwO2XZbzzSZBSdjfKIk0HzLZbj1TLMXcU3TWXAyxidKK5pvoUG37A7Z7CneQ1mgrNmPZY5fFpyM8YmY9Zyc4eS5hbnNfixz/LLgZIxPNOdCg272Iq6JBgtOxvhELEbrgb2Ia6LDgpMxPlBZVUlFVUVMglNWWhZgL+KaprHgZIwPVK/lFINnTilJKWS0z7Cek2kSC07G+EAsZiR3sxdxTVNZcDLGB2IxI7mbvYhrmsqCkzE+EItVcN1CL+La/MsmUhacjPGBWN/Wy+6QTVllGXmH8sJnNqYOFpyM8YFY39az4eSmqSw4GeMDsVho0M1exDVNZcHJGB+ofuYUw9F6YD0nEzkLTsb4QEmghARJoFVCq5gcr0PrDrRPbm8j9kzELDgZ4wOhqYtEJCbHExGy07PtXScTMQtOxvhArBYadOuZbus6mchZcDLGB2K1XIZbdrq9iGsiZ8HJGB+I1UKDbj3Te7KvZB+Hyg/F9Ljm+OApOInIMBHZJCI5IjK5jv0iIo85+9eIyFnhyopIJxF5R0Q2O98dXfumOPk3iciVTlpbEfmniGwUkXUiMtWV/yYRyReR1c7nJ5FeEGOOR3HpOXUIDie3W3smEmGDk4gkAtOB4UB/YIyI9K+VbTjQ1/mMB57yUHYysERV+wJLnN84+0cDA4BhwJNOPQCPqGo/4EzgAhEZ7mrDPFU9w/k804hrYMxxL1YLDbrZcHLTFF56TucCOaq6RVXLgVeAkbXyjATmaNAyoIOIZIQpOxKY7WzPBka50l9R1TJV3QrkAOeq6mFVfQ/AqetTICuCczbGd2K10KBbKDjZi7gmEl6CUyaw0/U710nzkqehst1VdQ+A893N6/FEpAPwPYI9rpBrnFuKr4pIj7pORETGi8hKEVmZn59fVxZjjjuqGhytF+PgdGLqiSRKovWcTES8BKe6XoyoPdVwfXm8lG3U8UQkCZgLPKaqW5zkN4FeqnoasJgjPbKalajOVNXBqjq4a9euYZphzPGhoqqCSq2kdavYPnNKSkgiMy3Tek4mIl6CUy7g7olkAbs95mmo7F7n1h/Od2j64nDHmwlsVtVpoQRV3aeqZc7PWcDZHs7ruLd532amLJ5CoCoQ76aYOIr1vHpu2enZ1nMyEfESnD4B+opIbxFJJjhYYUGtPAuAsc6ovSFAgXOrrqGyC4BxzvY44A1X+mgRSRGR3gQHWawAEJH7gXTgDvfBQ0HOcTWwwcN5Hffufvdupn48ldc3vh7vppgo+Wr/V4x7fRyFZYWey8R6RnI3exHXRCpscFLVAHAbsIjgX/rzVXWdiEwQkQlOtoXAFoKDF2YBtzZU1ikzFbhcRDYDlzu/cfbPB9YD/wdMVNVKEckC7iE46u/TWkPGJznDyz8HJgE3RXpBjhfbD27ntQ2vATBt2bQwuc2x4vcf/J45n89h5qqZnsvEeqFBt+z0bHILc6msqoz5sc2xLclLJlVdSDAAudNmuLYVmOi1rJO+DxhaT5kHgAdqpeVS9/MoVHUKMKXBk/CZJ1Y8gSD8asiv+POyP/PJrk84J/OceDfLNEFuYS5z184lQRKYtmwak86b5KlcrGckd+uZ3pNAVYA9xXvISrPBtcY7myHiOFRcXsysT2dxbf9r+e/v/jepyak8uvzReDfLNNFjyx9DVXli+BPsKtrF/HXzPZUrrQg+c4rXbT3ApjEyjWbB6Tj0wuoXKCgr4I4hd5CWksYtZ97CvHXz2F1UexyLOVYUlhXy9KqnuW7AdUwYPIGB3Qby8L8eJnjTomFxva1ns0SYCFlwOs5UaRWPLn+U8zLPY0jWEAB+cd4vqKyqZPqK6XFunYnUrFWzKCwr5M7z70REuPP8O1mzdw0bvgk/9qd6tF6cbuuBvYhrGs+C03Fm4eaF5OzP4Y4hRwY09unYh5H9RvL0qqc5XHE4jq0zkaiorGDa8ml8t9d3GXziYADGDBxDRvsM3tnyTtjy8Ryt1z65PZ3adLKek2k0C07HmWnLppGVlsU1p15TI/2XQ37JvpJ9/HXNX+PUMhOp+evmk1uYy13n31WdlpKUwqTzJrE+fz25hbkNli8JlNAqoRVJCZ7GP0Vdz/Se1nMyjWbB6Tjyxd4vWLJ1CbedcxutEmsux31Rz4s484QzmbZ8mqfnFKZlUFUe/tfD9O/an+F9h9fYN2HwBFISU3jnq4Z7T/FYaNDNXsQ1kbDgdByZtmwabZLa8NOzf3rUPhHhjiF3sD5/PYu3LI5D60wklmxdwud7P+fO8+8kQWr+79qhdQcu7HkhK3av4EDJgXrriMdyGW4903uy/eB2+0eRaRQLTseJvEN5/PWLvzLu9HF0atOpzjzXD7ie7u2685dlf4lx60ykHvnXI3Rv150bBt1Q5/5Le18KwLtb3623jngsNOiWnZ5NUXkRBWUFcWuDOfZYcDpOPL3yacoqy7h9yO315klJSmHiORN5K+ctNn6zMYatM5FYs3cNi75axKTzJpGSlFJnni5tu3B2xtl8uOPD6oEPtbWEnhPYcHLTOBacjgNlgRg5+swAABXnSURBVDKeXPkkw08aTr8u/RrM+7PBPyMlMYXHlj8Wo9aZSP3Pv/+Hdq3aMWHwhAbzXd7nckoDpXy046M698djoUE3exHXRCI+w3dagPzD+Y2anyxaxp89Pup1zls3j6+Lv64xfLw+3dp144ZBNzD789ncf+n99d4CNPGVW5jLy1+8zK2Dbw373yi7QzandD6FJVuXcGnvS0lMSKyxPx4LDbrZi7gmEtZzOsapKtOWTaN/1/5c3udyT2XuGHIHhysOM2vVrGZunT89/9nzvLD6hSbV8fjyx6nSKk//4IBg7+lA6QFW7l551L54LDTo1q1dN5ITk204uWkUC07HuKU7lvLZ159xx3l3IFLnvLhHGdR9EEN7D+WJT56gorKimVvoL1M/msqPF/yYm9+4mYc/fjiiOgrLCpmxagbX9r+W3h17eyozoNsAMtpn8PaWt2uMiqvSKkoDpTFfaNAtQRJs6QzTaBacjnHTlk2jc5vO3HjajY0qd8eQO8gtzK1eVsM03dSPpjJlyRTGDBzD9QOu59eLfx1RgHrm02coLCvkP7/9n57LJEgCl/e5nNzC3BqDXcory1E0rj0nsBdxTeNZcDqGbTmwhdc3vs7Pzv5Zox94j+g7gpM6ncS05bbWUzSEAtMPB/2QOd+fw0v/76WIAlRFZQXTlk3j4uyLq6cq8urczHNJS0nj7S1vV6fFc+oiN3sR1zSWBacwVu1exZ8+/hOrdq+Ket0VlRU8vvxx5q2dF9ELio8vf5zEhERuPefWRpdNkARuP+92luUuY1nuskaXP5ZVaRUfbv+wekLUpnpw6YPVgWn2qNkkJSSRlJBUI0D96eM/earrb+v/xs7Cndz17bvCZ66lVWIrLu19aY0pjeK5lpNbz/Se7CnaQ3lleVzbYY4dFpzqEVoTaeanM8ktzGXmpzN55tNnKC4vjkr9q79ezXnPnMek/5vE6L+P5qq5V7GzYKfn8oVlhTz72bNcP+B6MtMyI2rDTWfcRHpKekQr5QaqAvzjy3+Qsz8nomPHy9q8tVz43IVc/MLFnD7jdN7b+l6T6ntw6YPc/e7dNQJTSChAjR44mt8s/k3YABWaqqhfl36M6DsiovZ8p+d3glMaORPCxnO5DLee6T1RNOw8gMaE+HYoeUNWf72al9a8xOGKw4w8ZSSX9bmMd7a8wz+//Ceb9m3ixkE3cvoJp0dUd1mgjD98+Ace+vghOrfpzN+u+xu7Cndx97t3M+DJATx8+cP89OyfHjVVTW3Pf/Y8ReVFnkdz1Tds/tzMc/nb+r9xxkdneBpWrqp8kfcFr214jT3Fe0iQBC7OvpirTr6K9snta+SNdNj8F3u/4L4P72NnwU5+df6vuLb/tWGvRzglFSX84cM/8PC/HiY9JZ0/XPIHnl/9PJfOuZSbz7iZhy9/mM5tOzeqTndgmjNqzlFDuCEYoF78/osA/GbxbwD49QW/rrO+d7e+y+qvV/PM956J+HzbJbfjgh4X8P729xl1yqi4LjTolp1+ZDh5n4594toWc2ywnpPLofJDPP/Z8zy18inSW6cz5aIpjOg7guTEZP6j738w5aIppKWk8eTKJ3n+s+c5VH6oUfVvObCFM58+kweWPsANg25g/cT1XNv/Wm4fcjtf/PwLzsk8hwn/nMDQOUP5av9X9dZTWVXJo8sf5YIeFzT6uURtl/S6BFXl/W3vh827o2AHf1n2F6Z/Mp1KreSWM28J/kW47X3uefceFuUsatLovw35G7j+1es5bcZpvP3V2xwoPcD1r17PmU+fyesbX494brbFWxYz6KlBPPjRg/xw0A/ZeNtG/us7/8UXP/+CyRdM5sU1L9Jvej9eWvOS52N4CUwhoQAV6kE99NFDdeZ75N/OVEWn1T1VkVdD+wxFVXl327stqucE9iKu8c6Ck2Nt3lru++A+VuxeEQxEF06hR1qPGnl6pPVgyoVT+I++/8GK3Su474P7WJu3NmzdZYEy5q+bz58+/hPF5cW8dcNbvDDqhRo9lT4d+7D4R4uZ9b1ZfLrnUwY9NYg///vPVFZVHlXfm1++ydaDWz33mhrSuW1nzsw4k6U7llIWKKszz/6S/Tz32XM8sPQBdhXtYvSA0fzu4t9xbua53Hjajfz24t/St1NfXtv4Gr99/7csz11OlVZ5bsPmfZu58bUbGfDkABZuXsg9F93Dttu3sf7W9fz1//2V0kAp35/3fQbPGsw/v/yn5wCSfyifsf87lstfvBwRYfGPFjN71Gy6tO0CQNtWbXnwsgf5dPynnNTpJH70vz/iypeubPAfBgB/XPpHz4EpxB2gJi+ZfFSAWpu3lv/L+T9+ce4vmtzL6dK2C2dlnMWH2z/kQGlwQth4P3PqkR78f8kGRRivfB+cSipKmPP5HB5f8Thtk9sy+YLJXH3K1fWufZOUkMTVp1zN5Asm0za5LY+veJw5n8+pd16zTd9s4r4P72PJ1iV8J/s7rLt1HcNOGlZnXhHhJ2f9hHW3rmNon6Hc+fadXPDcBazLW1cj37Rl08hOz2ZUv1FNO3nHZb0v43DF4aMGRpRUlPDahte49717WbVnFcO+NYz7L7mfS3pfUuMv5BNTT+S2c2/jl0N+SbtW7Xhu9XM8+NGDYXtjWw5s4eY3bubU6afy2obX+M9v/ydbb9/K/ZfeT8c2HUlMSOSHg37IulvX8cLIFzhYepCr5l7F+c+ez9tfvV1vkFJVXlj9AqdOP5W5a+dyz0X3sGbCGob2GVpn/kHdB/HRzR8xfcR0lu9azsCnBvLg0gfr7AX+cekfuefde7hh0A2eA1NIKECNGTjmqAD1yL8eoW2rtmGnKvLqim9dQWmgtPqZWrx7Tq2TWtO9XXcbTm488/TMSUSGAY8CicAzqjq11n5x9o8ADgM3qeqnDZUVkU7APKAXsA34gaoecPZNAW4BKoFJqrrIST8beAFoAywEbldVFZEUYA5wNrAPuF5Vt4U7rw35G5j9+WwOlh5k2LeGcdXJVx21DlJ9sjtkc/eFd/Pml2/y9ldvs+GbDYw9fSyndjkVCP7F/vcNf2fpjqV0a9uNO8+/k5M7n0xqSmrYurPSslgwegFz185l0luTOGvmWdz7nXv5zQW/YW3eWj7Y/gGPXP5I1BaP69OxD73Se7Fk6xIuyr4IVeXD7R/yj83/oLi8mCGZQxjZb2TYZ1L9uvTj7ovuZsWuFby+8XUumX0J3zv5ezx02UOc2vXU6nw7CnZw/4f38/zq50mURH5x7i+YfOFkurfvXme9SQlJjDtjXHDQweez+cOHf+DKl67kwp4Xct937+OS3pdU5/1y35dM+McE3tv2Ht/u8W1mXjWTAd0GhL0GoVGPo/qNYtJbk7j73buZu3YuM783s3q5e3dgmj1qdqMCk/tc5nx/DgCTl0wG4MbTbuTlL15mwuAJjX7uVZ9eHXpxcqeT+XL/lwD1ThwbS9kdojecXFU5WHqQPcV7+Lr4a74u/pr8Q/l0btuZ3h1607tjb05of0JEz+5KKkrY8M0G1uatZW3eWr7I+4K1eWspryxnYLeBDOw6MPjdbSADug0gLSWtUfVXVlWyo2AHX+77kk37NrHpm03kFuXSI60HJ3c+mVM6n8IpXU6hZ3rPiJ89VlRWkFuYy7aD29h2cButElvRu0NvenXoRUZqRpOf4caChLtFIiKJwJfA5UAu8AkwRlXXu/KMAH5BMDidBzyqquc1VFZE/gTsV9WpIjIZ6KiqvxGR/sBc4FzgRGAxcLKqVorICuB2YBnB4PSYqr4lIrcCp6nqBBEZDXxfVa9v6LxSe6dq8U3FdG/XnZvOuKlJD2m3HNjCC6tfYO+hvVycfTH9uvRj3rp5FJQWcFmfy7j6lKtJTkwGGj9IIO9QHpPemsS8dfM4vfvpdGvXjX/t/Be5v8qlQ+sOnusJN4/gil0rePazZ7nyW1fy2defkXcoj1M6n8K1/a+tfl7QGOWV5RSXF/PgRw9yqPwQPz3rp/xs8M+YtWoWsz6dhYjw07N+yt0X3c2JqSc2qu6yQFmN24yX9LqE3178W5ZuX8oDSx+gdVJrHrrsIU8DS+rz5qY3mbhwIrmFufx88M/p2q4rv//g900KTG6BqgBj/3csc9fOZVC3QazLX8fmX2xu9J/Dhv67rtm7humfTKd1UmseHfZok9pbWySDXa7723Ws2buGjRM3UlZZRmmglJKKkuB3oKTO7f0l+/m6+OsaQSi0HW5Yeuuk1mSnZ9O7Y+9gwHKCVug7LSWNzfs2VwehtfnB75z9OdW3pVMSUzi166kM7DaQ5IRk1uavZV3eOg5VHHnenJ2eXR2sQp9+XfpxuOIwm77ZxKZ9m2oEopz9OZRVHrmFnpaSRo+0Huws3ElhWWF1ekpiCn079w0Gq86nBANXl+B2akoquwp3se3gNrYe3FodhEK/cwtz6721npyYTHZ6Nr069Kr+hAJXrw696N6+O4crDrOrcBe7inbV+N5dvJvXrn9tlao27WG3B16C0/nA71T1Suf3FABVfdCV52ngfVWd6/zeBHyXYK+ozrKhPKq6R0QynPKn1K5fRBYBvyPYu3pPVfs56WOc8j8L5VHVf4tIEvA10FUbODk5UfSyhy5j5CkjqwNHU5RXlvPGpjdYsmUJinJi6omMPW3sUdPPRDqC7fWNr3PrP29lT/EebjvnNh4f8XijyocLToGqAPcsuYeDZQfJaJ/BNadew8BuAz1PiVSX8WePJ/9QPvd9cB8zVs0gUBUgKSGJH5/xY+75zj0RBT230kApM1fN5I9L/8jeQ3sB+MGAHzDtymlkpGY0qW6AorIifvveb3lsxWNUaVXUAlNIoCrAuNfH8fIXL3Nd/+uYf938RtfR0H/XKq3i9x/8nrJAGVMvm1pvvkhE8uf4rrfv4n/+/T8IgtK4wS1d23blhPYncEL7E8hIzeCEdidU/w6ldWnbhfxD+Ww9uJWtB7ZW/6Ud+h16/haSIAnVf4EnSAIndTqJQd0G1QgyJ3U66ag7FFVaxfaD24/qWW38ZiMVVcFbwbXPMSkhiW91/FaNnlFou1u7bogIqsreQ3uDgaxWUNtyYAuBqkCdbQ8dLzMts85gk52eTUVVxVEBLLSddyivxvklSiKVevSz7vSUdDLTMlk/cX2LCU7XAsNU9SfO7x8B56nqba48/wCmqupHzu8lwG8IBqc6y4rIQVXt4KrjgKp2FJEngGWq+pKT/izwFsHgNFVVL3PSLwJ+o6pXicha5zi5zr6vnON8U+tcxgOh/6sGAuFHM/hDF+CbsLn8wa7FEXYtjrBrccQpqhr++UQTeXloUdc/nWtHtPryeCnr9XgN1eXpOKo6E5gJICIrYxH9jwV2LY6wa3GEXYsj7FocISJHT33fDLzckM8F3GOqs4DdHvM0VHavczsP5zvUt2yorqx66qou49zWSwf2ezg3Y4wxLZCX4PQJ0FdEeotIMjAaWFArzwJgrAQNAQpUdU+YsguAcc72OOANV/poEUkRkd5AX2CFU1+RiAxxRgeOrVUmVNe1wLsNPW8yxhjTsoW9raeqARG5DVhEcDj4c6q6TkQmOPtnEBw5NwLIITiU/OaGyjpVTwXmi8gtwA7gOqfMOhGZD6wHAsBE1eqncz/nyFDyt5wPwLPAiyKSQ7DHNNrDucd+GdyWy67FEXYtjrBrcYRdiyNici3CDogwxhhjYq3lv4lljDHGdyw4GWOMaXF8GZxEZJiIbBKRHGd2imOeiPQQkfdEZIOIrBOR2530TiLyjohsdr47uspMca7BJhG50pV+toh84ex7zBmAgjNIZZ6TvlxEesX6PL0SkUQR+cx5B8+31wFARDqIyKsistH583G+X6+HiPzS+f9jrYjMFZHWfrkWIvKciORJ8L3QUFpMzl1ExjnH2CwiocFrDVNVX30IDsz4CugDJAOfA/3j3a4onFcGcJaznUpw2qj+wJ+AyU76ZOAhZ7u/c+4pQG/nmiQ6+1YA5xN8f+wtYLiTfisww9keDcyL93k3cD1+BbwM/MP57cvr4LRxNvATZzsZ6ODH6wFkAluBNs7v+cBNfrkWwHeAs4C1rrRmP3egE7DF+e7obHcM2954X7A4/Ac6H1jk+j0FmBLvdjXDeb5BcE7DTUCGk5YBbKrrvAmOqDzfybPRlT4GeNqdx9lOIvjGvMT7XOs49yxgCXApR4KT766D0740gn8hS610310PgsFpp/OXZBLwD+AKP10LgrP2uINTs5+7O4+z72mCc6w22FY/3tYL/QENyXXSjhtOd/pMYDnQXYPviOF8d3Oy1XcdMp3t2uk1yqhqACgAojONdnRNA34NuGe+9ON1gOAdgnzgeec25zMi0g4fXg9V3QU8QvDVlT0E38d8Gx9eC5dYnHtEf+f6MThFMqXSMUNE2gN/B+5Q1cKGstaRFpVpouJJRK4C8lR1ldcidaQd89fBJYngrZynVPVM4BDB2zf1OW6vh/M8ZSTB21QnAu1E5MaGitSRdlxcCw+iee4RXRM/Bicv0zEdk0SkFcHA9FdVfc1J9ts0URcAV4vINuAV4FIReQn/XYeQXCBXVZc7v18lGKz8eD0uA7aqar6qVgCvAd/Gn9ciJBbnHtHfuX4MTl6mYzrmOCNmngU2qOqfXbt8NU2Uqk5R1SxV7UXwv+27qnojPrsOIar6NbBTRE5xkoYSnH3Fj9djBzBERNo65zAU2IA/r0VILM59EXCFiHR0eq9XOGkNi/cDujg9FBxBcDTbV8A98W5PlM7pQoJd5TXAauczguA93yXAZue7k6vMPc412IQz4sZJH0xwOZGvgCc4MpNIa+BvBKepWgH0ifd5h7km3+XIgAg/X4czgJXOn43XCY6Y8uX1AH4PbHTO40WCo9F8cS0ILuK6B6gg2Ju5JVbnDvzYSc8BbvbSXpu+yBhjTIvjx9t6xhhjWjgLTsYYY1ocC07GGGNaHAtOxhhjWhwLTsYYY1ocC07GNDMJzgp+a5g825yZnteIyAcikh0m/00i8kR0W2pMy2HByZjm14HgjM3hXKKqpwHvA//VrC0ypoWz4GRM85sKfEtEVovILBH50NleKyIX1ZH/3zgTY4pIVxH5u4h84nwuiGnLjYkTC07GNL/JwFeqegbB2QkWOdunE5zJo7ZhBGdyAHgU+IuqngNcAzwTg/YaE3dJ8W6AMT7zCfCcM0nv66rqDk7viUh3gpNvhm7rXQb0dxYbBUgTkdSYtdaYOLGekzExpKofElyRdBfwooiMde2+BMgG1gH3OWkJBBdwO8P5ZKpqUUwbbUwcWHAypvkVAakAzii8PFWdRXAW+bPcGVW1BLgDGCsinYC3gdtC+0XkjFg12ph4stt6xjQzVd0nIh+LyFqgHXBIRCqAYoJLDtTOv0dE5gITgUnAdBFZQ/D/1w+BCbFrvTHxYbOSG2OMaXHstp4xxpgWx4KTMcaYFseCkzHGmBbHgpMxxpgWx4KTMcaYFseCkzHGmBbHgpMxxpgW5/8Dy+awTEoqPUMAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "ax = sns.distplot(dfTmp.tsRel, hist = True, color=\"g\")\n", "ax.set_xlim(0, 100000)\n", "#ax.set_ylim(0, 0.008)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60773.00000000001" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfTmp.tsRel.median()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 -60773.0\n", "1 -60498.0\n", "2 -42829.0\n", "3 -75.0\n", "4 -43.0\n", "5 -20.0\n", "6 -13.0\n", "7 -10.0\n", "8 -4.0\n", "9 0.0\n", "10 10.0\n", "11 17.0\n", "12 27.0\n", "13 28.0\n", "14 39.0\n", "15 44.0\n", "16 50.0\n", "17 166.0\n", "18 383183.0\n", "Name: tsRel, dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfTmp.tsRel - dfTmp.tsRel.median()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2020-01-14 17:11:10\n", "1 2020-01-14 17:15:45\n", "2 2020-01-14 22:10:14\n", "3 2020-01-15 10:02:48\n", "4 2020-01-15 10:03:20\n", "5 2020-01-15 10:03:43\n", "6 2020-01-15 10:03:50\n", "7 2020-01-15 10:03:53\n", "8 2020-01-15 10:03:59\n", "9 2020-01-15 10:04:03\n", "10 2020-01-15 10:04:13\n", "11 2020-01-15 10:04:20\n", "12 2020-01-15 10:04:30\n", "13 2020-01-15 10:04:31\n", "14 2020-01-15 10:04:42\n", "15 2020-01-15 10:04:47\n", "16 2020-01-15 10:04:53\n", "17 2020-01-15 10:06:49\n", "18 2020-01-19 20:30:26\n", "Name: Timestamp, dtype: datetime64[ns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfInit.Timestamp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Detect outliers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### What about using standard scaling (z-score transformation) + thresholding\n", "#### Typical outlier threshold: more than +- 2 std. (z<-2 or z>2)\n", "\n", "" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 -0.783125\n", "1 -0.780147\n", "2 -0.588803\n", "3 -0.125805\n", "4 -0.125459\n", "5 -0.125209\n", "6 -0.125134\n", "7 -0.125101\n", "8 -0.125036\n", "9 -0.124993\n", "10 -0.124885\n", "11 -0.124809\n", "12 -0.124700\n", "13 -0.124690\n", "14 -0.124571\n", "15 -0.124516\n", "16 -0.124451\n", "17 -0.123195\n", "18 4.024628\n", "Name: tsRel, dtype: float64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tsRel_z = (dfTmp.tsRel - dfTmp.tsRel.mean()) / dfTmp.tsRel.std()\n", "tsRel_z" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Outlier detection is a serious task!\n", "\n", "#### SciKit methods\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example: a more advanced outlier detection" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tsRelQ3Q10Q11outScore
00.0473-971.093118
1275.0578-969.221765
217944.0677-848.985675
360698.06510-2.550890
460730.05108-1.646488
560753.0828-1.071656
660760.0188-0.988349
760763.0769-0.988349
860769.0557-1.012356
960773.0656-1.025628
1060783.0487-0.890923
1160790.0758-0.915438
1260800.0868-1.035467
1360801.0478-1.023812
1460812.07610-1.085007
1560817.0657-1.085007
1660823.0647-1.194836
1760939.0679-5.640073
18443956.09410-13811.188253
\n", "
" ], "text/plain": [ " tsRel Q3 Q10 Q11 outScore\n", "0 0.0 4 7 3 -971.093118\n", "1 275.0 5 7 8 -969.221765\n", "2 17944.0 6 7 7 -848.985675\n", "3 60698.0 6 5 10 -2.550890\n", "4 60730.0 5 10 8 -1.646488\n", "5 60753.0 8 2 8 -1.071656\n", "6 60760.0 1 8 8 -0.988349\n", "7 60763.0 7 6 9 -0.988349\n", "8 60769.0 5 5 7 -1.012356\n", "9 60773.0 6 5 6 -1.025628\n", "10 60783.0 4 8 7 -0.890923\n", "11 60790.0 7 5 8 -0.915438\n", "12 60800.0 8 6 8 -1.035467\n", "13 60801.0 4 7 8 -1.023812\n", "14 60812.0 7 6 10 -1.085007\n", "15 60817.0 6 5 7 -1.085007\n", "16 60823.0 6 4 7 -1.194836\n", "17 60939.0 6 7 9 -5.640073\n", "18 443956.0 9 4 10 -13811.188253" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import LocalOutlierFactor\n", "\n", "# fit the model for outlier detection (default)\n", "X = np.array(dfTmp.tsRel).reshape(dfTmp.shape[0],1)\n", "X.shape\n", "clf = LocalOutlierFactor(n_neighbors=5, contamination=0.1)\n", "clf.fit_predict(X)\n", "dfTmp['outScore'] = clf.negative_outlier_factor_.tolist()\n", "dfTmp" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "dfTmp.plot.scatter(x='tsRel', y='outScore', c='DarkBlue')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Correlations for filtered data" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tsRelQ3Q10Q11outScore
360698.06510-2.550890
460730.05108-1.646488
560753.0828-1.071656
660760.0188-0.988349
760763.0769-0.988349
860769.0557-1.012356
960773.0656-1.025628
1060783.0487-0.890923
1160790.0758-0.915438
1260800.0868-1.035467
1360801.0478-1.023812
1460812.07610-1.085007
1560817.0657-1.085007
1660823.0647-1.194836
\n", "
" ], "text/plain": [ " tsRel Q3 Q10 Q11 outScore\n", "3 60698.0 6 5 10 -2.550890\n", "4 60730.0 5 10 8 -1.646488\n", "5 60753.0 8 2 8 -1.071656\n", "6 60760.0 1 8 8 -0.988349\n", "7 60763.0 7 6 9 -0.988349\n", "8 60769.0 5 5 7 -1.012356\n", "9 60773.0 6 5 6 -1.025628\n", "10 60783.0 4 8 7 -0.890923\n", "11 60790.0 7 5 8 -0.915438\n", "12 60800.0 8 6 8 -1.035467\n", "13 60801.0 4 7 8 -1.023812\n", "14 60812.0 7 6 10 -1.085007\n", "15 60817.0 6 5 7 -1.085007\n", "16 60823.0 6 4 7 -1.194836" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfTmpFil = dfTmp[np.logical_and(dfTmp.outScore>-5, dfTmp.outScore<5)]\n", "dfTmpFil" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.distplot(dfTmpFil.tsRel, hist = True, color=\"g\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tsRel 1.000000\n", "Q3 0.130838\n", "Q10 -0.163241\n", "Q11 -0.344576\n", "outScore 0.702471\n", "Name: tsRel, dtype: float64\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "corr = dfTmpFil.corr()\n", "sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)\n", "print(corr['tsRel'])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.regplot(x='tsRel', y='Q3', data=dfTmpFil, color=\"g\")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q6Q7Q8Q9Q10Q11tsRel
085467Running535730.0
188556Reading77678275.0
266665Watching a movie7777717944.0
353644Watching a movie38851060698.0
466543Reading45410860730.0
587833Running84102860753.0
643111Reading11108860760.0
773765Reading4686960763.0
855544Watching a movie4455760769.0
966666Watching a movie4665660773.0
1044453Watching a movie5278760783.0
1177722Running7675860790.0
1288866Watching a movie8786860800.0
1344411Watching a movie1577860801.0
1487777Running710561060812.0
1577666Watching a movie6665760817.0
1676655Reading1774760823.0
1766655Watching a movie2997960939.0
1899999Watching a movie765410443956.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 tsRel\n", "0 8 5 4 6 7 Running 5 3 5 7 3 0.0\n", "1 8 8 5 5 6 Reading 7 7 6 7 8 275.0\n", "2 6 6 6 6 5 Watching a movie 7 7 7 7 7 17944.0\n", "3 5 3 6 4 4 Watching a movie 3 8 8 5 10 60698.0\n", "4 6 6 5 4 3 Reading 4 5 4 10 8 60730.0\n", "5 8 7 8 3 3 Running 8 4 10 2 8 60753.0\n", "6 4 3 1 1 1 Reading 1 1 10 8 8 60760.0\n", "7 7 3 7 6 5 Reading 4 6 8 6 9 60763.0\n", "8 5 5 5 4 4 Watching a movie 4 4 5 5 7 60769.0\n", "9 6 6 6 6 6 Watching a movie 4 6 6 5 6 60773.0\n", "10 4 4 4 5 3 Watching a movie 5 2 7 8 7 60783.0\n", "11 7 7 7 2 2 Running 7 6 7 5 8 60790.0\n", "12 8 8 8 6 6 Watching a movie 8 7 8 6 8 60800.0\n", "13 4 4 4 1 1 Watching a movie 1 5 7 7 8 60801.0\n", "14 8 7 7 7 7 Running 7 10 5 6 10 60812.0\n", "15 7 7 6 6 6 Watching a movie 6 6 6 5 7 60817.0\n", "16 7 6 6 5 5 Reading 1 7 7 4 7 60823.0\n", "17 6 6 6 5 5 Watching a movie 2 9 9 7 9 60939.0\n", "18 9 9 9 9 9 Watching a movie 7 6 5 4 10 443956.0" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dimensionality reduction" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[9.99999996e-01 1.81226177e-09 6.72112392e-10 5.91733094e-10\n", " 3.30434976e-10]\n", "0.9999999994658001\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAWr0lEQVR4nO3de5BU9ZnG8e8rF2USFZVJglxm0OAmeMHAiBoTReMFEcV4BfFGd4piN25la//YmE1tsltb1tb+sVtJNmZdKvESmUC8ECWWivdoKlFpFFEkmhERRjSARk2ChmDe/eP0SNP00KdnTvfv9OnnUzXF9OnD9FO/gofmnNPvMXdHRESa3z6hA4iISDJU6CIiGaFCFxHJCBW6iEhGqNBFRDJiaKgXHjVqlHd2doZ6eRGRprRq1apt7t5e6blghd7Z2UmhUAj18iIiTcnMXuvvOR1yERHJCBW6iEhGqNBFRDJChS4ikhEqdBGRjKha6GZ2o5ltMbMX+nnezOx7ZtZjZmvMbEryMYu6u6GzE/bZJ/q1u7tuLyUi0mzivEO/GZixl+fPBiYWvxYA/zv4WBV0d8OCBfDaa+Ae/bpggUpdRKSoaqG7++PA23vZZTbwY488CYw0s9FJBfzIN78J27fvvm379mi7iIgkcgx9DLCp5HFvcdsezGyBmRXMrLB169baXmXjxtq2i4i0mCQK3Spsq3jXDHdf5O5d7t7V3l7xk6v9Gz++tu0iIi0miULvBcaVPB4LbE7g5+7uuuugrW33bSNGRNtFRCSRQl8OXFm82uUE4F13fyOBn7u7efNg0SLo6AAr/qdg1qxou4iIVB/OZWZLgOnAKDPrBb4NDANw9xuAe4GZQA+wHZhfr7DMm7erwKdPh2efja54sUpHfUREWkvVQnf3uVWed+CriSWKK5+HK6+EJ56Ak09u+MuLiKRN835S9MIL4YAD4Ec/Cp1ERCQVmrfQ29pg7ly4/XZ4773QaUREgmveQgfI5eD992Hp0tBJRESCa+5CP+44OOooHXYREaHZC90sepf+9NPwQsXZYSIiLaO5Cx3g8sth2DC48cbQSUREgmr+Qm9vh/POg1tvhR07QqcREQmm+QsdomvSt22Dn/88dBIRkWCyUehnngljxuiwi4i0tGwU+pAhcPXVcP/98PrrodOIiASRjUIHmD8f/vpXuOWW0ElERILITqEffng0sOvGG6NiFxFpMdkpdIhOjr7ySjSwS0SkxWSr0C+4QAO7RKRlZavQ29rgssvgjjvg3XdDpxERaahsFTpoYJeItKzsFXpXFxx9tA67iEjLyV6h9w3sWrkSnn8+dBoRkYbJXqGDBnaJSEvKZqGPGgWzZ2tgl4i0lGwWOkTXpL/1FixfHjqJiEhDZLfQzzgDxo7VYRcRaRnZLfS+gV0rVkBvb+g0IiJ1l91CBw3sEpGWku1CP+wwOPVUDewSkZaQ7UKH6OTo+vXw+OOhk4iI1FX2C/2CC+DAA/XJURHJvOwX+ogRuwZ2vfNO6DQiInWT/UKHaBTABx9oYJeIZFprFPrUqXDMMTrsIiKZ1hqF3jewq1CANWtCpxERqYtYhW5mM8zsJTPrMbNrKzx/oJn93MyeM7O1ZjY/+aiDdPnlMHy4PjkqIplVtdDNbAhwPXA2MAmYa2aTynb7KvCiu08GpgP/ZWbDE846OIccAuefD4sXw5//HDqNiEji4rxDnwb0uPt6d98BLAVml+3jwP5mZsDHgbeBnYkmTUIup4FdIpJZcQp9DLCp5HFvcVup7wOfBTYDzwNfc/c9PpppZgvMrGBmha1btw4w8iCcfjqMG6fDLiKSSXEK3Sps87LHZwGrgUOBY4Hvm9kBe/wm90Xu3uXuXe3t7TWHHbTSgV2bNlXdXUSkmcQp9F5gXMnjsUTvxEvNB5Z5pAd4FfhMMhETNn8+uGtgl4hkTpxCXwlMNLMJxROdc4Dyg9AbgS8BmNkngb8B1icZNDETJsBpp2lgl4hkTtVCd/edwDXACmAdcJu7rzWzhWa2sLjbvwOfN7PngYeBr7v7tnqFHrR8Hl59FR57LHQSEZHEmHv54fDG6Orq8kKhEOS1ef99GD0aZs2KLmMUEWkSZrbK3bsqPdcanxQtN2IEzJsHd96pgV0ikhmtWeiwa2DXkiWhk4iIJKJ1C33KFJg8WQO7RCQzWrfQ+wZ2rVoFzz0XOo2IyKC1bqFDdBxdA7tEJCNau9APOQS+/GUN7BKRTGjtQofosMvbb8Pdd4dOIiIyKCr0L30Jxo/XYRcRaXoq9L6BXQ88ABs3hk4jIjJgKnTQwC4RyQQVOkBnZ3ToRQO7RKSJqdD75POwYQM8+mjoJCIiA6JC73P++TBypE6OikjTUqH3KR3Y9fvfh04jIlIzFXqpXC76gJEGdolIE1Khl5oyBY49VgO7RKQpqdDL5XLwzDOwenXoJCIiNVGhl5s3D/bdVydHRaTpqNDLHXzwroFdH3wQOo2ISGwq9EpyuehKFw3sEpEmokKvRAO7RKQJqdAr2WefaL7Lgw/Ca6+FTiMiEosKvT/z50e/3nxz0BgiInGp0PvT0REdernpJg3sEpGmoELfm3w+OuTyyCOhk4iIVKVC35vzz4eDDtLJURFpCir0vdlvv+iDRsuWaWCXiKSeCr2avoFdP/lJ6CQiInulQq/mc5+LvjSwS0RSToUeRz4Pzz4bfYmIpFSsQjezGWb2kpn1mNm1/ewz3cxWm9laM/tFsjEDu+wyDewSkdSrWuhmNgS4HjgbmATMNbNJZfuMBH4AnOfuRwIX1yFrOAcdBBdcAN3dGtglIqkV5x36NKDH3de7+w5gKTC7bJ/LgGXuvhHA3bckGzMF+gZ23XVX6CQiIhXFKfQxwKaSx73FbaWOAA4ys8fMbJWZXVnpB5nZAjMrmFlh69atA0scymmnRZ8e1WEXEUmpOIVuFbZ52eOhwFTgHOAs4F/M7Ig9fpP7Infvcveu9vb2msMG1Tew66GHNLBLRFIpTqH3AuNKHo8FNlfY5353/5O7bwMeByYnEzFFrr46+vWmm4LGEBGpJE6hrwQmmtkEMxsOzAGWl+1zN/BFMxtqZm3A8cC6ZKOmQEcHnH66BnaJSCpVLXR33wlcA6wgKunb3H2tmS00s4XFfdYB9wNrgKeBH7r7C/WLHVA+Dxs3wsMPh04iIrIbcy8/HN4YXV1dXigUgrz2oHzwARx6KJx1FixZEjqNiLQYM1vl7l2VntMnRWu1335w+eXws5/B22+HTiMi8hEV+kBoYJeIpJAKfSCOPRamTNHALhFJFRX6QOXzsHq1BnaJSGqo0Adq7txoYJfepYtISqjQB+qgg+DCCzWwS0RSQ4U+GLkcvPNOdMWLiEhgKvTBOPVU6OzUYRcRSQUV+mD0Dex6+GHYsCF0GhFpcSr0wbr6ajDTwC4RCU6FPljjx8MZZ0SF/uGHodOISAtToSchn4dNmzSwS0SCUqEnYfZsOPhg3c1IRIJSoSdh3313Dex6663QaUSkRanQk5LLwY4dGtglIsGo0JMyeTJMnRpdkx5oxryItDYVepLyeXjuOQ3sEpEgVOhJmjs3ugGGPjkqIgGo0JM0cuSugV3vvx86jYi0GBV60nI5ePddDewSkYZToSdt+nSYMEGHXUSk4VToSesb2PXII/Dqq6HTiEgLUaHXgwZ2iUgAKvR6GDcOzjwTbr5ZA7tEpGFU6PXSN7DroYdCJxGRFqFCr5fzzoNDDtHALhFpGBV6vfQN7LrrLg3sEpGGUKHXUz4fDezq7g6dRERagAq9no4+Grq6NLBLRBpChV5v+TysWQPPPBM6iYhknAq93ubM0cAuEWmIWIVuZjPM7CUz6zGza/ey33Fm9qGZXZRcxCY3ciRcdFF04wsN7BKROqpa6GY2BLgeOBuYBMw1s0n97PefwIqkQza9voFdy5aFTiIiGRbnHfo0oMfd17v7DmApMLvCfn8P3AlsSTBfNpxyChx2mA67iEhdxSn0McCmkse9xW0fMbMxwJeBG/b2g8xsgZkVzKywdevWWrM2r76BXY8+CuvXh04jIhkVp9Ctwrbya/C+A3zd3fc6uMTdF7l7l7t3tbe3x82YDRrYJSJ1FqfQe4FxJY/HApvL9ukClprZBuAi4Admdn4iCbNi7Fg46ywN7BKRuolT6CuBiWY2wcyGA3OA5aU7uPsEd+90907gDuDv3P2uxNM2u3weenvhwQdDJxGRDKpa6O6+E7iG6OqVdcBt7r7WzBaa2cJ6B8yUc8/VwC4RqZuhcXZy93uBe8u2VTwB6u5XDz5WRu27L1xxBVx/PWzbBqNGhU4kIhmiT4o2Wj4Pf/mLBnaJSOJU6I121FFw3HEa2CUiiVOhh5DPw/PPQ6EQOomIZIgKPYQ5c2DECJ0cFZFEqdBDOPDAXQO7tm8PnUZEMkKFHkouB++9p4FdIpIYFXoop5wChx+ugV0ikhgVeihm0cCuxx6DV14JnUZEMkCFHtJVV0WTGDWwS0QSoEIPSQO7RCRBKvTQ8nl4/XV44IHQSUSkyanQQzv33Gimi65JF5FBUqGHNnx4NLDr7ruhle7iJCKJU6GnQS6ngV0iMmgq9DQ46iiYNk0Du0RkUFToaZHPwwsvwMqVoZOISJNSoafFpZdqYJeIDIoKPS0OPBAuvhiWLNHALhEZEBV6mvQN7LrzztBJRKQJqdDT5OST4dOf1sAuERkQFXqa9A3s+sUvoKcndBoRaTIq9LTRwC4RGSAVetqMGQMzZmhgl4jUTIWeRvk8bN4MK1aETiIiTUSFnkazZkF7u65JF5GaqNDTqG9g1/LlGtglIrGp0NOqb2DXrbeGTiIiTUKFnlZHHgnHH6+BXSISmwo9zfJ5ePFFePrp0ElEpAmo0NPs0kuhrU0nR0UklliFbmYzzOwlM+sxs2srPD/PzNYUv35lZpOTj9qCDjhg18CuP/0pdBoRSbmqhW5mQ4DrgbOBScBcM5tUtturwCnufgzw78CipIO2rFwO/vAHDewSkarivEOfBvS4+3p33wEsBWaX7uDuv3L33xcfPgmMTTZmC/viFzWwS0RiiVPoY4BNJY97i9v6kwfuq/SEmS0ws4KZFbbq+up4zKJ36Y8/Dr/9beg0IpJicQrdKmyreB2dmZ1KVOhfr/S8uy9y9y5372pvb4+fstVpYJeIxBCn0HuBcSWPxwKby3cys2OAHwKz3f2tZOIJAIceCmefDbfcAjt3hk4jIikVp9BXAhPNbIKZDQfmAMtLdzCz8cAy4Ap3fzn5mKKBXSJSTdVCd/edwDXACmAdcJu7rzWzhWa2sLjbt4BDgB+Y2WozK9QtcauaNQs+8QmdHBWRfg2Ns5O73wvcW7bthpLvvwJ8Jdlospthw6KBXd/9LmzZEpW7iEgJfVK0meRy0TF0DewSkQpU6M1k0iQ44YRoFIAGdolIGRV6s+kb2PXUU6GTiEjKqNCbzSWXaGCXiFSkQm82BxwQlfrSpRrYJSK7UaE3o76BXXfcETqJiKSICr0ZfeELMHGirkkXkd2o0JtR38CuJ56Al/XBXBGJqNCb1VVXwZAhGtglIh9RoTer0aNh5kwN7BKRj6jQm1kuB2+8AfffHzqJiKSACr2ZnXOOBnaJyEdU6M1s2DC48kq45x743e9CpxGRwFTozU4Du0SkSIXe7D77WTjxRA3sEhEVeibk87BuHTz5ZOgkIhKQCj0LLrkEPvYxDewSaXEq9CzYf/9dA7v++MfQaUQkEBV6VuRyUZlrYJdIy1KhZ8VJJ8ERR+iadJEWpkLPir6BXb/8pQZ2ibQoFXqW9A3s0slRkZakQs+ST30qGgeggV0iLUmFnjW5HLz5Jtx3X+gkItJgKvSsmTkTPvlJnRwVaUEq9KwpHdj15puh04hIA6nQsyiXgw8/1MAukRajQs+iz3wGPv95DewSaTEq9KzK5+E3v4Ff/zp0EhFpEBV6Vl18sQZ2ibQYFXpW7b8/XHop/PSnGtgl0iJiFbqZzTCzl8ysx8yurfC8mdn3is+vMbMpyUeVmvUN7OrshH32iX7t7g6dKt26u7VetdB61abe6+Xue/0ChgCvAIcBw4HngEll+8wE7gMMOAF4qtrPnTp1qkudLV7sbuYenRqNvtraou2yp8WLo/XResWj9apNQusFFLyfXjWvchWEmZ0I/Ku7n1V8/I3iPwT/UbLP/wGPufuS4uOXgOnu/kZ/P7erq8sLhcJA/g2SuDo74bXX9tw+dGg0mVF29/LLlUcmaL0q03rVpr/16uiADRti/xgzW+XuXZWeGxrj948BNpU87gWOj7HPGGC3QjezBcACgPHjx8d4aRmUjRsrb9+5EyZNamyWZvDii5W3a70q03rVpr/16u/v6QDEKXSrsK38bX2cfXD3RcAiiN6hx3htGYzx4yu/Q+/ogNtvb3yetOvvfzRar8q0XrXpb70SfHMb56RoLzCu5PFYYPMA9pFGu+46aGvbfVtbW7Rd9qT1qo3WqzaNWK/+Dq73fRG9i18PTGDXSdEjy/Y5h91Pij5d7efqpGiDLF7s3tERnRzt6NAJq2q0XrXRetUmgfViMCdFAcxsJvAdoitebnT368xsYfEfhBvMzIDvAzOA7cB8d9/rGU+dFBURqd1gT4ri7vcC95Ztu6Hkewe+OpiQIiIyOPqkqIhIRqjQRUQyQoUuIpIRKnQRkYyIdZVLXV7YbCtQ4Sr7WEYB2xKMk5S05oL0ZlOu2ihXbbKYq8Pd2ys9EazQB8PMCv1dthNSWnNBerMpV22UqzatlkuHXEREMkKFLiKSEc1a6ItCB+hHWnNBerMpV22UqzYtlaspj6GLiMiemvUduoiIlFGhi4hkRKoLPa03p46Ra7qZvWtmq4tf32pQrhvNbIuZvdDP86HWq1quhq+XmY0zs0fNbJ2ZrTWzr1XYp+HrFTNXiPXaz8yeNrPnirn+rcI+IdYrTq4gfx+Lrz3EzJ41s3sqPJf8evU3Vzf0F3W6OXWDck0H7gmwZicDU4AX+nm+4esVM1fD1wsYDUwpfr8/8HJK/nzFyRVivQz4ePH7YcBTwAkpWK84uYL8fSy+9j8CP6n0+vVYrzS/Q58G9Lj7enffASwFZpftMxv4sUeeBEaa2egU5ArC3R8H3t7LLiHWK06uhnP3N9z9meL3fwDWEd0Ht1TD1ytmroYrrsEfiw+HFb/Kr6gIsV5xcgVhZmOJbv7zw352SXy90lzo/d14utZ9QuQCOLH438D7zOzIOmeKK8R6xRVsvcysE/gc0bu7UkHXay+5IMB6FQ8frAa2AA+6eyrWK0YuCPPn6zvAPwF/7ef5xNcrzYWe2M2pExbnNZ8hmrcwGfgf4K46Z4orxHrFEWy9zOzjwJ3AP7j7e+VPV/gtDVmvKrmCrJe7f+juxxLdM3iamR1VtkuQ9YqRq+HrZWazgC3uvmpvu1XYNqj1SnOhp/Xm1FVf093f6/tvoEd3expmZqPqnCuOVN7MO9R6mdkwotLsdvdlFXYJsl7VcoX+8+Xu7wCPEd1yslTQP1/95Qq0XicB55nZBqLDsqeZ2eKyfRJfrzQX+kpgoplNMLPhwBxgedk+y4Eri2eLTwDedfc3Qucys0+ZmRW/n0a0zm/VOVccIdarqhDrVXy9HwHr3P2/+9mt4esVJ1eg9Wo3s5HF70cApwO/KdstxHpVzRVivdz9G+4+1t07iTriEXe/vGy3xNcr1j1FQ3D3nWZ2DbCCXTenXmslN6cmus/pTKCH4s2pU5LrIuBvzWwn8D4wx4untevJzJYQndEfZWa9wLeJThIFW6+YuUKs10nAFcDzxeOvAP8MjC/JFWK94uQKsV6jgVvMbAhRId7m7veE/vsYM1eQv4+V1Hu99NF/EZGMSPMhFxERqYEKXUQkI1ToIiIZoUIXEckIFbqISEao0EVEMkKFLiKSEf8PNme4Gay9ql4AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "mdlPCA = PCA(n_components=5)\n", "XPCA = mdlPCA.fit_transform(df2)\n", "\n", "print(mdlPCA.explained_variance_ratio_)\n", "print(np.sum(mdlPCA.explained_variance_ratio_))\n", "plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 5.23403869e-06 7.40036105e-06 8.97709992e-06 9.61319140e-06\n", " 9.32793474e-06 3.94503126e-06 8.95009702e-07 -3.58017421e-06\n", " -6.06213801e-06 7.55257058e-06 1.00000000e+00 -7.70261980e-07\n", " -6.96515391e-07 1.46677737e-06]\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYhElEQVR4nO3df4wcd3nH8fdzd7bjc+w4tS/+cT/2gmQgbkRIcg2hhDYtpTgBxVRqpYQUaEpkRUoorSo1Qag/JJT+EG1FaQKuFdJQYSVCkJaUGgKipfxBAZ8phJg0xA3c3vrO8TkxTmInts/39I/Zqff29nZnd2d2ZnY/L+m0t7Pj7z45Ox9/Pft9vmPujoiI5F9f2gWIiEg8FOgiIl1CgS4i0iUU6CIiXUKBLiLSJQbSeuONGzf6+Ph4Wm8vIpJLBw4cOObuQ7VeSy3Qx8fHmZycTOvtRURyycymlntNl1xERLqEAl1EpEso0EVEuoQCXUSkSyjQRUS6RMNAN7MHzeyomT25zOtmZp8ws0Nm9oSZXRV/mSKSK3v3wvg49PUFj3v3pl1RT4gyQ38I2FHn9RuAbeWvXcCn2i9LRHJr717YtQumpsA9eNy1S6HeAQ0D3d2/CbxQ55SdwD954NvAejPbEleBIpIzH/kInDq1+NipU8FxSVQc19CHgemK56XysSXMbJeZTZrZ5NzcXAxvLSKZUyw2d1xiE0egW41jNe+a4e573H3C3SeGhmp2ropI3o2NNXdcYhNHoJeA0YrnI8BMDOOKSB7dey+sWLH42OBgcFwSFUegPwa8r7za5VrghLvPxjCuiOTRrbfCddedf14owJ49wXFJVMPNuczsYeB6YKOZlYA/BVYAuPtuYB9wI3AIOAXcllSxIpITq1YFj1ddBQcOpFtLD2kY6O5+S4PXHbgztopEJP+mphY/SkeoU1RE4uUerGjp74fnn4eTJ9OuqGco0EUkXsePByF+5ZXB8+np+udLbBToIhKv8DLLW9+6+LkkToEuIvEKG4jCQFdDUcco0EUkXmGAX3ttsDmXAr1jFOgiEq+pKVi9GjZvhuFhXXLpIAW6iMSrWAza/M2CpiLN0DtGgS4i8QoDHYJHBXrHKNBFJF5TU4sDfXoazp1Lt6YeoUAXkficPg1HjgSXWiB4nJ8PjkniFOgiEp9SKXisnKGDLrt0iAJdROITrmipDnStdOkIBbqIxCeciYeXXDRD7ygFuojEp1gMlisOl+9CuW4drF+vQO8QBbqIxGdqKmgoCvdDh2CWrksuHaFAF5H4VK5BD2ktesco0EUkPsXi+evnIXWLdowCXUTiEd7YotYM/Wc/gxdfTKeuHqJAF5F4zM3Bq6/WDnTQLL0DFOgiEo/qJYuh8LkCPXEKdBGJR3VTUUjNRR2jQBeReIQz8OpA37wZBgY0Q+8ABbqIxKNYhAsvhIsvXny8vx9GRxXoHaBAF5F4VN7YoprWoneEAl1E4lG5D3o1dYt2hAJdROJRq6koVCjA4cPB3uiSGAW6iLTvlVeCdej1ZugLCzAz09m6eowCXUTat9wKl5CWLnaEAl1E2rdcU1FIzUUdoUAXkfY1mqGPji4+TxKhQBeR9k1NQV8fbN1a+/U1a2DDBl1ySVikQDezHWb2tJkdMrN7arx+kZn9q5n9wMwOmtlt8ZcqIplVLAZ3KVqxYvlztI1u4hoGupn1A/cDNwDbgVvMbHvVaXcCP3L3K4Drgb8xs5Ux1yoiWVVr29xqai5KXJQZ+jXAIXd/1t3PAI8AO6vOcWCtmRlwIfACoAWnIr2iXlNRKGwucu9MTT0oSqAPA9MVz0vlY5XuAy4DZoAfAh9y94Xqgcxsl5lNmtnk3NxciyWLSKYsLMD09PIrXEKFArz8cnCzC0lElECvsTED1X/FvgP4PrAVeCNwn5mtW/KL3Pe4+4S7TwwNDTVdrIhk0HPPwdmz0WbooMsuCYoS6CVgtOL5CMFMvNJtwKMeOAT8BHh9PCWKSKYttw96NTUXJS5KoO8HtpnZpeUPOm8GHqs6pwi8DcDMNgGvA56Ns1ARyahGTUUhNRclbqDRCe4+b2Z3AY8D/cCD7n7QzO4ov74b+CjwkJn9kOASzd3ufizBukUkKxo1FYWGhmDVKgV6ghoGOoC77wP2VR3bXfH9DPDr8ZYmIrkwNQUXXQTrlnxstlhfX9AxqksuiVGnqIi0p962udXUXJQoBbqItCdKU1FIzUWJUqCLSHuiNBWFxsZgdhbOnEm2ph6lQBeR1r30Ehw/Hv2Sy9hY0ClaKiVbV49SoItI66bLTeRRZ+haupgoBbqItC5qU1FIzUWJUqCLSOuirkEP6UYXiVKgi0jrikUYGIAtW6Kdf8EFsGmTAj0hCnQRad3UFIyMQH9/9F8TbqMrsVOgi0jrmlmDHtJa9MQo0EWkdc10iYbCblHd6CJ2CnQRac38fLCevJUZ+iuvwDHt3xc3BbqItGZ2Fs6day3QQZddEqBAF5HWRN0HvZqaixKjQBeR1jTbVBRSc1FiFOgi0ppmm4pCGzbA6tWaoSdAgS4irSkWg3Bes6a5X2emfdETokAXkda0sgY9pLXoiVCgi0hrmtkHvZq6RROhQBeR1rTSVBQqFODo0WA9usRGgS4izTtxAl58sb0ZOuhGFzFToItI81pdshjS0sVEKNBFpHmtNhWF1FyUCAW6iDSv1TXooeHhYPmiAj1WCnQRad7UFKxcCZdc0tqvX7kyuCmGLrnESoEuIs0L16D3tREhai6KnQJdRJrXTlNRSM1FsVOgi0jz2mkqCoWBvrAQT02iQBeRJp09CzMzra9wCRUKcOZM0GAksVCgi0hzDh8Obh8XxwwddNklRpEC3cx2mNnTZnbIzO5Z5pzrzez7ZnbQzP4z3jJFJDPabSoKqbkodgONTjCzfuB+4O1ACdhvZo+5+48qzlkPfBLY4e5FM2txLZOIZF67TUUhzdBjF2WGfg1wyN2fdfczwCPAzqpz3gM86u5FAHfXRTGRbhUG8MhIe+OsXw9r1yrQYxQl0IeB6YrnpfKxSq8FLjazb5jZATN7X62BzGyXmU2a2eTc3FxrFYtIuqamgoai1avbG8dM2+jGLEqgW41jXvV8ALgaeCfwDuCPzey1S36R+x53n3D3iaGhoaaLFZEMiGMNekhr0WMVJdBLwGjF8xFgpsY5X3H3k+5+DPgmcEU8JYpIprSzD3o1dYvGKkqg7we2mdmlZrYSuBl4rOqcLwJvNbMBMxsE3gQ8FW+pIpI693iaikJjY/D883DyZDzj9biGge7u88BdwOMEIf05dz9oZneY2R3lc54CvgI8AXwXeMDdn0yubBFJxQsvwKlT8QY6aJYek4bLFgHcfR+wr+rY7qrnHwM+Fl9pIpI5cS1ZDFXui37ZZfGM2cPUKSoi0cXVVBRSc1GsFOgiEl27N7aotnUr9PfrkktMFOgiEl2xGKw/37gxnvEGBoK7FynQY6FAF5HowhUuVqs9pUVqLoqNAl1EoouzqSik5qLYKNBFJLo4m4pChQKUSnDuXLzj9iAFuohE8+qrcORIMjP0+XmYnY133B6kQBeRaEql4DGJQAdddomBAl1Eoom7qShU2VwkbVGgi0g0cTcVhUZHF48vLVOgi0g0xWKwXLHdG1tUW7cuuNmFZuhtU6CLSDTFImzZAitXxj+2ttGNhQJdRKJJYg16SGvRY6FAF5Fo4twHvZq6RWOhQBeRxtyTaSoKFQpw4kTwJS1ToItIY3NzcPp0sjN0gOnp+udJXQp0EWksqSWLIe2LHgsFuog0llRTUUjNRbFQoItIY3Hf2KLa5s2wYoUCvU0KdBFpbGoKLrwwaABKQl9f0LCkSy5tUaCLSGPhCpc4b2xRTWvR26ZAF5HGkmwqCqlbtG0KdBFpLMmmotDYGBw+DGfPJvs+XUyBLiL1nToFx44lt8IlNDYGCwswM5Ps+3QxBbqI1Bc2+3TikgvosksbFOgiUl/STUUhNRe1TYEuIvUlvQY9FN7oQjP0linQRaS+YjFYJz48nOz7rFkDGzcq0NugQBeR+qamgjAfGEj+vbSNblsU6CJSXyfWoIfUXNQWBbqI1JfkPujVwuYi9868X5eJFOhmtsPMnjazQ2Z2T53zfsHMzpnZb8ZXooikZmEhWLbYyRn6yy/D8eOdeb8u0zDQzawfuB+4AdgO3GJm25c576+Ax+MuUkRScuRI0LnZyUAHXXZpUZQZ+jXAIXd/1t3PAI8AO2uc90HgC8DRGOsTkTQlvQ96NTUXtSVKoA8DlfeFKpWP/T8zGwZ+A9hdbyAz22Vmk2Y2OTc312ytItJpnWoqCqm5qC1RAr3WfpnVn1h8HLjb3c/VG8jd97j7hLtPDA0NRa1RRNLSqaai0NAQrFqlGXqLoiwsLQGjFc9HgOrdcyaARyzYK3kjcKOZzbv7v8RSpYiko1gMbmqxbl1n3q+vT0sX2xAl0PcD28zsUuAwcDPwnsoT3P3S8Hszewj4ksJcpAt0YtvcamoualnDSy7uPg/cRbB65Sngc+5+0MzuMLM7ki5QRFLUyaaikGboLYvUy+vu+4B9VcdqfgDq7r/TflkikgnFIlx3XWffs1CA2Vk4fTq4ni6RqVNURGp76aWgwSeNGTpAqdTZ9+0CCnQRqa3TK1xCai5qmQJdRGrrdFNRSM1FLVOgi0htnW4qCo2MLH5/iUyBLiK1FYvBHuibN3f2fS+4ADZt0gy9BQp0EamtWAxuC9ff3/n3DrfRlaYo0EWktjTWoIe0Fr0lCnQRqS2NLtFQGOi60UVTFOgistT8PBw+3PkVLqGxMXjlFTh2LJ33zykFuogsNTsL586lN0PX0sWWKNBFZKm0liyGtC96SxToIrJUWk1FIXWLtkSBLiJLhUE6Olr/vKRs2ACDgwr0JinQRWSpqakgVNesSef9zbQvegsU6CKyVJpr0ENai940BbqILFUspnf9PKRu0aYp0EVkMfd0m4pCY2Nw9GiwHl0iUaCLyGInTgQ3t8hCoANMT6dbR44o0EVksbSXLIbUXNQ0BbqILJZ2U1FIzUVNU6CLyGJp3Xqu2vBwsHxRM/TIFOgislixCKtWwSWXpFvHypWwdasCvQkKdBFZbGoq6BDty0A8qLmoKRn4HRORTMlCU1FIzUVNUaCLyGJZaCoKFQrBssWFhbQryQUFuoicd+YMzMxka4Z+5gw891zaleSCAl1Ezjt8OOgUzVKggy67RKRAF5HzstJUFFJzUVMU6CJyXlaaikJqLmqKAl1Ezkv7xhbVLroI1q7VDD2iSIFuZjvM7GkzO2Rm99R4/VYze6L89S0zuyL+UkUkccUibNoEF1yQdiUBM22j24SGgW5m/cD9wA3AduAWM9teddpPgF929zcAHwX2xF2oiHRAFrbNrabmosiizNCvAQ65+7PufgZ4BNhZeYK7f8vdj5effhsYibdMEemILDUVhdRcFFmUQB8GKjckLpWPLecDwJdrvWBmu8xs0swm5+bmolcpIslzz1ZTUahQgBdegJdfTruSzIsS6FbjmNc80exXCAL97lqvu/sed59w94mhoaHoVYpI8p5/Hk6dyuYMHTRLjyBKoJeAyo+8R4CZ6pPM7A3AA8BOd38+nvJEpGOysm1uNQV6ZFECfT+wzcwuNbOVwM3AY5UnmNkY8CjwXnf/cfxlikjistZUFFKgRzbQ6AR3nzezu4DHgX7gQXc/aGZ3lF/fDfwJsAH4pJkBzLv7RHJli0jsstZUFNq6Ffr7tdIlgoaBDuDu+4B9Vcd2V3x/O3B7vKWJSEcVi7B6NWzYkHYliw0MBHcv0gy9IXWKikggXOFitdZBpExLFyNRoItIIItr0EPqFo1EgS4igSx2iYbGxqBUgnPn0q4k0xToIgKvvhrcRCJrK1xCY2MwPw+zs2lXkmkKdBEJZr+Q3Rm69kWPRIEuItldshjSvuiRKNBFJLtdoiE1F0WiQBeRICjNYCSjG6WuXQsXX6xAb0CBLiLBpYwtW2DlyrQrWZ72RW9IgS4i2V6DHlJzUUMKdBHJ5j7o1dRc1JACXaTXLSzkZ4Z+4kTwJTUp0EV63dwcnD6dj0AHzdLrUKCL9Lqs7oNeTc1FDSnQRXpd1puKQmouakiBLtLrst5UFNq8GVas0Ay9DgW6SK8rFoPGnfXr066kvr4+GB1VoNehQBfpdeG2uVm8sUU1NRfVpUAX6XV5WLIYUnNRXQp0kV6Xh6aiUKEAMzNw9mzalWSSAl2kl508CceO5WuGvrAAhw+nXUkmKdBFetn0dPCYp0AHXXZZhgJdpJflpakopOaiuhToIr0sL01FodHR4FErXWpSoIv0smIR+vth69a0K4lmcBA2btQMfRkKdJFeVizC8DAMDKRdSXRaurgsBbpILwubivKkUNAll2Uo0EV6WZ6aikLhDN097UoyR4Eu0qvOnYNSKT8rXEJjY8H6+ePH064kcxToIr3qyJGg4zJvM/TwLyBddllCgS7Sq/KybW41NRctK1Kgm9kOM3vazA6Z2T01Xjcz+0T59SfM7Kr4SwX27oXx8WAbzfHx4HlexlftnR876fHzXvtNNwXf3357/LUnaf/+4PHd79bvaTV3r/sF9AP/C7wGWAn8ANhedc6NwJcBA64FvtNo3Kuvvtqb8tnPug8OugcfhQRfg4PB8TgkOb5q7/zYSY+v2tOR559LTOMDk75Mrpo3+KTYzN4M/Jm7v6P8/MPlvwj+ouKcfwC+4e4Pl58/DVzv7rPLjTsxMeGTk5PR/+YZH699zWxgALZtiz7Ocp55Bubnkxk/ybGTHj+vYyc9fjfWXijAT3/a3thJSzIH0vo9bfLnbmYH3H2i1mtRugmGgemK5yXgTRHOGQYWBbqZ7QJ2AYw1e91uuetl8/Nw+eXNjVXLU08lN36SYyc9fl7HTnr8bqw9D9ekk8yBtH5P4/y5Lzd1D7+A3wIeqHj+XuDvq875N+C6iudfB66uN27Tl1wKhcX/VAm/CoXmxkljfNXe+bGTHl+1pyPPP5eYxqfOJZcoH4qWgNGK5yPATAvntOfee4N9HCoNDgbHsz6+au/82EmPr9rTkeefSyd+7sslffhFcFnmWeBSzn8o+vNV57yTxR+KfrfRuE3P0N2DDw8KBXez4DHuD3GSHF+1d37spMdX7enI888lhvFp50NRADO7Efg4wYqXB939XjO7o/wXwm4zM+A+YAdwCrjN3et+4tn0h6IiItL2h6K4+z5gX9Wx3RXfO3BnO0WKiEh71CkqItIlFOgiIl1CgS4i0iUU6CIiXSLSKpdE3thsDmh1/8uNwLEYy+kk1Z4O1Z6OvNae5boL7j5U64XUAr0dZja53LKdrFPt6VDt6chr7XmtW5dcRES6hAJdRKRL5DXQ96RdQBtUezpUezryWnsu687lNXQREVkqrzN0ERGpokAXEekSuQv0RjesziozGzWz/zCzp8zsoJl9KO2ammFm/Wb232b2pbRraYaZrTezz5vZ/5R/9m9Ou6aozOwPyn9WnjSzh83sgrRrWo6ZPWhmR83syYpjP2dmXzOzZ8qPF6dZ43KWqf1j5T8zT5jZP5vZ+jRrjCpXgW5m/cD9wA3AduAWM9ueblWRzQN/6O6XEewZf2eOagf4ELDMPbQy7e+Ar7j764EryMl/g5kNA78HTLj75QRbV9+cblV1PUSwfXale4Cvu/s2gruYZXUC9hBLa/8acLm7vwH4MfDhThfVilwFOnANcMjdn3X3M8AjwM6Ua4rE3Wfd/Xvl718iCJbhdKuKxsxGCG5i8kDatTTDzNYBvwR8GsDdz7j7z9KtqikDwGozGwAGifsuYDFy928CL1Qd3gl8pvz9Z4B3d7SoiGrV7u5fdffwjs7fJrgLW+blLdCXuxl1rpjZOHAl8J10K4ns48AfAQtpF9Kk1wBzwD+WLxc9YGZr0i4qCnc/DPw1UCS42foJd/9qulU1bZO7z0IwoQEuSbmeVv0uwR3ZMi9vgW41juVq3aWZXQh8Afh9d38x7XoaMbN3AUfd/UDatbRgALgK+JS7XwmcJLv/7F+kfL15J8GtH7cCa8zst9OtqveY2UcILpfuTbuWKPIW6MnfjDpBZraCIMz3uvujadcT0VuAm8zspwSXuH7VzD6bbkmRlYCSu4f/Evo8QcDnwa8BP3H3OXc/CzwK/GLKNTXrOTPbAlB+PJpyPU0xs/cD7wJu9Zw07OQt0PcD28zsUjNbSfAh0WMp1xRJ+b6rnwaecve/TbueqNz9w+4+4u7jBD/vf3f3XMwU3f0IMG1mrysfehvwoxRLakYRuNbMBst/dt5GTj7QrfAY8P7y9+8HvphiLU0xsx3A3cBN7n4q7XqiylWglz+kuAt4nOAP9+fc/WC6VUX2FuC9BDPc75e/bky7qB7wQWCvmT0BvBH485TriaT8r4rPA98Dfkjw/2pm29HN7GHgv4DXmVnJzD4A/CXwdjN7Bnh7+XnmLFP7fcBa4Gvl/1d31x0kI9T6LyLSJXI1QxcRkeUp0EVEuoQCXUSkSyjQRUS6hAJdRKRLKNBFRLqEAl1EpEv8H2SstMCZeOepAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "print(mdlPCA.components_[0,:])\n", "plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Question: What is \"wrong\" in the data?" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11tsRelQ6_ReadingQ6_RunningQ6_Watching a movie
count19.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.000000
mean6.4736845.7894745.7894744.7894744.6315794.7894745.7368426.8421056.0000007.78947472315.0526320.2631580.2105260.526316
std1.5408661.7819761.8128842.0160182.1137262.3939492.2568931.7082531.7950551.61860592341.6913300.4524140.4188540.512989
min4.0000003.0000001.0000001.0000001.0000001.0000001.0000004.0000002.0000003.0000000.0000000.0000000.0000000.000000
25%5.5000004.5000005.0000004.0000003.0000003.5000004.5000005.5000005.0000007.00000060741.5000000.0000000.0000000.000000
50%7.0000006.0000006.0000005.0000005.0000005.0000006.0000007.0000006.0000008.00000060773.0000000.0000000.0000001.000000
75%8.0000007.0000007.0000006.0000006.0000007.0000007.0000008.0000007.0000008.50000060806.5000000.5000000.0000001.000000
max9.0000009.0000009.0000009.0000009.0000008.00000010.00000010.00000010.00000010.000000443956.0000001.0000001.0000001.000000
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 \\\n", "count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n", "mean 6.473684 5.789474 5.789474 4.789474 4.631579 4.789474 \n", "std 1.540866 1.781976 1.812884 2.016018 2.113726 2.393949 \n", "min 4.000000 3.000000 1.000000 1.000000 1.000000 1.000000 \n", "25% 5.500000 4.500000 5.000000 4.000000 3.000000 3.500000 \n", "50% 7.000000 6.000000 6.000000 5.000000 5.000000 5.000000 \n", "75% 8.000000 7.000000 7.000000 6.000000 6.000000 7.000000 \n", "max 9.000000 9.000000 9.000000 9.000000 9.000000 8.000000 \n", "\n", " Q8 Q9 Q10 Q11 tsRel Q6_Reading \\\n", "count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n", "mean 5.736842 6.842105 6.000000 7.789474 72315.052632 0.263158 \n", "std 2.256893 1.708253 1.795055 1.618605 92341.691330 0.452414 \n", "min 1.000000 4.000000 2.000000 3.000000 0.000000 0.000000 \n", "25% 4.500000 5.500000 5.000000 7.000000 60741.500000 0.000000 \n", "50% 6.000000 7.000000 6.000000 8.000000 60773.000000 0.000000 \n", "75% 7.000000 8.000000 7.000000 8.500000 60806.500000 0.500000 \n", "max 10.000000 10.000000 10.000000 10.000000 443956.000000 1.000000 \n", "\n", " Q6_Running Q6_Watching a movie \n", "count 19.000000 19.000000 \n", "mean 0.210526 0.526316 \n", "std 0.418854 0.512989 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 1.000000 \n", "75% 0.000000 1.000000 \n", "max 1.000000 1.000000 " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Let's normalize the data" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "df2_norm = (df2-df2.min())/(df2.max()-df2.min())" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11tsRelQ6_ReadingQ6_RunningQ6_Watching a movie
00.80.3333330.3750.6250.7500.5714290.2222220.1666670.6250.0000000.0000000.01.00.0
10.80.8333330.5000.5000.6250.8571430.6666670.3333330.6250.7142860.0006191.00.00.0
20.40.5000000.6250.6250.5000.8571430.6666670.5000000.6250.5714290.0404180.00.01.0
30.20.0000000.6250.3750.3750.2857140.7777780.6666670.3751.0000000.1367210.00.01.0
40.40.5000000.5000.3750.2500.4285710.4444440.0000001.0000.7142860.1367931.00.00.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n", "0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n", "1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n", "2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n", "3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n", "4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n", "\n", " Q11 tsRel Q6_Reading Q6_Running Q6_Watching a movie \n", "0 0.000000 0.000000 0.0 1.0 0.0 \n", "1 0.714286 0.000619 1.0 0.0 0.0 \n", "2 0.571429 0.040418 0.0 0.0 1.0 \n", "3 1.000000 0.136721 0.0 0.0 1.0 \n", "4 0.714286 0.136793 1.0 0.0 0.0 " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2_norm.head()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11tsRelQ6_ReadingQ6_RunningQ6_Watching a movie
count19.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.00000019.000000
mean0.4947370.4649120.5986840.4736840.4539470.5413530.5263160.4736840.5000000.6842110.1628880.2631580.2105260.526316
std0.3081730.2969960.2266110.2520020.2642160.3419930.2507660.2847090.2243820.2312290.2079970.4524140.4188540.512989
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%0.3000000.2500000.5000000.3750000.2500000.3571430.3888890.2500000.3750000.5714290.1368190.0000000.0000000.000000
50%0.6000000.5000000.6250000.5000000.5000000.5714290.5555560.5000000.5000000.7142860.1368900.0000000.0000001.000000
75%0.8000000.6666670.7500000.6250000.6250000.8571430.6666670.6666670.6250000.7857140.1369650.5000000.0000001.000000
max1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 \\\n", "count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n", "mean 0.494737 0.464912 0.598684 0.473684 0.453947 0.541353 \n", "std 0.308173 0.296996 0.226611 0.252002 0.264216 0.341993 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.300000 0.250000 0.500000 0.375000 0.250000 0.357143 \n", "50% 0.600000 0.500000 0.625000 0.500000 0.500000 0.571429 \n", "75% 0.800000 0.666667 0.750000 0.625000 0.625000 0.857143 \n", "max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n", "\n", " Q8 Q9 Q10 Q11 tsRel Q6_Reading \\\n", "count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n", "mean 0.526316 0.473684 0.500000 0.684211 0.162888 0.263158 \n", "std 0.250766 0.284709 0.224382 0.231229 0.207997 0.452414 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.388889 0.250000 0.375000 0.571429 0.136819 0.000000 \n", "50% 0.555556 0.500000 0.500000 0.714286 0.136890 0.000000 \n", "75% 0.666667 0.666667 0.625000 0.785714 0.136965 0.500000 \n", "max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n", "\n", " Q6_Running Q6_Watching a movie \n", "count 19.000000 19.000000 \n", "mean 0.210526 0.526316 \n", "std 0.418854 0.512989 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 1.000000 \n", "75% 0.000000 1.000000 \n", "max 1.000000 1.000000 " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2_norm.describe()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.31431827 0.29012089 0.15348486 0.07788586 0.04541328]\n", "0.8812231522564273\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "mdlPCA = PCA(n_components=5)\n", "XPCA = mdlPCA.fit_transform(df2_norm)\n", "\n", "print(mdlPCA.explained_variance_ratio_)\n", "print(np.sum(mdlPCA.explained_variance_ratio_))\n", "\n", "plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[-0.28489905 -0.3141605 -0.26107415 -0.25405393 -0.27500032 -0.36871538\n", " -0.17301488 0.12875075 0.16485422 -0.03296608 -0.13459097 0.4957779\n", " -0.16340968 -0.33236822]\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "print(mdlPCA.components_[0,:])\n", "plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Remove tsRel column" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11Q6_ReadingQ6_RunningQ6_Watching a movie
00.80.3333330.3750.6250.7500.5714290.2222220.1666670.6250.0000000.01.00.0
10.80.8333330.5000.5000.6250.8571430.6666670.3333330.6250.7142861.00.00.0
20.40.5000000.6250.6250.5000.8571430.6666670.5000000.6250.5714290.00.01.0
30.20.0000000.6250.3750.3750.2857140.7777780.6666670.3751.0000000.00.01.0
40.40.5000000.5000.3750.2500.4285710.4444440.0000001.0000.7142861.00.00.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n", "0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n", "1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n", "2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n", "3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n", "4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n", "\n", " Q11 Q6_Reading Q6_Running Q6_Watching a movie \n", "0 0.000000 0.0 1.0 0.0 \n", "1 0.714286 1.0 0.0 0.0 \n", "2 0.571429 0.0 0.0 1.0 \n", "3 1.000000 0.0 0.0 1.0 \n", "4 0.714286 1.0 0.0 0.0 " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2_noTs = df2_norm[df2_norm.columns[df2_norm.columns.str.contains('tsRel')==False]]\n", "df2_noTs.head()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11Q6_ReadingQ6_RunningQ6_Watching a movie
00.80.3333330.3750.6250.7500.5714290.2222220.1666670.6250.0000000.01.00.0
10.80.8333330.5000.5000.6250.8571430.6666670.3333330.6250.7142861.00.00.0
20.40.5000000.6250.6250.5000.8571430.6666670.5000000.6250.5714290.00.01.0
30.20.0000000.6250.3750.3750.2857140.7777780.6666670.3751.0000000.00.01.0
40.40.5000000.5000.3750.2500.4285710.4444440.0000001.0000.7142861.00.00.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n", "0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n", "1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n", "2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n", "3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n", "4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n", "\n", " Q11 Q6_Reading Q6_Running Q6_Watching a movie \n", "0 0.000000 0.0 1.0 0.0 \n", "1 0.714286 1.0 0.0 0.0 \n", "2 0.571429 0.0 0.0 1.0 \n", "3 1.000000 0.0 0.0 1.0 \n", "4 0.714286 1.0 0.0 0.0 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2_noTs = df2_norm.drop(columns=['tsRel'])\n", "df2_noTs.head()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.31894549 0.29885772 0.1547158 0.07966809 0.04678632]\n", "0.898973407637008\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "mdlPCA = PCA(n_components=5)\n", "XPCA = mdlPCA.fit_transform(df2_noTs)\n", "\n", "print(mdlPCA.explained_variance_ratio_)\n", "print(np.sum(mdlPCA.explained_variance_ratio_))\n", "\n", "plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[-0.30244657 -0.32214438 -0.26410732 -0.25227651 -0.27659662 -0.3869891\n", " -0.17634825 0.12991123 0.1677439 -0.02453888 0.49300779 -0.20299601\n", " -0.29001178]\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "print(mdlPCA.components_[0,:])\n", "plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Alternative approach: Keep tsRel, but exclude outliers" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11tsRelQ6_ReadingQ6_RunningQ6_Watching a movie
30.250.00.7142860.5000000.5000000.2857140.7777780.6666670.3751.000.0000000.00.01.0
40.500.60.5714290.5000000.3333330.4285710.4444440.0000001.0000.500.1327801.00.00.0
51.000.81.0000000.3333330.3333331.0000000.3333331.0000000.0000.500.2282160.01.00.0
60.000.00.0000000.0000000.0000000.0000000.0000001.0000000.7500.500.2572611.00.00.0
70.750.00.8571430.8333330.6666670.4285710.5555560.6666670.5000.750.2697101.00.00.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 \\\n", "3 0.25 0.0 0.714286 0.500000 0.500000 0.285714 0.777778 0.666667 \n", "4 0.50 0.6 0.571429 0.500000 0.333333 0.428571 0.444444 0.000000 \n", "5 1.00 0.8 1.000000 0.333333 0.333333 1.000000 0.333333 1.000000 \n", "6 0.00 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 \n", "7 0.75 0.0 0.857143 0.833333 0.666667 0.428571 0.555556 0.666667 \n", "\n", " Q10 Q11 tsRel Q6_Reading Q6_Running Q6_Watching a movie \n", "3 0.375 1.00 0.000000 0.0 0.0 1.0 \n", "4 1.000 0.50 0.132780 1.0 0.0 0.0 \n", "5 0.000 0.50 0.228216 0.0 1.0 0.0 \n", "6 0.750 0.50 0.257261 1.0 0.0 0.0 \n", "7 0.500 0.75 0.269710 1.0 0.0 0.0 " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2Fil = df2[np.logical_and(df2.tsRel>60000, df2.tsRel<61000)]\n", "df2Fil_norm = (df2Fil-df2Fil.min())/(df2Fil.max()-df2Fil.min())\n", "df2Fil_norm.head()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.34631596 0.27017825 0.14469026 0.07712654 0.05616314]\n", "0.8944741480656595\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "mdlPCA = PCA(n_components=5)\n", "XPCA = mdlPCA.fit_transform(df2Fil_norm)\n", "\n", "print(mdlPCA.explained_variance_ratio_)\n", "print(np.sum(mdlPCA.explained_variance_ratio_))\n", "\n", "plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[-0.43656834 -0.36520933 -0.30003171 -0.22317841 -0.26032745 -0.38821589\n", " -0.21312891 0.05576004 0.17168435 -0.07017853 -0.05979342 0.27181642\n", " -0.3812503 0.10943387]\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "print(mdlPCA.components_[0,:])\n", "plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Predictive models\n", "\n", "#### Can we predict motivation?\n", "#### Can we predict stress?" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Q1Q2Q3Q4Q5Q7Q8Q9Q10Q11Q6_ReadingQ6_RunningQ6_Watching a movie
00.80.3333330.3750.6250.7500.5714290.2222220.1666670.6250.0000000.01.00.0
10.80.8333330.5000.5000.6250.8571430.6666670.3333330.6250.7142861.00.00.0
20.40.5000000.6250.6250.5000.8571430.6666670.5000000.6250.5714290.00.01.0
30.20.0000000.6250.3750.3750.2857140.7777780.6666670.3751.0000000.00.01.0
40.40.5000000.5000.3750.2500.4285710.4444440.0000001.0000.7142861.00.00.0
\n", "
" ], "text/plain": [ " Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n", "0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n", "1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n", "2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n", "3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n", "4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n", "\n", " Q11 Q6_Reading Q6_Running Q6_Watching a movie \n", "0 0.000000 0.0 1.0 0.0 \n", "1 0.714286 1.0 0.0 0.0 \n", "2 0.571429 0.0 0.0 1.0 \n", "3 1.000000 0.0 0.0 1.0 \n", "4 0.714286 1.0 0.0 0.0 " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = df2.drop(columns=['tsRel'])\n", "df3_norm = (df3-df3.min())/(df3.max()-df3.min())\n", "df3_norm.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Select X and y\n", "" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: FutureWarning: The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead\n", " colX = df3.columns[pd.np.r_[0:8,10:13]]\n" ] }, { "data": { "text/plain": [ "['Q1',\n", " 'Q2',\n", " 'Q3',\n", " 'Q4',\n", " 'Q5',\n", " 'Q7',\n", " 'Q8',\n", " 'Q9',\n", " 'Q6_Reading',\n", " 'Q6_Running',\n", " 'Q6_Watching a movie']" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "colX = df3.columns[pd.np.r_[0:8,10:13]]\n", "colX.tolist()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Q11']" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "colY = ['Q10'] # level of stress;\n", "colY = ['Q11'] # level of motivation;\n", "\n", "colY" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(19, 11)\n" ] }, { "data": { "text/plain": [ "array([[ 8, 5, 4, 6, 7, 5, 3, 5, 0, 1, 0],\n", " [ 8, 8, 5, 5, 6, 7, 7, 6, 1, 0, 0],\n", " [ 6, 6, 6, 6, 5, 7, 7, 7, 0, 0, 1],\n", " [ 5, 3, 6, 4, 4, 3, 8, 8, 0, 0, 1],\n", " [ 6, 6, 5, 4, 3, 4, 5, 4, 1, 0, 0],\n", " [ 8, 7, 8, 3, 3, 8, 4, 10, 0, 1, 0],\n", " [ 4, 3, 1, 1, 1, 1, 1, 10, 1, 0, 0],\n", " [ 7, 3, 7, 6, 5, 4, 6, 8, 1, 0, 0],\n", " [ 5, 5, 5, 4, 4, 4, 4, 5, 0, 0, 1],\n", " [ 6, 6, 6, 6, 6, 4, 6, 6, 0, 0, 1],\n", " [ 4, 4, 4, 5, 3, 5, 2, 7, 0, 0, 1],\n", " [ 7, 7, 7, 2, 2, 7, 6, 7, 0, 1, 0],\n", " [ 8, 8, 8, 6, 6, 8, 7, 8, 0, 0, 1],\n", " [ 4, 4, 4, 1, 1, 1, 5, 7, 0, 0, 1],\n", " [ 8, 7, 7, 7, 7, 7, 10, 5, 0, 1, 0],\n", " [ 7, 7, 6, 6, 6, 6, 6, 6, 0, 0, 1],\n", " [ 7, 6, 6, 5, 5, 1, 7, 7, 1, 0, 0],\n", " [ 6, 6, 6, 5, 5, 2, 9, 9, 0, 0, 1],\n", " [ 9, 9, 9, 9, 9, 7, 6, 5, 0, 0, 1]])" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = np.array(df3[colX])\n", "print(X.shape)\n", "X" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(19, 1)\n" ] }, { "data": { "text/plain": [ "array([[ 3],\n", " [ 8],\n", " [ 7],\n", " [10],\n", " [ 8],\n", " [ 8],\n", " [ 8],\n", " [ 9],\n", " [ 7],\n", " [ 6],\n", " [ 7],\n", " [ 8],\n", " [ 8],\n", " [ 8],\n", " [10],\n", " [ 7],\n", " [ 7],\n", " [ 9],\n", " [10]])" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = np.array(df3[colY])\n", "print(y.shape)\n", "y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Normalize data" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import StandardScaler\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "#scaler = StandardScaler()\n", "scaler = MinMaxScaler()\n", "\n", "scaler.fit(X)\n", "Xnorm = scaler.transform(X)\n", "Xnorm.max(axis=0)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.svm import SVR\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Cross validation with leave one out" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TRAIN: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [0]\n", "TRAIN: [ 0 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [1]\n", "TRAIN: [ 0 1 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [2]\n", "TRAIN: [ 0 1 2 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [3]\n", "TRAIN: [ 0 1 2 3 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [4]\n", "TRAIN: [ 0 1 2 3 4 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [5]\n", "TRAIN: [ 0 1 2 3 4 5 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [6]\n", "TRAIN: [ 0 1 2 3 4 5 6 8 9 10 11 12 13 14 15 16 17 18] TEST: [7]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 16 17 18] TEST: [8]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 10 11 12 13 14 15 16 17 18] TEST: [9]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 11 12 13 14 15 16 17 18] TEST: [10]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 12 13 14 15 16 17 18] TEST: [11]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 17 18] TEST: [12]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 14 15 16 17 18] TEST: [13]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 15 16 17 18] TEST: [14]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 16 17 18] TEST: [15]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18] TEST: [16]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18] TEST: [17]\n", "TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17] TEST: [18]\n" ] } ], "source": [ "from sklearn.model_selection import LeaveOneOut\n", "\n", "loo = LeaveOneOut()\n", "for train_index, test_index in loo.split(Xnorm):\n", " print('TRAIN: ' + str(train_index) + ' TEST: ' + str(test_index))\n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Y : [[3]] , pred as: [6.71770473]\n", "Y : [[8]] , pred as: [8.44358673]\n", "Y : [[7]] , pred as: [8.57175498]\n", "Y : [[10]] , pred as: [7.62213136]\n", "Y : [[8]] , pred as: [6.70446542]\n", "Y : [[8]] , pred as: [8.9269527]\n", "Y : [[8]] , pred as: [5.14777884]\n", "Y : [[9]] , pred as: [8.02397316]\n", "Y : [[7]] , pred as: [6.22595715]\n", "Y : [[6]] , pred as: [7.94496256]\n", "Y : [[7]] , pred as: [6.21621235]\n", "Y : [[8]] , pred as: [7.00076291]\n", "Y : [[8]] , pred as: [9.45348764]\n", "Y : [[8]] , pred as: [6.27163448]\n", "Y : [[10]] , pred as: [6.519619]\n", "Y : [[7]] , pred as: [8.10618284]\n", "Y : [[7]] , pred as: [8.58400087]\n", "Y : [[9]] , pred as: [7.92971992]\n", "Y : [[10]] , pred as: [6.78948034]\n" ] } ], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "from sklearn.svm import LinearSVR\n", "\n", "loo = LeaveOneOut()\n", "\n", "predAll = np.zeros([y.shape[0],1])\n", "\n", "i=0\n", "for train_index, test_index in loo.split(Xnorm):\n", "\n", " X_train, X_test = Xnorm[train_index], Xnorm[test_index]\n", " y_train, y_test = y[train_index], y[test_index]\n", "\n", " regr = LinearSVR(random_state=0, tol=1e-5)\n", " \n", " regr.fit(X_train, y_train) # Train the model\n", "\n", " ypred = regr.predict(X_test) # Apply the model\n", " \n", " predAll[i] = ypred\n", " \n", " print('Y : ' + str(y_test) + ' , pred as: ' + str(ypred))\n", " i = i + 1 \n" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 0.01086243],\n", " [0.01086243, 1. ]])" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.corrcoef(y.T, predAll.T)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPxUlEQVR4nO3dXWxkZ33H8e8/XqPMRkVOE4Pw0nRBQg5NULKphXhRo0KgBgphiVopSFQUVSwXiLcLV+xNEypVFC0XcIW0glKk0rRkWRboRUxEW3pFKidmtVkSC/EWMgvEiJgCGYFj/r3weLPe2OsZe86c5+x8P9LK9uPx+Cdr5zfnPOec50RmIkkq1xV1B5AkXZpFLUmFs6glqXAWtSQVzqKWpMLtq+JJr7322jx48GAVTy1Jl6UHH3zwZ5k5udX3KinqgwcPsrCwUMVTS9JlKSJ+uN33nPqQpMJZ1JJUOItakgpnUUtS4SxqSSpcJWd9SCrPqcU2x+aXOLfSYWqixdzsNIcPHag7lnpgUUsj4NRim6Mnz9BZXQOgvdLh6MkzAJZ1Azj1IY2AY/NL50t6Q2d1jWPzSzUlUj8samkEnFvp9DWusljU0giYmmj1Na6yWNTSCJibnaY1PrZprDU+xtzsdE2J1A8PJkojYOOAoWd9NJNFLY2Iw4cOWMwN5dSHJBXOopakwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXAWtSQVzqKWpMJZ1JJUOItakgpnUUtS4Xoq6oj4QEQ8HBFnI+KDVYeSJD1jx6KOiBuBdwMvB24C3hwRL6k6mCRpXS9b1C8FvpmZT2Xm08A3gLdVG0uStKGXGwc8DPxDRFwDdIA3AQsXPygijgBHAK677rpBZpQ0AKcW297hpaF2LOrMfCQiPgbcD/wKOA08vcXjjgPHAWZmZnLAOSXtwanFNkdPnqGzugZAe6XD0ZNnACzrBujpYGJmfiYzb8nMW4GfA9+pNpakQTo2v3S+pDd0Vtc4Nr9UUyL1o6d7JkbE8zLziYi4DrgDeGW1sSQN0rmVTl/jKkuvN7f9YneOehV4b2Y+WWEmSQM2NdGivUUpT020akijfvU69fEnmflHmXlTZn696lCSBmtudprW+Nimsdb4GHOz0zUlUj963aKW1GAbBww966OZLGppRBw+dMBibijX+pCkwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXAWtSQVzgtepD1wjWcNg0Ut7dKpxTZzJ06zura+/Hp7pcPcidOAazxrsJz6kHbpI189e76kN6yuJR/56tmaEulyZVFLu/TkU6t9jUu7ZVFLUuEsammXJlrjfY1Lu2VRS7t09+03MH5FbBobvyK4+/Ybakqky5VnfUi71LTF+D2VsLksamkPmrIY/6nFNkdPnjl/J/L2SoejJ88AnkrYBE59SCPg2PzS+ZLe0Fld49j8Uk2J1A+LWhoB57a4A/mlxlUWi1oaAVMTrb7GVRaLWhoBc7PTtMbHNo21xseYm52uKZH64cFEaQQ07QwVbWZRSyOiKWeo6Nmc+pCkwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXCeRy3tgUuHahgsammXXDpUw+LUh7RLLh2qYbGopV1y6VANi1Mf0i5NTbRob1HKLh06GM7/P6OnLeqI+FBEnI2IhyPinoi4supgUulcOrQ6G/P/7ZUOyTPz/6cW23VHq8WORR0RB4D3AzOZeSMwBtxZdTCpdIcPHeCjd7yMAxMtAjgw0eKjd7xsZLf6Bsn5/816nfrYB7QiYhXYD5yrLpJGWdN2d106tBrO/2+24xZ1ZraBjwOPAT8GfpGZX7v4cRFxJCIWImJheXl58El12XN3Vxu8ddhmvUx9XA28FXgRMAVcFRHvuPhxmXk8M2cyc2ZycnLwSXXZc3dXG15z/dYdst345a6Xg4mvA76fmcuZuQqcBF5VbSyNInd3teG/Ht16r3y78ctdL0X9GPCKiNgfEQHcBjxSbSyNInd3tcE37c16maN+ADgBPASc6f7M8YpzaQR5ups2+Ka9WU/nUWfmXZl5fWbemJl/lZm/qTqYRo+nu2mDb9qbeWWiiuLpboJnFrVq0qmaVbKoJRXJN+1nuCiTJBXOopakwlnUklQ4i1qSCmdRS1LhLGpJKpyn50nSHlW9PK9FLUl7MIy70Tv1IUl7MIzleS1qSdqDYaz0Z1FL0h4MY6U/i1qS9mAYK/15MFGS9mAYK/1Z1JK0R1Wv9OfUhyQVzqKWpMJZ1JJUOItakgpnUUtS4SxqSSqcRS1JhbOoJalwFrUkFc6ilqTCWdSSVDiLWpIKZ1FLUuEsakkqnEUtSYWzqCWpcBa1JBXOopakwlnUklQ4i1qSCmdRS1LhdrwLeURMA/9+wdCLgb/LzE9UlkrSwJ1abHNsfolzKx2mJlrMzU5XeudsDc6ORZ2ZS8DNABExBrSBL1WcS9IAnVpsc/TkGTqrawC0VzocPXkGwLJugH6nPm4DvpuZP6wijKRqHJtfOl/SGzqraxybX6opkfrRb1HfCdyz1Tci4khELETEwvLy8t6TSRqYcyudvsZVlp6LOiKeA9wO3LvV9zPzeGbOZObM5OTkoPJJGoCpiVZf4ypLP1vUbwQeysyfVhVGUjXmZqdpjY9tGmuNjzE3O11TIvVjx4OJF3g720x7SCrbxgFDz/popp6KOiL2A68H3lNtHElVOXzogMXcUD0VdWY+BVxTcRZJ0ha8MlGSCtfPHLUkaQtVX/VpUUvSHgzjqk+nPiRpD4Zx1WcxW9QuGCNVy9dYNYZx1WcRRe2CMVK1fI1VZ2qiRXuLUh7kVZ9FTH24YIxULV9j1RnGVZ9FbFG7YIxULV9j1RnGVZ9FFPUwdh2kUeZrrFpVX/VZxNSHC8ZI1fI11mxFbFG7YIxULV9jzRaZOfAnnZmZyYWFhYE/ryRdriLiwcyc2ep7RUx9SJK2V8TUhyQ1mWt9SFLBXOtDkgo3jIuJLGpJ2oNhXExkUUvSHgzjDu8WtSTtwcis9SFJTTUya31IUpONxFofkqTtWdSSVDiLWpIK5xy1NCK8Z2JzWdTSCPCeic1mUUt70JSt1Etd5lxiXm1mUUu7dGqxzdy9p1n93fqa7u2VDnP3ngbK20r1nonN5sFEaZfu/srZ8yW9YfV3yd1fOVtTou0N4zJnVceilnZppbPa13idvGdiszn1IY0A75nYbBa1tEtX7x/nyaeevfV89f7xGtLsrOrLnFUdpz6kXbrrLTcwPhabxsbHgrveckNNiXS5cota2iWnEzQsFrW0B04naBic+pCkwlnUklS4noo6IiYi4kREPBoRj0TEK6sOJkla1+sc9SeB+zLzLyLiOcD+CjNJki6wY1FHxHOBW4G/BsjM3wK/rTaWJGlDL1MfLwaWgc9GxGJEfDoirrr4QRFxJCIWImJheXl54EElaVT1UtT7gFuAT2XmIeDXwIcvflBmHs/MmcycmZycHHBMSRpdvRT148DjmflA9+sTrBe3JGkIdizqzPwJ8KOI2Fhm6zbg25WmkiSd1+tZH+8DPt894+N7wLuqiyRJulBPRZ2Z3wJmKs4iSdqCVyZKUuEsakkqnEUtSYWzqCWpcBa1JBXOopakwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXAWtSQVzqKWpMJZ1JJUOItakgpnUUtS4SxqSSqcRS1JhbOoJalwFrUkFc6ilqTCWdSSVDiLWpIKZ1FLUuEsakkqnEUtSYWzqCWpcBa1JBXOopakwu2rO4Ck4Ti12ObY/BLnVjpMTbSYm53m8KEDdcdSDyxqaQScWmxz9OQZOqtrALRXOhw9eQbAsm4Apz6kEXBsful8SW/orK5xbH6ppkTqh0UtjYBzK52+xlUWi1oaAVMTrb7GVRaLWhoBc7PTtMbHNo21xseYm52uKZH64cFEaQRsHDD0rI9m6qmoI+IHwC+BNeDpzJypMpSkwTt86IDF3FD9bFG/JjN/VlkSSdKWnKOWpML1WtQJfC0iHoyII1s9ICKORMRCRCwsLy8PLqEkjbhei/rVmXkL8EbgvRFx68UPyMzjmTmTmTOTk5MDDSlJo6ynos7Mc92PTwBfAl5eZShJ0jMiMy/9gIirgCsy85fdz+8H/j4z77vEzywDP9xlpmuBphy0bFJWaFbeJmWFZuVtUlZoVt69ZP3DzNxyOqKXsz6eD3wpIjYe/6+XKmmA7X5ZLyJioSmn/zUpKzQrb5OyQrPyNikrNCtvVVl3LOrM/B5w06B/sSSpN56eJ0mFK7Goj9cdoA9NygrNytukrNCsvE3KCs3KW0nWHQ8mSpLqVeIWtSTpAha1JBWuiKKOiCsj4n8j4nREnI2Ij9SdqRcRMRYRixHxH3VnuZSI+EFEnImIb0XEQt15dhIRExFxIiIejYhHIuKVdWfaSkRMd/+mG//+LyI+WHeuS4mID3VfYw9HxD0RcWXdmbYTER/o5jxb4t81Iv4pIp6IiIcvGPv9iLg/Ir7T/Xj1IH5XEUUN/AZ4bWbeBNwMvCEiXlFzpl58AHik7hA9ek1m3tyQ81E/CdyXmdezfmpokX/jzFzq/k1vBv4YeIr1K3eLFBEHgPcDM5l5IzAG3Flvqq1FxI3Au1m/Cvom4M0R8ZJ6Uz3LPwNvuGjsw8DXM/MlwNe7X+9ZEUWd637V/XK8+6/oo5wR8ULgz4FP153lchIRzwVuBT4DkJm/zcyVelP15Dbgu5m52ytyh2Uf0IqIfcB+4FzNebbzUuCbmflUZj4NfAN4W82ZNsnM/wF+ftHwW4HPdT//HHB4EL+riKKG89MI3wKeAO7PzAfqzrSDTwB/C/yu7iA92HH1w4K8GFgGPtudVvp0d+mC0t0J3FN3iEvJzDbwceAx4MfALzLza/Wm2tbDwK0RcU1E7AfeBPxBzZl68fzM/DFA9+PzBvGkxRR1Zq51dyFfCLy8u+tTpIh4M/BEZj5Yd5Ye7bj6YUH2AbcAn8rMQ8CvGdDuY1Ui4jnA7cC9dWe5lO586VuBFwFTwFUR8Y56U20tMx8BPsb62kL3AaeBp2sNVaNiinpDdzf3v3n23E9JXg3c3r1F2b8Br42If6k30vYatvrh48DjF+xRnWC9uEv2RuChzPxp3UF28Drg+5m5nJmrwEngVTVn2lZmfiYzb8nMW1mfYvhO3Zl68NOIeAFA9+MTg3jSIoo6IiYjYqL7eYv1/1CP1ptqe5l5NDNfmJkHWd/l/c/MLHLLJCKuiojf2/gc+DPWdyuLlJk/AX4UERu3x74N+HaNkXrxdgqf9uh6DHhFROyP9VXWbqPQA7UAEfG87sfrgDtoxt/4K8A7u5+/E/jyIJ60lLuQvwD4XESMsf7m8YXMLPqUtwbpe/XDArwP+Hx3SuF7wLtqzrOt7vzp64H31J1lJ5n5QEScAB5ifRphkbIvz/5iRFwDrALvzcwn6w50oYi4B/hT4NqIeBy4C/hH4AsR8TesvzH+5UB+l5eQS1LZipj6kCRtz6KWpMJZ1JJUOItakgpnUUtS4SxqSSqcRS1Jhft/4YHCuW/gYY8AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(y.T, predAll.T)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 2 }