{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ML Basics, warming up with small data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Automatically created module for IPython interactive environment\n"
]
}
],
"source": [
"print(__doc__)\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import datasets\n",
"from sklearn.decomposition import PCA\n",
"from sklearn import metrics\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Read poll data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Timestamp | \n",
" Q1_General background in data analysis? | \n",
" Q2_Hands-on experience in data analysis using Python? | \n",
" Q3_Experience in programming in general? | \n",
" Q4_General background in machine learning? | \n",
" Q5_Hands-on experience in running machine learning applications? | \n",
" Q6_Which one would you prefer on a Sunday afternoon? | \n",
" Q7_Hands-on experience in image analysis using satellite images? | \n",
" Q8_Level of interest in mathematics? | \n",
" Q9_Level of interest in reading? | \n",
" Q10_Level of stress about this class? | \n",
" Q11_Your overall motivation about this class? | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2020/01/14 5:11:10 PM EST | \n",
" 8 | \n",
" 5 | \n",
" 4 | \n",
" 6 | \n",
" 7 | \n",
" Running | \n",
" 5 | \n",
" 3 | \n",
" 5 | \n",
" 7 | \n",
" 3 | \n",
"
\n",
" \n",
" 1 | \n",
" 2020/01/14 5:15:45 PM EST | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 5 | \n",
" 6 | \n",
" Reading | \n",
" 7 | \n",
" 7 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
"
\n",
" \n",
" 2 | \n",
" 2020/01/14 10:10:14 PM EST | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" Watching a movie | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
"
\n",
" \n",
" 3 | \n",
" 2020/01/15 10:02:48 AM EST | \n",
" 5 | \n",
" 3 | \n",
" 6 | \n",
" 4 | \n",
" 4 | \n",
" Watching a movie | \n",
" 3 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 10 | \n",
"
\n",
" \n",
" 4 | \n",
" 2020/01/15 10:03:20 AM EST | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 3 | \n",
" Reading | \n",
" 4 | \n",
" 5 | \n",
" 4 | \n",
" 10 | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Timestamp Q1_General background in data analysis? \\\n",
"0 2020/01/14 5:11:10 PM EST 8 \n",
"1 2020/01/14 5:15:45 PM EST 8 \n",
"2 2020/01/14 10:10:14 PM EST 6 \n",
"3 2020/01/15 10:02:48 AM EST 5 \n",
"4 2020/01/15 10:03:20 AM EST 6 \n",
"\n",
" Q2_Hands-on experience in data analysis using Python? \\\n",
"0 5 \n",
"1 8 \n",
"2 6 \n",
"3 3 \n",
"4 6 \n",
"\n",
" Q3_Experience in programming in general? \\\n",
"0 4 \n",
"1 5 \n",
"2 6 \n",
"3 6 \n",
"4 5 \n",
"\n",
" Q4_General background in machine learning? \\\n",
"0 6 \n",
"1 5 \n",
"2 6 \n",
"3 4 \n",
"4 4 \n",
"\n",
" Q5_Hands-on experience in running machine learning applications? \\\n",
"0 7 \n",
"1 6 \n",
"2 5 \n",
"3 4 \n",
"4 3 \n",
"\n",
" Q6_Which one would you prefer on a Sunday afternoon? \\\n",
"0 Running \n",
"1 Reading \n",
"2 Watching a movie \n",
"3 Watching a movie \n",
"4 Reading \n",
"\n",
" Q7_Hands-on experience in image analysis using satellite images? \\\n",
"0 5 \n",
"1 7 \n",
"2 7 \n",
"3 3 \n",
"4 4 \n",
"\n",
" Q8_Level of interest in mathematics? Q9_Level of interest in reading? \\\n",
"0 3 5 \n",
"1 7 6 \n",
"2 7 7 \n",
"3 8 8 \n",
"4 5 4 \n",
"\n",
" Q10_Level of stress about this class? \\\n",
"0 7 \n",
"1 7 \n",
"2 7 \n",
"3 5 \n",
"4 10 \n",
"\n",
" Q11_Your overall motivation about this class? \n",
"0 3 \n",
"1 8 \n",
"2 7 \n",
"3 10 \n",
"4 8 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfInit = pd.read_csv(('./Data/MUSA-650WelcomePoll.csv'))\n",
"dfInit.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Calculate relative timestamp"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Timestamp | \n",
" tsRel | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2020-01-14 17:11:10 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2020-01-14 17:15:45 | \n",
" 275.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2020-01-14 22:10:14 | \n",
" 17944.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 2020-01-15 10:02:48 | \n",
" 60698.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 2020-01-15 10:03:20 | \n",
" 60730.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 2020-01-15 10:03:43 | \n",
" 60753.0 | \n",
"
\n",
" \n",
" 6 | \n",
" 2020-01-15 10:03:50 | \n",
" 60760.0 | \n",
"
\n",
" \n",
" 7 | \n",
" 2020-01-15 10:03:53 | \n",
" 60763.0 | \n",
"
\n",
" \n",
" 8 | \n",
" 2020-01-15 10:03:59 | \n",
" 60769.0 | \n",
"
\n",
" \n",
" 9 | \n",
" 2020-01-15 10:04:03 | \n",
" 60773.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Timestamp tsRel\n",
"0 2020-01-14 17:11:10 0.0\n",
"1 2020-01-14 17:15:45 275.0\n",
"2 2020-01-14 22:10:14 17944.0\n",
"3 2020-01-15 10:02:48 60698.0\n",
"4 2020-01-15 10:03:20 60730.0\n",
"5 2020-01-15 10:03:43 60753.0\n",
"6 2020-01-15 10:03:50 60760.0\n",
"7 2020-01-15 10:03:53 60763.0\n",
"8 2020-01-15 10:03:59 60769.0\n",
"9 2020-01-15 10:04:03 60773.0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfInit.Timestamp = pd.to_datetime(dfInit.Timestamp, format='%Y/%m/%d %I:%M:%S %p EST')\n",
"dfInit['tsRel'] = (dfInit.Timestamp - dfInit.Timestamp.min()).dt.total_seconds()\n",
"dfInit[['Timestamp', 'tsRel']].head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Column names"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Q1_General background in data analysis?',\n",
" 'Q2_Hands-on experience in data analysis using Python?',\n",
" 'Q3_Experience in programming in general?',\n",
" 'Q4_General background in machine learning?',\n",
" 'Q5_Hands-on experience in running machine learning applications?',\n",
" 'Q6_Which one would you prefer on a Sunday afternoon?',\n",
" 'Q7_Hands-on experience in image analysis using satellite images?',\n",
" 'Q8_Level of interest in mathematics?',\n",
" 'Q9_Level of interest in reading?',\n",
" 'Q10_Level of stress about this class?',\n",
" 'Q11_Your overall motivation about this class?',\n",
" 'tsRel']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = dfInit[dfInit.columns[1:]]\n",
"initCol = df.columns.tolist()\n",
"initCol"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q6 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 8 | \n",
" 5 | \n",
" 4 | \n",
" 6 | \n",
" 7 | \n",
" Running | \n",
" 5 | \n",
" 3 | \n",
" 5 | \n",
" 7 | \n",
" 3 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 5 | \n",
" 6 | \n",
" Reading | \n",
" 7 | \n",
" 7 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 275.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" Watching a movie | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 17944.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 5 | \n",
" 3 | \n",
" 6 | \n",
" 4 | \n",
" 4 | \n",
" Watching a movie | \n",
" 3 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 10 | \n",
" 60698.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 3 | \n",
" Reading | \n",
" 4 | \n",
" 5 | \n",
" 4 | \n",
" 10 | \n",
" 8 | \n",
" 60730.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 tsRel\n",
"0 8 5 4 6 7 Running 5 3 5 7 3 0.0\n",
"1 8 8 5 5 6 Reading 7 7 6 7 8 275.0\n",
"2 6 6 6 6 5 Watching a movie 7 7 7 7 7 17944.0\n",
"3 5 3 6 4 4 Watching a movie 3 8 8 5 10 60698.0\n",
"4 6 6 5 4 3 Reading 4 5 4 10 8 60730.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns = df.columns.str.split('_', 1).str[0].tolist()\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Visualize correlations"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#sns.pairplot(df, kind = 'reg')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
"
\n",
" \n",
" \n",
" \n",
" Q1 | \n",
" 1.000000 | \n",
" 0.766725 | \n",
" 0.713877 | \n",
" 0.624063 | \n",
" 0.738857 | \n",
" 0.676150 | \n",
" 0.405270 | \n",
" -0.244388 | \n",
" -0.441883 | \n",
" 0.042206 | \n",
" 0.313668 | \n",
"
\n",
" \n",
" Q2 | \n",
" 0.766725 | \n",
" 1.000000 | \n",
" 0.639008 | \n",
" 0.481836 | \n",
" 0.568244 | \n",
" 0.692275 | \n",
" 0.399874 | \n",
" -0.321784 | \n",
" -0.329991 | \n",
" 0.099348 | \n",
" 0.383485 | \n",
"
\n",
" \n",
" Q3 | \n",
" 0.713877 | \n",
" 0.639008 | \n",
" 1.000000 | \n",
" 0.564825 | \n",
" 0.544057 | \n",
" 0.616467 | \n",
" 0.596732 | \n",
" -0.083087 | \n",
" -0.631657 | \n",
" 0.400580 | \n",
" 0.457261 | \n",
"
\n",
" \n",
" Q4 | \n",
" 0.624063 | \n",
" 0.481836 | \n",
" 0.564825 | \n",
" 1.000000 | \n",
" 0.945541 | \n",
" 0.450752 | \n",
" 0.426714 | \n",
" -0.461877 | \n",
" -0.168868 | \n",
" 0.036739 | \n",
" 0.440323 | \n",
"
\n",
" \n",
" Q5 | \n",
" 0.738857 | \n",
" 0.568244 | \n",
" 0.544057 | \n",
" 0.945541 | \n",
" 1.000000 | \n",
" 0.444940 | \n",
" 0.467669 | \n",
" -0.463201 | \n",
" -0.263556 | \n",
" -0.023930 | \n",
" 0.407507 | \n",
"
\n",
" \n",
" Q7 | \n",
" 0.676150 | \n",
" 0.692275 | \n",
" 0.616467 | \n",
" 0.450752 | \n",
" 0.444940 | \n",
" 1.000000 | \n",
" 0.184545 | \n",
" -0.198770 | \n",
" -0.284418 | \n",
" 0.045276 | \n",
" 0.152171 | \n",
"
\n",
" \n",
" Q8 | \n",
" 0.405270 | \n",
" 0.399874 | \n",
" 0.596732 | \n",
" 0.426714 | \n",
" 0.467669 | \n",
" 0.184545 | \n",
" 1.000000 | \n",
" -0.126656 | \n",
" -0.205698 | \n",
" 0.501068 | \n",
" 0.036620 | \n",
"
\n",
" \n",
" Q9 | \n",
" -0.244388 | \n",
" -0.321784 | \n",
" -0.083087 | \n",
" -0.461877 | \n",
" -0.463201 | \n",
" -0.198770 | \n",
" -0.126656 | \n",
" 1.000000 | \n",
" -0.217410 | \n",
" 0.228420 | \n",
" -0.193531 | \n",
"
\n",
" \n",
" Q10 | \n",
" -0.441883 | \n",
" -0.329991 | \n",
" -0.631657 | \n",
" -0.168868 | \n",
" -0.263556 | \n",
" -0.284418 | \n",
" -0.205698 | \n",
" -0.217410 | \n",
" 1.000000 | \n",
" -0.133846 | \n",
" -0.311850 | \n",
"
\n",
" \n",
" Q11 | \n",
" 0.042206 | \n",
" 0.099348 | \n",
" 0.400580 | \n",
" 0.036739 | \n",
" -0.023930 | \n",
" 0.045276 | \n",
" 0.501068 | \n",
" 0.228420 | \n",
" -0.133846 | \n",
" 1.000000 | \n",
" 0.430875 | \n",
"
\n",
" \n",
" tsRel | \n",
" 0.313668 | \n",
" 0.383485 | \n",
" 0.457261 | \n",
" 0.440323 | \n",
" 0.407507 | \n",
" 0.152171 | \n",
" 0.036620 | \n",
" -0.193531 | \n",
" -0.311850 | \n",
" 0.430875 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 \\\n",
"Q1 1.000000 0.766725 0.713877 0.624063 0.738857 0.676150 0.405270 \n",
"Q2 0.766725 1.000000 0.639008 0.481836 0.568244 0.692275 0.399874 \n",
"Q3 0.713877 0.639008 1.000000 0.564825 0.544057 0.616467 0.596732 \n",
"Q4 0.624063 0.481836 0.564825 1.000000 0.945541 0.450752 0.426714 \n",
"Q5 0.738857 0.568244 0.544057 0.945541 1.000000 0.444940 0.467669 \n",
"Q7 0.676150 0.692275 0.616467 0.450752 0.444940 1.000000 0.184545 \n",
"Q8 0.405270 0.399874 0.596732 0.426714 0.467669 0.184545 1.000000 \n",
"Q9 -0.244388 -0.321784 -0.083087 -0.461877 -0.463201 -0.198770 -0.126656 \n",
"Q10 -0.441883 -0.329991 -0.631657 -0.168868 -0.263556 -0.284418 -0.205698 \n",
"Q11 0.042206 0.099348 0.400580 0.036739 -0.023930 0.045276 0.501068 \n",
"tsRel 0.313668 0.383485 0.457261 0.440323 0.407507 0.152171 0.036620 \n",
"\n",
" Q9 Q10 Q11 tsRel \n",
"Q1 -0.244388 -0.441883 0.042206 0.313668 \n",
"Q2 -0.321784 -0.329991 0.099348 0.383485 \n",
"Q3 -0.083087 -0.631657 0.400580 0.457261 \n",
"Q4 -0.461877 -0.168868 0.036739 0.440323 \n",
"Q5 -0.463201 -0.263556 -0.023930 0.407507 \n",
"Q7 -0.198770 -0.284418 0.045276 0.152171 \n",
"Q8 -0.126656 -0.205698 0.501068 0.036620 \n",
"Q9 1.000000 -0.217410 0.228420 -0.193531 \n",
"Q10 -0.217410 1.000000 -0.133846 -0.311850 \n",
"Q11 0.228420 -0.133846 1.000000 0.430875 \n",
"tsRel -0.193531 -0.311850 0.430875 1.000000 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.corr()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"corr = df.corr()\n",
"sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Q1_General background in data analysis?',\n",
" 'Q2_Hands-on experience in data analysis using Python?',\n",
" 'Q3_Experience in programming in general?',\n",
" 'Q4_General background in machine learning?',\n",
" 'Q5_Hands-on experience in running machine learning applications?',\n",
" 'Q6_Which one would you prefer on a Sunday afternoon?',\n",
" 'Q7_Hands-on experience in image analysis using satellite images?',\n",
" 'Q8_Level of interest in mathematics?',\n",
" 'Q9_Level of interest in reading?',\n",
" 'Q10_Level of stress about this class?',\n",
" 'Q11_Your overall motivation about this class?',\n",
" 'tsRel']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"initCol"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Handling categorical variables (visualization)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAax0lEQVR4nO3deZRcdZ338feXJBC2KEIrS5BFNkUFpQOCEDZxEGZQEVGPjvKMMwwu4/LI4zDnzFHn0Rn3GRdkNG6gsqiA+rixiyiL0kCAIGGZEPalQ8gesn6fP+omdJLuNEuqvknX+3VOn7731q/u/VbdW5/69a9v3YrMRJLUeRtVFyBJ3coAlqQiBrAkFTGAJamIASxJRUZXF/B0HH300XnRRRdVlyFJz1YMtnCD6AHPmDGjugRJWuc2iACWpJHIAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVKRtARwR34uIxyJiyoBlL4iISyPirub3Vu3afpWFi5cxY96i6jKkDdrjCx9n4dKFg942f8l8nnjyiQ5X1B7t7AGfCRy92rLTgMszc3fg8mZ+xPjeH+9hv89cSu9nLuM93/sz8xYtrS5J2qDMXzKfUy49hcN+chiH/vhQzr797FVu/8bkbzDxvIkc+uNDOfX3p7Jk2ZKiSteNtgVwZl4FzFxt8RuBs5rps4A3tWv7nXb/zAV85td/YcHiZQD8/s5+vvOHacVVSRuWs247i6sfuhqAhUsX8oXrv8BD8x4C4LYZt/HNm7/J4uWLSZKLp1/Mz+7+WWW5z1mnx4BflJkPAzS/XzhUw4g4OSL6IqKvv7+/YwU+W3f3z2P5al+vd+ejc2uKkTZQd8+6e5X55bmcabNbHZm7Zt21Rvu7nlhz2YZkvf0nXGZOyszezOzt6empLmdY++20FVtusurVPQ/bY8j3F0mDOHiHg1eZ33LMluzbsy8AB2x7AGM2GrPK7YeMP6RjtbVDp68H/GhEbJeZD0fEdsBjHd5+24wbO4Yz/25/vnzJHcyYt4i3vHo8J07YsbosaYPy5t3ezKxFs/jl//ySrcduzQdf9UG22HgLALbbYjtOP+J0vnnLN5m/ZD5v2/NtTBw/sbji5yba+bX0EbEz8KvMfHkz/0Xg8cz8XEScBrwgMz8+3Hp6e3uzr6+vbXVKUpt19oLsEXEucC2wZ0Q8EBHvBT4HHBURdwFHNfOS1JXaNgSRme8Y4qYj27VNSdqQrLf/hJOkkc4AlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKmIAS1IRA1iSihjAklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSkZIAjogPR8SUiLgtIj5SUYO6RP+dMPeR6io0iOW5nDufuJPZi2ZXl1JmdKc3GBEvB/4B2B9YDFwUEb/OzLs6XYtGsCdnw9knwv3XQWwEB34AXv+Z6qrUeHDeg5xy6SlMnzOdjTfamFMnnMo79npHdVkdV9EDfilwXWYuyMylwO+BNxfUoZHsz5Na4QuQy+Gar8PDt9TWpJXOmHwG0+dMB2Dx8sV86fovdWVPuCKApwATI2LriNgMOAbYcfVGEXFyRPRFRF9/f3/Hi9QGbub0NZc9cU/Hy9DgHpj7wCrzi5cv5pH53TdU1PEAzszbgc8DlwIXATcDSwdpNykzezOzt6enp8NVaoP3suNWnR/7PNjl0JpatIYjX3zkKvM7j9uZ3bfavaiaOh0fAwbIzO8C3wWIiP8AHlj7PaRnaI+/guO/DTf+ADZ9Pkz8P63fWi/87cv+FoDL7ruMHbfckffv+342iu47KSsys/MbjXhhZj4WES8GLgEOzMwnhmrf29ubfX19nStQktatGGxhSQ8YuCAitgaWAB9YW/hK0khVNQRxSMV2JWl90n2DLpK0njCAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKmIAS1IRA1iSihjAklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUpCSAI+KjEXFbREyJiHMjYmxFHRWWLFvOlXc8xoU3PsAjs5+sLmfkW7wApv0eZj/Ymp8/A6ZdCY9NbS1fPL+0PK1qeS7nqvuv4oI7L2D2otnV5bRdZGZnNxixA/BH4GWZuTAifgL8JjPPHOo+vb292dfX16kS2+b+mQs4/oxr6J+3CIAA/u2Ne/PuA3curWvEevhm+OGbYcHjEKNgn7fBrRfAskVPtdl0K3jnBTB+v7o6BcCsJ2dx4q9O5OH5DwMwKkbxtSO+xsTxE4srWydisIVVQxCjgU0jYjSwGfBQUR0ddcaVd68MX4AEPvubqSxYvLSuqJHsis+0whcgl8Hkc1YNX4CFT8AV/7fztWkN595x7srwBViWy/j0tZ8urKj9Oh7Amfkg8CXgPuBhYHZmXrJ6u4g4OSL6IqKvv7+/02W2xaNzFq2xbOGSZcx70gBui7kPD98GYM7TbKe26l+w5ut85pMzCyrpnI4HcERsBbwR2AXYHtg8It61ervMnJSZvZnZ29PT0+ky2+JNr9phjWUH7ro1LxzXNUPgnfWKE1ed32ybwdu98sTBl6ujjtnlmDWWHbvLsQWVdM7ogm2+DrgnM/sBIuJC4CDgRwW1dNRx+2zPRrSGIuY+uZTD93ohH3v9ntVljVwH/ROMHQd3Xgzb7AH7/yNMPhvuuxaWLYExm8Ier4fe91ZXKqB3215OP+J0vnLjV5i/ZD5H7XQUH3n1R6rLaquKf8IdAHwPmAAsBM4E+jLz60PdZ6T8E05S11o//gmXmX8CzgduBG5tapjU6TokqVrFEASZ+UngkxXblqT1hZ+Ek6QiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKmIAS1IRA1iSihjAklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkop0PIAjYs+ImDzgZ05EfKTTdaiLzeuHKRfAY7dXV9J1ps6cysXTL2bWk7NWLluybAlXPXAV59x+DhfdcxFzF88ddj19j/Rx+X2Xs3DpwnaW23ajO73BzLwD2BcgIkYBDwI/63Qd6lL3/AHOfiuseOEe+Qk45GO1NXWJr974Vb5z63cA2HzM5kw6ahI7jduJd//23UybPW1lu3Ebj+Oso89it612G3Q9H7riQ/zu/t8BsO3m2/LDN/yQbTfftv0PoA2qhyCOBP4nM+8trkPd4srPPhW+AL//IiyaV1dPl5j55EzOnHLmyvn5S+bzrVu+xc/v/vkq4QswZ/Ecvjvlu4Ou58ZHb1wZvgCPzH+Ec6ae05aaO6E6gN8OnDvYDRFxckT0RURff39/h8vSiLVw1qrzS59s/aitFixZwNJcusqyOYvmMGfxnEHbD7V8sOGJOYsGb7shKAvgiNgYOA746WC3Z+akzOzNzN6enp7OFqeRa7/3rDq/17Gw+TY1tXSR8VuO54DtDlhl2Vv2eAvH7nosY0eNXaP98bsfP+h6XrP9a9hhix1Wzo+O0bxptzet22I7KDKzZsMRbwQ+kJmvH65tb29v9vX1daAqdYXbfg53XQovfClMeC+M2bS6oq6wYMkCzrvjPKbPns4RLz6Cw3Y8DIA7Zt7BuVPPZdqsabxo8xdx/O7Hc+D2Bw65nv4F/Zw79VxmL5rNcbsdxz49+3ToETwnMejCwgA+D7g4M78/XFsDWNIGbtAALhmCiIjNgKOACyu2L0nrg46fhgaQmQuArSu2LUnri+qzICSpaxnAklTEAJakIgawJBUxgCWpiAEsSUUMYEkq8qwDOCI+sS4LkaRu81x6wH+/zqqQpC601k/CRcRQ13kLwCuYSNJzMNxHkWcBEzLz0dVviIj721OSJHWH4YYgfgDsNMRtG+5l6CVpPbDWHnBm/utabvvndV+OJHWP53IWxF7rshBJ6jbP5SyIS9ZZFZLUhYY7C+JrQ90EPH/dlyNJ3WO4syD+F/AxYNEgt71j3ZcjSd1juAC+HpiSmdesfkNEfKotFUlSlxgugE8AFjbf4bZbs+yOzFyUmbu0tzRJGtmG+yfcXOCzwAPA94GzgGkRcRpARLyqveVJ0sg1XA/4y8BmwE6ZORcgIsYBX4qI/waOBuwJS9KzMFwAHwPsnpm5YkFmzomI9wEzgDe0szhJGsmGG4JYPjB8V8jMZUB/Zl7XnrIkaeQbLoD/EhHvXn1hRLwLuL09JUlSdxhuCOIDwIUR8XfADUACE2hdivLNba5Nkka04S7G8yBwQEQcAexN6xNwv83MyztRnCSNZMP1gAHIzCuAK9pciyR1Fb+UU5KKGMCSVMQAlqQiBrAkFSkJ4Ih4fkScHxFTI+L2iDiwog5JNe6dcy/nTT2P6x+5vrqUUk/rLIg2+CpwUWaeEBEb07rehKQucNUDV/HhKz7M0lwKwHte9h5OnXBqcVU1Ot4Dbi7mMxH4LkBmLs7MWZ2uQ1KNb9/y7ZXhC3D21LOZu3huYUV1KoYgdgX6ge9HxE0R8Z2I2Hz1RhFxckT0RURff39/56uU1BaLly9eZX55Lmfp8qVDtB7ZKgJ4NPBq4L8z81XAfOC01Rtl5qTM7M3M3p6enk7XKKlN3vnSd64yf8wux7DV2K2KqqlVMQb8APBAZv6pmT+fQQJY0sh03EuOY7vNt+MPD/yBlzz/JRy767HVJZXpeABn5iMRcX9E7JmZdwBHAn/pdB2S6kzYdgITtp1QXUa5qrMg/gk4uzkDYhqtb1+WpK5SEsCZORnordi2JK0v/CScJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKmIAS1IRA1iSihjAklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVGR0xUYjYjowF1gGLM3M3oo6JKlSSQA3Ds/MGYXbl6RSDkFIUpGqAE7gkoi4ISJOHqxBRJwcEX0R0dff39/h8iSp/aoC+LWZ+WrgDcAHImLi6g0yc1Jm9mZmb09PT+crlKQ2KwngzHyo+f0Y8DNg/4o6JKlSxwM4IjaPiC1XTAOvB6Z0ug5JqlZxFsSLgJ9FxIrtn5OZFxXUIUmlOh7AmTkN2KfT25Wk9Y2noUlSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKmIAS1IRA1iSihjAklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFygI4IkZFxE0R8auqGiSpUmUP+MPA7YXbl6RSJQEcEeOBY4HvVGxfktYHVT3grwAfB5YP1SAiTo6Ivojo6+/v71xlktQhHQ/giPhr4LHMvGFt7TJzUmb2ZmZvT09Ph6qTpM6p6AG/FjguIqYD5wFHRMSPCuqQpFIdD+DM/JfMHJ+ZOwNvB67IzHd1ug5JquZ5wJJUZHTlxjPzSuDKyhokqYo9YEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJamIASxJRQxgSSpiAEtSEQNYkooYwJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKmIAS1IRA1iSihjAklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKGMCSVMQAlqQiBrAkFTGAJanI6E5vMCLGAlcBmzTbPz8zP9npOqrcfP8szvnTfWwyZiNOOmhndu3ZorokSUU6HsDAIuCIzJwXEWOAP0bEbzPzuoJaOurOR+fy1m9dy+KlywH4xeSHuOJjh7L1FpsUVyapQseHILJlXjM7pvnJTtdR4ReTH1wZvgCzFy7h0r88WliRpEolY8ARMSoiJgOPAZdm5p8GaXNyRPRFRF9/f3/ni2yDF2y+Zk/X3q/UvUoCODOXZea+wHhg/4h4+SBtJmVmb2b29vT0dL7INnhr73j22nbLlfMH77YNh+85Mh6bpGeuYgx4pcycFRFXAkcDUypr6YRxY8fw6w8dwnXTHmeT0RvRu/MLqkuSVKjiLIgeYEkTvpsCrwM+3+k6qozaKHjtbttUlyFpPVDRA94OOCsiRtEaAvlJZv6qoA5JKtXxAM7MW4BXdXq7krS+8ZNwklTEAJakIgawJBUxgCWpiAEsSUUMYEkqYgBLUhEDWJKKROb6fyXIiOgH7q2uYx3ZBphRXYRW4T5ZP42k/TIjM49efeEGEcAjSUT0ZWZvdR16ivtk/dQN+8UhCEkqYgBLUhEDuPMmVRegNbhP1k8jfr84BixJRewBS1IRA1iSihjAw4iIZRExOSKmRMQvI+L5bdjGKRHx7nW93pGsXfslIqZHxDbN9DXrYp3rg4j4r4j4yID5iyPiOwPmvxwR/3st9z8pIrYfZhsnRcTpQ9z2m3a8djopIo6LiNPW5ToN4OEtzMx9M/PlwEzgA+t6A5n5zcz8wbpe7wjXif1y0LpeZ6FrgIMAImIjWh9y2HvA7QcBV6/l/icBaw3gtcnMYzJz1rO9//ogM/9fZn5uXa7TAH5mrgV2AIiIKyOit5neJiKmN9MnRcSFEXFRRNwVEV9YceeImBcR/x4RN0fEdRHxomb5pyLi1AHr/XxE/Dki7oyIQ5rlm0XETyLiloj4cUT8acX2tcp+eUnz3N8QEX+IiL2a5X/TPGc3RcRlA577rSPikmb5t4BYsdKImNf8PqzZL+dHxNSIODsiorntmGbZHyPiaxGxvn6/4dU0AUwreKcAcyNiq4jYBHgpcFNEfCIirm/+spgULScAvcDZzV8dm0bEhIi4pjmW/xwRWzbr3n6IY3968zrZOSJuj4hvR8RtzXO/adNmQnN8XxsRX4yINb4pPSK2iIjLI+LGiLg1It442INtXmufb46DyyJi/2YfTouI45o2YyPi+816boqIw5vlf4qIvQes68qI2G9gDz8ieiLigua5uj4iXvus9kpm+rOWH2Be83sU8FPg6Gb+SqC3md4GmN5MnwRMA54HjKX1Eeodm9sS+Jtm+gvAvzbTnwJOHbDeLzfTxwCXNdOnAt9qpl8OLF2x/W78Wct+uRzYvZk+ALiimd6Kp876+fsBz/HXgE8008c2+2ib1bZxGDAbGE+r03ItcHCzf+8HdmnanQv8qvq5WctzNh14MfCPwCnAp5tj7LXAVU2bFwxo/8MBx+vA433j5hif0MyPo/X9kms79qc3r5Odm2N332b5T4B3NdNTgIOa6c8BUwZ5DKOBcc30NsDdK/brau0SeEMz/TPgEmAMsA8wuVn+MeD7zfRewH1N3R8F/q1Zvh1wZzN9EnB6M30OcHAz/WLg9mezTyq+FXlDs2lETKZ14NwAXPo07nN5Zs4GiIi/ADvReqEuBlb0kG4Ajhri/hcOaLNzM30w8FWAzJwSEbc8o0cx8qyxXyJiC1q9vJ82HVSATZrf44EfR8R2tALknmb5ROB4gMz8dUQ8McT2/pyZDwAM2O48YFpmrljXucDJ6+TRtceKXvBBwH/S+qvhIFpvLivGuw+PiI8DmwEvAG4DfrnaevYEHs7M6wEycw5A85wPdewPdE9mTm6mbwB2jtb48JaZuaKOc4C/HuQxBPAfETERWN48hhcBj6zWbjFwUTN9K7AoM5dExK2s+pr6evMYpkbEvcAetN4ULgU+CZxI6w1+da8DXjbgOBsXEVtm5txB2g7JIYjhLczMfWkdSBvz1FjjUp56/saudp9FA6aX8dS3Ty/J5i1zteWrWzRImxiibbcabL9sBMzK1tjwip+XNu2/Tqv38gpaPcCB++zpnAw/2D7d0PbJinHgV9DqbV4HHNgsuzoixgJnACc0z9O3WfPYhtbjHuo5G+rYH67N030u3wn0APs1+//RIWoc+FpbvmKbmbmcYV5Tmfkg8HhEvBJ4G3DeIM02Ag4ccJzt8EzDd8VK9DQ07+ofAk6NiDG0/qTar7n5hA6U8Eda78ZExMtovYi63sD9AiwE7omItwI045f7NE2fBzzYTL9nwCquovWiJiLeQGuo4umaCuwaETs38297Fg+hk66m1aucmZnLMnMm8HxaIXwtTwXZjOaviYHH9VxgxTjvVFpjvRMAImLLiHhOf01n5hO0xqRf0yx6+xBNnwc81vRmD6f1BvxsDdz3e9AaSrijue084OPA8zLz1kHuewnwwRUzEbHvsynAAH4GMvMm4GZaB8eXgPdF61SlbTqw+TOAnmbo4Z+BW2j96dj1Vtsv7wTeGxE30/rzecU/aT5Fa2jiD6x6icN/AyZGxI3A62mNAz7d7S4E3g9cFBF/pNUbW5/3ya20jtXrVls2OzNnZOsshW83y34OXD+g3ZnAN5vhl1G03my+3jzPlzJ4L/SZei8wKSKupdU7Hey5PBvojYg+Wvt66nPY3hnAqGZY4sfASZm5ond+Pq3j6SdD3PdDTR23NEMtpzybAvwo8gYiIkYBYzLzyYh4Ca1/Nu2RmYuLS+tqEbFFZs5rzor4BnBXZv5XdV0bohXPZTN9GrBdZn64uKy28p9wG47NgN81wx8BvM/wXS/8Q0S8h9Y49E3At4rr2ZAdGxH/QiuX7qV11sGIZg9Ykoo4BixJRQxgSSpiAEtSEQNYXSUixkfEL5prFUyLiNOjdS0EIuKVzXUIbmuuD7AuTq2ShmQAq2s0p4pdCPw8M3cHdgc2Bb7QfJDgR8Apmbk3res/LKmqVd3B09DUTY4AnszM7wNk5rKI+CitU54uB27JzJub2x6vK1Pdwh6wusnetC7+slJzIZnpwK5ARutC5Tc2F6SR2soesLrJUBeRCVqvhYOBCcAC4PKIuCEzL+9gfeoy9oDVTW6jdWHxlSJiHK3LGT4I/L65JsIC4DfAqztforqJAaxucjmwWTTfv9dcX+PLwOm0rh37ymh988ho4FDgL2WVqisYwOoazfVh3wycEBF3AY8DyzPz35vLIf4nrSuATQZuzMxf11WrbuC1INS1IuIgWt9icXxm3jBce2ldM4AlqYhDEJJUxACWpCIGsCQVMYAlqYgBLElFDGBJKvL/ASv+BnCnaj02AAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.catplot(x=\"Q6\", y=\"Q11\", data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Handling categorical variables (Data analysis)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"df2 = pd.get_dummies(df, columns=['Q6'])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 8 | \n",
" 5 | \n",
" 4 | \n",
" 6 | \n",
" 7 | \n",
" 5 | \n",
" 3 | \n",
" 5 | \n",
" 7 | \n",
" 3 | \n",
" 0.0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 7 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 275.0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 17944.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 5 | \n",
" 3 | \n",
" 6 | \n",
" 4 | \n",
" 4 | \n",
" 3 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 10 | \n",
" 60698.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 4 | \n",
" 10 | \n",
" 8 | \n",
" 60730.0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" 8 | \n",
" 7 | \n",
" 8 | \n",
" 3 | \n",
" 3 | \n",
" 8 | \n",
" 4 | \n",
" 10 | \n",
" 2 | \n",
" 8 | \n",
" 60753.0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 6 | \n",
" 4 | \n",
" 3 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 10 | \n",
" 8 | \n",
" 8 | \n",
" 60760.0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 7 | \n",
" 7 | \n",
" 3 | \n",
" 7 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 6 | \n",
" 8 | \n",
" 6 | \n",
" 9 | \n",
" 60763.0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 5 | \n",
" 5 | \n",
" 7 | \n",
" 60769.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 4 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 6 | \n",
" 60773.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 10 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 5 | \n",
" 3 | \n",
" 5 | \n",
" 2 | \n",
" 7 | \n",
" 8 | \n",
" 7 | \n",
" 60783.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 11 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 2 | \n",
" 2 | \n",
" 7 | \n",
" 6 | \n",
" 7 | \n",
" 5 | \n",
" 8 | \n",
" 60790.0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 12 | \n",
" 8 | \n",
" 8 | \n",
" 8 | \n",
" 6 | \n",
" 6 | \n",
" 8 | \n",
" 7 | \n",
" 8 | \n",
" 6 | \n",
" 8 | \n",
" 60800.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 13 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 5 | \n",
" 7 | \n",
" 7 | \n",
" 8 | \n",
" 60801.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 14 | \n",
" 8 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 10 | \n",
" 5 | \n",
" 6 | \n",
" 10 | \n",
" 60812.0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 15 | \n",
" 7 | \n",
" 7 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 7 | \n",
" 60817.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 16 | \n",
" 7 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 5 | \n",
" 1 | \n",
" 7 | \n",
" 7 | \n",
" 4 | \n",
" 7 | \n",
" 60823.0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 17 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 5 | \n",
" 2 | \n",
" 9 | \n",
" 9 | \n",
" 7 | \n",
" 9 | \n",
" 60939.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 18 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 7 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 10 | \n",
" 443956.0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 Q11 tsRel Q6_Reading \\\n",
"0 8 5 4 6 7 5 3 5 7 3 0.0 0 \n",
"1 8 8 5 5 6 7 7 6 7 8 275.0 1 \n",
"2 6 6 6 6 5 7 7 7 7 7 17944.0 0 \n",
"3 5 3 6 4 4 3 8 8 5 10 60698.0 0 \n",
"4 6 6 5 4 3 4 5 4 10 8 60730.0 1 \n",
"5 8 7 8 3 3 8 4 10 2 8 60753.0 0 \n",
"6 4 3 1 1 1 1 1 10 8 8 60760.0 1 \n",
"7 7 3 7 6 5 4 6 8 6 9 60763.0 1 \n",
"8 5 5 5 4 4 4 4 5 5 7 60769.0 0 \n",
"9 6 6 6 6 6 4 6 6 5 6 60773.0 0 \n",
"10 4 4 4 5 3 5 2 7 8 7 60783.0 0 \n",
"11 7 7 7 2 2 7 6 7 5 8 60790.0 0 \n",
"12 8 8 8 6 6 8 7 8 6 8 60800.0 0 \n",
"13 4 4 4 1 1 1 5 7 7 8 60801.0 0 \n",
"14 8 7 7 7 7 7 10 5 6 10 60812.0 0 \n",
"15 7 7 6 6 6 6 6 6 5 7 60817.0 0 \n",
"16 7 6 6 5 5 1 7 7 4 7 60823.0 1 \n",
"17 6 6 6 5 5 2 9 9 7 9 60939.0 0 \n",
"18 9 9 9 9 9 7 6 5 4 10 443956.0 0 \n",
"\n",
" Q6_Running Q6_Watching a movie \n",
"0 1 0 \n",
"1 0 0 \n",
"2 0 1 \n",
"3 0 1 \n",
"4 0 0 \n",
"5 1 0 \n",
"6 0 0 \n",
"7 0 0 \n",
"8 0 1 \n",
"9 0 1 \n",
"10 0 1 \n",
"11 1 0 \n",
"12 0 1 \n",
"13 0 1 \n",
"14 1 0 \n",
"15 0 1 \n",
"16 0 0 \n",
"17 0 1 \n",
"18 0 1 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"dfTmp = df2[['Q8', 'Q9', 'Q10', 'Q11', 'Q6_Reading', 'Q6_Running', 'Q6_Watching a movie',]].copy()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"#sns.pairplot(dfTmp, kind = 'reg')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"corr = df.corr()\n",
"sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dealing with outliers (focusing on tsRel)\n",
"\n",
"### Visualize data"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Q1_General background in data analysis?',\n",
" 'Q2_Hands-on experience in data analysis using Python?',\n",
" 'Q3_Experience in programming in general?',\n",
" 'Q4_General background in machine learning?',\n",
" 'Q5_Hands-on experience in running machine learning applications?',\n",
" 'Q6_Which one would you prefer on a Sunday afternoon?',\n",
" 'Q7_Hands-on experience in image analysis using satellite images?',\n",
" 'Q8_Level of interest in mathematics?',\n",
" 'Q9_Level of interest in reading?',\n",
" 'Q10_Level of stress about this class?',\n",
" 'Q11_Your overall motivation about this class?',\n",
" 'tsRel']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"initCol"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Q: Is there a correlation between how fast a student answered the poll and answers to questions?"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tsRel 1.000000\n",
"Q3 0.457261\n",
"Q10 -0.311850\n",
"Q11 0.430875\n",
"Name: tsRel, dtype: float64\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dfTmp = df[['tsRel','Q3', 'Q10', 'Q11']].copy()\n",
"corr = dfTmp.corr()\n",
"sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)\n",
"print(corr['tsRel'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### WARNING: Outliers may lead to incorrect conclusions!"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.regplot(x='tsRel', y='Q3', data=dfTmp, color=\"g\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(dfTmp.tsRel, hist=True, rug=True, color=\"g\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What is an outlier? Let's zoom into the data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0, 100000.0)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"ax = sns.distplot(dfTmp.tsRel, hist = True, color=\"g\")\n",
"ax.set_xlim(0, 100000)\n",
"#ax.set_ylim(0, 0.008)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"60773.00000000001"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfTmp.tsRel.median()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 -60773.0\n",
"1 -60498.0\n",
"2 -42829.0\n",
"3 -75.0\n",
"4 -43.0\n",
"5 -20.0\n",
"6 -13.0\n",
"7 -10.0\n",
"8 -4.0\n",
"9 0.0\n",
"10 10.0\n",
"11 17.0\n",
"12 27.0\n",
"13 28.0\n",
"14 39.0\n",
"15 44.0\n",
"16 50.0\n",
"17 166.0\n",
"18 383183.0\n",
"Name: tsRel, dtype: float64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfTmp.tsRel - dfTmp.tsRel.median()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2020-01-14 17:11:10\n",
"1 2020-01-14 17:15:45\n",
"2 2020-01-14 22:10:14\n",
"3 2020-01-15 10:02:48\n",
"4 2020-01-15 10:03:20\n",
"5 2020-01-15 10:03:43\n",
"6 2020-01-15 10:03:50\n",
"7 2020-01-15 10:03:53\n",
"8 2020-01-15 10:03:59\n",
"9 2020-01-15 10:04:03\n",
"10 2020-01-15 10:04:13\n",
"11 2020-01-15 10:04:20\n",
"12 2020-01-15 10:04:30\n",
"13 2020-01-15 10:04:31\n",
"14 2020-01-15 10:04:42\n",
"15 2020-01-15 10:04:47\n",
"16 2020-01-15 10:04:53\n",
"17 2020-01-15 10:06:49\n",
"18 2020-01-19 20:30:26\n",
"Name: Timestamp, dtype: datetime64[ns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfInit.Timestamp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Detect outliers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What about using standard scaling (z-score transformation) + thresholding\n",
"#### Typical outlier threshold: more than +- 2 std. (z<-2 or z>2)\n",
"\n",
""
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 -0.783125\n",
"1 -0.780147\n",
"2 -0.588803\n",
"3 -0.125805\n",
"4 -0.125459\n",
"5 -0.125209\n",
"6 -0.125134\n",
"7 -0.125101\n",
"8 -0.125036\n",
"9 -0.124993\n",
"10 -0.124885\n",
"11 -0.124809\n",
"12 -0.124700\n",
"13 -0.124690\n",
"14 -0.124571\n",
"15 -0.124516\n",
"16 -0.124451\n",
"17 -0.123195\n",
"18 4.024628\n",
"Name: tsRel, dtype: float64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tsRel_z = (dfTmp.tsRel - dfTmp.tsRel.mean()) / dfTmp.tsRel.std()\n",
"tsRel_z"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Outlier detection is a serious task!\n",
"\n",
"#### SciKit methods\n",
"\n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example: a more advanced outlier detection"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tsRel | \n",
" Q3 | \n",
" Q10 | \n",
" Q11 | \n",
" outScore | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 4 | \n",
" 7 | \n",
" 3 | \n",
" -971.093118 | \n",
"
\n",
" \n",
" 1 | \n",
" 275.0 | \n",
" 5 | \n",
" 7 | \n",
" 8 | \n",
" -969.221765 | \n",
"
\n",
" \n",
" 2 | \n",
" 17944.0 | \n",
" 6 | \n",
" 7 | \n",
" 7 | \n",
" -848.985675 | \n",
"
\n",
" \n",
" 3 | \n",
" 60698.0 | \n",
" 6 | \n",
" 5 | \n",
" 10 | \n",
" -2.550890 | \n",
"
\n",
" \n",
" 4 | \n",
" 60730.0 | \n",
" 5 | \n",
" 10 | \n",
" 8 | \n",
" -1.646488 | \n",
"
\n",
" \n",
" 5 | \n",
" 60753.0 | \n",
" 8 | \n",
" 2 | \n",
" 8 | \n",
" -1.071656 | \n",
"
\n",
" \n",
" 6 | \n",
" 60760.0 | \n",
" 1 | \n",
" 8 | \n",
" 8 | \n",
" -0.988349 | \n",
"
\n",
" \n",
" 7 | \n",
" 60763.0 | \n",
" 7 | \n",
" 6 | \n",
" 9 | \n",
" -0.988349 | \n",
"
\n",
" \n",
" 8 | \n",
" 60769.0 | \n",
" 5 | \n",
" 5 | \n",
" 7 | \n",
" -1.012356 | \n",
"
\n",
" \n",
" 9 | \n",
" 60773.0 | \n",
" 6 | \n",
" 5 | \n",
" 6 | \n",
" -1.025628 | \n",
"
\n",
" \n",
" 10 | \n",
" 60783.0 | \n",
" 4 | \n",
" 8 | \n",
" 7 | \n",
" -0.890923 | \n",
"
\n",
" \n",
" 11 | \n",
" 60790.0 | \n",
" 7 | \n",
" 5 | \n",
" 8 | \n",
" -0.915438 | \n",
"
\n",
" \n",
" 12 | \n",
" 60800.0 | \n",
" 8 | \n",
" 6 | \n",
" 8 | \n",
" -1.035467 | \n",
"
\n",
" \n",
" 13 | \n",
" 60801.0 | \n",
" 4 | \n",
" 7 | \n",
" 8 | \n",
" -1.023812 | \n",
"
\n",
" \n",
" 14 | \n",
" 60812.0 | \n",
" 7 | \n",
" 6 | \n",
" 10 | \n",
" -1.085007 | \n",
"
\n",
" \n",
" 15 | \n",
" 60817.0 | \n",
" 6 | \n",
" 5 | \n",
" 7 | \n",
" -1.085007 | \n",
"
\n",
" \n",
" 16 | \n",
" 60823.0 | \n",
" 6 | \n",
" 4 | \n",
" 7 | \n",
" -1.194836 | \n",
"
\n",
" \n",
" 17 | \n",
" 60939.0 | \n",
" 6 | \n",
" 7 | \n",
" 9 | \n",
" -5.640073 | \n",
"
\n",
" \n",
" 18 | \n",
" 443956.0 | \n",
" 9 | \n",
" 4 | \n",
" 10 | \n",
" -13811.188253 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tsRel Q3 Q10 Q11 outScore\n",
"0 0.0 4 7 3 -971.093118\n",
"1 275.0 5 7 8 -969.221765\n",
"2 17944.0 6 7 7 -848.985675\n",
"3 60698.0 6 5 10 -2.550890\n",
"4 60730.0 5 10 8 -1.646488\n",
"5 60753.0 8 2 8 -1.071656\n",
"6 60760.0 1 8 8 -0.988349\n",
"7 60763.0 7 6 9 -0.988349\n",
"8 60769.0 5 5 7 -1.012356\n",
"9 60773.0 6 5 6 -1.025628\n",
"10 60783.0 4 8 7 -0.890923\n",
"11 60790.0 7 5 8 -0.915438\n",
"12 60800.0 8 6 8 -1.035467\n",
"13 60801.0 4 7 8 -1.023812\n",
"14 60812.0 7 6 10 -1.085007\n",
"15 60817.0 6 5 7 -1.085007\n",
"16 60823.0 6 4 7 -1.194836\n",
"17 60939.0 6 7 9 -5.640073\n",
"18 443956.0 9 4 10 -13811.188253"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.neighbors import LocalOutlierFactor\n",
"\n",
"# fit the model for outlier detection (default)\n",
"X = np.array(dfTmp.tsRel).reshape(dfTmp.shape[0],1)\n",
"X.shape\n",
"clf = LocalOutlierFactor(n_neighbors=5, contamination=0.1)\n",
"clf.fit_predict(X)\n",
"dfTmp['outScore'] = clf.negative_outlier_factor_.tolist()\n",
"dfTmp"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dfTmp.plot.scatter(x='tsRel', y='outScore', c='DarkBlue')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Correlations for filtered data"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tsRel | \n",
" Q3 | \n",
" Q10 | \n",
" Q11 | \n",
" outScore | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 60698.0 | \n",
" 6 | \n",
" 5 | \n",
" 10 | \n",
" -2.550890 | \n",
"
\n",
" \n",
" 4 | \n",
" 60730.0 | \n",
" 5 | \n",
" 10 | \n",
" 8 | \n",
" -1.646488 | \n",
"
\n",
" \n",
" 5 | \n",
" 60753.0 | \n",
" 8 | \n",
" 2 | \n",
" 8 | \n",
" -1.071656 | \n",
"
\n",
" \n",
" 6 | \n",
" 60760.0 | \n",
" 1 | \n",
" 8 | \n",
" 8 | \n",
" -0.988349 | \n",
"
\n",
" \n",
" 7 | \n",
" 60763.0 | \n",
" 7 | \n",
" 6 | \n",
" 9 | \n",
" -0.988349 | \n",
"
\n",
" \n",
" 8 | \n",
" 60769.0 | \n",
" 5 | \n",
" 5 | \n",
" 7 | \n",
" -1.012356 | \n",
"
\n",
" \n",
" 9 | \n",
" 60773.0 | \n",
" 6 | \n",
" 5 | \n",
" 6 | \n",
" -1.025628 | \n",
"
\n",
" \n",
" 10 | \n",
" 60783.0 | \n",
" 4 | \n",
" 8 | \n",
" 7 | \n",
" -0.890923 | \n",
"
\n",
" \n",
" 11 | \n",
" 60790.0 | \n",
" 7 | \n",
" 5 | \n",
" 8 | \n",
" -0.915438 | \n",
"
\n",
" \n",
" 12 | \n",
" 60800.0 | \n",
" 8 | \n",
" 6 | \n",
" 8 | \n",
" -1.035467 | \n",
"
\n",
" \n",
" 13 | \n",
" 60801.0 | \n",
" 4 | \n",
" 7 | \n",
" 8 | \n",
" -1.023812 | \n",
"
\n",
" \n",
" 14 | \n",
" 60812.0 | \n",
" 7 | \n",
" 6 | \n",
" 10 | \n",
" -1.085007 | \n",
"
\n",
" \n",
" 15 | \n",
" 60817.0 | \n",
" 6 | \n",
" 5 | \n",
" 7 | \n",
" -1.085007 | \n",
"
\n",
" \n",
" 16 | \n",
" 60823.0 | \n",
" 6 | \n",
" 4 | \n",
" 7 | \n",
" -1.194836 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" tsRel Q3 Q10 Q11 outScore\n",
"3 60698.0 6 5 10 -2.550890\n",
"4 60730.0 5 10 8 -1.646488\n",
"5 60753.0 8 2 8 -1.071656\n",
"6 60760.0 1 8 8 -0.988349\n",
"7 60763.0 7 6 9 -0.988349\n",
"8 60769.0 5 5 7 -1.012356\n",
"9 60773.0 6 5 6 -1.025628\n",
"10 60783.0 4 8 7 -0.890923\n",
"11 60790.0 7 5 8 -0.915438\n",
"12 60800.0 8 6 8 -1.035467\n",
"13 60801.0 4 7 8 -1.023812\n",
"14 60812.0 7 6 10 -1.085007\n",
"15 60817.0 6 5 7 -1.085007\n",
"16 60823.0 6 4 7 -1.194836"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfTmpFil = dfTmp[np.logical_and(dfTmp.outScore>-5, dfTmp.outScore<5)]\n",
"dfTmpFil"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEKCAYAAAAb7IIBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de1xVdb7/8deHmwqioHgBBG+ZSHhDxEuWmVnZVGBao+ZoNRM5djmdmpmc0zlN02OaR3X6TWcqx9LK0bHSsjTzbqZWKiaYonhJRFQUFRXwgsjt+/tjbzvEQdnAhrUvn+fjsR/svdb3u3mvzWJ/9lp7re8SYwxKKaW8j4/VAZRSSllDC4BSSnkpLQBKKeWltAAopZSX0gKglFJeSguAUkp5KYcKgIjcKSL7RSRLRKbXMF9E5E37/AwRia8y7wMROSUiu6v1+W8R2Wdvv1hEQhq+OEoppRxVawEQEV9gBjAaiAUmiEhstWajgR72Wwows8q8fwJ31vDUa4E4Y0wf4Efgj3UNr5RSqv4c2QJIBLKMMdnGmFJgAZBUrU0SMM/YpAIhIhIOYIz5Bjhb/UmNMWuMMeX2h6lAp/ouhFJKqbrzc6BNJHC0yuNcYJADbSKBPAdzPAIsrGmGiKRg26ogKChoQExMjINPqZRSCiA9Pf20MaZd9emOFACpYVr18SMcaVPzk4s8D5QDH9Y03xgzC5gFkJCQYNLS0hx5WqWUUnYicrim6Y4UgFwgqsrjTsDxerSpKdQU4G5gpNFBiZRSqkk58h3ANqCHiHQVkQBgPLC0WpulwGT70UCDgSJjzDV3/4jIncBzwL3GmOJ6ZFdKKdUAtRYA+xe1TwCrgb3AJ8aYTBGZKiJT7c1WANlAFjAbmHalv4h8DGwBeopIroj82j7rbSAYWCsiO0TkHWctlFJKqdqJO+150e8AlFKq7kQk3RiTUH26ngmslFJeSguAUkp5KS0ASinlpbQAKKWUl9ICoJRSXsqRE8GUUrWYlT7L6ghuL2VAitURvI5uASillJfSAqCUUl5KC4BSSnkpLQBKKeWltAAopZSX0gKglFJeSguAUkp5KS0ASinlpbQAKKWUl9ICoJRSXkoLgFJKeSktAEop5aW0ACillJfSAqCUUl5KC4BSSnkpLQBKKeWltAAopZSX0gKglJsoqyijorLC6hjKg+glIZVyMZWmkqyzWfx45keyC7I5ceEE50vPU1pRCoAgtAxoSVhgGO2D2tMlpAtdQ7oS3ToaXx9fi9Mrd6IFQCkXce7yOTbkbGBL7hbOXjqLIIQHh9M9tDutmrciyD8IYwxllWWcu3yO08Wn2X96P1uPbQWghV8LYtvF0rdDX/p17Eczv2YWL5FydVoAlLJYaUUpX2V/xaqsVZRWlNKrXS/ui7mPuPZxtPBvUWv/gksFZBdkk5mfye5Tu0nPS6eZbzP6dezH8C7D6RbSDRFpgiVR7sahAiAidwJ/B3yB94wxr1SbL/b5dwHFwEPGmO32eR8AdwOnjDFxVfq0ARYCXYAc4AFjTEEDl0cpt3Kk6Aiz0meRX5xPv479uC/mPjq07FCn5whtEcqAFgMYEDGASlPJwbMH2XpsK9uOb2Prsa10ad2F27vfTv/w/viIfu2n/letBUBEfIEZwCggF9gmIkuNMXuqNBsN9LDfBgEz7T8B/gm8Dcyr9tTTgXXGmFdEZLr98XP1XxSl3Icxho2HN/Lpnk8JDgjmmcHP0DOsZ4Of10d86NG2Bz3a9mBc7Di25G5h/aH1zNo+i8jgSO7teS99O/TVLQIFOLYFkAhkGWOyAURkAZAEVC0AScA8Y4wBUkUkRETCjTF5xphvRKRLDc+bBNxivz8X2IAWAOUFjDF8tvcz1mavJa59HA/3e5iWAS2d/nua+zVnRJcRDO88nG3HtrHswDJmps2kV1gvfnnDLwkPDnf671TuxZHtwUjgaJXHufZpdW1TXQdjTB6A/Wd7B7Io5dYqTSUf7/6YtdlrGdFlBI8PfLxR3vyr8hEfBnUaxIvDX2T8DePJKczhpW9eYun+pXpYqZdzZAugpm1FU4829SIiKUAKQHR0tDOeUilLGGNYuHshGw9v5Pbut3NfzH1NuivG18eXEV1HMCBiAIv2LGL5geXsPrWbR/o/QseWHZssh3IdjmwB5AJRVR53Ao7Xo011J0UkHMD+81RNjYwxs4wxCcaYhHbt2jkQVynXtD5nPRsOb2BUt1FN/uZfVatmrXik/yM8NuAxThef5q/f/pXtedstyaKs5UgB2Ab0EJGuIhIAjAeWVmuzFJgsNoOBoiu7d65hKTDFfn8K8EUdcivlVnaf2s0nmZ/Qr0M/7utl3Zt/VfHh8bww/AUigiN4N/1dluxbQqWptDqWakK1FgBjTDnwBLAa2At8YozJFJGpIjLV3mwFkA1kAbOBaVf6i8jHwBagp4jkisiv7bNeAUaJyAFsRxj97NBSpTzF6eLTzN4+m06tOvFI/0dc6lDMkOYhPDvkWYZFDWNl1krm7Jij3wt4EYfOAzDGrMD2Jl912jtV7hvg8av0nXCV6WeAkQ4nVcoNVVRW8MEPHyAIv034rUuenevv68+kPpMICwpjyb4lXCq7RMqAFAJ8A6yOphqZ63wUUcoDrcpaxcGCg0zsPZG2gW2tjnNVIsLo60YzsfdEdp/azT+2/YOyijKrY6lGpgVAqUaSU5jDsgPLSIxIJDEy0eo4DhneeTiT+05m7+m9fLDjA/1OwMNpAVCqEVSaSuZnzKdVQCsm9K5xL6jLGho1lPtj72d73nY+zPgQ2x5e5Yl0MDilGsHGnI0cPXeUlPgUAv0DrY5TZ7d1u40LpRdYmbWSjsEdGdVtlNWRVCPQLQClnKyopIgl+5fQK6wX8eHxVsept6SeScR3jOezPZ+xN3+v1XFUI9ACoJSTfb73c8ory5kQN8EljvevLxFhSr8phAeHM2v7LPIv5lsdSTmZFgClnOhI0RFSj6VyW9fb6jyssytq7tecaQnTMMbwwY4P9BwBD6MFQCknWrxvMYH+gdxx3R1WR3GadkHteLD3g2QXZLMya6XVcZQTaQFQykn2n97Pnvw9jL5utFt+8XstAyMHMihyEMsPLCe7INvqOMpJtAAo5QTGGD7f9zmhzUMZ0WWE1XEaxYS4CYQ2D2XOjjl6kpiH0AKglBNknMogpzCHu6+/G39ff6vjNIoW/i2Y1GcSpy6eYlXWKqvjKCfQAqBUAxljWHFgBW1btGVIpyFWx2lUse1iGRgxkFUHV3Hiwgmr46gG0gKgVAOtO7SOnMIc7rjuDnx9fK2O0+juj70ffx9/Ptr1kZ4l7Oa0ACjVQC9/+zKtm7VmaKehVkdpEq2bt2ZMrzHsP7Of9Lx0q+OoBtACoFQDbD66mQ05GxjVfZTH7vuvyU3RNxEZHMnifYv1C2E3pgVAqQZ4ddOrtG3Rlpujb7Y6SpPyER/G9hrL6eLTbDy80eo4qp60AChVT1lns/hy/5cue6GXxnZD+xuIbRfL8gPLuVh60eo4qh60AChVT29tfQs/Hz+mDZxWe2MPNbbXWC6VXdLDQt2UFgCl6qGopIgPdnzA+LjxhAeHWx3HMp1adSIxMpENhzdw7vI5q+OoOtICoFQ9vP/D+1wovcDTg5+2Oorl7upxF2UVZazNXmt1FFVHWgCUqqOKygre+v4tbu58s1uP9+8sHVt2ZGDEQDbmbORC6QWr46g60AKgVB2tzFpJTmEOTyY+aXUUl3FXj7sorShl7UHdCnAnWgCUqqOZaTMJbxlOUs8kq6O4jPDgcBIiElifs16PCHIjWgCUqoOcwhxWHljJb+J/41Unfjnijuvu4HLFZb498q3VUZSDtAAoVQez0mchIjwa/6jVUVxOVKsoeoX1Yv2h9ZRXllsdRzlAC4BSDiqtKOX9H97nnuvvIap1lNVxXNKobqMovFzItuPbrI6iHKAFQCkHLd67mFMXTzE1YarVUVxWbLtYIoIj+OrgVzpSqBvQAqCUg+bsmEN062hu73671VFclogwqtsocs/nsvf0XqvjqFo4VABE5E4R2S8iWSIyvYb5IiJv2udniEh8bX1FpJ+IpIrIDhFJE5FE5yySUs6Xey6XNQfXMKXvFHxEPzddy8CIgQQHBLMhZ4PVUVQtal2TRcQXmAGMBmKBCSISW63ZaKCH/ZYCzHSg72vAn40x/YAX7I+Vcknzds7DYHio30NWR3F5/r7+DIseRsbJDM5eOmt1HHUNjnyUSQSyjDHZxphSYAFQ/QDoJGCesUkFQkQkvJa+Bmhlv98aON7AZVGqURhjmLNjDsM7D6dbaDer47iFm6JvAuCbw99YnERdiyMFIBI4WuVxrn2aI22u1fdp4L9F5CjwOvDHmn65iKTYdxGl5efnOxBXKefadHQTWWezeLjfw1ZHcRttA9vSu0NvvjvynR4S6sIcKQBSw7TqX+9frc21+v4W+HdjTBTw78D7Nf1yY8wsY0yCMSahXbt2DsRVyrnm/DCHlgEtGRc7zuoobmV45+GcLz3PD3k/WB1FXYUjBSAXqHrQcyf+7+6aq7W5Vt8pwOf2+59i212klEu5WHqRT/Z8wgOxDxAUEGR1HLcS2y6WsMAwNhzeYHUUdRWOFIBtQA8R6SoiAcB4YGm1NkuByfajgQYDRcaYvFr6HgeG2+/fChxo4LIo5XSf7f2MC6UX9MvfevARH26Kvomss1mcvHDS6jiqBrUWAGNMOfAEsBrYC3xijMkUkakicuWMmBVANpAFzAamXauvvc+jwP8TkZ3AX7EdPaSUS5mzYw7XtbmOYdHDrI7ilgZ3GowgbM7dbHUUVQM/RxoZY1Zge5OvOu2dKvcN8Lijfe3TvwMG1CWsUk0puyCbDTkb+MuIvyBS09dZqjYhzUOIax/HlqNbuPf6e/H18bU6kqpCz2hR6irm7piLIEzuO9nqKG7txqgbKbpcxJ78PVZHUdVoAVCqBpWmkrk75zKq+ygd+K2BenfoTXBAMJuObrI6iqpGC4BSNdiYs5HDRYd5qO9DVkdxe34+fgzuNJidJ3fqheNdjBYApWrw4a4PaRnQkqQYveqXMwzpNIRKU0na8TSro6gqtAAoVU1JeQmf7vmUsb3GEugfaHUcjxDZKpKoVlFsPbbV6iiqCi0ASlWz/MflnLt8jgd7P2h1FI8yKHIQOYU5ek6AC9ECoFQ1H+76kI4tO3Jr11utjuJRBkYORBDdCnAhWgCUqqLgUgHLDyxnQtwEPWbdyUKahxATFsPWY1v1amEuQguAUlUs2rOI0opS3f3TSAZ1GsTp4tNkF2RbHUWhBUCpn5m/az4xYTHEh8fX3ljVWf+O/fH38dfdQC5CC4BSdkeKjvDN4W94sPeDOvRDI2nu15w+HfqwPW87labS6jheTwuAUnYf7foIgIm9J1qcxLMNCB/A+dLzHDijAwBbTQuAUtgu+zg/Yz5Do4bqZR8bWe8OvQnwDSAtT08Ks5oWAKWAjJMZZOZnMqn3JKujeLwA3wB6t+/ND3k/UFFZYXUcr6YFQClsx/77+fhx/w33Wx3FKyREJNh2A53V3UBWcuh6AEpZbVb6rEZ77kpTyezts+kV1ovP935eewfVYHHt42jm24z04+nEhMVYHcdr6RaA8noHzhygsKSQQZ0GWR3FawT4BtiOBjqxXXcDWUgLgPJ6W49tpZlvM/p26Gt1FK8SHx7PhdIL/HjmR6ujeC0tAMqrlVWUkZ6XTnx4PAG+AVbH8So/7QbKS7c6itfSAqC82q5TuygpLyExMtHqKF7np91AebobyCpaAJRX25q7lVbNWukXkRZJiEjgYtlF9p/Zb3UUr6QFQHmti6UX2Z2/m4ERA/ER/Vewwg3tbvjpaCDV9HStV14rPS+d8spyBkXq0T9W8ff1p2+Hvvxw4gfKKsqsjuN1tAAor7X12FbCW4YT3Tra6ihebUDEAC6WXWR9znqro3gdLQDKK50uPk3W2SwGdRqkI39a7MpuID0Jr+lpAVBeaWuubTx63f1jPX9ff25ofwNf7P9Ch4huYloAlNcxxpB6LJXr215PmxZtrI6jsF0o5sSFE3x/7Huro3gVhwqAiNwpIvtFJEtEptcwX0TkTfv8DBGJd6SviDxpn5cpIq81fHGUql1OYQ6nLp5icORgq6Mou7j2cfj5+LF472Kro3iVWguAiPgCM4DRQCwwQURiqzUbDfSw31KAmbX1FZERQBLQxxhzA/C6MxZIqdqk5qbi7+Ovl310IYH+gYzoMoLF+xbrBeObkCNbAIlAljEm2xhTCizA9sZdVRIwz9ikAiEiEl5L398CrxhjLgMYY045YXmUuqbyynK2Hd9G3459aeHfwuo4qooxMWM4cPYA+07vszqK13CkAEQCR6s8zrVPc6TNtfpeD9wkIltFZKOIDKzpl4tIioikiUhafn6+A3GVurrMU5lcLLuou39c0L097wVg8T7dDdRUHCkANR0jV30b7WptrtXXDwgFBgO/Bz6RGo7HM8bMMsYkGGMS2rVr50Bcpa4u9VgqwQHBxLarvhdTWS2yVSSDIgexZN8Sq6N4DUcKQC4QVeVxJ+C4g22u1TcX+Ny+2+h7oBIIczy6UnVTXFZMxskMEiIS8PXxtTqOqkFyTDLbjm8j91yu1VG8giMFYBvQQ0S6ikgAMB5YWq3NUmCy/WigwUCRMSavlr5LgFsBROR6IAA43eAlUuoq0o/bhn4Y3El3/7iq5JhkAL7Y94XFSbxDrQXAGFMOPAGsBvYCnxhjMkVkqohMtTdbAWQDWcBsYNq1+tr7fAB0E5Hd2L4cnmL063/ViFKPpdIhqAOdW3e2Ooq6ipiwGGLCYliyX3cDNQWHrglsjFmB7U2+6rR3qtw3wOOO9rVPLwUm1SWsUvV1ZeiHpJ5JOvSDi0vumczrW16n4FIBoS1CrY7j0fRMYOUVNh/djCC6+8cNJMckU15ZzvIDy62O4vG0ACiPV2kq2Xx0M7HtYnXoBzcwMHIgEcERejRQE9ACoDzenvw9FJQUcGPUjVZHUQ7wER+SeiaxMmsll8ouWR3Ho2kBUB5v05FNtAxoSd+Ofa2OohyUHJNMcVkxX2V/ZXUUj6YFQHm085fPs/PkTgZFDsLPx6FjHpQLuKXLLbRu1lp3AzUyLQDKo6UeS6XCVDAsepjVUVQdBPgG8Ivrf8HSH5dSXlludRyPpQVAeSxjDJuObKJrSFcigiOsjqPqKLlnMqeLT7P56Garo3gsLQDKY2UXZpN3IU8//bupO6+7k2a+zXQ3UCPSAqA81qYjm2jm24yEiASro6h6CG4WzG3dbtNrBDQiLQDKI5WUl5B2PI2EiASa+zW3Oo6qpzExY8gpzCHjZIbVUTySFgDlkdKOp3G54rIe++/m7ul5D4LoNQIaiRYA5ZE2Hd1Ex5Yd6RbazeooqgHaB7Xnxugb9XuARqIFQHmco+eOkl2QzbCoYTrwmwdI7pnMzpM7OVRwyOooHkcLgPI4G3I24O/jz9CooVZHUU7w0zUC9us1ApxNC4DyKMVlxXx/7HsSIxMJCgiyOo5ygu5tutO7fW/dDdQItAAoj7L56GZKK0q5pcstVkdRTpQck8y3R74l/2K+1VE8ihYA5TEqTSUbD2+ka0hXoltHWx1HOVFyTDKVppJlPy6zOopH0QKgPMbe/L2cunhKP/17oP4d+xPdOlovFelkWgCUx1h3aB2tmrViQPgAq6MoJxMRknsms+bgGi6WXrQ6jsfQAqA8wvHzx8nMz+SWLrfg7+tvdRzVCJJjkikpL2H1wdVWR/EYWgCUR1iXvQ5/H3+Gdx5udRTVSG7qfBOhzUP1aCAn0gKg3N65y+dIPZbKkKghtAxoaXUc1Uj8fPy4p+c9LPtxGWUVZVbH8QhaAJTb23h4I+WV5YzsOtLqKKqRJfdMpqCkgG+PfGt1FI+gBUC5tZLyEtYfWk+fDn3o2LKj1XFUI7vjujto4deCxXt1cDhn0AKg3Nq3R77lYtlFRl832uooqgkE+gdye/fbWbJ/iV4jwAm0ACi3VVZRxtqDa+nZtqeO+ulFkmOSyT2XS3peutVR3J4WAOW2tuRuoehyEaN76Kd/b3L39XfjK758vvdzq6O4PYcKgIjcKSL7RSRLRKbXMF9E5E37/AwRia9D39+JiBGRsIYtivImFZUVrD64mq4hXYlpG2N1HNWEwgLDGNF1BJ/u+VR3AzVQrQVARHyBGcBoIBaYICKx1ZqNBnrYbynATEf6ikgUMAo40uAlUV5lS+4WThefZnSP0Trmvxca12scWWez2HVql9VR3JojWwCJQJYxJtsYUwosAJKqtUkC5hmbVCBERMId6PsG8AdAy7hyWFlFGct+XEaXkC70ad/H6jjKAmN6jcFHfFi0Z5HVUdyaIwUgEjha5XGufZojba7aV0TuBY4ZY3Ze65eLSIqIpIlIWn6+DgWrbEf+FJQUkNwzWT/9e6n2Qe0Z3nm47gZqIEcKQE3/YdVf8au1qXG6iAQCzwMv1PbLjTGzjDEJxpiEdu3a1RpWebbL5ZdZcWAF17e9npgw3ffvzcbFjmPf6X3syd9jdRS35UgByAWiqjzuBBx3sM3VpncHugI7RSTHPn27iOiZPOqa1h1ax/nS8/rpXzEmZgyC6G6gBnCkAGwDeohIVxEJAMYDS6u1WQpMth8NNBgoMsbkXa2vMWaXMaa9MaaLMaYLtkIRb4w54awFU56nqKSIVVmr6NuhL93bdLc6jrJYeHA4w6KHsWivFoD6qrUAGGPKgSeA1cBe4BNjTKaITBWRqfZmK4BsIAuYDUy7Vl+nL4XyCl/s/4LyynLG9hprdRTlIu6PvZ/dp3az7/Q+q6O4JYfOAzDGrDDGXG+M6W6Medk+7R1jzDv2+8YY87h9fm9jTNq1+tbw/F2MMaedsUDKMx0pOsLmo5u5teutdGjZweo4ykXc1+s+AN0NVE96JrByecYYPs38lKCAIO7qcZfVcZQLiWwVydCooVoA6kkLgHJ583bO48ezP5LUM4lA/0Cr4ygXM67XOHae3MmBMwesjuJ2tAAol3a6+DTPrnmW7qHdGRY9zOo4ygWNjbV9J6RbAXWnBUC5tGfXPMu5y+eY1GcSPqKrq/q/oltHMyhykB4NVA/6H6Vc1pqDa5i3cx5/uPEPRARHWB1HubBxsePYnredg2cPWh3FrWgBUC7pTPEZHv7iYXqF9eL5m563Oo5ycQ/c8AAAC3YvsDiJe9ECoFyOMYapy6eSfzGfD+/7kBb+LayOpFxcdOtohkUP48NdH+rYQHWgBUC5nHk757FozyJeGvES/cP7Wx1HuYmJcRPZe3ovGSczrI7iNrQAKJey6+Qupq2Yxs2db+b3Q39vdRzlRu6/4X78fPz4aNdHVkdxG1oAlMsoLCnkvk/uo1WzViwYuwBfH1+rIyk3EhYYxu3db2dB5gIqTaXVcdyCFgDlEipNJZMXTyanMIdF9y8iPDjc6kjKDU2Mm/jTsCGqdloAlEv43Zrf8eWPX/LGHW9wY/SNVsdRbiopJokWfi2YnzHf6ihuQQuAstzftvyNN1Lf4N8G/RuPD3zc6jjKjbUMaMl9ve5jYeZCSspLrI7j8rQAKEvNz5jPs2ueZVzsOP52x9/0Ii+qwab0nUJhSSFf7v/S6iguTwuAssy/dv6LKUumMKLLCP415l861INyilu73kpkcCRzd861OorL0/84ZYm5O+YyZckUbulyC8smLqO5X3OrIykP4evjy6/6/IpVWas4eeGk1XFcmhYA1aSMMfz127/y0BcPMbLbSL6c8KUO8aycbkq/KVSYCj7c9aHVUVyaFgDVZMoqynj0y0d5/uvnmdh7IssmLNM3f9UoYsJiSIxM5J87/qlDQ1yDFgDVJI4UHeHmf97M+z+8z3/d/F/MHzOfZn7NrI6lPNjD/R5m16ldpB1Pq72xl9ICoBrd0v1L6f9ufzJPZbJw3EJeGvGSHu2jGt3E3hMJ9A9kVvosq6O4LC0AqtGcKT7DpM8nkbQgiejW0aSnpP80bK9Sja1Vs1aMv2E8H+/+mPOXz1sdxyVpAVBOV2kqmfPDHGL/EcvCzIX8afif2PqbrfRo28PqaMrLpAxI4WLZRT7e/bHVUVySFgDlNMYY1h9az6D3BvHI0kfoFtqNtEfTePGWFwnwDbA6nvJCiZGJ9G7fW3cDXYUWANVgxhi+yv6K4f8czq3zbiXvfB7zx8xn8yOb6duxr9XxlBcTEVIGpJCel872vO1Wx3E5WgBUvRljWHlgJTd+cCOj/jWKQ4WHeHv022Q9lcWDfR7UL3qVS5jUZxKB/oHM+H6G1VFcjp/VAZRrqMsmcmlFKam5qazPWc/x88dp06INE3tPZGinofj7+jNv57xGTKpU3YQ0D2Fyn8nM2TGHV0e9SlhgmNWRXIYWAOWwM8Vn2HB4A98d+Y7ismKiWkUxpe8UEiMT8fPRVUm5ricHPck76e/w3vb3mD5sutVxXIZD/7Uicifwd8AXeM8Y80q1+WKffxdQDDxkjNl+rb4i8t/APUApcBB42BhT6IyFUs5jjOHA2QN8fehrdpzYgYjQr2M/RnYdSffQ7rqbR7mF2HaxjOw6khnbZvC7ob/TDyx2tb4KIuILzABGAbnANhFZaozZU6XZaKCH/TYImAkMqqXvWuCPxphyEXkV+CPwnPMWTTVERWUF6XnprM1ey5GiIwT5B3FH9zsY3mU4bVq0sTqeUnX21KCnSFqQxJJ9SxgXO87qOC7BkTKYCGQZY7IBRGQBkARULQBJwDxjG3QjVURCRCQc6HK1vsaYNVX6pwL6F3EBJeUlfHfkO9YdWsfZS2fp2LIjD/Z+kMGdBuuhnMqt/aLHL+ga0pU3Ut/QAmDnSAGIBI5WeZyL7VN+bW0iHewL8Aiw0IEsqpGUlJewLnsda7PXcqn8Ej3a9GBC3ATi2sfpOP3KI/j6+PLMkGd4cuWTfHfkO4ZFD7M6kuUcKQA17eStPrze1drU2ldEngfKgRrHbRWRFCAFIDo6urasqo5KykuYuW0mL2x4gQulF+jXoR93XncnXUO7Wh1NKad7pP8j/Hnjn3l106taAHCsAOQCUVUedwKOO9gm4Fp9RWQKcDcw0lxlzFZjzICJ/E4AAA9OSURBVCxgFkBCQoKO6+okFZUVzNkxhz9v/DO553LpFdaL5JhkuoR0sTqaUo0m0D+QpxKf4oUNL7Dr5C56d+htdSRLObJtvw3oISJdRSQAGA8srdZmKTBZbAYDRcaYvGv1tR8d9BxwrzGm2EnLoxyQfjydwe8P5tEvH6VTq058Pflrnh78tL75K6/weOLjBPkH8drm16yOYrlaC4Axphx4AlgN7AU+McZkishUEZlqb7YCyAaygNnAtGv1tfd5GwgG1orIDhF5x3mLpWpSVFLEkyueJPG9RHLP5fLRfR+x+ZHNjOg6wupoSjWZNi3a8NiAx/h418dkF2RbHcdS4k5Xy0lISDBpaXpxh/pYuHsh/7bq38gvzmdawjT+cutfaN289U/zdbAsZbWUASlN9ruOnz9O9ze788ANDzA32fMvHi8i6caYhOrT9fAOD1dwqYAJn01g/GfjiWodxfe/+Z637nrrZ2/+SnmbiOAIHh/4OPMz5rM3f6/VcSyjBcCDfX3oa/q804dFexbxlxF/YcuvtzAgYoDVsZRyCc/d+ByB/oG8uPFFq6NYRguAByopL+HZ1c8yct5IgvyD2PLrLTx/8/N6+rtSVbQLasfTg57mk8xP2Hlip9VxLKEFwMMcKjjE0PeH8rfUvzEtYRrbH9tOQsT/2fWnlAKeHfosIc1D+P3a3+NO34c6ixYAD7LywEoGzBrAocJDLB2/lBm/mEGgf6DVsZRyWSHNQ3hx+IuszV7Lsh+XWR2nyWkB8ACVppKXNr7ELz76BdGto0l7NI17et5jdSyl3MK0gdOICYvhmTXPUFpRanWcJqUFwM0VXCrgno/v4U8b/sSkPpPY/OvNdG/T3epYSrkNf19//nb738g6m8WbW9+0Ok6T0gLgxnac2MGAWQNYe3At/7jrH8xNnqu7fJSqh9E9RnNXj7t4aeNL5J7LtTpOk9EC4Kbm7ZzHkPeHUFpRyjcPf8NvB/5WL86iVAO8NfotyivLmbpsqtd8IawFwM1cLr/MtOXTmLJkCkM6DWH7Y9sZ3Gmw1bGUcnvdQrvx8q0vs/zAchbsXmB1nCahBcCN5J7LZfg/hzMzbSZ/GPoH1vxqDe2D2lsdSymP8dSgp0iMTOSpVU+RfzHf6jiNTguAm/j60NfEvxtPZn4mi+5fxKujXtUTu5RyMl8fX96/933OXT7Hw1887PG7grQAuLhKU8mr373KqH+NIiwwjG2PbmNs7FirYynlseLax/H6qNdZfmA5b6S+YXWcRqUFwIWdKT7DvR/fy/R10xnbayxbf7OVmLAYq2Mp5fGeSHyC5Jhkpn81nW3Htlkdp9FoAXBRW3O3Ej8rnjUH1/D26LdZOG4hwc2CrY6llFcQEd6/933Cg8MZ+8lYTlw4YXWkRqEFwMUYY/if1P/hpjk34SM+bHpkE48nPq6HeCrVxNq0aMPiXy7mzKUzJC1IorjM8y5cqAXAheSdz+Oej+/h31f/O6N7jGZ7ynYGRg60OpZSXis+PJ6P7vuIbce2MWXJFCpNpdWRnEoLgIv4NPNT4mbGse7QOv7njv9hyS+XENoi1OpYSnm9pJgkXr/9dRbtWUTKlykeVQT0OEKLFVwq4ImVT/DRro8YGDGQeWPm6Re9SrmYZ4Y8Q1FJES998xK+4svMu2fiI+7/+VkLgEWMMSzas4inVz/NqYuneOmWl/jjTX/UY/uVclEv3vIi5ZXl/PW7v1JaWcq7d79LgG+A1bEaRN9tLJB5KpMnVz7J+pz19O3Ql6Xjl+qlGpVycSLCX279C838mvGnDX/iSNERPnvgM0Kah1gdrd7cfxvGjRSWFPL0qqfp+05fdpzYwYy7ZpCekq5v/kq5CRHhheEvMDd5Lt8e/pbB7w0m42SG1bHqTQtAEyguK+b1za/T460evLn1TX4T/xt+fPJHpg2chq+Pr9XxlFJ1NLnvZNb+ai1Fl4tInJ3IjO9nuOWwEVoAGlFhSSGvbXqNbn/vxu/X/p748HjSUtJ45+53CAsMszqeUqoBhncZTsbUDEZ2G8kTK59gxNwRZJ7KtDpWnWgBaAR78vfw9KqniX4jmue+eo649nF889A3rJ60mvjweKvjKaWcpF1QO76c8CXv3v0uGScz6PduP55e9bTbnDmsXwI7yZniMyzas4j5u+bz3ZHv8Pfx5/4b7ud3Q35H//D+VsdTSjUSH/EhZUAKY2LG8B/r/oO3v3+bd9Pf5bEBj/FE4hNc1+Y6qyNelRaAejLGkHEygzUH17D64Go2Ht5IeWU5Pdv25LXbXmNKvyk6Vr9SXqRdUDtm3zub54Y9x8vfvszb37/N37f+ndu7387kPpO5p+c9tGrWyuqYP6MFwEHFZcVknMzgh7wfSD2WypqDa37azItrH8czg59hfNx4+nXsp+P2KOXFrmtzHXOS5vDyrS/z3vb3mL19NpMWTyLAN4CRXUdyW7fbGNl1JHHt4yw/CMShAiAidwJ/B3yB94wxr1SbL/b5dwHFwEPGmO3X6isibYCFQBcgB3jAGFPQ8EWqv5LyEnLP5ZJTmENOYQ6HCw9zsOAgO07sYP+Z/T+dAt62RVtu63Ybd3S/g9u7305kq0grYyulXFBEcAQvDH+B/7z5P0nNTeXTzE9ZkbWClWtWAhDkH0R8eDxx7ePoFtqN7qHd6RbajW6h3Zps5N9aC4CI+AIzgFFALrBNRJYaY/ZUaTYa6GG/DQJmAoNq6TsdWGeMeUVEptsfP+e8RftfH/zwAWsOrqG8svxntwulFygoKaDgUgEFJQWUlJf8rJ+P+BDVKoreHXozLnYc/Tv2p394fzq37qyf8pVSDvERH4ZGDWVo1FDe4A2OFh1lQ84G0o6nkZaXxsLMhZy9dPZnfVr4tSCkeQihLUJtP5uH8uItL5IQkeDUbI5sASQCWcaYbAARWQAkAVULQBIwz9gOhE0VkRARCcf26f5qfZOAW+z95wIbaKQCcLToKDtO7MDPx+9nt6CAIHqF9SK0eSihLUIJbR5KRHAEXUK60DmkM5HBkfj7+jdGJKWUl4pqHcWv+v6KX/X91U/TCksKOXj2INkF2WQXZHO6+DQFJQUUlhRSWFJI3oU8yivLnZ7FkQIQCRyt8jgX26f82tpE1tK3gzEmD8AYkyciNX5jKiIpQIr94QUR2e9A5oYIA0438u9wZd6+/KCvgSXL/xiPNfWvvBaXWweGMKQh3TvXNNGRAlDTvo7qp7xdrY0jfa/JGDMLmFWXPg0hImnGGOduZ7kRb19+0NfA25cfvOc1cOREsFwgqsrjTsBxB9tcq+9J+24i7D9POR5bKaVUQzlSALYBPUSkq4gEAOOBpdXaLAUmi81goMi+e+dafZcCU+z3pwBfNHBZlFJK1UGtu4CMMeUi8gSwGtuhnB8YYzJFZKp9/jvACmyHgGZhOwz04Wv1tT/1K8AnIvJr4Ahwv1OXrP6abHeTi/L25Qd9Dbx9+cFLXgNxxxHslFJKNZwOBqeUUl5KC4BSSnkpjy0A9pPRFonIPhHZKyJDRKSNiKwVkQP2n6FV2vcRkS0ikikiu0SkuX36BhHZLyI77Lf29unNRGShiGSJyFYR6WLNktasLssvIg9WWb4dIlIpIv3s8wbYX48sEXnTPuyHyy8/OPU18IZ1wF9E5tr/1ntF5I9Vnsdb1oFrvQZuuQ7UyhjjkTdsZxf/xn4/AAgBXgOm26dNB1613/cDMoC+9sdtAV/7/Q1AQg3PPw14x35/PLDQ6mWu7/JX69cbyK7y+HtgCLZzOlYCo91h+Z38Gnj8OgBMBBbY7wdiG5+rizetA7W8Bm65DtT6+lgdoJH+6K2AQ9i/5K4yfT8Qbr8fDuy3378LmH+V57raH341MMR+3w/bWYPijPxNvfzV2vwVeLlKm31V5k0A3nX15Xfma+At64D9b/ulfTnaAj8CbbxpHbjaa+Cu64AjN0/dBdQNyAfmiMgPIvKeiARRbfgJ4MrwE9cDRkRWi8h2EflDteebY9/s+68rm79UGebCGFMOFGFbaVxBXZe/ql8CH9vvR2I7me+KK0N8XJnnqssPznsNrvD0dWARcBHIw3ZY9uvGmLN41zpwtdfgCndbB2rlqQXAD4gHZhpj+mP7o06vpf0w4EH7zzEiMtI+70FjTG/gJvvtyghODR7mohHVdfkBEJFBQLExZveVSTU0Mw7McwXOeg3AO9aBRKACiAC6As+KSDe8ax242msA7rkO1MpTC0AukGuM2Wp/vAjbinC14SdygY3GmNPGmGJsJ7bFAxhjjtl/ngc+wraSXOkTZX8uP6A18PMxXa1T1+W/Yjw//+Sbi234jiuqDuXhyssPznsNvGUdmAisMsaUGWNOAZuABLxrHbjaa+Cu60CtPLIAGGNOAEdFpKd90khsQ1BfbfiJ1UAfEQm0/xGHA3tExE9EwsB2hABwN3Dlk2HV5xoHfG3sOwKtVo/lR0R8sJ2NvaDK8+QB50VksH2Td3KVPi67/OC818CL1oEjwK1iEwQMxrbv35vWgRpfA3ddBxxi9ZcQjXUD+gFp2I7uWQKEYts3tw44YP/Zpkr7SUAmtj/sa/ZpQUC6/TkysV/ZzD6vOfAptuEvvge6Wb3MDVz+W4DUGp4nwf6aHATe5n/PHnfp5XfWa+At6wDQ0r4smdjeJH/vbevA1V4Dd14HarvpUBBKKeWlPHIXkFJKqdppAVBKKS+lBUAppbyUFgCllPJSWgCUUspLaQFQqgb2USSn1dImxz5yZIaIbBSRzrW0f0hE3nZuUqXqTwuAUjULwTbSY21GGGP6YBss7D8bNZFSTqYFQKmavQJ0tw/+NVtEvrHf3y0iN9XQfgv2QdJEpJ2IfCYi2+y3G5s0uVIO0gKgVM2mAweNMf2AfcBq+/2+wI4a2t+J7UxTsJ0p+oYxZiAwFnivCfIqVWd+VgdQyg1sAz6wjwOzxBhTtQCsF5EO2AYUu7IL6DYg9n9HDKaViAQ3WVqlHKRbAErVwhjzDXAzcAz4l4hMrjJ7BNAZ2xgxL9mn+WC7SEg/+y3S2EaRVMqlaAFQqmbngWAA+9E9p4wxs4H3sQ8VfoUx5hLwNDBZRNoAa4AnrswX+7WFlXI1ugtIqRoYY86IyCYR2Y1tNMiLIlIGXMA2JHL19nki8jHwOPAUMENEMrD9j30DTG269Eo5RkcDVUopL6W7gJRSyktpAVBKKS+lBUAppbyUFgCllPJSWgCUUspLaQFQSikvpQVAKaW81P8HDvrR6sd3O6cAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(dfTmpFil.tsRel, hist = True, color=\"g\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tsRel 1.000000\n",
"Q3 0.130838\n",
"Q10 -0.163241\n",
"Q11 -0.344576\n",
"outScore 0.702471\n",
"Name: tsRel, dtype: float64\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"corr = dfTmpFil.corr()\n",
"sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns)\n",
"print(corr['tsRel'])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.regplot(x='tsRel', y='Q3', data=dfTmpFil, color=\"g\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q6 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 8 | \n",
" 5 | \n",
" 4 | \n",
" 6 | \n",
" 7 | \n",
" Running | \n",
" 5 | \n",
" 3 | \n",
" 5 | \n",
" 7 | \n",
" 3 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 5 | \n",
" 6 | \n",
" Reading | \n",
" 7 | \n",
" 7 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 275.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" Watching a movie | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 17944.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 5 | \n",
" 3 | \n",
" 6 | \n",
" 4 | \n",
" 4 | \n",
" Watching a movie | \n",
" 3 | \n",
" 8 | \n",
" 8 | \n",
" 5 | \n",
" 10 | \n",
" 60698.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 3 | \n",
" Reading | \n",
" 4 | \n",
" 5 | \n",
" 4 | \n",
" 10 | \n",
" 8 | \n",
" 60730.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 8 | \n",
" 7 | \n",
" 8 | \n",
" 3 | \n",
" 3 | \n",
" Running | \n",
" 8 | \n",
" 4 | \n",
" 10 | \n",
" 2 | \n",
" 8 | \n",
" 60753.0 | \n",
"
\n",
" \n",
" 6 | \n",
" 4 | \n",
" 3 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" Reading | \n",
" 1 | \n",
" 1 | \n",
" 10 | \n",
" 8 | \n",
" 8 | \n",
" 60760.0 | \n",
"
\n",
" \n",
" 7 | \n",
" 7 | \n",
" 3 | \n",
" 7 | \n",
" 6 | \n",
" 5 | \n",
" Reading | \n",
" 4 | \n",
" 6 | \n",
" 8 | \n",
" 6 | \n",
" 9 | \n",
" 60763.0 | \n",
"
\n",
" \n",
" 8 | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
" 4 | \n",
" 4 | \n",
" Watching a movie | \n",
" 4 | \n",
" 4 | \n",
" 5 | \n",
" 5 | \n",
" 7 | \n",
" 60769.0 | \n",
"
\n",
" \n",
" 9 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" Watching a movie | \n",
" 4 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 6 | \n",
" 60773.0 | \n",
"
\n",
" \n",
" 10 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 5 | \n",
" 3 | \n",
" Watching a movie | \n",
" 5 | \n",
" 2 | \n",
" 7 | \n",
" 8 | \n",
" 7 | \n",
" 60783.0 | \n",
"
\n",
" \n",
" 11 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 2 | \n",
" 2 | \n",
" Running | \n",
" 7 | \n",
" 6 | \n",
" 7 | \n",
" 5 | \n",
" 8 | \n",
" 60790.0 | \n",
"
\n",
" \n",
" 12 | \n",
" 8 | \n",
" 8 | \n",
" 8 | \n",
" 6 | \n",
" 6 | \n",
" Watching a movie | \n",
" 8 | \n",
" 7 | \n",
" 8 | \n",
" 6 | \n",
" 8 | \n",
" 60800.0 | \n",
"
\n",
" \n",
" 13 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" Watching a movie | \n",
" 1 | \n",
" 5 | \n",
" 7 | \n",
" 7 | \n",
" 8 | \n",
" 60801.0 | \n",
"
\n",
" \n",
" 14 | \n",
" 8 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" 7 | \n",
" Running | \n",
" 7 | \n",
" 10 | \n",
" 5 | \n",
" 6 | \n",
" 10 | \n",
" 60812.0 | \n",
"
\n",
" \n",
" 15 | \n",
" 7 | \n",
" 7 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" Watching a movie | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 7 | \n",
" 60817.0 | \n",
"
\n",
" \n",
" 16 | \n",
" 7 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 5 | \n",
" Reading | \n",
" 1 | \n",
" 7 | \n",
" 7 | \n",
" 4 | \n",
" 7 | \n",
" 60823.0 | \n",
"
\n",
" \n",
" 17 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
" 5 | \n",
" 5 | \n",
" Watching a movie | \n",
" 2 | \n",
" 9 | \n",
" 9 | \n",
" 7 | \n",
" 9 | \n",
" 60939.0 | \n",
"
\n",
" \n",
" 18 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" 9 | \n",
" Watching a movie | \n",
" 7 | \n",
" 6 | \n",
" 5 | \n",
" 4 | \n",
" 10 | \n",
" 443956.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 tsRel\n",
"0 8 5 4 6 7 Running 5 3 5 7 3 0.0\n",
"1 8 8 5 5 6 Reading 7 7 6 7 8 275.0\n",
"2 6 6 6 6 5 Watching a movie 7 7 7 7 7 17944.0\n",
"3 5 3 6 4 4 Watching a movie 3 8 8 5 10 60698.0\n",
"4 6 6 5 4 3 Reading 4 5 4 10 8 60730.0\n",
"5 8 7 8 3 3 Running 8 4 10 2 8 60753.0\n",
"6 4 3 1 1 1 Reading 1 1 10 8 8 60760.0\n",
"7 7 3 7 6 5 Reading 4 6 8 6 9 60763.0\n",
"8 5 5 5 4 4 Watching a movie 4 4 5 5 7 60769.0\n",
"9 6 6 6 6 6 Watching a movie 4 6 6 5 6 60773.0\n",
"10 4 4 4 5 3 Watching a movie 5 2 7 8 7 60783.0\n",
"11 7 7 7 2 2 Running 7 6 7 5 8 60790.0\n",
"12 8 8 8 6 6 Watching a movie 8 7 8 6 8 60800.0\n",
"13 4 4 4 1 1 Watching a movie 1 5 7 7 8 60801.0\n",
"14 8 7 7 7 7 Running 7 10 5 6 10 60812.0\n",
"15 7 7 6 6 6 Watching a movie 6 6 6 5 7 60817.0\n",
"16 7 6 6 5 5 Reading 1 7 7 4 7 60823.0\n",
"17 6 6 6 5 5 Watching a movie 2 9 9 7 9 60939.0\n",
"18 9 9 9 9 9 Watching a movie 7 6 5 4 10 443956.0"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dimensionality reduction"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[9.99999996e-01 1.81226177e-09 6.72112392e-10 5.91733094e-10\n",
" 3.30434976e-10]\n",
"0.9999999994658001\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAWr0lEQVR4nO3de5BU9ZnG8e8rF2USFZVJglxm0OAmeMHAiBoTReMFEcV4BfFGd4piN25la//YmE1tsltb1tb+sVtJNmZdKvESmUC8ECWWivdoKlFpFFEkmhERRjSARk2ChmDe/eP0SNP00KdnTvfv9OnnUzXF9OnD9FO/gofmnNPvMXdHRESa3z6hA4iISDJU6CIiGaFCFxHJCBW6iEhGqNBFRDJiaKgXHjVqlHd2doZ6eRGRprRq1apt7t5e6blghd7Z2UmhUAj18iIiTcnMXuvvOR1yERHJCBW6iEhGqNBFRDJChS4ikhEqdBGRjKha6GZ2o5ltMbMX+nnezOx7ZtZjZmvMbEryMYu6u6GzE/bZJ/q1u7tuLyUi0mzivEO/GZixl+fPBiYWvxYA/zv4WBV0d8OCBfDaa+Ae/bpggUpdRKSoaqG7++PA23vZZTbwY488CYw0s9FJBfzIN78J27fvvm379mi7iIgkcgx9DLCp5HFvcdsezGyBmRXMrLB169baXmXjxtq2i4i0mCQK3Spsq3jXDHdf5O5d7t7V3l7xk6v9Gz++tu0iIi0miULvBcaVPB4LbE7g5+7uuuugrW33bSNGRNtFRCSRQl8OXFm82uUE4F13fyOBn7u7efNg0SLo6AAr/qdg1qxou4iIVB/OZWZLgOnAKDPrBb4NDANw9xuAe4GZQA+wHZhfr7DMm7erwKdPh2efja54sUpHfUREWkvVQnf3uVWed+CriSWKK5+HK6+EJ56Ak09u+MuLiKRN835S9MIL4YAD4Ec/Cp1ERCQVmrfQ29pg7ly4/XZ4773QaUREgmveQgfI5eD992Hp0tBJRESCa+5CP+44OOooHXYREaHZC90sepf+9NPwQsXZYSIiLaO5Cx3g8sth2DC48cbQSUREgmr+Qm9vh/POg1tvhR07QqcREQmm+QsdomvSt22Dn/88dBIRkWCyUehnngljxuiwi4i0tGwU+pAhcPXVcP/98PrrodOIiASRjUIHmD8f/vpXuOWW0ElERILITqEffng0sOvGG6NiFxFpMdkpdIhOjr7ySjSwS0SkxWSr0C+4QAO7RKRlZavQ29rgssvgjjvg3XdDpxERaahsFTpoYJeItKzsFXpXFxx9tA67iEjLyV6h9w3sWrkSnn8+dBoRkYbJXqGDBnaJSEvKZqGPGgWzZ2tgl4i0lGwWOkTXpL/1FixfHjqJiEhDZLfQzzgDxo7VYRcRaRnZLfS+gV0rVkBvb+g0IiJ1l91CBw3sEpGWku1CP+wwOPVUDewSkZaQ7UKH6OTo+vXw+OOhk4iI1FX2C/2CC+DAA/XJURHJvOwX+ogRuwZ2vfNO6DQiInWT/UKHaBTABx9oYJeIZFprFPrUqXDMMTrsIiKZ1hqF3jewq1CANWtCpxERqYtYhW5mM8zsJTPrMbNrKzx/oJn93MyeM7O1ZjY/+aiDdPnlMHy4PjkqIplVtdDNbAhwPXA2MAmYa2aTynb7KvCiu08GpgP/ZWbDE846OIccAuefD4sXw5//HDqNiEji4rxDnwb0uPt6d98BLAVml+3jwP5mZsDHgbeBnYkmTUIup4FdIpJZcQp9DLCp5HFvcVup7wOfBTYDzwNfc/c9PpppZgvMrGBmha1btw4w8iCcfjqMG6fDLiKSSXEK3Sps87LHZwGrgUOBY4Hvm9kBe/wm90Xu3uXuXe3t7TWHHbTSgV2bNlXdXUSkmcQp9F5gXMnjsUTvxEvNB5Z5pAd4FfhMMhETNn8+uGtgl4hkTpxCXwlMNLMJxROdc4Dyg9AbgS8BmNkngb8B1icZNDETJsBpp2lgl4hkTtVCd/edwDXACmAdcJu7rzWzhWa2sLjbvwOfN7PngYeBr7v7tnqFHrR8Hl59FR57LHQSEZHEmHv54fDG6Orq8kKhEOS1ef99GD0aZs2KLmMUEWkSZrbK3bsqPdcanxQtN2IEzJsHd96pgV0ikhmtWeiwa2DXkiWhk4iIJKJ1C33KFJg8WQO7RCQzWrfQ+wZ2rVoFzz0XOo2IyKC1bqFDdBxdA7tEJCNau9APOQS+/GUN7BKRTGjtQofosMvbb8Pdd4dOIiIyKCr0L30Jxo/XYRcRaXoq9L6BXQ88ABs3hk4jIjJgKnTQwC4RyQQVOkBnZ3ToRQO7RKSJqdD75POwYQM8+mjoJCIiA6JC73P++TBypE6OikjTUqH3KR3Y9fvfh04jIlIzFXqpXC76gJEGdolIE1Khl5oyBY49VgO7RKQpqdDL5XLwzDOwenXoJCIiNVGhl5s3D/bdVydHRaTpqNDLHXzwroFdH3wQOo2ISGwq9EpyuehKFw3sEpEmokKvRAO7RKQJqdAr2WefaL7Lgw/Ca6+FTiMiEosKvT/z50e/3nxz0BgiInGp0PvT0REdernpJg3sEpGmoELfm3w+OuTyyCOhk4iIVKVC35vzz4eDDtLJURFpCir0vdlvv+iDRsuWaWCXiKSeCr2avoFdP/lJ6CQiInulQq/mc5+LvjSwS0RSToUeRz4Pzz4bfYmIpFSsQjezGWb2kpn1mNm1/ewz3cxWm9laM/tFsjEDu+wyDewSkdSrWuhmNgS4HjgbmATMNbNJZfuMBH4AnOfuRwIX1yFrOAcdBBdcAN3dGtglIqkV5x36NKDH3de7+w5gKTC7bJ/LgGXuvhHA3bckGzMF+gZ23XVX6CQiIhXFKfQxwKaSx73FbaWOAA4ys8fMbJWZXVnpB5nZAjMrmFlh69atA0scymmnRZ8e1WEXEUmpOIVuFbZ52eOhwFTgHOAs4F/M7Ig9fpP7Infvcveu9vb2msMG1Tew66GHNLBLRFIpTqH3AuNKHo8FNlfY5353/5O7bwMeByYnEzFFrr46+vWmm4LGEBGpJE6hrwQmmtkEMxsOzAGWl+1zN/BFMxtqZm3A8cC6ZKOmQEcHnH66BnaJSCpVLXR33wlcA6wgKunb3H2tmS00s4XFfdYB9wNrgKeBH7r7C/WLHVA+Dxs3wsMPh04iIrIbcy8/HN4YXV1dXigUgrz2oHzwARx6KJx1FixZEjqNiLQYM1vl7l2VntMnRWu1335w+eXws5/B22+HTiMi8hEV+kBoYJeIpJAKfSCOPRamTNHALhFJFRX6QOXzsHq1BnaJSGqo0Adq7txoYJfepYtISqjQB+qgg+DCCzWwS0RSQ4U+GLkcvPNOdMWLiEhgKvTBOPVU6OzUYRcRSQUV+mD0Dex6+GHYsCF0GhFpcSr0wbr6ajDTwC4RCU6FPljjx8MZZ0SF/uGHodOISAtToSchn4dNmzSwS0SCUqEnYfZsOPhg3c1IRIJSoSdh3313Dex6663QaUSkRanQk5LLwY4dGtglIsGo0JMyeTJMnRpdkx5oxryItDYVepLyeXjuOQ3sEpEgVOhJmjs3ugGGPjkqIgGo0JM0cuSugV3vvx86jYi0GBV60nI5ePddDewSkYZToSdt+nSYMEGHXUSk4VToSesb2PXII/Dqq6HTiEgLUaHXgwZ2iUgAKvR6GDcOzjwTbr5ZA7tEpGFU6PXSN7DroYdCJxGRFqFCr5fzzoNDDtHALhFpGBV6vfQN7LrrLg3sEpGGUKHXUz4fDezq7g6dRERagAq9no4+Grq6NLBLRBpChV5v+TysWQPPPBM6iYhknAq93ubM0cAuEWmIWIVuZjPM7CUz6zGza/ey33Fm9qGZXZRcxCY3ciRcdFF04wsN7BKROqpa6GY2BLgeOBuYBMw1s0n97PefwIqkQza9voFdy5aFTiIiGRbnHfo0oMfd17v7DmApMLvCfn8P3AlsSTBfNpxyChx2mA67iEhdxSn0McCmkse9xW0fMbMxwJeBG/b2g8xsgZkVzKywdevWWrM2r76BXY8+CuvXh04jIhkVp9Ctwrbya/C+A3zd3fc6uMTdF7l7l7t3tbe3x82YDRrYJSJ1FqfQe4FxJY/HApvL9ukClprZBuAi4Admdn4iCbNi7Fg46ywN7BKRuolT6CuBiWY2wcyGA3OA5aU7uPsEd+90907gDuDv3P2uxNM2u3weenvhwQdDJxGRDKpa6O6+E7iG6OqVdcBt7r7WzBaa2cJ6B8yUc8/VwC4RqZuhcXZy93uBe8u2VTwB6u5XDz5WRu27L1xxBVx/PWzbBqNGhU4kIhmiT4o2Wj4Pf/mLBnaJSOJU6I121FFw3HEa2CUiiVOhh5DPw/PPQ6EQOomIZIgKPYQ5c2DECJ0cFZFEqdBDOPDAXQO7tm8PnUZEMkKFHkouB++9p4FdIpIYFXoop5wChx+ugV0ikhgVeihm0cCuxx6DV14JnUZEMkCFHtJVV0WTGDWwS0QSoEIPSQO7RCRBKvTQ8nl4/XV44IHQSUSkyanQQzv33Gimi65JF5FBUqGHNnx4NLDr7ruhle7iJCKJU6GnQS6ngV0iMmgq9DQ46iiYNk0Du0RkUFToaZHPwwsvwMqVoZOISJNSoafFpZdqYJeIDIoKPS0OPBAuvhiWLNHALhEZEBV6mvQN7LrzztBJRKQJqdDT5OST4dOf1sAuERkQFXqa9A3s+sUvoKcndBoRaTIq9LTRwC4RGSAVetqMGQMzZmhgl4jUTIWeRvk8bN4MK1aETiIiTUSFnkazZkF7u65JF5GaqNDTqG9g1/LlGtglIrGp0NOqb2DXrbeGTiIiTUKFnlZHHgnHH6+BXSISmwo9zfJ5ePFFePrp0ElEpAmo0NPs0kuhrU0nR0UklliFbmYzzOwlM+sxs2srPD/PzNYUv35lZpOTj9qCDjhg18CuP/0pdBoRSbmqhW5mQ4DrgbOBScBcM5tUtturwCnufgzw78CipIO2rFwO/vAHDewSkarivEOfBvS4+3p33wEsBWaX7uDuv3L33xcfPgmMTTZmC/viFzWwS0RiiVPoY4BNJY97i9v6kwfuq/SEmS0ws4KZFbbq+up4zKJ36Y8/Dr/9beg0IpJicQrdKmyreB2dmZ1KVOhfr/S8uy9y9y5372pvb4+fstVpYJeIxBCn0HuBcSWPxwKby3cys2OAHwKz3f2tZOIJAIceCmefDbfcAjt3hk4jIikVp9BXAhPNbIKZDQfmAMtLdzCz8cAy4Ap3fzn5mKKBXSJSTdVCd/edwDXACmAdcJu7rzWzhWa2sLjbt4BDgB+Y2WozK9QtcauaNQs+8QmdHBWRfg2Ns5O73wvcW7bthpLvvwJ8Jdlospthw6KBXd/9LmzZEpW7iEgJfVK0meRy0TF0DewSkQpU6M1k0iQ44YRoFIAGdolIGRV6s+kb2PXUU6GTiEjKqNCbzSWXaGCXiFSkQm82BxwQlfrSpRrYJSK7UaE3o76BXXfcETqJiKSICr0ZfeELMHGirkkXkd2o0JtR38CuJ56Al/XBXBGJqNCb1VVXwZAhGtglIh9RoTer0aNh5kwN7BKRj6jQm1kuB2+8AfffHzqJiKSACr2ZnXOOBnaJyEdU6M1s2DC48kq45x743e9CpxGRwFTozU4Du0SkSIXe7D77WTjxRA3sEhEVeibk87BuHTz5ZOgkIhKQCj0LLrkEPvYxDewSaXEq9CzYf/9dA7v++MfQaUQkEBV6VuRyUZlrYJdIy1KhZ8VJJ8ERR+iadJEWpkLPir6BXb/8pQZ2ibQoFXqW9A3s0slRkZakQs+ST30qGgeggV0iLUmFnjW5HLz5Jtx3X+gkItJgKvSsmTkTPvlJnRwVaUEq9KwpHdj15puh04hIA6nQsyiXgw8/1MAukRajQs+iz3wGPv95DewSaTEq9KzK5+E3v4Ff/zp0EhFpEBV6Vl18sQZ2ibQYFXpW7b8/XHop/PSnGtgl0iJiFbqZzTCzl8ysx8yurfC8mdn3is+vMbMpyUeVmvUN7OrshH32iX7t7g6dKt26u7VetdB61abe6+Xue/0ChgCvAIcBw4HngEll+8wE7gMMOAF4qtrPnTp1qkudLV7sbuYenRqNvtraou2yp8WLo/XResWj9apNQusFFLyfXjWvchWEmZ0I/Ku7n1V8/I3iPwT/UbLP/wGPufuS4uOXgOnu/kZ/P7erq8sLhcJA/g2SuDo74bXX9tw+dGg0mVF29/LLlUcmaL0q03rVpr/16uiADRti/xgzW+XuXZWeGxrj948BNpU87gWOj7HPGGC3QjezBcACgPHjx8d4aRmUjRsrb9+5EyZNamyWZvDii5W3a70q03rVpr/16u/v6QDEKXSrsK38bX2cfXD3RcAiiN6hx3htGYzx4yu/Q+/ogNtvb3yetOvvfzRar8q0XrXpb70SfHMb56RoLzCu5PFYYPMA9pFGu+46aGvbfVtbW7Rd9qT1qo3WqzaNWK/+Dq73fRG9i18PTGDXSdEjy/Y5h91Pij5d7efqpGiDLF7s3tERnRzt6NAJq2q0XrXRetUmgfViMCdFAcxsJvAdoitebnT368xsYfEfhBvMzIDvAzOA7cB8d9/rGU+dFBURqd1gT4ri7vcC95Ztu6Hkewe+OpiQIiIyOPqkqIhIRqjQRUQyQoUuIpIRKnQRkYyIdZVLXV7YbCtQ4Sr7WEYB2xKMk5S05oL0ZlOu2ihXbbKYq8Pd2ys9EazQB8PMCv1dthNSWnNBerMpV22UqzatlkuHXEREMkKFLiKSEc1a6ItCB+hHWnNBerMpV22UqzYtlaspj6GLiMiemvUduoiIlFGhi4hkRKoLPa03p46Ra7qZvWtmq4tf32pQrhvNbIuZvdDP86HWq1quhq+XmY0zs0fNbJ2ZrTWzr1XYp+HrFTNXiPXaz8yeNrPnirn+rcI+IdYrTq4gfx+Lrz3EzJ41s3sqPJf8evU3Vzf0F3W6OXWDck0H7gmwZicDU4AX+nm+4esVM1fD1wsYDUwpfr8/8HJK/nzFyRVivQz4ePH7YcBTwAkpWK84uYL8fSy+9j8CP6n0+vVYrzS/Q58G9Lj7enffASwFZpftMxv4sUeeBEaa2egU5ArC3R8H3t7LLiHWK06uhnP3N9z9meL3fwDWEd0Ht1TD1ytmroYrrsEfiw+HFb/Kr6gIsV5xcgVhZmOJbv7zw352SXy90lzo/d14utZ9QuQCOLH438D7zOzIOmeKK8R6xRVsvcysE/gc0bu7UkHXay+5IMB6FQ8frAa2AA+6eyrWK0YuCPPn6zvAPwF/7ef5xNcrzYWe2M2pExbnNZ8hmrcwGfgf4K46Z4orxHrFEWy9zOzjwJ3AP7j7e+VPV/gtDVmvKrmCrJe7f+juxxLdM3iamR1VtkuQ9YqRq+HrZWazgC3uvmpvu1XYNqj1SnOhp/Xm1FVf093f6/tvoEd3expmZqPqnCuOVN7MO9R6mdkwotLsdvdlFXYJsl7VcoX+8+Xu7wCPEd1yslTQP1/95Qq0XicB55nZBqLDsqeZ2eKyfRJfrzQX+kpgoplNMLPhwBxgedk+y4Eri2eLTwDedfc3Qucys0+ZmRW/n0a0zm/VOVccIdarqhDrVXy9HwHr3P2/+9mt4esVJ1eg9Wo3s5HF70cApwO/KdstxHpVzRVivdz9G+4+1t07iTriEXe/vGy3xNcr1j1FQ3D3nWZ2DbCCXTenXmslN6cmus/pTKCH4s2pU5LrIuBvzWwn8D4wx4untevJzJYQndEfZWa9wLeJThIFW6+YuUKs10nAFcDzxeOvAP8MjC/JFWK94uQKsV6jgVvMbAhRId7m7veE/vsYM1eQv4+V1Hu99NF/EZGMSPMhFxERqYEKXUQkI1ToIiIZoUIXEckIFbqISEao0EVEMkKFLiKSEf8PNme4Gay9ql4AAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"mdlPCA = PCA(n_components=5)\n",
"XPCA = mdlPCA.fit_transform(df2)\n",
"\n",
"print(mdlPCA.explained_variance_ratio_)\n",
"print(np.sum(mdlPCA.explained_variance_ratio_))\n",
"plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 5.23403869e-06 7.40036105e-06 8.97709992e-06 9.61319140e-06\n",
" 9.32793474e-06 3.94503126e-06 8.95009702e-07 -3.58017421e-06\n",
" -6.06213801e-06 7.55257058e-06 1.00000000e+00 -7.70261980e-07\n",
" -6.96515391e-07 1.46677737e-06]\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"print(mdlPCA.components_[0,:])\n",
"plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Question: What is \"wrong\" in the data?"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 6.473684 | \n",
" 5.789474 | \n",
" 5.789474 | \n",
" 4.789474 | \n",
" 4.631579 | \n",
" 4.789474 | \n",
" 5.736842 | \n",
" 6.842105 | \n",
" 6.000000 | \n",
" 7.789474 | \n",
" 72315.052632 | \n",
" 0.263158 | \n",
" 0.210526 | \n",
" 0.526316 | \n",
"
\n",
" \n",
" std | \n",
" 1.540866 | \n",
" 1.781976 | \n",
" 1.812884 | \n",
" 2.016018 | \n",
" 2.113726 | \n",
" 2.393949 | \n",
" 2.256893 | \n",
" 1.708253 | \n",
" 1.795055 | \n",
" 1.618605 | \n",
" 92341.691330 | \n",
" 0.452414 | \n",
" 0.418854 | \n",
" 0.512989 | \n",
"
\n",
" \n",
" min | \n",
" 4.000000 | \n",
" 3.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 4.000000 | \n",
" 2.000000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 5.500000 | \n",
" 4.500000 | \n",
" 5.000000 | \n",
" 4.000000 | \n",
" 3.000000 | \n",
" 3.500000 | \n",
" 4.500000 | \n",
" 5.500000 | \n",
" 5.000000 | \n",
" 7.000000 | \n",
" 60741.500000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 7.000000 | \n",
" 6.000000 | \n",
" 6.000000 | \n",
" 5.000000 | \n",
" 5.000000 | \n",
" 5.000000 | \n",
" 6.000000 | \n",
" 7.000000 | \n",
" 6.000000 | \n",
" 8.000000 | \n",
" 60773.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 8.000000 | \n",
" 7.000000 | \n",
" 7.000000 | \n",
" 6.000000 | \n",
" 6.000000 | \n",
" 7.000000 | \n",
" 7.000000 | \n",
" 8.000000 | \n",
" 7.000000 | \n",
" 8.500000 | \n",
" 60806.500000 | \n",
" 0.500000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 9.000000 | \n",
" 9.000000 | \n",
" 9.000000 | \n",
" 9.000000 | \n",
" 9.000000 | \n",
" 8.000000 | \n",
" 10.000000 | \n",
" 10.000000 | \n",
" 10.000000 | \n",
" 10.000000 | \n",
" 443956.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 \\\n",
"count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n",
"mean 6.473684 5.789474 5.789474 4.789474 4.631579 4.789474 \n",
"std 1.540866 1.781976 1.812884 2.016018 2.113726 2.393949 \n",
"min 4.000000 3.000000 1.000000 1.000000 1.000000 1.000000 \n",
"25% 5.500000 4.500000 5.000000 4.000000 3.000000 3.500000 \n",
"50% 7.000000 6.000000 6.000000 5.000000 5.000000 5.000000 \n",
"75% 8.000000 7.000000 7.000000 6.000000 6.000000 7.000000 \n",
"max 9.000000 9.000000 9.000000 9.000000 9.000000 8.000000 \n",
"\n",
" Q8 Q9 Q10 Q11 tsRel Q6_Reading \\\n",
"count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n",
"mean 5.736842 6.842105 6.000000 7.789474 72315.052632 0.263158 \n",
"std 2.256893 1.708253 1.795055 1.618605 92341.691330 0.452414 \n",
"min 1.000000 4.000000 2.000000 3.000000 0.000000 0.000000 \n",
"25% 4.500000 5.500000 5.000000 7.000000 60741.500000 0.000000 \n",
"50% 6.000000 7.000000 6.000000 8.000000 60773.000000 0.000000 \n",
"75% 7.000000 8.000000 7.000000 8.500000 60806.500000 0.500000 \n",
"max 10.000000 10.000000 10.000000 10.000000 443956.000000 1.000000 \n",
"\n",
" Q6_Running Q6_Watching a movie \n",
"count 19.000000 19.000000 \n",
"mean 0.210526 0.526316 \n",
"std 0.418854 0.512989 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 1.000000 \n",
"75% 0.000000 1.000000 \n",
"max 1.000000 1.000000 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's normalize the data"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"df2_norm = (df2-df2.min())/(df2.max()-df2.min())"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.8 | \n",
" 0.333333 | \n",
" 0.375 | \n",
" 0.625 | \n",
" 0.750 | \n",
" 0.571429 | \n",
" 0.222222 | \n",
" 0.166667 | \n",
" 0.625 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.8 | \n",
" 0.833333 | \n",
" 0.500 | \n",
" 0.500 | \n",
" 0.625 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.333333 | \n",
" 0.625 | \n",
" 0.714286 | \n",
" 0.000619 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.625 | \n",
" 0.500 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.571429 | \n",
" 0.040418 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.2 | \n",
" 0.000000 | \n",
" 0.625 | \n",
" 0.375 | \n",
" 0.375 | \n",
" 0.285714 | \n",
" 0.777778 | \n",
" 0.666667 | \n",
" 0.375 | \n",
" 1.000000 | \n",
" 0.136721 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.500 | \n",
" 0.375 | \n",
" 0.250 | \n",
" 0.428571 | \n",
" 0.444444 | \n",
" 0.000000 | \n",
" 1.000 | \n",
" 0.714286 | \n",
" 0.136793 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n",
"0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n",
"1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n",
"2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n",
"3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n",
"4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n",
"\n",
" Q11 tsRel Q6_Reading Q6_Running Q6_Watching a movie \n",
"0 0.000000 0.000000 0.0 1.0 0.0 \n",
"1 0.714286 0.000619 1.0 0.0 0.0 \n",
"2 0.571429 0.040418 0.0 0.0 1.0 \n",
"3 1.000000 0.136721 0.0 0.0 1.0 \n",
"4 0.714286 0.136793 1.0 0.0 0.0 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2_norm.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
" 19.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.494737 | \n",
" 0.464912 | \n",
" 0.598684 | \n",
" 0.473684 | \n",
" 0.453947 | \n",
" 0.541353 | \n",
" 0.526316 | \n",
" 0.473684 | \n",
" 0.500000 | \n",
" 0.684211 | \n",
" 0.162888 | \n",
" 0.263158 | \n",
" 0.210526 | \n",
" 0.526316 | \n",
"
\n",
" \n",
" std | \n",
" 0.308173 | \n",
" 0.296996 | \n",
" 0.226611 | \n",
" 0.252002 | \n",
" 0.264216 | \n",
" 0.341993 | \n",
" 0.250766 | \n",
" 0.284709 | \n",
" 0.224382 | \n",
" 0.231229 | \n",
" 0.207997 | \n",
" 0.452414 | \n",
" 0.418854 | \n",
" 0.512989 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.300000 | \n",
" 0.250000 | \n",
" 0.500000 | \n",
" 0.375000 | \n",
" 0.250000 | \n",
" 0.357143 | \n",
" 0.388889 | \n",
" 0.250000 | \n",
" 0.375000 | \n",
" 0.571429 | \n",
" 0.136819 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.600000 | \n",
" 0.500000 | \n",
" 0.625000 | \n",
" 0.500000 | \n",
" 0.500000 | \n",
" 0.571429 | \n",
" 0.555556 | \n",
" 0.500000 | \n",
" 0.500000 | \n",
" 0.714286 | \n",
" 0.136890 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.800000 | \n",
" 0.666667 | \n",
" 0.750000 | \n",
" 0.625000 | \n",
" 0.625000 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.666667 | \n",
" 0.625000 | \n",
" 0.785714 | \n",
" 0.136965 | \n",
" 0.500000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 \\\n",
"count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n",
"mean 0.494737 0.464912 0.598684 0.473684 0.453947 0.541353 \n",
"std 0.308173 0.296996 0.226611 0.252002 0.264216 0.341993 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.300000 0.250000 0.500000 0.375000 0.250000 0.357143 \n",
"50% 0.600000 0.500000 0.625000 0.500000 0.500000 0.571429 \n",
"75% 0.800000 0.666667 0.750000 0.625000 0.625000 0.857143 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" Q8 Q9 Q10 Q11 tsRel Q6_Reading \\\n",
"count 19.000000 19.000000 19.000000 19.000000 19.000000 19.000000 \n",
"mean 0.526316 0.473684 0.500000 0.684211 0.162888 0.263158 \n",
"std 0.250766 0.284709 0.224382 0.231229 0.207997 0.452414 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.388889 0.250000 0.375000 0.571429 0.136819 0.000000 \n",
"50% 0.555556 0.500000 0.500000 0.714286 0.136890 0.000000 \n",
"75% 0.666667 0.666667 0.625000 0.785714 0.136965 0.500000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" Q6_Running Q6_Watching a movie \n",
"count 19.000000 19.000000 \n",
"mean 0.210526 0.526316 \n",
"std 0.418854 0.512989 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 0.000000 \n",
"50% 0.000000 1.000000 \n",
"75% 0.000000 1.000000 \n",
"max 1.000000 1.000000 "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2_norm.describe()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.31431827 0.29012089 0.15348486 0.07788586 0.04541328]\n",
"0.8812231522564273\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"mdlPCA = PCA(n_components=5)\n",
"XPCA = mdlPCA.fit_transform(df2_norm)\n",
"\n",
"print(mdlPCA.explained_variance_ratio_)\n",
"print(np.sum(mdlPCA.explained_variance_ratio_))\n",
"\n",
"plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.28489905 -0.3141605 -0.26107415 -0.25405393 -0.27500032 -0.36871538\n",
" -0.17301488 0.12875075 0.16485422 -0.03296608 -0.13459097 0.4957779\n",
" -0.16340968 -0.33236822]\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"print(mdlPCA.components_[0,:])\n",
"plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Remove tsRel column"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.8 | \n",
" 0.333333 | \n",
" 0.375 | \n",
" 0.625 | \n",
" 0.750 | \n",
" 0.571429 | \n",
" 0.222222 | \n",
" 0.166667 | \n",
" 0.625 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.8 | \n",
" 0.833333 | \n",
" 0.500 | \n",
" 0.500 | \n",
" 0.625 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.333333 | \n",
" 0.625 | \n",
" 0.714286 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.625 | \n",
" 0.500 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.571429 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.2 | \n",
" 0.000000 | \n",
" 0.625 | \n",
" 0.375 | \n",
" 0.375 | \n",
" 0.285714 | \n",
" 0.777778 | \n",
" 0.666667 | \n",
" 0.375 | \n",
" 1.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.500 | \n",
" 0.375 | \n",
" 0.250 | \n",
" 0.428571 | \n",
" 0.444444 | \n",
" 0.000000 | \n",
" 1.000 | \n",
" 0.714286 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n",
"0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n",
"1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n",
"2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n",
"3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n",
"4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n",
"\n",
" Q11 Q6_Reading Q6_Running Q6_Watching a movie \n",
"0 0.000000 0.0 1.0 0.0 \n",
"1 0.714286 1.0 0.0 0.0 \n",
"2 0.571429 0.0 0.0 1.0 \n",
"3 1.000000 0.0 0.0 1.0 \n",
"4 0.714286 1.0 0.0 0.0 "
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2_noTs = df2_norm[df2_norm.columns[df2_norm.columns.str.contains('tsRel')==False]]\n",
"df2_noTs.head()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.8 | \n",
" 0.333333 | \n",
" 0.375 | \n",
" 0.625 | \n",
" 0.750 | \n",
" 0.571429 | \n",
" 0.222222 | \n",
" 0.166667 | \n",
" 0.625 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.8 | \n",
" 0.833333 | \n",
" 0.500 | \n",
" 0.500 | \n",
" 0.625 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.333333 | \n",
" 0.625 | \n",
" 0.714286 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.625 | \n",
" 0.500 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.571429 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.2 | \n",
" 0.000000 | \n",
" 0.625 | \n",
" 0.375 | \n",
" 0.375 | \n",
" 0.285714 | \n",
" 0.777778 | \n",
" 0.666667 | \n",
" 0.375 | \n",
" 1.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.500 | \n",
" 0.375 | \n",
" 0.250 | \n",
" 0.428571 | \n",
" 0.444444 | \n",
" 0.000000 | \n",
" 1.000 | \n",
" 0.714286 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n",
"0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n",
"1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n",
"2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n",
"3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n",
"4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n",
"\n",
" Q11 Q6_Reading Q6_Running Q6_Watching a movie \n",
"0 0.000000 0.0 1.0 0.0 \n",
"1 0.714286 1.0 0.0 0.0 \n",
"2 0.571429 0.0 0.0 1.0 \n",
"3 1.000000 0.0 0.0 1.0 \n",
"4 0.714286 1.0 0.0 0.0 "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2_noTs = df2_norm.drop(columns=['tsRel'])\n",
"df2_noTs.head()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.31894549 0.29885772 0.1547158 0.07966809 0.04678632]\n",
"0.898973407637008\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"mdlPCA = PCA(n_components=5)\n",
"XPCA = mdlPCA.fit_transform(df2_noTs)\n",
"\n",
"print(mdlPCA.explained_variance_ratio_)\n",
"print(np.sum(mdlPCA.explained_variance_ratio_))\n",
"\n",
"plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.30244657 -0.32214438 -0.26410732 -0.25227651 -0.27659662 -0.3869891\n",
" -0.17634825 0.12991123 0.1677439 -0.02453888 0.49300779 -0.20299601\n",
" -0.29001178]\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"print(mdlPCA.components_[0,:])\n",
"plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Alternative approach: Keep tsRel, but exclude outliers"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" tsRel | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 0.25 | \n",
" 0.0 | \n",
" 0.714286 | \n",
" 0.500000 | \n",
" 0.500000 | \n",
" 0.285714 | \n",
" 0.777778 | \n",
" 0.666667 | \n",
" 0.375 | \n",
" 1.00 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.50 | \n",
" 0.6 | \n",
" 0.571429 | \n",
" 0.500000 | \n",
" 0.333333 | \n",
" 0.428571 | \n",
" 0.444444 | \n",
" 0.000000 | \n",
" 1.000 | \n",
" 0.50 | \n",
" 0.132780 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 1.00 | \n",
" 0.8 | \n",
" 1.000000 | \n",
" 0.333333 | \n",
" 0.333333 | \n",
" 1.000000 | \n",
" 0.333333 | \n",
" 1.000000 | \n",
" 0.000 | \n",
" 0.50 | \n",
" 0.228216 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
" 0.750 | \n",
" 0.50 | \n",
" 0.257261 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.75 | \n",
" 0.0 | \n",
" 0.857143 | \n",
" 0.833333 | \n",
" 0.666667 | \n",
" 0.428571 | \n",
" 0.555556 | \n",
" 0.666667 | \n",
" 0.500 | \n",
" 0.75 | \n",
" 0.269710 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 \\\n",
"3 0.25 0.0 0.714286 0.500000 0.500000 0.285714 0.777778 0.666667 \n",
"4 0.50 0.6 0.571429 0.500000 0.333333 0.428571 0.444444 0.000000 \n",
"5 1.00 0.8 1.000000 0.333333 0.333333 1.000000 0.333333 1.000000 \n",
"6 0.00 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 \n",
"7 0.75 0.0 0.857143 0.833333 0.666667 0.428571 0.555556 0.666667 \n",
"\n",
" Q10 Q11 tsRel Q6_Reading Q6_Running Q6_Watching a movie \n",
"3 0.375 1.00 0.000000 0.0 0.0 1.0 \n",
"4 1.000 0.50 0.132780 1.0 0.0 0.0 \n",
"5 0.000 0.50 0.228216 0.0 1.0 0.0 \n",
"6 0.750 0.50 0.257261 1.0 0.0 0.0 \n",
"7 0.500 0.75 0.269710 1.0 0.0 0.0 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2Fil = df2[np.logical_and(df2.tsRel>60000, df2.tsRel<61000)]\n",
"df2Fil_norm = (df2Fil-df2Fil.min())/(df2Fil.max()-df2Fil.min())\n",
"df2Fil_norm.head()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.34631596 0.27017825 0.14469026 0.07712654 0.05616314]\n",
"0.8944741480656595\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAfj0lEQVR4nO3de3xV1Zn/8c9DABHQYSoRkADxgkVQUcwg9Ya2lQFveB80tbVqY1RqHceptrZURlGrtXeoUnVaFUUs6qAV4dfWFlsvJWgUUHEQBSIqiFakiEh5fn+sk+GQnJgdOCfrXL7v1yuv5OxL8mS/8OvOOms929wdEREpXh1iFyAiIrmloBcRKXIKehGRIqegFxEpcgp6EZEi1zF2AZn07NnTKysrY5chIlIwFixY8K67l2fal5dBX1lZSV1dXewyREQKhpktb2mfhm5ERIqcgl5EpMgp6EVEilyioDez0Wa2xMyWmtlVGfaPNbMXzazezOrM7Ii0fW+Y2cLGfdksXkREWtfqm7FmVgZMBo4FGoD5ZjbL3V9KO+z3wCx3dzM7EJgBDErbf4y7v5vFukVEJKEkd/TDgaXuvszdNwHTgbHpB7j7et/aHa0b0P6d0qZNg8pK6NAhfJ42rd1LEBHJR0mCvi+wMu11Q2rbNszsFDN7BfgtcF7aLgfmmtkCM6tp6YeYWU1q2KduzZo1yapvNG0a1NTA8uXgHj7X1CjsRURIFvSWYVuzO3Z3f8jdBwEnA9em7Trc3YcBY4BLzOyoTD/E3ae6e5W7V5WXZ5zz37Krr4YNG7bdtmFD2C4iUuKSBH0D0C/tdQWwqqWD3X0esLeZ9Uy9XpX6vBp4iDAUlF0rVrRtu4hICUkS9POBgWa2p5l1BsYBs9IPMLN9zMxSXw8DOgNrzaybme2S2t4NGAUsyuYvAED//pm39+6d9R8lIlJoWg16d98MjAfmAC8DM9x9sZnVmllt6rDTgEVmVk+YofNvqTdnewF/NrMXgL8Cv3X3x7P+W0yaBF27Nt++ejXcfDP84x9Z/5EiIoXC8vFRglVVVd7mXjfTpoUx+RUrwh3+lVfC3Lnw8MNw+OHwq1/BPvvkpF4RkdjMbIG7V2XaVzwrY6ur4Y03YMuW8Pmii+DBB+Guu2DRIhg6FH7xizArR0SkhBRP0GdiBuecE4L+iCPg4ovhX/8VVq5s/VwRkSJR3EHfqKICHn883NE/9RQccEC409fdvYiUgNIIegh397W18MILIei/8hU49dTwhq2ISBErnaBvtPfe8Mc/wg9+ALNnw5AhYSxfRKRIlV7QA5SVwX/8ByxYEGbonHZaGMt///3YlYmIZF1pBn2jIUPgmWfgmmtg+nTYf3+YMyd2VSIiWVXaQQ/QqRN873sh8Hv0gNGjw1j++vWxKxMRyQoFfaNDDglDOVdcAVOnwoEHwpNPxq5KRGSHKejTdekSWibMmxdm6YwcGYJ/48bYlYmIbDcFfSZHHBGmYdbWwi23wLBh0NaWDCIieUJB35Lu3WHKlPDm7Lp1MGJEGMv/5JPYlYmItImCvjWjRoUWCmefDf/1X3DooeG1iEiBUNAn0aNHaJnw4IPQ0BDeuL3pJrU/FpGCoKBvi1NOCXfzxx8f2iCPHAlLl8auSkTkUyno22r33WHmTLj77q3tj6dMUYM0EclbCvrtYQZf+lII+iOPhEsuUftjEclbCvodUVERGqPdeqvaH4tI3lLQ7ygzuPDC5u2P33kndmUiIoCCPnuatj/ef/8wli8iEpmCPpsa2x8/9xwMGACnnx7G8tX+WEQiUtDnwuDB8PTTMHEi3H9/uLt//PHYVYlIiVLQ50qnTjBhwtb2x2PGhN45H34YuzIRKTEK+lxrbH/8n/8Z2h8PHRq6Y4qItBMFfXvo0iW0TGhsf3z00WEs/6OPYlcmIiUgUdCb2WgzW2JmS83sqgz7x5rZi2ZWb2Z1ZnZE0nNLSmP744sugh/+MNztq/2xiORYq0FvZmXAZGAMMBg4y8wGNzns98BQdz8IOA+4vQ3nlpbu3WHy5Obtjzdtil2ZiBSpJHf0w4Gl7r7M3TcB04Gx6Qe4+3r3/1sO2g3wpOeWrMb2x9XVof3xiBFqfywiOZEk6PsC6U1cGlLbtmFmp5jZK8BvCXf1ic9NnV+TGvapW7NmTZLaC1+PHvDrX8NDD8Gbb6r9sYjkRJKgtwzbmjVzcfeH3H0QcDJwbVvOTZ0/1d2r3L2qvLw8QVlF5OSTw938CSeE9sdHHaX2xyKSNUmCvgHol/a6AljV0sHuPg/Y28x6tvXcklZeDr/5DdxzD7z00tb2x1u2xK5MRApckqCfDww0sz3NrDMwDpiVfoCZ7WNmlvp6GNAZWJvkXEljFsbs1f5YRLKo1aB3983AeGAO8DIww90Xm1mtmdWmDjsNWGRm9YRZNv/mQcZzc/GLFJW+fUNjtNtuC60U9t8/jOWr/bGIbAfzPAyPqqoqr9P88mDZMjj3XHjySRg7NoR/r16xqxKRPGNmC9y9KtM+rYzNd3vtBU88AbfcEhqjqf2xiLSRgr4QlJXB5Zdv2/64ulrtj0UkEQV9IUlvfzxjhtofi0giCvpC09j++Nln4Z//ObQ/vvBCtT8WkRYp6AvVsGGhIdo3vwm//KXaH4tIixT0haxLF/j+98OMnA4dQvvjyy9X+2MR2YaCvhgcfvjW9sc/+lG4258/P3ZVIpInFPTFolu30P547lxYvx4+97kwlq/2xyIlT0FfbI49FhYuhC99Ca69Vu2PRURBX5R69IBf/Urtj0UEUNAXt8b2xyeeqPbHIiVMQV/sysvhgQdg2rSt7Y8nTw7tkCsrw2ydysqwX0SKUsfYBUg7MIOzz4aRI+GCC2D8+BDwjb3uly+HmprwdXV1vDpFJCd0R19K+vaFxx6Dz3ym+QNNNmyAq6+OU5eI5JSCvtSYtdwMbcWK9q1FRNqFgr4U9e/ftu0iUtAU9KVo0iTo2nXbbWahfYKIFB0FfSmqroapU0NvezPo0yf0zbn1VnjvvdjViUiWKehLVXU1vPFGeFN21arwJu1rr4XHFW7cGLs6EckiBb0ERx8Nd98Nf/5zaJ+gVbQiRUNBL1udeWbofjlzJlx2GeThg+NFpO20YEq2ddll0NAQHkber194sImIFDQFvTR3002hGdqVV4ZFVlotK1LQFPTSXIcOofvl22/DV78KvXrBF78YuyoR2U4ao5fMdtoptDkeNAhOPRXq62NXJCLbKVHQm9loM1tiZkvN7KoM+6vN7MXUx1NmNjRt3xtmttDM6s2sLpvFS4716AGzZ4fPY8aE6ZgiUnBaDXozKwMmA2OAwcBZZja4yWGvAyPd/UDgWmBqk/3HuPtB7l6VhZqlPfXtG8J+40YYPRrWro1dkYi0UZI7+uHAUndf5u6bgOnA2PQD3P0pd2/slPUMUJHdMiWqIUNg1qxwR3/SSfDRR7ErEpE2SBL0fYGVaa8bUttacj4wO+21A3PNbIGZ1bR0kpnVmFmdmdWtWbMmQVnSro48Mjys5OmnQ297LagSKRhJgt4ybMu4ksbMjiEE/ZVpmw9392GEoZ9LzOyoTOe6+1R3r3L3qvLy8gRlSbs7/XT48Y/h4Yfh0ku1oEqkQCSZXtkA9Et7XQGsanqQmR0I3A6Mcff/G8h191Wpz6vN7CHCUNC8HSlaIrr00rCg6uabw4Kqq5q9Ny8ieSbJHf18YKCZ7WlmnYFxwKz0A8ysP/AgcI67v5q2vZuZ7dL4NTAKWJSt4iWSG28Mwzff+hbcdVfsakSkFa3e0bv7ZjMbD8wByoA73X2xmdWm9t8KTAB2A6aYGcDm1AybXsBDqW0dgXvd/fGc/CbSfjp0gP/+b3jnHTj/fOjdG0aNil2ViLTAPA/HWauqqryuTlPu8966dXDUUaG98Z/+BMOGxa5IpGSZ2YKWprBrZaxsv1133fqw8eOOg9dfj12RiGSgoJcds8ce8PjjsGlTWFD17ruxKxKRJhT0suP22y8sqFq+PCyo2rAhdkUikkZBL9lxxBFw773wzDNw1lmweXPsikQkRUEv2XPqqfDTn4a7+/HjtaBKJE+oH71k1/jx4aElN94YFlRdfXXsikRKnoJesu/660PYf+c7ofvluefGrkikpCnoJfvM4Pbb4a234IILwhOqxoyJXZVIydIYveRG584wcyYccACccQZoAZxINAp6yZ3GBVU9e8Lxx8OyZbErEilJCnrJrT59woKqzZvDgio9a0Ck3SnoJfcGDYJHHoGVK+GEE+Dvf49dkUhJUdBL+zjsMLjvvjBWP26cFlSJtCMFvbSfk0+Gn/8cHn0ULr5YC6pE2ommV0r7uuii8ISq66+HigqYMCF2RSJFT0Ev7e+660LYf+97YUHV+efHrkikqCnopf01Lqh65x248MIwM+e442JXJVK0NEYvcXTqBA88AEOHhgVV8+fHrkikaCnoJZ5ddoHf/ja0SDj+eFi6NHZFIkVJQS9x9e4dFlRt2RIWVK1eHbsikaKjoJf49t03TLlctSrc2a9fH7sikaKioJf8MGIETJ8Ozz0HZ54Jn3wSuyKRoqGgl/xx0kkwZQrMng21tVpQJZIlml4p+eXCC8NDS669Njyh6pprYlckUvAU9JJ/Jk4MC6omTgwLqr72tdgViRS0REM3ZjbazJaY2VIzuyrD/mozezH18ZSZDU16rkgzZnDbbeGpVLW14Y1aEdlurQa9mZUBk4ExwGDgLDMb3OSw14GR7n4gcC0wtQ3nijTXqRPMmAEHHxzenH322dgViRSsJHf0w4Gl7r7M3TcB04Gx6Qe4+1Pu/n7q5TNARdJzRVrUvXtYUNWnT+hj/+qrsSsSKUhJgr4vsDLtdUNqW0vOB2a39VwzqzGzOjOrW6OnEEmjXr3CgioIC6reeSduPSIFKEnQW4ZtGee9mdkxhKC/sq3nuvtUd69y96ry8vIEZUnJGDgw3Nm/844WVIlshyRB3wD0S3tdAaxqepCZHQjcDox197VtOVekVcOHhzH7+vrQBE0LqkQSSxL084GBZranmXUGxgGz0g8ws/7Ag8A57v5qW84VSez44+HWW8NQTk2NFlSJJNTqPHp332xm44E5QBlwp7svNrPa1P5bgQnAbsAUMwPYnBqGyXhujn4XKQUXXBAWVF1zTXhC1bXXxq5IJO+Z5+FdUVVVldfV1cUuQ/KVe7ijv/12+MUvwlx7kRJnZgvcvSrTPq2MlcJjFgJ+1Sq45JIw/XKsZu2KtERNzaQwdewY3pw95BAYNw6efjp2RSJ5S0Evhatbt9AeoaICTjwRliyJXZFIXlLQS2HbffcwC6dDh7Cg6u23Y1ckkncU9FL49t4bHnsM1qyB446DDz+MXZFIXlHQS3GoqoIHHoAXX4TTT4dNm2JXJJI3FPRSPMaMgalTYe7cMN8+D6cOi8Sg6ZVSXM47LyyomjAhvEl7/fWxKxKJTkEvxec73wlPqLrhhhD2F18cuyKRqBT0UnzMYPJkeOstGD8+LKg65ZTYVYlEozF6KU4dO8L06aHr5dlnw1/+ErsikWgU9FK8unYNC6r69QsLql5+OXZFIlEo6KW49ewZFlR16hQWVK3S4xCk9CjopfjttVdYULV2bVhQtW5d7IpE2pWCXkrDIYfAb34DixfDqadqQZWUFAW9lI7Ro0MP+9//Psy337IldkUi7ULTK6W0fOUrYUHV1VeHOfY33hi7IpGcU9BL6fnWt8KCqu9/H/r2ha9/PXZFIjmloJfSYwY/+1mYgfONb8Aee8Bpp8WuSiRnNEYvpamsDO69F0aMgOpqePLJ2BWJ5IyCXkpX167wyCNQWQknnQQvvRS7IpGcUNBLadttt7CgqkuXMCvnzTdjVySSdQp6kcrKsKDq/ffDgqoPPohdkUhWKehFAA4+GB58MAzfnHIKfPxx7IpEskZBL9Lo2GPhzjvhiSfgq1/VgiopGomC3sxGm9kSM1tqZldl2D/IzJ42s4/N7Iom+94ws4VmVm9mddkqXCQnzjknPLDkvvvgyitjVyOSFa3OozezMmAycCzQAMw3s1nunj5F4T3gUuDkFr7NMe7+7o4WK9IurrwyLKj6wQ/CgqrLLotdkcgOSXJHPxxY6u7L3H0TMB0Ym36Au6929/nAJzmoUaR9mcFPfhLG6i+/HGbMiF2RyA5JEvR9gZVprxtS25JyYK6ZLTCzmpYOMrMaM6szs7o1a9a04duL5EBZGUybBocdFoZz/vSn2BWJbLckQW8Ztnkbfsbh7j4MGANcYmZHZTrI3ae6e5W7V5WXl7fh24vkyM47w6xZoZ/9mDGhVUKHDmE65rRpsasTSSxJ0DcA/dJeVwCJH9Pj7qtSn1cDDxGGgkQKw2c+A7W18NFH4WHj7rB8OdTUKOylYCQJ+vnAQDPb08w6A+OAWUm+uZl1M7NdGr8GRgGLtrdYkSh+9KPm2zZsCK2ORQpAq7Nu3H2zmY0H5gBlwJ3uvtjMalP7bzWz3kAdsCuwxcwuAwYDPYGHzKzxZ93r7o/n5lcRyZEVK9q2XSTPJGpT7O6PAY812XZr2tdvE4Z0mloHDN2RAkWi698/DNc05R6mYn73u9C9e/vXJZKQVsaKtGbSpNDpMt3OO8PIkXDTTbDffjBzZgh+kTykoBdpTXU1TJ0KAwaEOfYDBsAvfwl//CP85S+hA+bpp4eZOf/7v7GrFWnGPA/vQqqqqryuTt0SpEBs3gxTpoQhnI0b4ZvfDI8rbPpXgEgOmdkCd6/KtE939CI7qmNHuPRSeOUVOOMMuO46GDIkPNREJA8o6EWypU8fuOee0P2ya9fw1KqTToLXX49dmZQ4Bb1Ith19NNTXw803wx/+AIMHh7t89biXSBT0IrnQqRNccUUYzjnxxDB+v//+MGdO7MqkBCnoRXKpoiJ0v5wzJ8zYGT06zNBZubL1c0WyREEv0h5GjYKFC8MQzmOPhbn3N90EmzbFrkxKgIJepL3stFPoj/PSS/CFL4RVtQcdFN68FckhBb1Ie6ushP/5nzD9cuNG+Pzn4eyzQ3dMkRxQ0IvEcsIJsHgxTJgADz4In/0s/PjHYQGWSBYp6EVi2nlnmDgRFi2Cww+Hf/93OOSQ0FpBJEsU9CL5YJ99wpu0M2fC++/DEUfAuefC6tWxK5MioKAXyRdmcOqp8PLL4Y3aadPCcM6UKfCPf8SuTgqYgl4k33TrBjfeCC++CMOGwSWXwKGHwl//GrsyKVAKepF8td9+8LvfwX33wapVMGIEXHghrF0buzIpMAp6kXxmBuPGhVYKl10Gd9wRhnPuuAO2bIldnRQIBb1IIdh1V/jhD+H558Od/gUXhFk6zz8fuzIpAAp6kUJywAEwbx78+tewbBlUVcHXvw5/+1vsyiSPKehFCo0ZfPnLsGQJXHRRmJUzaBDcfbeeWysZKehFClWPHvDzn8P8+eE5tl/+cuiFv2hR7MokzyjoRQrdsGHw9NPhAeaLFoVGaVdcAR9+GLsyyRMKepFi0KEDfO1r8OqrcN55cMstYTjn/vs1nCMKepGisttu4c7+mWegV68wNXPUqDCeLyUrUdCb2WgzW2JmS83sqgz7B5nZ02b2sZld0ZZzRSQHDj00jN03juEfcAB8+9vw97/HrkwiaDXozawMmAyMAQYDZ5nZ4CaHvQdcCvxgO84VkVwoKwvtE5YsgbPOghtuCA8qf/hhDeeUmCR39MOBpe6+zN03AdOBsekHuPtqd58PfNLWc0Ukx3r1CvPu580LC69OOSX0wn/ttdiVSTtJEvR9gfQnGTektiWR+FwzqzGzOjOrW7NmTcJvLyKJHXkkPPdcWGE7bx4MGQLXXAMffRS7MsmxJEFvGbYl/bsv8bnuPtXdq9y9qry8POG3F5E26dQpPNxkyZJwZz9xIuy/f+iFL0UrSdA3AP3SXlcAqxJ+/x05V0RyZY89QlfM3/0OOneG448Pwb98eezKJAeSBP18YKCZ7WlmnYFxwKyE339HzhWRXPvCF+CFF8IbtXPnhoZpN9wAmzbFrkyyqNWgd/fNwHhgDvAyMMPdF5tZrZnVAphZbzNrAC4HvmNmDWa2a0vn5uqXEZHt0LkzXHVVeLLV6NFhGuaBB4a7fSkK5nk4zaqqqsrr6upilyFSmmbPDh0xX3sNzjwzvHnbN+n8C4nFzBa4e1WmfVoZKyLbGjMm9MyZOBFmzQqtFG65BT5pOntaCoWCXkSa69IFJkyAxYth5MjQJO3gg8O0TCk4CnoRadlee8Ejj4TVtOvXh9A/5xx4++3YlUkbKOhF5NOZwdix8NJL4Y3a++8Pz6392c9g8+bY1UkCCnoRSaZrV5g0KYzfH3ooXHop/Mu/hF74ktcU9CLSNvvuC3PmwIwZsGYNHHYYnH8+vPtu7MqkBQp6EWk7MzjjjDD3/oor4K67wv8AbrsNtmyJXZ00oaAXke23yy5w881QXx8WWdXWwogRcN11UFkZnnxVWQnTpsWutKQp6EVkxw0ZAk88AffcExqmffe7oW+Oe/hcU6Owj0hBLyLZYQbV1fBP/9R834YN4SEojz4KDQ168Ek76xi7ABEpMg0Nmbd/8AGceGL4erfd4KCDtv347GdDG2XJOgW9iGRX//6Z2x336wfTp8Pzz4cx/fr68Ezbjz8O+3faKfTGTw//Aw8MT8WSHaKgF5HsmjQpjMlv2LB1W9euof3xYYeFj0abN4cx/cbgr68Pq3DvuGPrMXvv3fzuv2/fMFQkiah7pYhk37RpcPXVsGJFuMOfNCmM3yfhDqtWbRv+9fWwdOnWY3r2zDz007F0710/rXulgl5ECsO6dbBw4dbgf/75sEo3fejngAOaD/3sskvcutuJgl5EitMnnzQf+qmvh7Vrtx6zzz7N7/732KPohn4U9CJSOtzhzTebh/9rr209pgiHfj4t6Av3txIRycQMKirCxwknbN2+bh28+OK24f/Tn259Pm6XLpmHfrp3j/N7ZJHu6EWkdDUO/aRP+ayvh/feC/vNMg/99OmTd0M/GroREUnKPSz6ajr0s2zZ1mPKy5uH/777Rh360dCNiEhSZmFxV79+W1fyQljZ23To5yc/KYihH93Ri4hsr08+gVde2Tb8n38e3n8/7DeDgQOb3/337r3t0M+OrDug8Udp6EZEpH24w8qVzYd+Xn996zG777419NevhzvvhI0bt+7v2hWmTm1T2CvoRURi+9vfmg/9LFoU/irIZMAAeOONxN9eY/QiIrH16AFHHRU+Gm3aFMb2M91wr1iRtR+dqB+9mY02syVmttTMrsqw38zsp6n9L5rZsLR9b5jZQjOrNzPdpouINOrcOYzJZ9LS9u3QatCbWRkwGRgDDAbOMrPBTQ4bAwxMfdQAv2iy/xh3P6ilPytERErWpElhTD5d165he5YkuaMfDix192XuvgmYDoxtcsxY4C4PngF6mFmfrFUpIlKsqqvDG68DBoSZOAMGtPmN2NYkGaPvC6xMe90AHJrgmL7AW4ADc83MgdvcfWqmH2JmNYS/BuifxT9ZRETyXnV1VoO9qSR39JnW+TZ95+DTjjnc3YcRhncuMbOjMhyLu0919yp3ryovL09QloiIJJEk6BuAfmmvK4BVSY9x98bPq4GHCENBIiLSTpIE/XxgoJntaWadgXHArCbHzAK+nJp9MwL4wN3fMrNuZrYLgJl1A0YBi7JYv4iItKLVMXp332xm44E5QBlwp7svNrPa1P5bgceA44ClwAbgq6nTewEPWVjq2xG4190fz/pvISIiLdLKWBGRIlBwLRDMbA2wfDtP7wm8m8VyskV1tY3qahvV1TbFWNcAd884kyUvg35HmFldPi7MUl1to7raRnW1TanVlagFgoiIFC4FvYhIkSvGoM+48jYPqK62UV1to7rapqTqKroxehER2VYx3tGLiEgaBb2ISJEryKDfkQehRK7raDP7IPUQlnozm9BOdd1pZqvNLGP7iYjXq7W6Yl2vfmb2hJm9bGaLzewbGY5p92uWsK52v2Zm1sXM/mpmL6TqmpjhmBjXK0ldUf6NpX52mZk9b2aPZtiX3evl7gX1QWjD8BqwF9AZeAEY3OSY44DZhK6aI4Bn86Suo4FHI1yzo4BhwKIW9rf79UpYV6zr1QcYlvp6F+DVPPk3lqSudr9mqWvQPfV1J+BZYEQeXK8kdUX5N5b62ZcD92b6+dm+XoV4R5+vD0JJUlcU7j4PeO9TDony4JgEdUXh7m+5+3Oprz8EXiY8XyFdu1+zhHW1u9Q1WJ962Sn10XSWR4zrlaSuKMysAjgeuL2FQ7J6vQox6Ft6yElbj4lRF8DnUn9KzjazITmuKakY1yupqNfLzCqBgwl3g+miXrNPqQsiXLPUMEQ9sBr4f+6eF9crQV0Q59/Yj4FvAlta2J/V61WIQb+jD0LJlSQ/8zlCP4qhwM+Ah3NcU1IxrlcSUa+XmXUHZgKXufu6prsznNIu16yVuqJcM3f/h7sfRHgWxXAz27/JIVGuV4K62v16mdkJwGp3X/Bph2XYtt3XqxCDfocehBKzLndf1/inpLs/BnQys545riuJGNerVTGvl5l1IoTpNHd/MMMhUa5Za3XF/jfm7n8D/giMbrIr6r+xluqKdL0OB04yszcIQ7yfN7N7mhyT1etViEG/3Q9CiV2XmfU2C835zWw44fqvzXFdScS4Xq2Kdb1SP/MO4GV3/2ELh7X7NUtSV4xrZmblZtYj9fXOwBeBV5ocFuN6tVpXjOvl7t9y9wp3ryTkxB/c/UtNDsvq9UrycPC84jv2IJTYdZ0OXGRmm4GPgHGeeos9l8zsPsLsgp5m1gB8j/DGVLTrlbCuKNeLcMd1DrAwNb4L8G2gf1ptMa5ZkrpiXLM+wK/NrIwQlDPc/dHY/00mrCvWv7Fmcnm91AJBRKTIFeLQjYiItIGCXkSkyCnoRUSKnIJeRKTIKehFRIqcgl5EpMgp6EVEitz/B0aaKn8aJddmAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"mdlPCA = PCA(n_components=5)\n",
"XPCA = mdlPCA.fit_transform(df2Fil_norm)\n",
"\n",
"print(mdlPCA.explained_variance_ratio_)\n",
"print(np.sum(mdlPCA.explained_variance_ratio_))\n",
"\n",
"plt.plot(np.arange(0,mdlPCA.explained_variance_ratio_.shape[0]), mdlPCA.explained_variance_ratio_, '-ro')"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.43656834 -0.36520933 -0.30003171 -0.22317841 -0.26032745 -0.38821589\n",
" -0.21312891 0.05576004 0.17168435 -0.07017853 -0.05979342 0.27181642\n",
" -0.3812503 0.10943387]\n"
]
},
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD6CAYAAACiefy7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZgU5bX48e9hF1EBQQRZplFciCvibtyJuOI1i0zQeN24XvUXjTGGSJYbvbgkMdHkmhiuGzeZaFziQBRXbNwXQNEMooDIMoIyggIGWYY5vz9Od2iGHuierq6q7j6f55mnu6trqg7LnKk+71vnFVXFOedc+WsTdQDOOefC4QnfOecqhCd855yrEJ7wnXOuQnjCd865CuEJ3znnKkQgCV9EhovI+yIyT0TGZHl/hIi8IyIzRWS6iBwdxHmdc87lTgqdhy8ibYE5wDCgHpgGVKvquxn7dAH+qaoqIvsDD6rq3ts6do8ePbSqqqqg+JxzrpLMmDHjU1Xtme29dgEc/1BgnqrOBxCRB4ARwL8Svqp+kbH/9kBOv2WqqqqYPn16ACE651xlEJGFLb0XRElnN2Bxxuv61LbmQfybiLwHPA5c2NLBRGR0quwzvaGhIYDwnHPOQTAJX7Js2+IKXlUfTZVxzgJuaOlgqjpeVYeq6tCePbN+KnHOOdcKQST8eqBfxuu+wJKWdlbVF4DdRaRHAOd2zjmXoyAS/jRgkIgkRKQDMBKYlLmDiOwhIpJ6PgToACwP4NzOOedyVPCgrao2isgVwFNAW+AeVZ0lIpem3r8T+DrwHRHZAHwJnKPeptM550IVyDx8VZ2sqnuq6u6qOi617c5UskdVb1HVr6jqgap6hKq+FMR5nXNuCzU1UFUFbdrYY01N1BHFRhDTMp1zLh5qamD0aFizxl4vXGivAUaNii6umPDWCs658jF27KZkn7ZmjW13nvCdc2Vk0aL8tlcYT/jOufLRv39+2yuMJ3znXPkYNw62227zbZ0723bnCd85V0ZGjYLvfnfT6512gvHjfcA2xWfpOOfKS5s20K4ddOsGw4d7ss/gCd85V16mToVDDoH27eHDD6OOJla8pOOcKx9ffAHTpsHxx0Mi4Qm/GU/4zrny8dJL0NgIxx1nCX/JEli3LuqoYsMTvnOufCSTVso56ihL+Kp2t60DPOE758rJ1Klw2GE2FTORsG1e1vkXT/jOufKwahXMmGH1e7DGaQALFkQVUex4wnfOlYcXX4SNG61+D9Cnj8/UacYTvnOuPCST0KEDHHGEvW7bFgYM8ISfwRO+c648JJOW7DNbK/jUzM14wnfOlb7PP4e33tpUv0/zhL8ZT/jOudL3wgs2BTNbwv/0U7shy3nCd86VgWQSOnWyKZmZfGrmZgJJ+CIyXETeF5F5IjImy/ujROSd1NcrInJAEOd1zjnAEv6RR0LHjptv94S/mYITvoi0Be4ATgEGA9UiMrjZbh8Cx6rq/sANwPhCz+uccwCsWAHvvLNlOQc2JXyfiw8Ec4V/KDBPVeer6nrgAWBE5g6q+oqqfpZ6+RrQN4DzOuccPP989vo9QI8esP32foWfEkTC3w1YnPG6PrWtJRcBT7T0poiMFpHpIjK9oaEhgPCcc2UtmbRWCoccsuV7InbHrSd8IJiEL1m2adYdRY7HEv4PWzqYqo5X1aGqOrRnz54BhOecK2vJpDVL69Ah+/s+NfNfgkj49UC/jNd9gSXNdxKR/YG7gBGqujyA8zrnKl1DA9TVZS/npKUTvma9Dq0oQST8acAgEUmISAdgJDApcwcR6Q/8DThPVecEcE7nnLP6PWw74a9ebYO7Fa7gJQ5VtVFErgCeAtoC96jqLBG5NPX+ncBPgZ2B34sIQKOqDi303M65CpdMQpcucPDBLe+TOTVz553DiSumApmHr6qTVXVPVd1dVceltt2ZSvao6sWq2k1VD0x9ebJ3rlA1NTYg2aaNPdbURB1R+JJJOPpo64rZEp+L/y9+p61zpaimBkaPttWc0qs6jR5dWUn/449h9uytl3PAE34GT/jOlaKxY2HNms23rVlj2ytFLvV7gB13hO7d/eYrPOE7V5oWLcpvezlKJi2ZH3TQtvf1qZmAJ3znSlO/ftm39+8fbhxRSibhq1+FdjnMPfGbrwBP+M6VpiFDttzWuTOMGxd+LFFYsgTmzNl2OSctkbCSTlNTUcOKO0/4zpWaqVNh4kS7uh0wYNP2226DUaMiCytUyaQ95pPw162zgd4K5gnfuVKyfDmcey7ssQdMnmxXrW+8Ye+1bRtpaKGaOhW6doUDcuy07jN1AE/4zpUOVbj4Yli2DB54wG44Ahg6FHbfHe6/P9r4wpRMwrHH5v5LzhM+4AnfudLxxz9CbS3cdNPmNXwRGDkSnnuuMkoWixfDBx/Accfl/j1VVfboCd85F3uzZsH3vgcnn2yPzVVX24Dkww+HH1vY8q3fgy1/2Lu3J/yoA3DObcOXX9oV/I47woQJ1kqhua98BfbbrzLKOlOnWk+c/fbL7/vSM3UqmCd85+LuBz+wFsATJkCvXi3vV10Nr7xibRbKWbp+n+0X39b4zVee8J2LtUmT4I47rIwzfPjW9z3nHHt84IHixxWVBQvsK5/6fVpVldX/GxuDjamEeMJ3Lq4++gguvNBaB9x007b3HzgQDjusvMs6ranfpyUSsHGjJf0K5QnfuTjauBHOO8/q9/ffDx075vZ91dXw9tvWRbIcJZPQs6eNWeTLp2Z6wnculn7xC0tuv/sd7LVX7t/3rW9ZbbscyzqqNmB73HE2FTVfnvA94TsXO6+9Bj/5idXkL7ggv+/t3dsS4v33l98arvPnWzmmNeUcsIZzbdt6wnfOxcTKlfDtb0PfvnDnna27kh05EubOhTffDD6+KKXr960ZsAXrqtmvX7wTfpFXMfOE71xcqMJll1lP+7/8xXrFtMbXv25L/pXb4G0yCbvuCnvv3fpjxHlqZgirmAWS8EVkuIi8LyLzRGRMlvf3FpFXRWSdiFwTxDmdKzt/+pMl+p/9DI48svXH6d7d7sj961/Lpx1wofX7tDjffBXCKmYFJ3wRaQvcAZwCDAaqRWRws91WAN8FflXo+ZwrS3PnwuWXwzHHwHXXFX686mqor4eXXy78WHEwd671wG9t/T4tkYClS232U9yEsIpZEFf4hwLzVHW+qq4HHgBGZO6gqstUdRqwIYDzOVde1q+3BN2+Pfz5z8G0OT7zTNhuu/Ip6xRav09LN1GL493ILa1WFuAqZkEk/N2AzDsZ6lPbWkVERovIdBGZ3tDQUHBwzsXej38MM2bA3Xe3vHRhvrp0gTPOgIcegg1lcJ2VTEKfPjBoUGHHifPUzHHj7Jd0poBXMQsi4WcrqLV6PpiqjlfVoao6tGfPngWE5VwJeOYZ+OUv4dJL4d/+LdhjV1fDp5/ClCnBHjds6fr98ccXVr+HeCf8UaPgiivsuYitZjZ+fKCrmOWw+u821QOZlyV9gSUBHNe58rZsGXznOzB4MNx6a/DHP+UU2GknK+tsqw9PnL33HnzySeH1e7BZPh07xjPhA+yyiz02NFhH0IAFcYU/DRgkIgkR6QCMBCYFcFznypeq3VT12Wd2V2znzsGfo2NHOPtsePRRWLs2+OOHJaj6PWya3x7XhF9XZ7+UipDsIYCEr6qNwBXAU8Bs4EFVnSUil4rIpQAisquI1ANXAz8WkXoR2bHQcztXsn77W1uT9le/yr+vez6qq2H1ajtXqUombWxj4MBgjhfnufizZsG++xbt8IHMw1fVyaq6p6rurqrjUtvuVNU7U88/VtW+qrqjqnZNPV8VxLmdKzkzZ8K119qg6uWXF/dcxx9vZYJSna3T1BRc/T4trgm/qak0Er5zLkf//Ke1PujRA+65J7gk1pJ27eCb34THHoNVJXiN9e67NvAcRP0+LZGwUtrKlcEdMwgffmj3B3jCd65MXHUVzJljd9X26BHOOaurrYY/cWI45wtSkPX7tPRMnbjdcVtXZ4+e8J0rAw8/DHfdBWPGwAknhHfeI46wm3dKsWVyMmmDrOkbpoKQPlbcyjrphD+4eaOC4HjCdy4MCxfCJZfYilQ//3m4527TxspITz8Ny5eHe+5CNDXB888HW86B+M7FnzXL5t7vsEPRTuEJ37liqqmxH+KqKqsZn3OOtVAIW3W1reX68MPhn7u13nkHVqwIPuF3725JNW4Jv66uqOUc8ITvXPGk292mm1+pWhuFgHuc5+SAA2zlrFKarTN1qj0GWb8HGyiP20ydDRvsBjNP+M6VqBDa3eZMxK7yX3jBFkcvBckk7L57cP2FMsUt4c+da0m/NWv15sETvnPFEkK727xUV9unjAcfjOb8+di4sTj1+7R0wo/LMpCzZtmjX+E7V6JCaHeblz33hCFDSqOsM3OmjXkUM+GvWWM9a+Kgrs4G1wtZzSsHnvCdK5Yf/3jLbQG3u81bdTVMmwbz5kUXQy7S9ftiJnyIz1z8ujrYY48t2yMHzBO+c8XSpYs99upVtHa3efvWt+wx7nPyk0kbZO7duzjHj9vUzBBm6IAnfOeKp7bWkv1HH9mc8gULok32YOWko4+2sk5c6tfNNTba4HLQs3Myxenmq7Vr7RNXkQdswRO+c8Wxbp11qDzzzGCWLAxSdbX1qEnf2Rk3b75pHT6LVc4B+/TVo0c8Ev5779kFgV/hO1eikklLWmedFXUkW/rmN+2XUFwHb4vRPyebuEzNDKGHTponfOeKobbWriLD7JmTq5494aSTrI4fx7LO1KnWT6ZXr+KeJ04Jv337wtfrzYEnfOeC1tRknSmHD4dOnaKOJrvqakt2r78edSSb27ABXnyx+Ff3YAl/4UKb8x+lujoboA6h5YYnfOeC9sYb8PHH8SznpJ11li2BGLeyzvTptmZAMev3aYmE/YJZEvES3EVe9CSTJ3zngjZxoi08cuqpUUfSsp12svgefDD6K9xMYdXvIR5TM1evttlbnvCdK1G1tZawunWLOpKtq662TyLPPx91JJtMnWpr/IaxOEwcbr569117LKWELyLDReR9EZknImOyvC8i8tvU+++IyJAgzutc7Lz3nn3FuZyTdvrpNrAcl7LO+vXw8svhlHPA7kkQifYKPz1DJ4Q5+BBAwheRtsAdwCnAYKBaRJov2XIKMCj1NRr4Q6HndS6W0ssInnlmtHHkYrvt7BfTI49Yso3aG29Yf5swyjlgYxh9+kSb8GfNsn+H9KeNIgviCv9QYJ6qzlfV9cADwIhm+4wA/k/Na0BXESnSPdPORai2FoYOLU5L32IYOdIW9H7qqagjsfq9CBx7bHjnjHpqZl2dTUEN6ea8IBL+bsDijNf1qW357gOAiIwWkekiMr0hLp3snMvF0qXw2mulUc5JGzbMVoCKQ2+dZNIWaunePbxzxiHhh1S/h2ASvmTZ1vxujlz2sY2q41V1qKoO7dmzZ8HBOReaSZPscUTzD7gx1qEDfOMbVopqvlhLmNauhVdfDa9+n5ZIQH19NCWt5cvtIqHEEn49kPn5tS/QfGJrLvs4V9pqa22FppAG4AJTXW1z3//+9+hieP11S/ph1e/TEgm72ziKRWnSi56E+P8liIQ/DRgkIgkR6QCMBCY122cS8J3UbJ3DgZWqujSAczsXD6tWwXPPWTlHsn2gjbGvftUGL6OcrZNM2gIgxxwT7nmjnIsf0ipXmQpO+KraCFwBPAXMBh5U1VkicqmIXJrabTIwH5gH/C9wWaHndS5WnnzSygKlVL9Pa9vW+uQ/8QR8/nk0MSSTcNBB0LVruOeNMuHX1cGOO0LfvqGdMpB5+Ko6WVX3VNXdVXVcatudqnpn6rmq6uWp9/dT1elBnNe52KittaZkRxwRdSStU11tv7D+9rfwz/3llzbYHXb9HmC33ayHTRQ3X6UHbEP8ROh32jpXqPXr4fHH49n7PleHHGLjD1HM1nn1Vfs7jCLht21rN2CFfYWvagk/5PEeT/jOFWrqVKvhl2I5J03E5uRPmQKffBLuuZNJS7xHHx3uedOqqsJP+J98AitWhFq/B0/4zhWutha23x5OPDHqSApTXW2tnR96KNzzJpNw8MFWz45CFHPxQ1z0JJMnfOcKke59f/LJdot8KfvKV6xxWZizdf75T2upEEU5Jy2RgGXLLJaweMJ3rgTNmGH91Eu5nJNp5Eh45RVbGCQMr7xiPemjTvgQ7sBtXZ0N8u+yS3jnxBO+c4WprbX682mnRR1JMEaOtMe//jWc8yWTtnbAUUeFc75sopiaGcGALXjCd64wtbXW7CvM/i/FNHAgHHZYeGWdZNJmCHXpEs75sgk74auGuspVJk/4zrXWnDm2gEW5lHPSqqth5kzr619Mq1fDtGnRlnPAyiqdO4eX8Bctgi++8ITvXElJ974vpWZpufjWt2yaZjGv8mtqYI89bHnFu+6y11ERsamZYdXwIxqwBU/4zrVebS0MGWI37pST3r2tidn991v5IWg1NTB6tM2MAXscPTrapB/m1MyQV7nK5Anfudb4+GO7Q7Tcyjlp1dUwdy689VZhx/n8c3j7bfs0dPvtcPXVcMklW7ZiXrMGxo4t7FyFCPPmq1mzrKVD2H2DgHahn9G5cvD3v9vVb7mVc9K+/nW49FKrr69ebZ9ixo2DUaM27aNqq2UtWGDTOBcs2PSVfr1y5ebH7dzZeudkE0WL4rREwmL97LPiLz4f8qInmTzhO9caEydakthvv6gjKY4nnrDa9qpV9nrhQrjgApgwwdaCTSf11as3/74uXexquarK2i4PGLDp9YAB0KOH/b1lm+cfZWksc6ZOMRP+xo020H/CCcU7x1Z4wncuX6tXw7PPwmWXlV7v+1yNHWvJKdOGDfbn3n9/m755wgmbJ/OqKkuW2/o7GTfOavaZZZ3OnW17VDIT/pAhxTvPBx/AunWRLZLjCd+5fD31lP3Qlmv9HrZeXpk5s7Bjp8tCY8faebKVi8IW1lz8CGfogCd85/JXW2uliSOPjDqS4unfv7hll1Gjok3wzXXtal/FTvjpVa4GDy7ueVrgs3Scy8eGDfDYY3DGGdYSoFyNG2dllkxRl12KLYypmXV1Vg7bfvvinqcFnvCdy8fzz9tsjnIu54BdfY8fb7V5EXscPz5eV+VBSySKf/NVhDN0wBO+c/mprbUr3WHDoo6k+EaNsgTY1GSP5ZzsYVPCL8bNZmCres2ZE9mALRSY8EWku4g8IyJzU49Z5zOJyD0iskxE6go5n3ORUrXpmF/7Wun3vndbSiTsHoFirfg1Zw40Npb0Ff4YYIqqDgKmpF5ncx8wvMBzORetN9+E+vryL+dUqqoqeyxWHT/iGTpQeMIfAUxIPZ8AZP1JUNUXgBUFnsu5aNXWQps2cPrpUUfiiqHYUzPr6mzthL32Ks7xc1Bowu+lqksBUo8FL98iIqNFZLqITG9oaCj0cM4Fp7YWjjkGdt456khcMYRxhT9okN2pHJFtzisTkWeBXbO8VZROR6o6HhgPMHTo0CKNnjiXp3nz7Af2ttuijsQVS+fO0KtXcRP+QQcV59g52mbCV9WTWnpPRD4Rkd6qulREegPLAo3Oubgo1973bnPFmou/Zg3Mnw/nnRf8sfNQaElnEnB+6vn5wMQCj+dcPNXWwoEHbvrY78pTsRL+7Nk2yyvCAVsoPOHfDAwTkbnAsNRrRKSPiExO7yQi9wOvAnuJSL2IXFTgeZ0Lz7Jl8PLLPjunEiQSsHixTZ8MUgxm6ECBvXRUdTlwYpbtS4BTM15XF3Ie5yL12GN2deYJv/wlEpbsP/rI7i4OSl0ddOgAu+8e3DFbwe+0dW5bamvth3///aOOxBVbsaZmzpoF++wTef8lT/jObc0XX8DTT9vVfbn2vnebFGtqZsQ9dNI84Tu3NU8/Xf69790m/fvbzXVBJvyVK21cwBO+czFXWwvdu8PRR0cdiQtD+/bQt2+wCT/dAz/CpmlpnvCda0ml9L53mwt6amZMZuiAJ3znWvbii/DZZ17OqTRBJ/xZs2zBkyBn/bSSJ3znWlJba22Qv/a1qCNxYUokYMkSWLs2mOPV1Vk5p0306Tb6CJyLo8ze982X+nPlLT01c2sLuecjJjN0wBO+c9nNnGk/8N47p/IEORe/ocHu1I7BgC14wncuO+99X7mCTPjpGTp+he9cjNXW2lTMnj2jjsSFrXdva4MQRMKP0Qwd8ITv3Jbmz4d33vHZOZWqTRubURNUwu/WzX6JxIAnfOea8973LqipmekZOjFpy+EJ37nmamutUdrAgVFH4qISRMJXtRp+TMo54Anfuc19+im89JKXcypdIgHLl8Pq1a0/xpIl8PnnnvCdi63HHoOmJk/4lS6ImToxG7AFT/jOba621jomHnhg1JG4KKUT/oIFrT9GOuHHZA4+eMJ3bpM1a6wd8ogRsRlkcxEJ6gq/Vy/o0SOYmALgCd+5tKefhi+/9HKOg513hi5dCkv4MRuwhQITvoh0F5FnRGRu6rFbln36iUhSRGaLyCwRubKQczpXNLW1Nmf6q1+NOhIXNRFb/aq1Cb+pqfwSPjAGmKKqg4ApqdfNNQLfV9V9gMOBy0VkcIHndS5YjY3w979bK4X27aOOxsVBIVMzFyywEmGZJfwRwITU8wnAFp+FVXWpqr6Zer4amA3sVuB5nQvWSy/BihVeznGbpBO+av7fG8MBWyg84fdS1aVgiR3YZWs7i0gVcBDw+lb2GS0i00VkekNDQ4HhOZejiROhUyc4+eSoI3FxkUjYIvbLl+f/vTFa1jDTNhO+iDwrInVZvvK671xEugCPAFep6qqW9lPV8ao6VFWH9vTGVa7Yamqsb8ptt1ndtrY26ohcXBQyU6euzqb37rhjsDEVaJsLdarqSS29JyKfiEhvVV0qIr2BZS3s1x5L9jWq+rdWR+tckGpqYPRoq7WCzdAZPdqejxoVXVwuHjIT/iGH5Pe9MVr0JFOhJZ1JwPmp5+cDE5vvICIC3A3MVtVfF3g+11o1NTbroE0be6ypiTqi6I0duynZp61ZY9uda+3NVxs2wHvvxa6cA4Un/JuBYSIyFxiWeo2I9BGRyal9jgLOA04QkZmpr1MLPK/LR/pKduFCG4BauNBeV3rSb2kJu6CWtnOlbYcdbD5+viWdefNg/fpYXuFvs6SzNaq6HDgxy/YlwKmp5y8BfttilLZ2JVvJpYv+/e2XX7btzkHrpmbGbJWrTH6nbSXwK9nsfvrTLbd17gzjxoUfi4un1tx8VVdnEwD22acoIRXCE34l2Hnn7NvbtIEbb2zdtLNy8Nln9rjrrvYDOmAAjB9f2Z963OYSCavhNzXl/j11dbDHHrDddkULq7U84Ze7O+6wHu9tmv1Td+xoVyBjx0K/fvCf/wnvvx9NjFFYuxZ+9Ss48URYutR+oBcs8GTvNpdIWD1+6dLcvye9ylUMecIvV6rws5/BFVfAmWfCXXfZFWz6Svbuu+Ef/7C1W6ur4d57Ye+9rbXAlCmtu7uwlNx3H3z8MVx3XdSRuDjLdy7+2rU2aBvD+j14wi9PGzfC5ZfD9dfDBRfAI4/YY/qjaeaV7H77WfJftAj+67/gjTfgpJOsH/x998G6ddH9OYplwwa45RY4/HA4/vioo3Fxlm/Cf/99+/nzhO9CsW4dfPvb8Ic/wLXXWjJvl8NkrF12sU8EixbZ9zQ12S+JAQPghhugnNpc3H+//dIbO9b73rutGzDAHnNN+DFc5SqTJ/xysnq1lWQefBB++Uu7is03oXXqBBdeaKWeZ56BIUNsNkv//jZ3/913ixN7WJqa4KabbJHy006LOhoXd506QZ8+ud98VVdnF1iDBhU1rNbyhF8uGhpsADKZtHr8NdcUdjwRK+1MnmxJ/jvfgT/9yQajhg+3xUJKsc7/6KN2F+R11/nVvctNPnPx6+pgr72gQ4fixtRKnvDLwaJFtmjHP/5hCe3f/z3Y4++zD/zxj7B4Mfz3f8Pbb1tXyf32s8Hg++4rjbYNqjbHftAg+MY3oo7GlYp8En4MFz3J5Am/1L37Lhx5pM04efppOOOM4p2rRw+rey9YABMm2EfXSy6xWn8ptG146il46y0YMwbato06GlcqqqrsYmfDhq3v98UX9ovBE74ritdeg6OPtlkBL7wQ3tJ8HTtaieett2ywt7m4NiAbN87uOTj33KgjcaUkkbCxn8WLt75fenwrpnPwwRN+6XrySavZd+8OL79sg5BhE2l59k7c2ja88IKtanXttbGtr7qYynVqZsxn6IAn/NL0l79Y6WbPPS3ZDxwYXSwtNRqLWwOycePs08hFF0UdiSs1uSb8WbNsVk+UP4/b4Am/1Pzud3bT1FFHwdSp0KtXtPGMG2cNx5r7j/8IP5aWTJ9u4xtXXx3L/iYu5vr2tTGfXK7wBw+O9fiQJ/xSoWrz4b/7XVto+8knYaedoo7KfvmMH7+pbUPfvtZH/JFHoLEx6ujMjTdC167WL8i5fLVrZ59Yc0n4MS7ngCf80rBxoyWrG26wksRDD9lHx7gYNWpT24bFi+Gee2DGDLj11qgjs4/Zjz5qvyhjtr6oKyHprpktWbECliyJ9YAteMKPv3XrYORImwc/Zgz87//m1iohSt/4Bpx9trVqeO+9aGO5+WbYfntL+M611rbm4sd40ZNMnvDjbPVqOPVUePhhu1q+6abSuTv0jjustn/hhfYJJQrz51vfnEsvbXlNAOdykUjYvS5ffpn9fU/4riANDdbJ8fnn7Sanq6+OOqL87Lor3H47vPoq/M//RBPDLbfYANr3vx/N+V35qKqyx5bKOnV1NnbVr19YEbVKQQlfRLqLyDMiMjf12C3LPp1E5A0ReVtEZonIzws5Z1mrqdnUoqBPH2thUFtrNzmVonPPtU8o111nV9th+ugja/lw4YXQu3e453blZ1tTM9OLnsT8E3ihV/hjgCmqOgiYknrd3DrgBFU9ADgQGC4ihxd43vJTU2MtCdItChob7ep05cqoI2s9ERt7aNcOLr443GZrt95qpaRrrw3vnK58bS3hq5bEDB0oPOGPACaknk8Azmq+g5ovUi/bp75KsM1ikY0day0JMq1bF88WBfno29eWEkwmbcA5DJ9+ar6uCsoAAAx5SURBVL9oRo3a9IPqXCF23dVmxmVL+MuW2brQFZDwe6nqUoDUY5bGKiAibUVkJrAMeEZVX2/pgCIyWkSmi8j0hnJadGNrmprsyj6buLUoaI2LL7Y2ENdcs+1+JEG4/XYbXBuT7QOnc60gYuXWbAm/BFoqpG0z4YvIsyJSl+VrRK4nUdWNqnog0Bc4VERa/JtR1fGqOlRVh/bs2TPXU5SuFSts0ZKWxK1FQWuI2NX9xo12B24xSzsrV9rdyGefbW2dnQtKS1Mzyynhq+pJqrpvlq+JwCci0hsg9bhsG8f6HJgKDA8g9tL35ptw8MHw7LPWw755i4LOna11QTlIJGxO/BNP2EIqxfL731vSL/VSmIuflm6+qquzab/ZOsfGTKElnUnA+ann5wMTm+8gIj1FpGvq+XbASUDEd+PEwN13Wx/7xkZ48UVbpSqzRcGAAfY6vdh4Obj8cusBdOWVsHRp8MdfswZ+8xs45RQ46KDgj+8qWyIBn3225USK9IBtzGfoQOEJ/2ZgmIjMBYalXiMifURkcmqf3kBSRN4BpmE1/McKPG/pWrvWatoXX2z96998Ew47zN7LbFGwYEF5JXuw6ab33GN/B5ddFnxp56677P6F664L9rjOQfaZOqqxX+UqU0EJX1WXq+qJqjoo9bgitX2Jqp6aev6Oqh6kqvunSkHXBxF4SfrwQ7vCvftuKzk8+SRUwjhFpj33hOuvt/sLHnoouOOuX28Ltx9zjC0K41zQ0jdfZSb8xYvtjvhKSPguD088YfX6Dz6ASZNsbdgYt1Etqu99Dw45BK64ouUFVPL1f/8H9fVeu3fFk+0KPz1gG/OmaWme8Itt40ZrInbaaTbjZsaM4q47WwratbPSzuefWz2/UI2NNiB88MEwbFjhx3Mum27drOOqJ3yX1fLlNuXy+uutPcIrr8Duu0cdVTzsuy/85CfW3GziFmP9+XnoIfvkNHZsSQycuRIlsuXUzFmzrA1K9+7RxZUHT/jFMn26XXE+9xzceafNwsm2MlQlGzMGDjjAev1/9lnrjtHUZAucDB4MI3K+NcS51mme8EukpUKaJ/ygqdpNRkcdZc9fesluNvIrzy21b2+lnWXLWt/R8rHH7IfuRz+yWUDOFVN6Lr6qlWvffbdkyjngCT9YX35pK1KNHg3HHWf1+kMOiTqqeBsyBH74Q/sE9NRT+X2vqt2YNnCgLRLjXLElEna/R0ODdYBdu9av8CvS/Pl2VX/vvVabnjwZevSIOqrS8JOfWBuESy6BVaty/74pU+CNN+wXRtxXAXPlIXOmToksepLJE34QHn/c6vUffmglhuuvr9wpl63RqZOVdurr82t4duONNmB2/vnb3te5IGQm/PQMncGDo4snT57wC7FxI/z0pzYTp6rKSjinnRZ1VKXp8MNtfv4f/mCrfG3Lq69ay+VrroGOHYsfn3NgLU9gU8JPJKBLl2hjyoMn/HxkrkjVrx8ceCDccANccIFNuRw4MOoIS9sNN9i01Ysu2nJtgObGjbOGVaNHhxObc2DJvWfPTQm/hAZswRN+7pqvSFVfb//gF11krRK22y7qCEtf587WD+eDD6yu35KZM62MdtVVsP324cXnHNhV/Zw58P77JVW/B0/4ucu2IhVYa2Ofchmc446zefm/+Q289lr2fW66ye54vOKKUENzDrCE/8ordoe3J/wy1dLKU+WwIlXc3HKLlcwuvNCmvWV6/327s/byy6Fr12jic5UtkYANG+y5J/wytGaNzSTJphxWpIqbHXawtQBmz7a6fqZbbrF/i6uuiiY255ZlrPN0xhlW7i0RnvC3ZelSOPZYu6mqffvN3yunFani5uSTbTD8lltszQCw8ZM//cnm65fA6kKuDNXUbJ7gFy+2sb0SSfqe8Lfm7bfh0EPtSnPSJLupqpxXpIqbW2+1xH7WWfb3XVVlddM99og6Mlepxo6Fdes237ZmTcm05fbbE1vy+ON2u/5OO1k/nAMPtO2e4MPTrRtUV8Ovf7359jFjrDuh/1u4sJX4WJ5f4TenCr/9LZx5pq3O9MYbm5K9C98jj2y5rYSuqFyZaWnMrkTG8jzhZ2pstKl+V15pCf+FF+zWfRedEr+icmVm3Lgt25yX0FheQQlfRLqLyDMiMjf12G0r+7YVkbdEJJ4LmK9aZSPuv/89/OAHdmXpN/VEr8SvqFyZGTXKxu5KdCyv0Cv8McAUVR0ETEm9bsmVwOwCz1ccCxdap8tnn7V/vF/8wnurx0WJX1G5MjRqlPXEb2qyxxJJ9lB4wh8BTEg9nwCclW0nEekLnAbcVeD5gvf66zYTZ/FiePJJm/Ln4qPEr6ici5NCZ+n0UtWlAKq6VERamhx9G3AtsMO2Digio4HRAP2L/bH9wQettW6fPjB1qvVkd/EzapQneOcCsM0rfBF5VkTqsnzltICoiJwOLFPVGbnsr6rjVXWoqg7t2bNnLt+SP1XrpX7OOdbH/rXXPNk758reNq/wVfWklt4TkU9EpHfq6r43sCzLbkcBZ4rIqUAnYEcR+bOqntvqqAuxbp2tMTthAnz729bpsqW2Cc45V0YKreFPAtLLDZ0PTGy+g6r+SFX7qmoVMBJ4LrJkv3w5fO1rlux//nP485892TvnKkahCf9mYJiIzAWGpV4jIn1EZHKhwQVqzhxbVen11+Evf7GVqrytsXOughQ0aKuqy4ETs2xfApyaZftUYGoh52yVqVPh7LNtoevnnoMjjww9BOeci1r5Tza/914r4+y6q13de7J3zlWo8kv4mevO7rSTLaJx3HG2Qk16xXnnnKtA5dUtM73ubHopwlWrrIxz7rm+OpJzruKV1xV+tnVnGxttgNY55ypceSV876zonHMtKq+E750VnXOuReWV8L2zonPOtai8Er53VnTOuRaV1ywd8M6KzjnXgvK6wnfOOdciT/jOOVchPOE751yF8ITvnHMVwhO+c85VCFHVqGNokYg0AAtb+e09gE8DDCdMpRp7qcYNHntUPPbgDVDVrOvDxjrhF0JEpqvq0KjjaI1Sjb1U4waPPSoee7i8pOOccxXCE75zzlWIck7446MOoAClGnupxg0ee1Q89hCVbQ3fOefc5sr5Ct8551wGT/jOOVchyi7hi8hwEXlfROaJyJio48mViPQTkaSIzBaRWSJyZdQx5UtE2orIWyLyWNSx5ENEuorIwyLyXurv/4ioY8qViHwv9f+lTkTuF5FOUcfUEhG5R0SWiUhdxrbuIvKMiMxNPXaLMsZsWoj7l6n/L++IyKMiUhKLZpdVwheRtsAdwCnAYKBaRAZHG1XOGoHvq+o+wOHA5SUUe9qVwOyog2iF24EnVXVv4ABK5M8gIrsB3wWGquq+QFtgZLRRbdV9wPBm28YAU1R1EDAl9Tpu7mPLuJ8B9lXV/YE5wI/CDqo1yirhA4cC81R1vqquBx4ARkQcU05Udamqvpl6vhpLOrtFG1XuRKQvcBpwV9Sx5ENEdgSOAe4GUNX1qvp5tFHlpR2wnYi0AzoDSyKOp0Wq+gKwotnmEcCE1PMJwFmhBpWDbHGr6tOq2ph6+RrQN/TAWqHcEv5uwOKM1/WUUNJME5Eq4CDg9WgjycttwLVAU9SB5Gkg0ADcmypH3SUi20cdVC5U9SPgV8AiYCmwUlWfjjaqvPVS1aVgFz3ALhHH0xoXAk9EHUQuyi3hS5ZtJTXvVES6AI8AV6nqqqjjyYWInA4sU9UZUcfSCu2AIcAfVPUg4J/Es6ywhVS9ewSQAPoA24vIudFGVVlEZCxWjq2JOpZclFvCrwf6ZbzuS4w/4jYnIu2xZF+jqn+LOp48HAWcKSILsDLaCSLy52hDylk9UK+q6U9TD2O/AErBScCHqtqgqhuAvwFHRhxTvj4Rkd4AqcdlEceTMxE5HzgdGKUlckNTuSX8acAgEUmISAdsAGtSxDHlREQEqyPPVtVfRx1PPlT1R6raV1WrsL/z51S1JK40VfVjYLGI7JXadCLwboQh5WMRcLiIdE79/zmREhlwzjAJOD/1/HxgYoSx5ExEhgM/BM5U1TVRx5Orskr4qUGUK4CnsP/4D6rqrGijytlRwHnY1fHM1NepUQdVIf4fUCMi7wAHAjdGHE9OUp9KHgbeBP6B/TzH9nZ/EbkfeBXYS0TqReQi4GZgmIjMBYalXsdKC3H/D7AD8EzqZ/XOSIPMkbdWcM65ClFWV/jOOeda5gnfOecqhCd855yrEJ7wnXOuQnjCd865CuEJ3znnKoQnfOecqxD/Hycd3W9YmWUBAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"print(mdlPCA.components_[0,:])\n",
"plt.plot(np.arange(0,mdlPCA.components_[0,:].shape[0]), mdlPCA.components_[0,:], '-ro')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predictive models\n",
"\n",
"#### Can we predict motivation?\n",
"#### Can we predict stress?"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Q1 | \n",
" Q2 | \n",
" Q3 | \n",
" Q4 | \n",
" Q5 | \n",
" Q7 | \n",
" Q8 | \n",
" Q9 | \n",
" Q10 | \n",
" Q11 | \n",
" Q6_Reading | \n",
" Q6_Running | \n",
" Q6_Watching a movie | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.8 | \n",
" 0.333333 | \n",
" 0.375 | \n",
" 0.625 | \n",
" 0.750 | \n",
" 0.571429 | \n",
" 0.222222 | \n",
" 0.166667 | \n",
" 0.625 | \n",
" 0.000000 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.8 | \n",
" 0.833333 | \n",
" 0.500 | \n",
" 0.500 | \n",
" 0.625 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.333333 | \n",
" 0.625 | \n",
" 0.714286 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.625 | \n",
" 0.500 | \n",
" 0.857143 | \n",
" 0.666667 | \n",
" 0.500000 | \n",
" 0.625 | \n",
" 0.571429 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.2 | \n",
" 0.000000 | \n",
" 0.625 | \n",
" 0.375 | \n",
" 0.375 | \n",
" 0.285714 | \n",
" 0.777778 | \n",
" 0.666667 | \n",
" 0.375 | \n",
" 1.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.4 | \n",
" 0.500000 | \n",
" 0.500 | \n",
" 0.375 | \n",
" 0.250 | \n",
" 0.428571 | \n",
" 0.444444 | \n",
" 0.000000 | \n",
" 1.000 | \n",
" 0.714286 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Q1 Q2 Q3 Q4 Q5 Q7 Q8 Q9 Q10 \\\n",
"0 0.8 0.333333 0.375 0.625 0.750 0.571429 0.222222 0.166667 0.625 \n",
"1 0.8 0.833333 0.500 0.500 0.625 0.857143 0.666667 0.333333 0.625 \n",
"2 0.4 0.500000 0.625 0.625 0.500 0.857143 0.666667 0.500000 0.625 \n",
"3 0.2 0.000000 0.625 0.375 0.375 0.285714 0.777778 0.666667 0.375 \n",
"4 0.4 0.500000 0.500 0.375 0.250 0.428571 0.444444 0.000000 1.000 \n",
"\n",
" Q11 Q6_Reading Q6_Running Q6_Watching a movie \n",
"0 0.000000 0.0 1.0 0.0 \n",
"1 0.714286 1.0 0.0 0.0 \n",
"2 0.571429 0.0 0.0 1.0 \n",
"3 1.000000 0.0 0.0 1.0 \n",
"4 0.714286 1.0 0.0 0.0 "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = df2.drop(columns=['tsRel'])\n",
"df3_norm = (df3-df3.min())/(df3.max()-df3.min())\n",
"df3_norm.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Select X and y\n",
""
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
":1: FutureWarning: The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead\n",
" colX = df3.columns[pd.np.r_[0:8,10:13]]\n"
]
},
{
"data": {
"text/plain": [
"['Q1',\n",
" 'Q2',\n",
" 'Q3',\n",
" 'Q4',\n",
" 'Q5',\n",
" 'Q7',\n",
" 'Q8',\n",
" 'Q9',\n",
" 'Q6_Reading',\n",
" 'Q6_Running',\n",
" 'Q6_Watching a movie']"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"colX = df3.columns[pd.np.r_[0:8,10:13]]\n",
"colX.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Q11']"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"colY = ['Q10'] # level of stress;\n",
"colY = ['Q11'] # level of motivation;\n",
"\n",
"colY"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(19, 11)\n"
]
},
{
"data": {
"text/plain": [
"array([[ 8, 5, 4, 6, 7, 5, 3, 5, 0, 1, 0],\n",
" [ 8, 8, 5, 5, 6, 7, 7, 6, 1, 0, 0],\n",
" [ 6, 6, 6, 6, 5, 7, 7, 7, 0, 0, 1],\n",
" [ 5, 3, 6, 4, 4, 3, 8, 8, 0, 0, 1],\n",
" [ 6, 6, 5, 4, 3, 4, 5, 4, 1, 0, 0],\n",
" [ 8, 7, 8, 3, 3, 8, 4, 10, 0, 1, 0],\n",
" [ 4, 3, 1, 1, 1, 1, 1, 10, 1, 0, 0],\n",
" [ 7, 3, 7, 6, 5, 4, 6, 8, 1, 0, 0],\n",
" [ 5, 5, 5, 4, 4, 4, 4, 5, 0, 0, 1],\n",
" [ 6, 6, 6, 6, 6, 4, 6, 6, 0, 0, 1],\n",
" [ 4, 4, 4, 5, 3, 5, 2, 7, 0, 0, 1],\n",
" [ 7, 7, 7, 2, 2, 7, 6, 7, 0, 1, 0],\n",
" [ 8, 8, 8, 6, 6, 8, 7, 8, 0, 0, 1],\n",
" [ 4, 4, 4, 1, 1, 1, 5, 7, 0, 0, 1],\n",
" [ 8, 7, 7, 7, 7, 7, 10, 5, 0, 1, 0],\n",
" [ 7, 7, 6, 6, 6, 6, 6, 6, 0, 0, 1],\n",
" [ 7, 6, 6, 5, 5, 1, 7, 7, 1, 0, 0],\n",
" [ 6, 6, 6, 5, 5, 2, 9, 9, 0, 0, 1],\n",
" [ 9, 9, 9, 9, 9, 7, 6, 5, 0, 0, 1]])"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = np.array(df3[colX])\n",
"print(X.shape)\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(19, 1)\n"
]
},
{
"data": {
"text/plain": [
"array([[ 3],\n",
" [ 8],\n",
" [ 7],\n",
" [10],\n",
" [ 8],\n",
" [ 8],\n",
" [ 8],\n",
" [ 9],\n",
" [ 7],\n",
" [ 6],\n",
" [ 7],\n",
" [ 8],\n",
" [ 8],\n",
" [ 8],\n",
" [10],\n",
" [ 7],\n",
" [ 7],\n",
" [ 9],\n",
" [10]])"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = np.array(df3[colY])\n",
"print(y.shape)\n",
"y"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Normalize data"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"#scaler = StandardScaler()\n",
"scaler = MinMaxScaler()\n",
"\n",
"scaler.fit(X)\n",
"Xnorm = scaler.transform(X)\n",
"Xnorm.max(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.svm import SVR\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Cross validation with leave one out"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TRAIN: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [0]\n",
"TRAIN: [ 0 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [1]\n",
"TRAIN: [ 0 1 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [2]\n",
"TRAIN: [ 0 1 2 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [3]\n",
"TRAIN: [ 0 1 2 3 5 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [4]\n",
"TRAIN: [ 0 1 2 3 4 6 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [5]\n",
"TRAIN: [ 0 1 2 3 4 5 7 8 9 10 11 12 13 14 15 16 17 18] TEST: [6]\n",
"TRAIN: [ 0 1 2 3 4 5 6 8 9 10 11 12 13 14 15 16 17 18] TEST: [7]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 9 10 11 12 13 14 15 16 17 18] TEST: [8]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 10 11 12 13 14 15 16 17 18] TEST: [9]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 11 12 13 14 15 16 17 18] TEST: [10]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 12 13 14 15 16 17 18] TEST: [11]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 17 18] TEST: [12]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 14 15 16 17 18] TEST: [13]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 15 16 17 18] TEST: [14]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 16 17 18] TEST: [15]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18] TEST: [16]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18] TEST: [17]\n",
"TRAIN: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17] TEST: [18]\n"
]
}
],
"source": [
"from sklearn.model_selection import LeaveOneOut\n",
"\n",
"loo = LeaveOneOut()\n",
"for train_index, test_index in loo.split(Xnorm):\n",
" print('TRAIN: ' + str(train_index) + ' TEST: ' + str(test_index))\n"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Y : [[3]] , pred as: [6.71770473]\n",
"Y : [[8]] , pred as: [8.44358673]\n",
"Y : [[7]] , pred as: [8.57175498]\n",
"Y : [[10]] , pred as: [7.62213136]\n",
"Y : [[8]] , pred as: [6.70446542]\n",
"Y : [[8]] , pred as: [8.9269527]\n",
"Y : [[8]] , pred as: [5.14777884]\n",
"Y : [[9]] , pred as: [8.02397316]\n",
"Y : [[7]] , pred as: [6.22595715]\n",
"Y : [[6]] , pred as: [7.94496256]\n",
"Y : [[7]] , pred as: [6.21621235]\n",
"Y : [[8]] , pred as: [7.00076291]\n",
"Y : [[8]] , pred as: [9.45348764]\n",
"Y : [[8]] , pred as: [6.27163448]\n",
"Y : [[10]] , pred as: [6.519619]\n",
"Y : [[7]] , pred as: [8.10618284]\n",
"Y : [[7]] , pred as: [8.58400087]\n",
"Y : [[9]] , pred as: [7.92971992]\n",
"Y : [[10]] , pred as: [6.78948034]\n"
]
}
],
"source": [
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"from sklearn.svm import LinearSVR\n",
"\n",
"loo = LeaveOneOut()\n",
"\n",
"predAll = np.zeros([y.shape[0],1])\n",
"\n",
"i=0\n",
"for train_index, test_index in loo.split(Xnorm):\n",
"\n",
" X_train, X_test = Xnorm[train_index], Xnorm[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
"\n",
" regr = LinearSVR(random_state=0, tol=1e-5)\n",
" \n",
" regr.fit(X_train, y_train) # Train the model\n",
"\n",
" ypred = regr.predict(X_test) # Apply the model\n",
" \n",
" predAll[i] = ypred\n",
" \n",
" print('Y : ' + str(y_test) + ' , pred as: ' + str(ypred))\n",
" i = i + 1 \n"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1. , 0.01086243],\n",
" [0.01086243, 1. ]])"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.corrcoef(y.T, predAll.T)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPxUlEQVR4nO3dXWxkZ33H8e8/XqPMRkVOE4Pw0nRBQg5NULKphXhRo0KgBgphiVopSFQUVSwXiLcLV+xNEypVFC0XcIW0glKk0rRkWRboRUxEW3pFKidmtVkSC/EWMgvEiJgCGYFj/r3weLPe2OsZe86c5+x8P9LK9uPx+Cdr5zfnPOec50RmIkkq1xV1B5AkXZpFLUmFs6glqXAWtSQVzqKWpMLtq+JJr7322jx48GAVTy1Jl6UHH3zwZ5k5udX3KinqgwcPsrCwUMVTS9JlKSJ+uN33nPqQpMJZ1JJUOItakgpnUUtS4SxqSSpcJWd9SCrPqcU2x+aXOLfSYWqixdzsNIcPHag7lnpgUUsj4NRim6Mnz9BZXQOgvdLh6MkzAJZ1Azj1IY2AY/NL50t6Q2d1jWPzSzUlUj8samkEnFvp9DWusljU0giYmmj1Na6yWNTSCJibnaY1PrZprDU+xtzsdE2J1A8PJkojYOOAoWd9NJNFLY2Iw4cOWMwN5dSHJBXOopakwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXAWtSQVzqKWpMJZ1JJUOItakgpnUUtS4Xoq6oj4QEQ8HBFnI+KDVYeSJD1jx6KOiBuBdwMvB24C3hwRL6k6mCRpXS9b1C8FvpmZT2Xm08A3gLdVG0uStKGXGwc8DPxDRFwDdIA3AQsXPygijgBHAK677rpBZpQ0AKcW297hpaF2LOrMfCQiPgbcD/wKOA08vcXjjgPHAWZmZnLAOSXtwanFNkdPnqGzugZAe6XD0ZNnACzrBujpYGJmfiYzb8nMW4GfA9+pNpakQTo2v3S+pDd0Vtc4Nr9UUyL1o6d7JkbE8zLziYi4DrgDeGW1sSQN0rmVTl/jKkuvN7f9YneOehV4b2Y+WWEmSQM2NdGivUUpT020akijfvU69fEnmflHmXlTZn696lCSBmtudprW+Nimsdb4GHOz0zUlUj963aKW1GAbBww966OZLGppRBw+dMBibijX+pCkwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXAWtSQVzgtepD1wjWcNg0Ut7dKpxTZzJ06zura+/Hp7pcPcidOAazxrsJz6kHbpI189e76kN6yuJR/56tmaEulyZVFLu/TkU6t9jUu7ZVFLUuEsammXJlrjfY1Lu2VRS7t09+03MH5FbBobvyK4+/Ybakqky5VnfUi71LTF+D2VsLksamkPmrIY/6nFNkdPnjl/J/L2SoejJ88AnkrYBE59SCPg2PzS+ZLe0Fld49j8Uk2J1A+LWhoB57a4A/mlxlUWi1oaAVMTrb7GVRaLWhoBc7PTtMbHNo21xseYm52uKZH64cFEaQQ07QwVbWZRSyOiKWeo6Nmc+pCkwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXCeRy3tgUuHahgsammXXDpUw+LUh7RLLh2qYbGopV1y6VANi1Mf0i5NTbRob1HKLh06GM7/P6OnLeqI+FBEnI2IhyPinoi4supgUulcOrQ6G/P/7ZUOyTPz/6cW23VHq8WORR0RB4D3AzOZeSMwBtxZdTCpdIcPHeCjd7yMAxMtAjgw0eKjd7xsZLf6Bsn5/816nfrYB7QiYhXYD5yrLpJGWdN2d106tBrO/2+24xZ1ZraBjwOPAT8GfpGZX7v4cRFxJCIWImJheXl58El12XN3Vxu8ddhmvUx9XA28FXgRMAVcFRHvuPhxmXk8M2cyc2ZycnLwSXXZc3dXG15z/dYdst345a6Xg4mvA76fmcuZuQqcBF5VbSyNInd3teG/Ht16r3y78ctdL0X9GPCKiNgfEQHcBjxSbSyNInd3tcE37c16maN+ADgBPASc6f7M8YpzaQR5ups2+Ka9WU/nUWfmXZl5fWbemJl/lZm/qTqYRo+nu2mDb9qbeWWiiuLpboJnFrVq0qmaVbKoJRXJN+1nuCiTJBXOopakwlnUklQ4i1qSCmdRS1LhLGpJKpyn50nSHlW9PK9FLUl7MIy70Tv1IUl7MIzleS1qSdqDYaz0Z1FL0h4MY6U/i1qS9mAYK/15MFGS9mAYK/1Z1JK0R1Wv9OfUhyQVzqKWpMJZ1JJUOItakgpnUUtS4SxqSSqcRS1JhbOoJalwFrUkFc6ilqTCWdSSVDiLWpIKZ1FLUuEsakkqnEUtSYWzqCWpcBa1JBXOopakwlnUklQ4i1qSCmdRS1LhdrwLeURMA/9+wdCLgb/LzE9UlkrSwJ1abHNsfolzKx2mJlrMzU5XeudsDc6ORZ2ZS8DNABExBrSBL1WcS9IAnVpsc/TkGTqrawC0VzocPXkGwLJugH6nPm4DvpuZP6wijKRqHJtfOl/SGzqraxybX6opkfrRb1HfCdyz1Tci4khELETEwvLy8t6TSRqYcyudvsZVlp6LOiKeA9wO3LvV9zPzeGbOZObM5OTkoPJJGoCpiVZf4ypLP1vUbwQeysyfVhVGUjXmZqdpjY9tGmuNjzE3O11TIvVjx4OJF3g720x7SCrbxgFDz/popp6KOiL2A68H3lNtHElVOXzogMXcUD0VdWY+BVxTcRZJ0ha8MlGSCtfPHLUkaQtVX/VpUUvSHgzjqk+nPiRpD4Zx1WcxW9QuGCNVy9dYNYZx1WcRRe2CMVK1fI1VZ2qiRXuLUh7kVZ9FTH24YIxULV9j1RnGVZ9FbFG7YIxULV9j1RnGVZ9FFPUwdh2kUeZrrFpVX/VZxNSHC8ZI1fI11mxFbFG7YIxULV9jzRaZOfAnnZmZyYWFhYE/ryRdriLiwcyc2ep7RUx9SJK2V8TUhyQ1mWt9SFLBXOtDkgo3jIuJLGpJ2oNhXExkUUvSHgzjDu8WtSTtwcis9SFJTTUya31IUpONxFofkqTtWdSSVDiLWpIK5xy1NCK8Z2JzWdTSCPCeic1mUUt70JSt1Etd5lxiXm1mUUu7dGqxzdy9p1n93fqa7u2VDnP3ngbK20r1nonN5sFEaZfu/srZ8yW9YfV3yd1fOVtTou0N4zJnVceilnZppbPa13idvGdiszn1IY0A75nYbBa1tEtX7x/nyaeevfV89f7xGtLsrOrLnFUdpz6kXbrrLTcwPhabxsbHgrveckNNiXS5cota2iWnEzQsFrW0B04naBic+pCkwlnUklS4noo6IiYi4kREPBoRj0TEK6sOJkla1+sc9SeB+zLzLyLiOcD+CjNJki6wY1FHxHOBW4G/BsjM3wK/rTaWJGlDL1MfLwaWgc9GxGJEfDoirrr4QRFxJCIWImJheXl54EElaVT1UtT7gFuAT2XmIeDXwIcvflBmHs/MmcycmZycHHBMSRpdvRT148DjmflA9+sTrBe3JGkIdizqzPwJ8KOI2Fhm6zbg25WmkiSd1+tZH+8DPt894+N7wLuqiyRJulBPRZ2Z3wJmKs4iSdqCVyZKUuEsakkqnEUtSYWzqCWpcBa1JBXOopakwlnUklQ4i1qSCmdRS1LhLGpJKpxFLUmFs6glqXAWtSQVzqKWpMJZ1JJUOItakgpnUUtS4SxqSSqcRS1JhbOoJalwFrUkFc6ilqTCWdSSVDiLWpIKZ1FLUuEsakkqnEUtSYWzqCWpcBa1JBXOopakwu2rO4Ck4Ti12ObY/BLnVjpMTbSYm53m8KEDdcdSDyxqaQScWmxz9OQZOqtrALRXOhw9eQbAsm4Apz6kEXBsful8SW/orK5xbH6ppkTqh0UtjYBzK52+xlUWi1oaAVMTrb7GVRaLWhoBc7PTtMbHNo21xseYm52uKZH64cFEaQRsHDD0rI9m6qmoI+IHwC+BNeDpzJypMpSkwTt86IDF3FD9bFG/JjN/VlkSSdKWnKOWpML1WtQJfC0iHoyII1s9ICKORMRCRCwsLy8PLqEkjbhei/rVmXkL8EbgvRFx68UPyMzjmTmTmTOTk5MDDSlJo6ynos7Mc92PTwBfAl5eZShJ0jMiMy/9gIirgCsy85fdz+8H/j4z77vEzywDP9xlpmuBphy0bFJWaFbeJmWFZuVtUlZoVt69ZP3DzNxyOqKXsz6eD3wpIjYe/6+XKmmA7X5ZLyJioSmn/zUpKzQrb5OyQrPyNikrNCtvVVl3LOrM/B5w06B/sSSpN56eJ0mFK7Goj9cdoA9NygrNytukrNCsvE3KCs3KW0nWHQ8mSpLqVeIWtSTpAha1JBWuiKKOiCsj4n8j4nREnI2Ij9SdqRcRMRYRixHxH3VnuZSI+EFEnImIb0XEQt15dhIRExFxIiIejYhHIuKVdWfaSkRMd/+mG//+LyI+WHeuS4mID3VfYw9HxD0RcWXdmbYTER/o5jxb4t81Iv4pIp6IiIcvGPv9iLg/Ir7T/Xj1IH5XEUUN/AZ4bWbeBNwMvCEiXlFzpl58AHik7hA9ek1m3tyQ81E/CdyXmdezfmpokX/jzFzq/k1vBv4YeIr1K3eLFBEHgPcDM5l5IzAG3Flvqq1FxI3Au1m/Cvom4M0R8ZJ6Uz3LPwNvuGjsw8DXM/MlwNe7X+9ZEUWd637V/XK8+6/oo5wR8ULgz4FP153lchIRzwVuBT4DkJm/zcyVelP15Dbgu5m52ytyh2Uf0IqIfcB+4FzNebbzUuCbmflUZj4NfAN4W82ZNsnM/wF+ftHwW4HPdT//HHB4EL+riKKG89MI3wKeAO7PzAfqzrSDTwB/C/yu7iA92HH1w4K8GFgGPtudVvp0d+mC0t0J3FN3iEvJzDbwceAx4MfALzLza/Wm2tbDwK0RcU1E7AfeBPxBzZl68fzM/DFA9+PzBvGkxRR1Zq51dyFfCLy8u+tTpIh4M/BEZj5Yd5Ye7bj6YUH2AbcAn8rMQ8CvGdDuY1Ui4jnA7cC9dWe5lO586VuBFwFTwFUR8Y56U20tMx8BPsb62kL3AaeBp2sNVaNiinpDdzf3v3n23E9JXg3c3r1F2b8Br42If6k30vYatvrh48DjF+xRnWC9uEv2RuChzPxp3UF28Drg+5m5nJmrwEngVTVn2lZmfiYzb8nMW1mfYvhO3Zl68NOIeAFA9+MTg3jSIoo6IiYjYqL7eYv1/1CP1ptqe5l5NDNfmJkHWd/l/c/MLHLLJCKuiojf2/gc+DPWdyuLlJk/AX4UERu3x74N+HaNkXrxdgqf9uh6DHhFROyP9VXWbqPQA7UAEfG87sfrgDtoxt/4K8A7u5+/E/jyIJ60lLuQvwD4XESMsf7m8YXMLPqUtwbpe/XDArwP+Hx3SuF7wLtqzrOt7vzp64H31J1lJ5n5QEScAB5ifRphkbIvz/5iRFwDrALvzcwn6w50oYi4B/hT4NqIeBy4C/hH4AsR8TesvzH+5UB+l5eQS1LZipj6kCRtz6KWpMJZ1JJUOItakgpnUUtS4SxqSSqcRS1Jhft/4YHCuW/gYY8AAAAASUVORK5CYII=\n",
"text/plain": [
"