{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# importing packages/modules\n", "\n", "import pandas as pd\n", "from sklearn.manifold import TSNE\n", "from sklearn.feature_extraction import DictVectorizer\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", "from sklearn.decomposition import PCA\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "\n", "sns.set_style('darkgrid')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Objective: Use unsupervised learning to create distinct clusters to represent the most abundant OTU(Operational taxonomic units)/identities of bacteria found in a mouse *Mus Musculus* based on schloss lab MiSeq standard operating procedures (SOPs)." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# import the csv file into jupyter\n", "\n", "df = pd.read_csv(\"0.16.cons.taxonomy.csv\", delimiter=\";\",index_col=0, usecols=[\"otu\",\"size\",\"identity\",\"phylum\",\"class\",\"order\",\"family\",\"genus\",\"genus_0\",\"genus_1\"])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sizeidentityphylumclassorderfamilygenusgenus_0genus_1
otu
Otu00161864Bacteria(100)Bacteroidetes(100)Bacteroidia(100)Bacteroidales(100)Porphyromonadaceae(100)Barnesiella(96)Barnesiella_unclassified(96)Barnesiella_unclassified(96)
Otu00223360Bacteria(100)Firmicutes(100)Clostridia(100)Clostridiales(100)Lachnospiraceae(89)Lachnospiraceae_unclassified(61)Lachnospiraceae_unclassified(61)Lachnospiraceae_unclassified(61)
Otu0036679Bacteria(100)Firmicutes(100)Bacilli(100)Lactobacillales(100)Lactobacillaceae(95)Lactobacillus(95)Lactobacillus_unclassified(95)Lactobacillus_unclassified(95)
Otu0046584Bacteria(100)Bacteroidetes(100)Bacteroidia(100)Bacteroidales(100)Bacteroidaceae(100)Bacteroides(100)Bacteroides_unclassified(100)Bacteroides_unclassified(100)
Otu0055376Bacteria(100)Bacteroidetes(100)Bacteroidia(100)Bacteroidales(100)Rikenellaceae(100)Alistipes(100)Alistipes_unclassified(100)Alistipes_unclassified(100)
\n", "
" ], "text/plain": [ " size identity phylum class \\\n", "otu \n", "Otu001 61864 Bacteria(100) Bacteroidetes(100) Bacteroidia(100) \n", "Otu002 23360 Bacteria(100) Firmicutes(100) Clostridia(100) \n", "Otu003 6679 Bacteria(100) Firmicutes(100) Bacilli(100) \n", "Otu004 6584 Bacteria(100) Bacteroidetes(100) Bacteroidia(100) \n", "Otu005 5376 Bacteria(100) Bacteroidetes(100) Bacteroidia(100) \n", "\n", " order family \\\n", "otu \n", "Otu001 Bacteroidales(100) Porphyromonadaceae(100) \n", "Otu002 Clostridiales(100) Lachnospiraceae(89) \n", "Otu003 Lactobacillales(100) Lactobacillaceae(95) \n", "Otu004 Bacteroidales(100) Bacteroidaceae(100) \n", "Otu005 Bacteroidales(100) Rikenellaceae(100) \n", "\n", " genus genus_0 \\\n", "otu \n", "Otu001 Barnesiella(96) Barnesiella_unclassified(96) \n", "Otu002 Lachnospiraceae_unclassified(61) Lachnospiraceae_unclassified(61) \n", "Otu003 Lactobacillus(95) Lactobacillus_unclassified(95) \n", "Otu004 Bacteroides(100) Bacteroides_unclassified(100) \n", "Otu005 Alistipes(100) Alistipes_unclassified(100) \n", "\n", " genus_1 \n", "otu \n", "Otu001 Barnesiella_unclassified(96) \n", "Otu002 Lachnospiraceae_unclassified(61) \n", "Otu003 Lactobacillus_unclassified(95) \n", "Otu004 Bacteroides_unclassified(100) \n", "Otu005 Alistipes_unclassified(100) " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# look at the first couple of rows\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 106 entries, Otu001 to Otu106\n", "Data columns (total 9 columns):\n", "size 106 non-null int64\n", "identity 106 non-null object\n", "phylum 106 non-null object\n", "class 106 non-null object\n", "order 106 non-null object\n", "family 106 non-null object\n", "genus 106 non-null object\n", "genus_0 106 non-null object\n", "genus_1 106 non-null object\n", "dtypes: int64(1), object(8)\n", "memory usage: 8.3+ KB\n" ] } ], "source": [ "# give a concise summary of the dataset\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "100" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(df['genus_0'] == df['genus_1']) \n", "\n", "# they are 6 instances where our classification is not the same i.e where the column genus_0 and genus_1 are not the \n", "# same, thus, I will not drop the column" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# changing the type of the column to int32 to reduce memory usage \n", "# the other columns are of the right type\n", "\n", "df['size'] = df['size'].astype('int32')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "They are 0 missing values in this data set.\n" ] } ], "source": [ "# check for NaNs or missing values\n", "\n", "missing = sum(pd.isnull(df).any())\n", "\n", "msg = 'They are {} missing values in this data set.'\n", "\n", "print(msg.format(missing))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Firmicutes(100) 54\n", "Proteobacteria(100) 14\n", "TM7(100) 12\n", "Bacteria_unclassified(100) 10\n", "Bacteroidetes(100) 7\n", "Deinococcus-Thermus(100) 4\n", "Actinobacteria(100) 4\n", "Verrucomicrobia(100) 1\n", "Name: phylum, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# subsetting the dataframe taking only the phylum columns\n", "phylum = df['phylum']\n", "\n", "# counting each category \n", "phylum.value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We have a lot of Firmicues and Proteobacteria. Usually it's **Firmicutes** and **Bacteriodetes** in healthy humans (debatable depends on a lot of factors). Let's change the strings in the columns to numbers using a cool trick with DictVectorizer. For unsupervised learning techniques to work; you need a matrix: rows and columns with numbers." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Make a boolean mask for categorical columns\n", "df_mask = (df.dtypes == object)\n", "\n", "# Get list of categorical column names\n", "df_mask_columns = df.columns[df_mask].tolist()\n", "\n", "# conversion of dataframe to dictionary \n", "df_dict = df.to_dict(\"records\")\n", "\n", "# We don't want sparse matrices\n", "dv = DictVectorizer(sparse = False)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'family=Cryomorphaceae(100)': 31, 'phylum=Firmicutes(100)': 235, 'class=Epsilonproteobacteria(100)': 9, 'genus_0=Anaerovorax_unclassified(86)': 111, 'family=Bacteria_unclassified(100)': 21, 'genus=Bacillus(100)': 67, 'class=Bacteroidia(100)': 5, 'order=Rhodobacterales(100)': 227, 'genus_1=Bacteroides_unclassified(100)': 167, 'order=Coriobacteridae(100)': 216, 'genus_1=Ruminococcaceae_unclassified(100)': 194, 'genus_0=Ruminococcaceae_unclassified(96)': 145, 'genus=Stenotrophomonas(100)': 99, 'genus=Clostridiales_unclassified(69)': 77, 'genus=Rhodobacter(100)': 95, 'genus=Staphylococcus(100)': 98, 'genus_0=Clostridiales_unclassified(100)': 122, 'family=Rikenellaceae(100)': 48, 'family=Bacillales_unclassified(100)': 19, 'class=Clostridia(100)': 7, 'genus_1=Staphylococcus_unclassified(100)': 196, 'family=Ruminococcaceae(100)': 49, 'order=Verrucomicrobiales(100)': 229, 'genus=Coprobacillus(100)': 78, 'family=Clostridia_unclassified(100)': 25, 'identity=Bacteria(100)': 205, 'genus_1=Clostridiales_unclassified(69)': 174, 'genus=Streptococcus(100)': 100, 'class=TM7_genera_incertae_sedis(100)': 14, 'genus_1=Aeromonas_unclassified(100)': 158, 'family=Moraxellaceae(100)': 42, 'genus=Gammaproteobacteria_unclassified(100)': 86, 'order=Rhizobiales(100)': 226, 'genus_1=Escherichia/Shigella_unclassified(100)': 180, 'family=Deinococcaceae(100)': 32, 'genus_0=Pseudomonadaceae_unclassified(70)': 141, 'class=Betaproteobacteria(100)': 6, 'genus=Lactobacillus(95)': 89, 'genus_0=Ruminococcaceae_unclassified(100)': 144, 'genus_0=Akkermansia_unclassified(100)': 109, 'genus_1=Bacteroidetes_unclassified(100)': 168, 'genus_1=Clostridium_unclassified(100)': 175, 'genus_0=Actinomycetaceae(53)': 107, 'genus_0=Staphylococcus_unclassified(100)': 146, 'family=Clostridiaceae(100)': 26, 'genus_1=Akkermansia_unclassified(100)': 159, 'genus_1=Clostridia_unclassified(100)': 171, 'genus_0=Porphyromonas_unclassified(100)': 140, 'phylum=Bacteria_unclassified(100)': 232, 'genus_0=Deinococcus_unclassified(100)': 129, 'family=Rhodobacteraceae(100)': 47, 'family=Bacillaceae(100)': 18, 'genus_1=Pseudomonadaceae_unclassified(70)': 191, 'genus=Actinomycineae(53)': 59, 'genus=Helicobacter(100)': 87, 'order=Flavobacteriales(100)': 221, 'genus_1=Clostridiales_unclassified(67)': 173, 'genus_0=Bacillales_unclassified(100)': 113, 'genus_1=Bacilli_unclassified(100)': 164, 'genus=unclassified_Clostridiales_unclassified(93)': 104, 'genus_0=Clostridiales_unclassified(69)': 124, 'genus_1=Fluviicola_unclassified(100)': 182, 'family=Ruminococcaceae(96)': 50, 'genus_0=Streptococcus_unclassified(100)': 148, 'family=Pseudomonadaceae(100)': 45, 'genus=Coriobacterineae(100)': 79, 'class=Bacilli(100)': 2, 'family=Erysipelotrichaceae(100)': 34, 'genus_0=Turicibacter_unclassified(100)': 150, 'family=Incertae_Sedis_XIII(86)': 38, 'order=Actinobacteridae(100)': 206, 'genus_1=Coprobacillus_unclassified(100)': 176, 'genus_0=Escherichia/Shigella_unclassified(100)': 131, 'family=unclassified_Clostridiales(63)': 56, 'genus=Bacillales_unclassified(100)': 65, 'genus_1=Cryomorphaceae_unclassified(100)': 177, 'genus=unclassified_Ruminococcaceae(62)': 105, 'phylum=Bacteroidetes(100)': 233, 'genus_0=TM7_genera_incertae_sedis_unclassified(100)': 149, 'family=Aeromonadaceae(100)': 17, 'phylum=Verrucomicrobia(100)': 238, 'order=Bacteroidetes_unclassified(100)': 212, 'family=Bacteroidaceae(100)': 22, 'class=Verrucomicrobiae(100)': 15, 'family=Helicobacteraceae(100)': 37, 'genus_0=Listeria_unclassified(100)': 138, 'genus_0=Rhodobacter_unclassified(100)': 143, 'genus=Bacteria_unclassified(100)': 68, 'order=TM7_genera_incertae_sedis_unclassified(100)': 228, 'genus_1=Lactobacillus_unclassified(95)': 186, 'genus_1=TM7_genera_incertae_sedis_unclassified(100)': 199, 'genus_1=Rhizobiales_unclassified(100)': 192, 'family=Bacteroidetes_unclassified(100)': 23, 'family=Neisseriaceae(100)': 43, 'order=Campylobacterales(100)': 213, 'order=Neisseriales(100)': 224, 'genus_1=Bacillus_unclassified(100)': 165, 'genus_1=Clostridiales_unclassified(100)': 172, 'class=Deinococci(100)': 8, 'order=Bacillales(100)': 208, 'genus=Aeromonas(100)': 60, 'genus_1=unclassified_Clostridiales_unclassified(63)': 202, 'order=Aeromonadales(100)': 207, 'family=Bifidobacteriales(100)': 24, 'family=Actinomycetales(54)': 16, 'genus_1=Deinococcus_unclassified(100)': 178, 'genus_1=Bacteria_unclassified(100)': 166, 'phylum=Actinobacteria(100)': 231, 'genus_0=Bacteria_unclassified(100)': 116, 'genus_0=Aeromonas_unclassified(100)': 108, 'genus_0=Bifidobacterium(100)': 120, 'genus_0=unclassified_Clostridiaceae_1(95)': 151, 'genus=Ruminococcaceae_unclassified(100)': 96, 'genus_0=Erysipelotrichaceae_unclassified(100)': 130, 'family=Xanthomonadaceae(100)': 55, 'order=Pseudomonadales(100)': 225, 'genus_0=Rhizobiales_unclassified(100)': 142, 'genus_1=Bacillales_unclassified(100)': 163, 'genus_1=Bifidobacterium_unclassified(100)': 170, 'genus_0=Cryomorphaceae_unclassified(100)': 128, 'order=Xanthomonadales(100)': 230, 'class=Flavobacteria(100)': 12, 'size': 239, 'genus_0=Alistipes_unclassified(100)': 110, 'order=Bacteria_unclassified(100)': 210, 'order=Clostridiales(100)': 215, 'order=Clostridia_unclassified(100)': 214, 'genus_1=Turicibacter_unclassified(100)': 200, 'genus=Fluviicola(100)': 85, 'genus_0=Clostridia_unclassified(100)': 121, 'order=Deinococcales(100)': 217, 'phylum=TM7(100)': 237, 'genus_0=Clostridium(100)': 125, 'genus_1=Olsenella(100)': 189, 'genus_1=Firmicutes_unclassified(100)': 181, 'genus_0=Stenotrophomonas_unclassified(100)': 147, 'genus_1=Alistipes_unclassified(100)': 160, 'class=Firmicutes_unclassified(100)': 11, 'order=Gammaproteobacteria_unclassified(100)': 222, 'family=Staphylococcaceae(100)': 51, 'genus=Clostridia_unclassified(100)': 73, 'class=Gammaproteobacteria(100)': 13, 'family=Bacilli_unclassified(100)': 20, 'genus=Clostridiales_unclassified(67)': 76, 'genus=Alistipes(100)': 62, 'genus_1=Barnesiella_unclassified(96)': 169, 'order=Lactobacillales(100)': 223, 'genus_1=Rhodobacter_unclassified(100)': 193, 'genus_0=Coriobacteriaceae(100)': 127, 'class=Alphaproteobacteria(100)': 1, 'genus=Bacteroidetes_unclassified(100)': 70, 'phylum=Proteobacteria(100)': 236, 'class=Erysipelotrichi(100)': 10, 'genus_1=Anaerovorax_unclassified(86)': 161, 'genus_1=Actinomyces(53)': 156, 'genus=Bacilli_unclassified(100)': 66, 'class=Bacteria_unclassified(100)': 3, 'genus=Lachnospiraceae_unclassified(61)': 88, 'genus_1=Stenotrophomonas_unclassified(100)': 197, 'genus_0=Bacillus_unclassified(100)': 115, 'genus_0=Bacteroidetes_unclassified(100)': 118, 'genus_0=Lachnospiraceae_unclassified(61)': 136, 'genus=Turicibacter(100)': 102, 'order=Bacteroidales(100)': 211, 'family=TM7_genera_incertae_sedis_unclassified(100)': 53, 'genus_0=Bacteroides_unclassified(100)': 117, 'class=Actinobacteria(100)': 0, 'genus_0=Firmicutes_unclassified(100)': 132, 'genus_0=Clostridiales_unclassified(67)': 123, 'genus_0=Coprobacillus_unclassified(100)': 126, 'genus=Listeria(100)': 90, 'genus=Firmicutes_unclassified(100)': 84, 'genus_1=Helicobacter_unclassified(100)': 184, 'genus_1=Neisseria_unclassified(100)': 188, 'genus_1=Acinetobacter_unclassified(100)': 155, 'genus=Barnesiella(96)': 71, 'genus=Anaerovorax(86)': 63, 'genus=Pseudomonadaceae_unclassified(70)': 93, 'genus_0=Bacilli_unclassified(100)': 114, 'order=Firmicutes_unclassified(100)': 220, 'genus=Porphyromonas(100)': 92, 'genus_0=Helicobacter_unclassified(100)': 135, 'order=Enterobacteriales(100)': 218, 'family=Clostridiales_unclassified(67)': 28, 'genus_1=unclassified_Clostridiales_unclassified(93)': 203, 'family=Lactobacillaceae(95)': 40, 'order=Bacilli_unclassified(100)': 209, 'genus_0=Acinetobacter_unclassified(100)': 106, 'class=Bacteroidetes_unclassified(100)': 4, 'family=Streptococcaceae(100)': 52, 'genus_0=Fluviicola_unclassified(100)': 133, 'genus_1=unclassified_Clostridiaceae_1_unclassified(95)': 201, 'genus=Bacteroides(100)': 69, 'family=Clostridiales_unclassified(69)': 29, 'genus_0=unclassified_Ruminococcaceae_unclassified(62)': 154, 'family=Verrucomicrobiaceae(100)': 54, 'genus=Clostridiales_unclassified(100)': 75, 'genus_0=Neisseria_unclassified(100)': 139, 'genus=Acinetobacter(100)': 58, 'genus=Deinococcus(100)': 81, 'genus=Erysipelotrichaceae_unclassified(100)': 82, 'family=Lachnospiraceae(89)': 39, 'genus=Akkermansia(100)': 61, 'genus_1=Ruminococcaceae_unclassified(96)': 195, 'family=Coriobacteriales(100)': 30, 'genus_1=Listeria_unclassified(100)': 187, 'genus_1=Porphyromonas_unclassified(100)': 190, 'genus_0=Bacillaceae_unclassified(100)': 112, 'genus_0=unclassified_Clostridiales_unclassified(93)': 153, 'phylum=Deinococcus-Thermus(100)': 234, 'genus_1=unclassified_Ruminococcaceae_unclassified(62)': 204, 'family=Enterobacteriaceae(100)': 33, 'genus_1=Bacillaceae_unclassified(100)': 162, 'family=Clostridiales_unclassified(100)': 27, 'genus_1=Erysipelotrichaceae_unclassified(100)': 179, 'genus=Escherichia/Shigella(100)': 83, 'order=Erysipelotrichales(100)': 219, 'genus=Cryomorphaceae_unclassified(100)': 80, 'genus=Bacillaceae_unclassified(100)': 64, 'family=Firmicutes_unclassified(100)': 35, 'genus_0=Barnesiella_unclassified(96)': 119, 'genus_0=unclassified_Clostridiales_unclassified(63)': 152, 'family=Listeriaceae(100)': 41, 'genus_1=Adlercreutzia(64)': 157, 'genus_1=Lachnospiraceae_unclassified(61)': 185, 'genus=unclassified_Clostridiales_unclassified(63)': 103, 'genus_1=Streptococcus_unclassified(100)': 198, 'family=unclassified_Clostridiales(93)': 57, 'genus_1=Gammaproteobacteria_unclassified(100)': 183, 'genus_0=Gammaproteobacteria_unclassified(100)': 134, 'genus=Bifidobacteriaceae(100)': 72, 'genus=Ruminococcaceae_unclassified(96)': 97, 'family=Gammaproteobacteria_unclassified(100)': 36, 'genus=Clostridiaceae_1(100)': 74, 'genus=TM7_genera_incertae_sedis_unclassified(100)': 101, 'family=Porphyromonadaceae(100)': 44, 'genus=Rhizobiales_unclassified(100)': 94, 'family=Rhizobiales_unclassified(100)': 46, 'genus=Neisseria(100)': 91, 'genus_0=Lactobacillus_unclassified(95)': 137}\n" ] } ], "source": [ "# Apply fit_transform to our dataset\n", "df_encoded = dv.fit_transform(df_dict)\n", "\n", "# New column names\n", "print(dv.vocabulary_)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(106, 240)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# rows and number of column names in the matrix\n", "# 106 rows and 240 columns\n", "df_encoded.shape" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# t-SNE\n", "# find a two dimensional representation of the data that preserves the distances between points as best as possible\n", "# preserves information by finding the closest neighbours (points)\n", "\n", "# instantiate model\n", "tsne = TSNE(random_state=42)\n", "\n", "# fit and transform the data\n", "otu_tsne = tsne.fit_transform(df_encoded)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAD1CAYAAAB0gc+GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAH9VJREFUeJzt3XtsVOfdJ/DveMaewdh4SOTXNnkrbsUJyq6dBpWETQwJlBD8iqzIBSgtKm+0RcW0peCAiJsYZ6EGRN2XqhK3SIlQRUvqbVlg3zopNCkXq0tQFIOojJ1AYFOwXdIY4wGPx3PZP5wZPONzzsw5c+7n+/nLPj6eeZ6ZOb95zu+5uWKxWAxERGR6OUYXgIiIMsOATURkEQzYREQWwYBNRGQRDNhERBbBgE1EZBEeJf80NDSETZs24fr168jJycGWLVswdepUtctGREQjKArYJ0+eRDgcxqFDh9Da2opdu3bhV7/6VdI5N2/2q1JAsyko8CIQGDS6GJpzQj2dUEeA9bSa4uJC0b8pSolMnjwZkUgE0WgUgUAAHo+iuG9JHo/b6CLowgn1dEIdAdbTThRF2vz8fFy/fh0LFy5Eb28v9u7dO+qcggKvLV9AtzsHfn++0cXQnBPq6YQ6AqynnbiUTE3ftm0b8vLyUFtbi66uLnzve9/DsWPH4PV6E+fYNSXi9+fj1q27RhdDc06opxPqCLCeViOVElHUwh43bhxyc3MBAEVFRQiHw4hEIspKR0REGVEUsFeuXIm6ujosX74cQ0NDWLduHfLz7X0rQkRkNEUBe+zYsfjlL3+pdlmIiEiCc4Z3EFlMS3sPdp++ip7+QZQUelFTNQkLp5dInvPElPFovdKb9D/fnjXZoBqQ2hiwiUyopb0HjX/6BMFwFADQ3T+Ixj99AgCJoC10zu/PdyceI/4/Y/O9mD3Rr3MNSAucmk5kQrtPX00E4rhgOIrdp69KnpMqGI6i6XinFkUkA7CFTWRCPf3CM/ZGHhc7J1VXX1BRGTJJyZC+2MImMqGSQm/a42LnpCor8sl+/ni6pbt/EDHcS6+0tPfIfixSDwM2kQnVVE2Cz5N8efo8OaipmiR5TiqfJwe188tlP38mKRnSH1MiRCYUTz1IpSSEzhEaJfJc5QTZMwAzScmQ/hiwiUxq4fSStDnjTM5RoqTQi26B4JxpGoa0wZQIEY2SSUqG9McWNhGNkklKhvTHgE1EgrRKt5ByDNhEFrPk7Q/x2Zf3xlZPvs+H3/37TANLRHphDpvIQlKDNQB89mUQS97+0KASkZ4YsIksJDVYpztO9sKATURkEQzYREQWwYBNZCGT7xNeF0TsONmL4oC9b98+LF26FM8//zyam5vVLBMRifjdv88cFZw5SsQ5FA3rO3v2LD7++GP89re/xcDAAN566y21y0VEIpwQnLm0qzBFAfvMmTMoLy/HmjVrEAgEsHHjRrXLRZTAi9dZMtltx6kUBeze3l7cuHEDe/fuxd///nesXr0a7777LlwuV+KcggIvPB63agU1C7c7B36//XeIN6KeR8/fwNY/tqP37hAAwD/Gg+r/WoY/fHwdwaERF+/x4W2vnquckNXz8b00n83H/obffPj5qOPBcBR7W69J7k9ppXoqpShg+/1+TJkyBXl5eZgyZQq8Xi++/PJL3H///YlzAgF7LsPo9+fLXqrSivSuZ0t7D7a824mhaCxx7NZAWPjiHYpi53sdWe9TyPfSXLaf6EzakzJVV19Qsh5WqWc6xcWFon9T1Ok4Y8YMnD59GrFYDD09PRgYGIDfz00+Sbndp68mBet0uC6z/Ry+IB6sAS7tCihsYT/99NM4d+4cXnzxRcRiMdTX18Pttl/6g/QjNwDHAHyz6RSKfB7Uzp3q+NymHUh9X3Np12GKF39iRyOpSWzB/HT6gmFseXd4V3AGbWvLcYkH7bpnpvH9BSfOkEnUVE1Cbo5r1HGPC3ihshSlErfDQ9EY9xq0gcUVpYLHX6gsZbD+CpdXJVOIX5BN719GXzAMABjndeOVeV9P/O2bTadE/585bevb9K3hzYIPX+hGNDbc4l5cUZo4TgzYpKN046mzWTCfHVKZMfuY9k3fKmeAlsCATbpQYzJEkc+TaH2nYodUepyQYn3MYZMudp++mggUccFwVFbuuXbuVME8N3OcmVHjPSBjsYVNuhDLMcvJPXNj2Oyo8R6QsRiwSRdiw/bk5p65Maxyar0HZBymREixlvYeLNp/FjObTmHR/rNoae8RPbemahJ8nuSPGydD6IvvgbrkfP7V4m5oaGjQ4oHv3g1p8bCG8/lyEQwOGV0MzaWrZ7wD69ZXnYCBUAR//awXZUVeTCsuGHX+tOIClBV50d4dwJ1QBKWFXqw3eIai095LM74HatLz/RT6/H/wyT/x5l+v4cu7g3hyyv1pHkHc2LHidzxMiZAiUh1YYgGA6Qzj8T1Qh9DnPy6+gJUWwxOZEiFF2IFFTpbuc55uISulGLBJEbGOKnZgkROk+5zLWHhSFqZESJGaqklJkzAAdmDJUfO7Npz7/Hbi929+bRx2L3nEwBKRHEKf/5EEpguogi1sUmTh9BLUPTMNpYVeuACUFnq5olqGUoM1AJz7/DZqftdmUIlIrvjnf0yucAgVW8gqW2xhk2LswFImNVinO07mFP/8bz/RqduCVQzYRERZ0HPBKqZEiIgsIquA/c9//hNz5szB5cuX1SoPacyI2VmU7JtfGyfrOFGc4oA9NDSE+vp6+Hw+NctDGorPzuruH0QM95bXZNDW1+4lj4wKzhwlQplQnMPesWMHli1bhv3796tZHtKQktmJpA2nBGezb5hgNYoC9h/+8Afcd999qKqqEg3YBQVeeDz220nd7c6B359vdDEUkZqdmFonK9czU06oI6BtPY+ev4Gm453o6guirMiH2vnleK5yQuJvjcc/QXBoxIYJxz/B2Hxv4hw1OeH9dMViMdlzcr7zne/A5XLB5XKhvb0dkyZNwp49e1BcXJw45+bNflULahZ+fz5u3bprdDEUWbT/rODymqWFXhxb9VjSMSvXM1NOqCOgXT1Td7ABhidPxcfji33einwejMl1q97qtsv7WVxcKPo3RS3sgwcPJn5esWIFGhoakoI1mRNnJ1I2UtMbd0NhyRSb2B1dXzCc2OqN25TJw2F9DsLZiaRUS3sPtrzbmdRhfXswInhuPFBnuq4MtynLXNYTZ37961+rUQ7SCWcnkhJN71/GUIYrGsUDdbr1NkbiKo+Z4UxHIkpLbLf6VCNTbEJ7cN4NhQVb5lzlMTMM2ERf4RC07JQKvGapd3Qt7T34ny0dCKc01p+YMl6vYloaAzY5jlBgBpB0+87OsGTjvG7RnDUgPNIobuTrPc7nQUQgs3LkQjcqHyiy/Gut9Zc+Ox3JdqSm34vN9mx6/7LoiAcCXpn3dXgk1ngWy0Gnvt59wTCEMuHhGCz/Wusxk5gBm2wl3UUjNttTLEfLzrBhC6eXoH7hg6IL84vloKX2Pkxl9ddaaiaxWhiwyVbSXTRygwI7w+5ZOL0EDQsfhM+THDakxvLLeb2t/lrrsc8pc9hkGZnkB9NdNCWFXsHZd+O8boQiMU4qSkNo5IdUnlbs9U7lccHyr7VYXdX8ImLAJktInQYt1imY7qIRm+35yryvA8g8EDmZnLH8Qq93bo4Lnhxg4KuhIuO8brwy7+uWf631mEnMgG0jdh6WlulKg+kumnQtRLu8XmYht0VuZXrUVdHiT5ng4k/ZkxOA0y3EI5fZFtKZ2XRKcHSBC8CHtbOTjmX6upmtjlphPa1F9cWfKHNKW72ZpgDi7L7WtZz8IKffk10xYGtIbtAdSW4A1qOH2khcaTBZvCEw8ktM6x27yXgM2BrKptUrNwDr0UNtJCflQtMRSn8BQDQG/P58NwBoGrTt3FdidgzYGsqm1Ss3AKvZAm1p78He1mvo6gua6oJkqmNYuskohy90axaws7lrpOxx4owKxKZCiwXXTFq9NVWTZE1QUGut6/gFeaMvyI16TSrdF36Gq6AqosdsPhLHFnaWpFoc2bR6laQA1GiB2r3z0g7STUYRmz6uBrv3lZidooA9NDSEuro6XL9+HaFQCKtXr8a8efPULpvqtMi9SQW4+OplSp/TiBQAL0jzS7cxwOKKUs2e2+59JWanKGAfPXoUfr8fO3fuRG9vLxYvXmz6gK1V7i1dgLNa3pUXpPmNvPvSe5RIurtGs3ZImrVccikK2M8++ywWLFiQ+N3tdqtWoGxIvSla3erbLcBx+Jw1GNUQkErVmbVDcvuJzsToGcA85VIiq5mOgUAAq1evxpIlS7Bo0aKkvw0MhODx6BfIj56/gZ8euYjg0IhAk5uDn/33/4LnKieg/PV3RWfKdW55NuPncbtzEInce450z2tFR8/fQNOJTnTdCqKsyIfa+eWWrYuU1PfSrvSq55yf/wU3+oKjjk8o8uHkK09p/vxC9Tx6/gZe+V8XBK99vcolV26ueNxU3OnY1dWFNWvWYPny5aOCNQAEAvrmPHe+15EUNAEgOBTFzvc6MHuiX7IlLGc6a+r019kT/aibP21Ui2P2RL9lp8nOnujHc7VPJZXfqnWRYpepzOnoVc8ugWAdP67H8wvVc+d7HYLBWs9yyaX61PQvvvgCL7/8Murr6zFr1izFBVNTulyy2K3+E1PGY9H+s1nltqyWpybSghnTg1Kd5VZMWyoah713717cvn0bu3fvxooVK7BixQoEg8LfrnpJN+ZZaJzyvz38L/jPv/1D0y19iJxC7twBPUgFZSv2y9hmtT4lq9Ut2n9WsEUgtaEob6PtQ6iOQh3X56/34fCFbkRj1lyvw6wrTKpN7P0UGgL5QqV530PLrda3/USn7AtEyUQTjjmmkYRGOWz+Y3IOVK/1Oswm02vSbOlBu61BY7qAnToER84FIvZhEfvWN2POjYwjNPRT7PZTy/U6zKSlvQeN73UiGLn3SljtS8tsXyLZMN1aIocvdMs6no7ULtpmzLmRceTcWWm5XodZJO44IsKVVXpNknKmC9hiF4LSCyTdhBk1Fkwie5BzZ6Xleh1mkW5VQCd8aZmN6VIiOS7hD4LSC8RuU8dJO0JDP10QTotouV6HWaS743DCl5bZmK6FLXYhKL1AslnilJxF6I7rjeoH8UJlaSI45bjMPcJATemuESd8aZmNKYf1yR0lIjWUyO6b02rFCfV0Qh0B5fUUGxLnAvC8Cb+07PJ+WmpYX0t7D1qv9CIWG27hpBuCk27BGbsN6yHSC68d8zFVC1uqNQwIf3CUTH7Jhl2+xdNxQj2dUEeA9bQay7SwxUZ0/PzPnyIUiQm2ojn5hYicwlSdjmJB9vZgRHRoHjsVicgpTBWw5QbZnv5BTn4hIscwVcAWC75FPuHMTUmhl5NfiMgxTJXDFuuVBiC5bRUnvxCRE5gqYAPSwZfDi4jIyUwXsMWwFU1ETmeqHDYREYlT3MKORqNoaGhAR0cH8vLysHXrVkycOFHNsokyclcLIiKjKG5hnzhxAqFQCO+88w5qa2uxfft2NcslSmp9ayIiO1McsD/66CNUVVUBAB555BFcvHhRtUJJkVrfmojIzhSnRAKBAAoKChK/u91uhMNheDzDD1lQ4IXH486+hCmkpqL7/fmqP18qtztHl+cxmhPq6YQ6AqynnSgO2AUFBbhz507i92g0mgjWABAIaLOWh9Q+jHos/GKXBWbScUI9nVBHgPW0GqnFnxSnRB599FGcOnUKANDW1obycn3WxuVUdCJyKsUt7Pnz56O1tRXLli1DLBZDY2OjmuUSxTV6icipTLUethq0HvJnl9uudJxQTyfUEWA9rcYy62FnK93uM0REVmarmY4c8kdEdmargM3dZ4jIzmwVsLn7DBHZma0CNof8EZGd2arTkUP+iMjObBWwAa6bTUT2ZauUCBGRnTFgExFZBAM2EZFFMGATEVkEAzYRkUUwYBMRWQQDNhGRRTBgExFZBAM2EZFFMGATEVmEoqnp/f392LBhAwKBAIaGhrBp0yZ84xvfULtsREQ0gqKA/fbbb+Pxxx/HypUrceXKFdTW1uLw4cNql42IiEZQFLBXrlyJvLw8AEAkEoHXy/WmiYi0lnYT3ubmZhw4cCDpWGNjIyoqKnDz5k18//vfR11dHWbOnJl0zsBACB6PW/0SG8ztzkEkEk1/osU5oZ5OqCPAelpNbq543FS8a3pHRwfWr1+PjRs3Ys6cOaP+btSu6Vqzy87M6Tihnk6oI8B6Wo3qu6Z/+umnWLt2LXbt2oWHHnpIccGIiChzigJ2U1MTQqEQfvaznwEACgoKsGfPHlULRkREyRQFbAZnIiL9ceIMEZFFMGATEVkEAzYRkUUwYBMRWQQDNhGRRTBgExFZBAM2EZFFKBqHbUct7T3YffoqevoHUVLoRU3VJCycXmJ0sYiIEhiwMRysG//0CYLh4YVjuvsH0finTwCAQZuITIMpEQC7T19NBOu4YDiK3aevGlMgIiIBbGED6OkflHWcyCyYynMWtrABlBQKb8AwzsfvMzKveCqvu38QMdxL5bW09xhdNNIIAzaAmqpJyM1xjTp+ZzDMDz+ZFlN5zsOAjeGOxTG5o1+KcAxoev+yASUiSo+pPOdhwP5K/2BE8HhfkK1sMiexVJ7YcbI+BuyvSH3IeYtJZlRTNQk+T/Il7PPkoKZqkjEFIs1lFbAvX76MGTNmYHDQ+rdgUh9y3mKSWXlHBOxxXjfqnpnGUSI2pngYRCAQwI4dO5CXl6dmeUyJt5hkNqmTvQAgFFG0nzZZiKIWdiwWw+uvv47169djzJgxapfJEFJpD95iktlwhIgzpW1hNzc348CBA0nHJkyYgOrqalvtmC6V9uAtJpkNR4g4kysWi8m+j5o/fz5KS0sBAG1tbaioqMDBgweTzhkYCMHjcatTSh3M+flfcKMvOOr4hCIfTr7yVOJ3tzsHkUh01Hl244R6WrmOmX5eAWvXUw671DM3VzxuKgrYI82dOxctLS3wepPzvDdv9mfzsLoTygn6PDmjOnH+4/RnOHTuc0RjQI4LWFxRik3fKjeiyJry+/Nx69Zdo4uhKSvXMdPPK2Dtesphl3oWFxeK/s3xc69HrsVQ6HXD6/HgdjAsuC7D9hOd+P357sTv0RgSv9sxaJN5xT+XXEfEWbJuYYuxQgtbTisFAB77xSlEBV6tHBdwdv1sLYuqO7u0VqQ4oY4A62k1Ui1sR0+ckdvTLhSspY4TEanJ0QFbbk+7wPpQkseJiNTk6IAtdy2GxRWlso4TEanJ0QFbaC0GYHhd4UX7z45a9GnTt8qxfObXEi3qHBfwQqU9R4kQkfk4epTIyJ727pQ0iNi+jm8sehjrqibrV0iiFNtPdOLwhW7bDy2l0RzdwgaGg/GxVY+hVCANwqm+ZDbxoaXxju740NLtJzqNLRjpwvEBO45TfckKDl/olnWc7MXRKZGRSgq9o9Ii8eOZ4GaopKX454tDS52NLeyvZLMYPDdDJS2N/HyJ4dBSZ2DA/srC6SWoe2YaSgu9cAEoLfRmvBg8l7okLQl9vlJxaKkzMCUywsLpJYrSGMx/k5akPkccJeIsDNgi5OSks81/E0kR+3yVFnpxbNVjBpSIjMKUiAC5OWmh/Hdujgt3Q2HMbDolOAmHKFPcbJfiGLAFyM1Jp+a/i3wexGIx3B6MsBOSspZN/wrZC1MiApTkpEfmvxftP4u+YDjp7/GAz4uMxEil4ZT2r5C9sIUtQO6iUKnYCUlycWgoZYIBW0C2OcNsAz45D4eGUiYUpUQikQi2bduGixcvIhQK4Uc/+hGefvpptctmGLHtl4DhzU+7+oKiI0da2ntwNxROfUh2EpEk3pVRJhQF7CNHjiAcDuPQoUPo6elBS0uL2uUyXGrOMHU7MaHV/IS2HAOGOyFr505lDpJEcWgoZUJRSuTMmTMoLS3FqlWr8Nprr2Hu3Llql8t0MrllFZuRNibXzWBNkjh0jzKRtoXd3NyMAwcOJB0bP348vF4v9u3bh3PnzuHVV1/FwYMHk84pKPDC43GrW1oDSd2y+v35GZ9jFW53juXKLJeZ6vjtWZMxNt+LpuOd6OoLoqzIh9r55XiuckLWj22memrJCfVMG7BfeuklvPTSS0nH1q1bh6eeegoulwszZ87E1atXR/1fIGCv3JvULWt8p+ZMzrEKu+xALcVsdZw90Y/Z/2Nm0jE1yme2emrFLvVUfdf0GTNm4OTJkwCAS5cuoaysTFnJLCSTW1be1hKRlhR1Oi5ZsgSbN2/GkiVLEIvF8MYbb6hdLtOJ56D3tl4THSUiNrqE+WsiUoMrFotpsvT5zZv9Wjys4exy25WOE+rphDoCrKfVqJ4SISIi/XEtEZ1wCzEiyhYDtg4ymXRDRJQOA7YOpCbdpAvYbJkTURwDtg6UrhPBljkRjcRORx0oXb2PK7gR0UgM2DpQOqGGK7gR0UhMiehA6YSaTFdwY56byBnYwtbJwuklOLbqMbxR/SAAYPMfO9JuzptJy5w7lWivpb0Hi/af5YbKZDi2sHUktxMxk5Z5NiNQKL3tJzrx+/Pdid/Z8UtGYsDWkZLgmm7zVea5tdPS3pMUrOP4hUhGYcDWkRbBVSzPPc7nwaL9Z5nXzoLUaBx+IZIRmMPWkdTwPqV5UqE8d26OC3cGw8xrZ0kqKHPrLjICA7aOxDoRn5gyXnHH4cLpJah7ZhpKC71wASgt9GJMbg7CKWswcvy2fFJBmWuckxEYsHUkFFzrnpmG1iu9WU2QiY9A+bB2No6tegz9gxHB83gbL4/QFywAvFBZyvQSGYI5bJ0JdSJu/mOH4LkjA6ycsdbcgVsd3JCCzIYB2wTSBVi5wwFrqiYlnQ9wqzKl0o3SIdKTooDd39+PdevWYWBgALm5udi5cyeKi4vVLptjpAuwcocD6t0yFGv9cwYmkbrcDQ0NDXL/6dChQ/D5fPjFL36BUCiE999/H08++WTSOXfvhtQqo6n4fLkIBodUfcxpxQUoK/KivTuAO6EISgu9WD93aiK47frLFcH/uxOK4Pv/baLoYy6f8a/41/E+nPt/t/B//taDoxe7MT4/F9OKC9KWKdN6xlv/t4JhAEAgFMFfP+vFPwJBvPV/Px91vKzIm9Hz60GL99KMWE9rGTtWPHWpqIVdXl6OK1eGg0ggEIDHw8xKtqRuvZXmpPVYnlWs9X/4QjeiIiNV2MomUiZtpG1ubsaBAweSjtXX16O1tRXV1dXo6+vDwYMHR/1fQYEXHo9bvZKahNudA78/X9fn3LDgQfz0yEUEh0akTHJzsGHBg5Jl2dt6TTCY7m29hm/Pmiz5nJnWU2zkSWqwHnl+Nq/f0fM30HS8E119QZQV+VA7vxzPVU5Q9FhGvJdGYD3tQ9Gu6T/84Q/x5JNPYtmyZbh06RI2bNiAY8eOJZ3DXdPVpSQfPLPpFITeXBeAD2tnS/5vpvVctP+sYOs/xyUctEsLvTi26rG0jztSvO5Cz+Pz5KDumWmKWu122WU7HdbTWqR2TVeUyxg3bhwKC4cf9P7778edO3eUlYwypmS0QrbD+zL5khDrMP23h/8F//m3f2Q9UiU1rZOKaRZyEkUBe+3atXjttdfwm9/8BuFwGFu2bFG7XKSCbIb3HT1/I6P8t9SIlMoHirIeJSKUI0/FCUHkFIoCdklJCd588021y0Iqy2Z4X9PxzqyXbRW6K5Cb2skkGHNCEDkFh3fYnNKJH119QcHjqQFUzkgUJaNWxNI6cZwQRE7CtURIUFmRT/B4amtWzkbBSjYVFlvPA7i3Fgvz1+QUbGGToNr55fjp/76YNv8tZ41vJeuBcz0PonsYsEnQc5UTcOfuYNpAKWckitJRK1zPg2gYAzaJyiRQyhmJwkWpiLLDgE1ZkZOyYHqDKDuKZjpmgjMdrc0J9XRCHQHW02qkZjpylAgRkUUwYBMRWQQDNhGRRTBgExFZBAM2EZFFaDZKhIiI1MUWNhGRRTBgExFZBAM2EZFFMGDLEIlEsHXrVixbtgzPP/88PvjgA6OLpKnLly9jxowZGBy0544u/f39+MEPfoDvfve7WLp0KT7++GOji6SqaDSK+vp6LF26FCtWrMC1a9eMLpImhoaGsGHDBixfvhwvvvgi/vznPxtdJM1wLREZjhw5gnA4jEOHDqGnpwctLS1GF0kzgUAAO3bsQF5entFF0czbb7+Nxx9/HCtXrsSVK1dQW1uLw4cPG10s1Zw4cQKhUAjvvPMO2trasH37duzZs8foYqnu6NGj8Pv92LlzJ3p7e7F48WLMmzfP6GJpggFbhjNnzqC8vByrVq1CLBbD66+/bnSRNBGv2/r161FTU2N0cTSzcuXKxBdSJBKB12uvrcY++ugjVFVVAQAeeeQRXLx40eASaePZZ5/FggULEr+73W4DS6MtBmwRzc3NOHDgQNKx8ePHw+v1Yt++fTh37hxeffVVHDx40KASqkOonhMmTEB1dTUeeughg0qlPqF6NjY2oqKiAjdv3sSGDRtQV1dnUOm0EQgEUFBQkPjd7XYjHA7D47HXZT927FgAw/X98Y9/jJ/85CcGl0g7HIctw7p165K+zZ944gm0trYaXCr1zZ8/H6WlpQCAtrY2VFRUWP6LSUxHRwfWr1+PjRs3Ys6cOUYXR1Xbtm1DZWUlqqurAQCzZ8/GqVOnDC6VNrq6urBmzZpEHtuu7PVVq7EZM2bg5MmTWLBgAS5duoSysjKji6SJ48ePJ36eO3cu3nrrLQNLo51PP/0Ua9euxa5du2x1NxH36KOP4oMPPkB1dTXa2tpQXl5udJE08cUXX+Dll19GfX09Zs2aZXRxNMUWtgyhUAibN2/G5cuXEYvF0NDQgIcfftjoYmlq7ty5aGlpsV1+FwBWr16Njo4OPPDAAwCAgoICW3XKRaNRNDQ0oLOzE7FYDI2NjZg6darRxVLd1q1b0dLSgilTpiSOvfnmm/D5hDeStjIGbCIii+A4bCIii2DAJiKyCAZsIiKLYMAmIrIIBmwiIotgwCYisggGbCIii2DAJiKyiP8P9tEj6f352eAAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plot \n", "xs = otu_tsne[:,0]\n", "ys = otu_tsne[:,1]\n", "plt.scatter(xs, ys)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The clusters are not so distinct maybe changing the *learning rate* parameter could help. Let's apply another technique and see how this gones. Prinicipal Component analysis(PCoA), however, Principal Coordinate analysis would be more in this case." ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# PCA removes the zero variance features and transforms the data\n", "# Another cool thing about PCA is that you can use it for feature extraction\n", "\n", "stdscale = StandardScaler() # use a standard scaler to ensure each row has a mean of 0 and a variance of 1\n", "df_scaled = stdscale.fit_transform(df_encoded) # apply it to our dataset" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(106, 106)\n" ] } ], "source": [ "# call PCA\n", "model_pca = PCA()\n", "transformed = model_pca.fit_transform(df_scaled) #fit\n", "print(transformed.shape) # look at the dimensions of the matrix" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAD0CAYAAAC/3RwjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFGZJREFUeJzt3X+M1PWdx/HX9/udXzvMssP+4oeIsFEOvZYiXIm2h7S1CJVKYiqC2L1Sq4kcF+1BoI2pG5IqgRgTc8kBVauHPdoSol64cDX1R0+rNdbyoy0npQXqDwTdn7O7w87uzM73e38MXXeBZWQY+H4/7PORGJnv/Nj3fqJPv37nO9+xPM/zBAAIPNvvAQAAnw7BBgBDEGwAMATBBgBDEGwAMATBBgBDhC7UC7e0dF+ol/ZVIhFVOt3n9xiBxhoVxxoVN1LXqK6uctj72MM+R6GQ4/cIgccaFccaFccanY5gA4AhCDYAGIJgA4AhCDYAGIJgA4AhCDaAoXbtVPz2RUp86QuK375I2rXT74lwEsEG8IldOxXf+JCsTJ+8iZfLyvQpvvEhoh0QBBvAgPjWJ2VVjZHq66VoRKqvl1U1RvGtT/o9GkSwAQxiN7dKVcmhG6uShe3wHcEGMMCtr5U6U0M3dqYK2+E7gg1gQM+37pbX2SE1N0t9Wam5WV5nh3q+dbffo0EX8OJPAAy0cJF6VDiWbR/9QG59rXru/YG0cJHfk0EEG8CpFi5SD4EOJA6JAIAhSt7D/tGPfqRXXnlFuVxOd9xxhxYvXlzOuQAApygp2G+99Zb27t2rn/3sZ8pkMnrqqafKPRcA4BQlBfv111/X1KlTtXLlSqXTaa1du7bccwEATlFSsDs6OnTs2DFt2bJFR48e1YoVK/TCCy/IsqxyzwcAOKmkYCeTSTU0NCgSiaihoUHRaFTt7e2qqakZeEwiEb0kv+LHcWwlk3G/xwg01qg41qg41uh0JQV71qxZeuaZZ/Ttb39bzc3NymQySiaHfpz1Uv3yzGQyrlSqx+8xAo01Ko41Km6krtHZvoS3pGB/+ctf1ttvv63bbrtNnuepqalJjnPp7U0DQJCUfFofbzQCwMXFB2cAwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMcV7Bbmtr09y5c3X48OFyzQMAGEbJwc7lcmpqalIsFivnPACAYZQc7I0bN2rp0qWqr68v5zwAgGGESnnSc889p+rqas2ZM0ePP/74GR+TSEQVCjnnNVwQOY6tZDLu9xiBxhoVxxoVxxqdzvI8zzvXJ915552yLEuWZenAgQOaPHmyNm/erLq6uoHHtLR0l3XQoEgm40qlevweI9BYo+JYo+JG6hrV1VUOe19Je9jbtm0b+HNjY6PWrVs3JNYAgPLjtD4AMERJe9iD/eQnPynHHACAItjDBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDEGwAMATBBgBDhEp5Ui6X0wMPPKAPP/xQ2WxWK1as0I033lju2QAAg5QU7J07dyqZTOqRRx5RR0eHbr31VoINABdYScFesGCB5s+fP3DbcZyyDQQAODPL8zyv1Cen02mtWLFCt99+u2655ZYh92UyWYVCl17IHcdWPu/6PUagsUbFsUbFjdQ1CoeH72ZJe9iSdPz4ca1cuVLLli07LdaSlE73lfrSgZZMxpVK9fg9RqCxRsWxRsWN1DWqq6sc9r6Sgt3a2qq77rpLTU1Nuv7660seDADw6ZV0Wt+WLVvU1dWlTZs2qbGxUY2Njert7S33bACAQc7rGPbZtLR0X4iX9d1I/d+0c3HJrFEmI6utVXZfr9xoTF5NrVRRUZaXvmTW6AIaqWt0tkMifHAGOJNMRvbR92W5rrz4KFmuK/vo+1Im4/dkGMEINnAGVlurFI1JkYhkWYW/R2OF7YBPCDZwBnZfrxQOD90YDhe2Az4h2MAZuNGYlMsN3ZjLFbYDPiHYwBl4NbVSX6+UzUqeV/h7X29hO+CTkj84A1yyjh+Ts2eP7Obj8pywvKuuknv5JHkTJ5XtLBGgFAQbGOz4MUVe/IXcyqS8yydJ3Wk5f/mT8pOnnD3WB95R+JcvyG7+WG79WOVuWiBdfc3FmxsjAsEGBnH27JFbmZSqqgobqqrkntyeXzjhkwcODrQ8hVpblG+4Ut6kSbJSKVU886Qy/3Q30UZZEWzgb44fU/jVV+Q5IalqtPJXXiVV10iVCdn7/yjrmVbZrS1ys1mF3zui/OQGeZMmKfzC/8jq7lb+iiskJyzV1CkvKfzLF5Qj2Cgj3nQEpE8OhURjUiwq5XIK73lbam+TDh9S6P/+KKuvV97EyxT54+/lfHis8DwnLMtzpaqkwu8c+OT1kknZzR/787vgkkWwAX1yKMSd/jnZfRnJsuTGYnL+8HtFXntVymQUevMNhX7zuqzOTrlVVQodOiRJ8kaNlizJ6h50OYZUSm79WJ9+G1yqCDYgye5olyoTUk2Nctd+Xl4oLCvTJ6/5I1ntbVIiIY0ZI+X6pfZ26USX1FMIdP/VV8vq7JRn21I+J7W1yEm1Fd54BMqIY9iAJDcSkr1vr2zbkRuvkHvNNXLzrpzf/Fr5yy6X5YQkJySNSshtmCL7TwfkTf27QqDDYeWnTFF/3VjZ778vt34sbzjigiDYQEe7rPQJhf58UApHZMUr5H3wnryJk+SNSsi9YrLCv3tbniTFKqREpbyaGuWmTv0k0P+6lkDjgiPYGJkGXTpVv3tbdke7+qd/Ts6x41Jnh+wTrvpmzJI9qlJWX69ys6+Xc/Ad2al2uZaj3PyF6l+1xu/fAiMMwcbIc/LSqYrG5MVHKXTkkKxwWF5NjfLjTp5rnUnLaW9T7qavKfbsz5Wvqlb+i/+ofGeXnM529d/6DX9/B4xIvOmIEefUS6dajiPPCclKDzrLw5UsudKUBvV+Y6m8aEzW0Q/lRWPq/cZSaUqDf78ARiz2sDHi2H298uKjBm7np1yl0MF35HqulKyWentld3Yp99m/LzxgSoP6CTQCgD1sjDinXjrV/exnlR87TpItdaSk/n7lJ02UO2OWf0MCZ8AeNkYcr6ZW1tH3CzfCYSkeV37mrMKx7WxWbuVouVdeKY2p9ndQ4BQEGyNPRYXciZMKZ4n0nCh8we41nyls93s24CwINkamigp5Ey9X3u85gHPAMWwAMATBBgBDEGwAMETJx7Bd19W6det08OBBRSIRPfTQQ7riiivKORsAYJCS97BfeuklZbNZbd++XatXr9aGDRvKORcA4BQlB3v37t2aM2eOJGnGjBnav39/2YYCAJyu5EMi6XRaiURi4LbjOOrv71coVHjJRCKqUMg5/wkDxnFsJZNxv8cINNaoONaoONbodCUHO5FI6MSJEwO3XdcdiLUkpdN95zdZQCWTcaVSPX6PEWisUXGsUXEjdY3q6iqHva/kQyIzZ87Ua6+9Jknat2+fpk6dWupLAQA+hZL3sOfNm6c33nhDS5culed5Wr9+fTnnunAGXbjejcbk1dRKFRV+TwUARVme53kX4oVbWrqLP+hiG3TheoXDhSu29fXKnTjp00X7+DEl/7xfJz74SO6YauVnzpTGT7jwcxtmpP6v7LlgjYobqWt0QQ6JmOjUC9crEpGiscL2Yo4fU+TFX0i9ffLGjZWV7SvcPn7swg8OALrUL/504B2Fd+2U/cEHspKj1d/bq8ju38l+710pm5U7ukre9BnKzbtJ/UvuPOtetrNnj9zKpDQmKaX7pKoquSe35xeylw3gwrt0g/3Wm4r/+7/J6+2VlUjIPfRnRfbulh0OS/2u5OVlf/yRvD/sVaSjTW5NjdyvLhg22nZHu7xxY4durEzI/uhjrvgG4KIwK9h/PaLQf/+Xwr/9razOlNzLJij3xRuU/+pNQ48ld7Qr9p9b5XmSXZmQZ9kK/eUvkm1LmYxUEZdicSmbldXVLc/NK/Lqq+q79h/kTbz8jD/aHVMtqzstjR50Xmh3Wi4XuQdwkQTvGPaunYrfvkiJL31B8dsXSbt2Frb/9Yhi//GEIm/9Rk6qXVbIlvPuu4r878uK7Pj5kGPJ9qFDsk6kZUdC8iJRKRaTclkN/LquK3mSQiEp3y/bcqS2dtl9vcOOlZ85U3Z3qvAVUm5e6uyU3Z0qvPEIABdBsIK9a6fiGx+SlemTN/FyWZk+xTc+JO3aqdCvX5O6umTlXXnJKqm6RqqultXVLau1Vc6ePQMvY3d3ya0dKy+TkayTG6MxqT8nWXZhTzvkSK4nRaKFbxmpqS58199wxk9Qdt7XpFhU1kcfy4tEC7c5SwTARRKoQyLxrU/Kqhoj1dcXNtTXyzq5vf+6OYVjz/35wuEMSQpHZWcyyudysjvaB44lu5Wj5V17rULvH5HX1l54g3D8eFmpDlmxaCHcnV1Svl/u+AlSNKbs3LmFc7LPZvwE6eorlRuBpxoB8F+g9rDt5lapKjl0Y1VSdnOr3No6KXRyz7j/5Mfec31yQ2EpHB5yLNm98kpZdXXqXXiLvMQo6eOPpLo69d5xp9zJDdKoROE548fLmzlLmX/+l7O+4QgAQRCoPWy3vlZWZ+qTPWxJ6kzJra9V/5wbFDp8UF7zx7JTnfLCJ6RMr7zLLpNXWzv0WPKYavXP+rzsQ2OUqxsrL+9J48fJrR+rHj7ZCMBQgQp2z7fuLhzDlgp72p0peZ0d6rn3B9KUBvUuv2foWSKTJ5/5LBFJGlMt9/Oz+RZsAJeMQAVbCxepR4Vj1vbRD+TW1xZivXBR4f4pDeq/b5X6fR0SAPwRrGBLhWj/LdAAgAGBetMRADA8gg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGCIkq7W193drTVr1iidTiuXy+n73/++rr322nLPBgAYpKRgP/3007ruuuu0fPlyHTlyRKtXr9bzzz9f7tkAAIOUFOzly5crEolIkvL5vKLRaFmHAgCczvI8zzvbA3bs2KGtW7cO2bZ+/XpNnz5dLS0tuueee/TAAw9o9uzZQx6TyWQVCjnln9hnjmMrn+eLx86GNSqONSpupK5RODx8N4sGezgHDx7UqlWrtHbtWs2dO/e0+1taukt52cBLJuNKpXr8HiPQWKPiWKPiRuoa1dVVDntfSYdEDh06pPvvv1+PPfaYpk2bVvJgAIBPr6RgP/roo8pms3r44YclSYlEQps3by7rYACAoUoKNnEGgIuPD84AgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCHOK9iHDx/WrFmz1NfXV655AADDKDnY6XRaGzduVCQSKec8AIBhlBRsz/P04IMPatWqVaqoqCj3TACAMwgVe8COHTu0devWIdsmTJigm2++WdOmTRv2eYlEVKGQc/4TBozj2Eom436PEWisUXGsUXGs0eksz/O8c33SvHnzNG7cOEnSvn37NH36dG3btm3IY1pausszYcAkk3GlUj1+jxForFFxrFFxI3WN6uoqh72v6B72mbz44osDf/7KV76ip556qpSXAQCcA07rAwBDlLSHPdgrr7xSjjkAAEWwhw0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0AhiDYAGAIgg0A5bD9p4rfdIMSsz6j+E03SNt/WvYfQbAB4Hxt/6niG34ouzcrjZsguzer+IYflj3aBBsAzlP8x1tkV1ZJdbVSLCLV1cqurFL8x1vK+nMINgCcJ7utXaocPXRj5ejC9nL+nLK+GgCMQG5NtdTdNXRjd1dhexkRbAA4Tz3fuVdud6fU0ir1ZqWWVrndner5zr1l/Tnn/SW8ADDiLVmmHp08lv3RMbk11epZ+aC0ZFlZfwzBBoByWLJMPWUO9Kk4JAIAhiDYAGAIgg0AhiDYAGAIgg0AhrA8z/P8HgIAUBx72ABgCIINAIYg2ABgCIJ9Drq7u3Xvvffqm9/8ppYsWaK9e/f6PVJguK6rpqYmLVmyRI2NjXrvvff8Hilwcrmc1qxZo2XLlum2227Tyy+/7PdIgdXW1qa5c+fq8OHDfo8SKHw0/Rw8/fTTuu6667R8+XIdOXJEq1ev1vPPP+/3WIHw0ksvKZvNavv27dq3b582bNigzZs3+z1WoOzcuVPJZFKPPPKIOjo6dOutt+rGG2/0e6zAyeVyampqUiwW83uUwCHY52D58uWKRCKSpHw+r2g06vNEwbF7927NmTNHkjRjxgzt37/f54mCZ8GCBZo/f/7AbcdxfJwmuDZu3KilS5fq8ccf93uUwOGQyDB27Nihr3/960P+evfddxWLxdTS0qI1a9Zo1apVfo8ZGOl0WolEYuC24zjq7+/3caLgGTVqlBKJhNLptO677z5997vf9XukwHnuuedUXV098B9/DMUe9jAWL16sxYsXn7b94MGDWrVqldauXavZs2f7MFkwJRIJnThxYuC267oKhfjH61THjx/XypUrtWzZMt1yyy1+jxM4zz77rCzL0ptvvqkDBw7oe9/7njZv3qy6ujq/RwsE/o06B4cOHdL999+vxx57TNOmTfN7nECZOXOmfvWrX+nmm2/Wvn37NHXqVL9HCpzW1lbdddddampq0vXXX+/3OIG0bdu2gT83NjZq3bp1xHoQgn0OHn30UWWzWT388MOSCnuVvLFWMG/ePL3xxhtaunSpPM/T+vXr/R4pcLZs2aKuri5t2rRJmzZtkiQ98cQTvLmGT42PpgOAIXjTEQAMQbABwBAEGwAMQbABwBAEGwAMQbABwBAEGwAMQbABwBD/DwBt+rBLqZOLAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# draw a scatter plot of the first two columns, change the transparency of the points and draw the points in red\n", "plt.scatter(transformed[:,0], transformed[:,1], alpha = 0.1, color = 'red')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Text(0.5,1,'Finding the ideal number of n_components parameter for PCA')" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEPCAYAAACjjWTcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XdcU1f/B/APSRiyQVARCjIEt4i7bq2CA6EuXFQftXW21arFhRMVZ1Xqz/G0daBP1WodraNuES0OFBEciCICIiA7zCT3/P7wMY9UIIgkNyTf9+vl62VI7r2fc5N8OZx777k6jDEGQgghGk3AdwBCCCHKR8WeEEK0ABV7QgjRAlTsCSFEC1CxJ4QQLUDFnhBCtECtKPZubm7w9vaGj4+P/N/ChQsBAD4+PsjLy/ug9Q0aNAg3btxAWloaRo4cWWM5f/vtN+zfvx8AEBISguXLl3/U+hYuXIjr16+/9/P79++jd+/e1V5vcnIy2rRp8zHRqsXf3x9nzpxRybZkMhmmTp0KT09P7Nu3TyXbrM2SkpLw9ddf8x2jxi1atAgxMTEftY7U1FQMGjQIPj4+uHv3brXWkZycjKZNm5apYYMHD8bhw4flrykpKcGmTZvg6+sLHx8feHt7Y+fOnfjn2fGhoaFwc3NDVFTUB2UQVSs5D/bs2QNLS8v3fn78+PFqr7N+/fo4cODAx8QqIzIyEo0bN66x9a1cubLG1qVt0tLSEB4ejqioKAiFQr7jqL2XL18iISGB7xg17vr16/Dz8/uoddy4cQNWVlbYvXv3R63HwMCgTL1KS0vDoEGD0KJFC7i5uWHatGlwdHTEwYMHoa+vj+zsbEyePBmFhYWYOXOmfLkDBw7A29sbe/bsgbu7e5W3X2uKfUXc3Nzw999/4/Llyzh37hwEAgESExNhYGCANWvWwNnZGfHx8ViwYAGKiorg5OSEwsJCAG9+23p7e+Pu3bsICQlBSkoKMjIykJKSgvr162PdunWoV68eoqOjsXTpUkgkEtjb2+Ply5eYN28eOnbsKM9x7tw5XLx4EdeuXYOBgQEA4NmzZ/D390dGRgasrKywceNG1KtXD2lpaVi+fDlSU1MhkUgwcOBATJky5b22+fv7Y8yYMfDy8sJ//vMf7NmzB8bGxnB1dS3zum3btuHs2bPgOA62trZYsmQJ6tevj6ioKKxbtw6lpaXIyMjAp59+ilWrVlW4L5OTkzF+/Hj06NED9+7dQ15eHubOnYu+ffsiJCQE2dnZWLx4MQCUeezv74/mzZsjKioKWVlZGDFiBF6/fo2bN2+iqKgImzZtgpubm3w/7dy5E8XFxfD29sbUqVMBAHfu3MH69etRVFQEgUCAGTNmoFevXvj9999x+PBhFBUVwdjYGKGhoWUy3759G2vXrkVRURF0dXUxc+ZMeHh4YNKkSZBKpRgyZAhCQkJgb28vX2bevHkwNjbG48eP8erVK7i5uWHNmjUwMjKq9LO2Y8cOHD16FCKRCA4ODggODoaJiQm2bt2KkydPQigUwtHREYGBgbC2tq7yfvH390ezZs0QGRmJ7Oxs+Pj44JtvvgEAnD9/Hj/++CM4joORkRHmz5+PVq1aVfp5rejzVdH727t3byxatAhpaWmYOHEiduzYgRUrVuDOnTvQ1dWFnZ0dVq9e/d7+qSz39u3bceHCBRQXF6OoqAgBAQHyz1FUVBTS09Ph5uaGefPmYfHixcjMzERGRgZsbW2xadMm1K1bF71798agQYMQERGB3NxcTJo0CXfu3EFsbCxEIhG2bduG+vXrV9jeH374Aenp6ZgzZw7Wrl0LJycnrFy5EnFxcZBIJOjcuTO+//57iEQitGjRAn369MGjR4+wfv16tGzZEgAQERGBTZs2IT8/H/7+/ggNDcXBgwcRGhoKgUAAKysrBAYGwtHREfPmzUNOTg6SkpLQs2dPzJ07t9LPU/369eHg4IDnz58jLy8Pz549w86dO+WdEwsLC6xduxYpKSnyZW7cuIHc3Fz59zI1NRU2NjaVbkeO1QKurq5s0KBBbPDgwfJ/r1+/lj+XmZnJjhw5wtq2bctSU1MZY4wtX76cff/994wxxnx8fNihQ4cYY4zdvn2bubm5sYiICJaUlMTc3d0ZY4xt2bKF9enTh+Xn5zPGGJs8eTLbvHkzk0gkrHv37uzy5cuMMcb+/vtv+fL/FBAQwH766Sf5+nr37s0yMzMZY4xNnTqV/fjjj4wxxvz9/dmFCxcYY4wVFxczf39/dvLkyffWN3bsWHb69Gn24MED1rlzZ5aens4YYywwMJD16tWLMcbY0aNH2cyZM5lEImGMMXbgwAE2adIkxhhjs2bNkucUi8WsY8eO7P79+2Xa/a6kpCTm6urKLl68yBhj7MyZM6xnz57y9ixbtkz+2ncfjx07ls2YMYMxxlhUVBRzdXWVt2/lypVs0aJF8tdNnjyZSSQSlp+fz7y8vNjly5dZTk4O69evH0tKSmKMMfbq1SvWvXt3lpKSwo4cOcLat28vf1/elZWVxTp37syioqIYY4zFxcWxDh06sBcvXlTYxrfvk5+fHyspKWGlpaXM19eXHT58uNzXvnX+/HnWr18/lpOTwxhjbNWqVez//u//2OHDh5mfnx8rKCiQ75cJEyZ88H758ssvWWlpKcvNzWWenp7s4sWLLD4+nn366afsxYsXjDHGrl+/zrp06cLy8/Mr/LwyVvHnq7L3NyIigg0cOJAxxtitW7eYl5cX4ziOMcbY2rVrWWRk5Hv7pKLcycnJzN/fnxUVFTHGGPvzzz/ZoEGD5PvH09NT/nndvXs327FjB2OMMY7j2KRJk9jPP//MGGOsV69ebNWqVYwxxk6ePMmaNGnCHj58yBhjbNq0aWzbtm2VtvftOqKjoxljjM2bN4/t3buXMcaYVCplc+bMYTt37mSMvakjR48eLfe9P3LkCPvqq6/k78Fnn30m/14fOXKE9e/fn3EcxwICAti4cePKXUd5n8c7d+6w9u3bs5cvX7Kff/6ZffPNN+Uu+65vvvmGBQcHM8YY+/LLL9natWsVLvNWrenZVzSM867mzZujQYMGAIBmzZrh3LlzyM7OxuPHj+Hr6wsAaNu2bYVDLR06dICxsbF8+dzcXMTFxQEAevToAQDo1KlTlYdqunTpIs/cpEkTZGVlobCwELdu3UJubi42b94MACgsLMSjR48wYMCActfz999/o0uXLrC2tgYA+Pn5ITw8HABw6dIl3L9/H0OHDgUAcByHoqIiAEBwcDDCwsKwfft2PHv2DCUlJSgsLIS5uXmFmXV1deVtbdasGXJycqrU1r59+wIAPvnkEwBAt27dAAD29va4efOm/HXDhg2DSCSCsbExPD095cckMjIyMH36dPnrdHR08PjxYwBv/np7+768Kzo6Gvb29mjdujUAoHHjxvDw8MDNmzfL/NVVnm7dukFPTw8A4Orqitzc3Epf//fff8PLywtmZmYAgPnz5wMAvv32WwwZMgSGhoYAgC+++ALbt29HaWnpB+0XPz8/6OrqQldXF15eXggPD4eTkxM6deokX7Zz586wtLSUj0GX93mt7PPVqlWrKr2/rq6uEAqFGD58OLp27QpPT0+0atWq3P1SXu5evXph7dq1+OOPP5CYmIh79+6hoKBAvoy7uztEojelZ9y4cbh9+zZ27dqF58+f48mTJ/L3EwD69esn339WVlZo0qSJfP8pau8/v0+XL1/G/fv35ePkxcXFZZ5v165duW1819WrVzFgwAD593rIkCFYuXIlkpOTAbypLxUpLi6Gj48PgDfHlCwsLLBu3TrY2NhAIBC8Nzb/TxkZGbhw4QKOHDkCAPD19cXSpUsxffp0+eevMrWm2FfF2+ET4E2xeHfnvfv/tx+0qiwvFArfexOqOgb87nbero/jODDGcODAAdSpUwcAkJWVBX19/UrX9W6Gd7fPcRwmTZqE0aNHAwBKS0vlhWvs2LFwc3NDt27d0L9/f9y7d0/hB0pXVxcCgUCe+Z/535JIJGWWe1s4311Ped7NzhiDSCSCTCaDs7MzfvvtN/lzaWlpsLS0xB9//FHhB1kmk5XJ+HadUqm0siYCqPyzUlHud7eVl5eHvLw8cBxX5uccx5XZflX3y7ufFcYYBALBe+t++9zb9ZfXhso+X9nZ2RW+v+8yNTXF8ePHcefOHURERGDmzJmYOHEixowZU6XcsbGxmDZtGsaPH48uXbqgffv2WLZsmfx1776f69atQ3R0NIYOHYqOHTtCKpWWeS/e3X/l7bsP+T5xHIfNmzfD2dkZwJv38N19UJWCyXHcez979z2pbB3/HLN/V+vWrbFnzx7IZLIy35Ho6GiEhoZi3bp1OHToEADIhz45joNYLMbRo0fLfW/+qVacjfMxLCws0Lx5c3khiY2NlffWq8LZ2Rl6enoICwsD8Gbnx8XFlftFEQqFCguNsbEx3N3dsWvXLgBvPnCjRo3ChQsXKlymS5cuuHbtGl69egUAOHr0qPy5rl274vDhwxCLxQCAzZs34/vvv0deXh7u37+POXPmoF+/fnj16hVevHhR7oe1KiwsLBAbGwvGGMRiMS5dulSt9Rw7dgyMMeTm5uL06dPo1q0b3N3dkZiYiFu3bgEAHj58CE9PT6SlpVW6Lnd3dzx79gzR0dEAgCdPnuDWrVvo0KFDtbJV5tNPP8W5c+fk+zkkJAS7d+9Gt27dcOTIEflxoNDQULRv3/69Iq/IiRMnwHGcfL/07t0bnTt3Rnh4OJKSkgC8+esiNTW1TM/3n6rz+QLefHbf/gK/dOkSxo8fjzZt2uDrr7+Gr69vhWe0lJf71q1baNGiBf71r3+hQ4cOuHDhAmQyWbnLh4eHY9y4cfD19UXdunVx/fr1Cl9bnfa++53s2rUrdu/eDcYYSktLMXXq1A8+U6tbt244deoUsrKyAABHjhyBubk5HBwcPmg9/9SmTRs4OTlh9erVKCkpAQC8fv0aQUFBsLOzg0wmw2+//YZly5bh4sWLuHjxIi5fvozJkydj7969CjsrgIb17CuyceNGzJ8/HwcOHIC9vT2cnJyqvKxIJEJISAiWLFmCjRs3olGjRrCysirTq3qre/fuCA4OVrjO9evXY8WKFfD29kZpaSkGDRqEwYMHV/h6Nzc3zJ07F+PGjYORkVGZP6mHDx+OtLQ0jBgxAjo6OrCxsUFwcDBMTU3x1Vdf4fPPP4ehoSHq168PDw8PJCYmyocFPsTgwYNx9epV9OvXD/Xr10eHDh2q9AH7JxMTEwwZMgTFxcUYO3YsOnXqBADYsmUL1q5di5KSEjDGsHbtWtjZ2ZUZ6vgnS0tLbN68GStWrEBxcTF0dHSwevVqODo6yv+srik9evRAfHw8Ro0aBQBwcXHBihUrYGhoiNTUVAwfPhwcx8HBwQHr16//4PUXFxdj2LBhKCgowOjRo9G5c2cAwJIlSzBjxgzIZDIYGBhg+/btMDExqXRdFX2+KtsnLi4u0NfXx7Bhw3Dw4EGEhYVh0KBBMDQ0hJmZGVasWFHl3I0bN8bZs2fRv39/cByHXr16ITc3V/6L8l3Tp0/H2rVrsXnzZujq6sLDwwMvXrz4gD1X+fepb9++mDt3LpYuXYqFCxdi5cqV8Pb2hkQiwaeffopJkyZ90La6dOmC8ePHY9y4ceA4DpaWltixY4f8r6WPsWXLFvzwww8YMmQIhEIhOI6Dr68vJk6ciAsXLoDjOHh7e5dZZvz48di7dy+uXLmCnj17Vrp+HVadb6yWWbNmDSZOnAgrKyukpqbCx8cH58+fh6mpKd/RiAZ496yr2qS25tZWWtGz/1i2trYYP348RCIRGGMICgqiQq+BRo8eXeZA4rv2799f7kFiQmoL6tkTQogW0PgDtIQQQqjYE0KIVqBiTwghWkBtD9BmZORXe1ljY32IxSU1mEY9UTs/ju+xN1dYHvM9VePrrg56PzULH+20tq74tFyN7NmLRNoxyyG1U7NQOzWLurVTbXv2hCjbrLaVz0pIiCahYk+0Vo9PevEdgRCV0chhHEKq4v7raNx/Hc13DEJUgnr2RGsFhs8DoD4HaAlRJurZE0KIFlBasb937x78/f3f+/nFixcxdOhQ+Pn5yednJoQQolxKGcb597//jRMnTshvJvCWRCLB6tWrcfjwYdSpUwejRo1Cr1695HdgIkTVGABOTaaH4jimNlmUidpZOR1UfGOZj6GUYm9vb4+QkBB8//33ZX7+9OlT2Nvby2/t1rZtW9y+fRv9+/dXRgxC3lMskeHv59m4+jQT0S/zIJFx6LjxKt+xCJHr7lwXG3yb1/h6lVLsPT09y71RglgsLnPjBSMjo3JvaAC8ufqsuhclCIUCmJsrvsVYbUftrDrGGE5Ep2LtX4+Rnl8CEwMR+tp9C1vzOnA0damhpB9HoKOjFT1eamfl3O3MlPK9VunZOMbGxmXmCy8oKKjwrjsfc5mxubkhcnIKq718bUHtrJrnmYVYdS4Od1Py0LS+MRb3c0XbT8wgEqrX+Qn0fmqWj2lndZerbLoElRZ7Z2dnJCYmIicnB4aGhrh9+zYmTpyoyghEy0Qm5WDO8VgIdXSwoG9j+LRsAMF/x0Nvpt4AAHSw6chnREJUQiXF/o8//kBhYSH8/Pwwb948TJw4EYwxDB06FPXr11dFBKKFLsRlIPDUI9iaGWDL0JawMS173+BVN5YBoPPsiXZQWrG3s7OTn1r57k1ye/fujd69eytrs4QAAA7dTcH6i0/RsqEpNvo2h1kdXb4jEcIruoKWaBTGGLaGP8eem0no4VwXQQObwEBXvWYfJIQPVOyJxpDKOASdjcPJB+kY2toGc3u7QCio+fOVCamNqNgTjVBQKsW8Px4i4nk2pnRxwISO9kq5MIWQ2oqKPan1MgtKMetoDOLSxVjUrzF8WtpUabkVXYOVnIwQ9UHFntRqSdlF+PrIfbwuKMV63+bo6lS3ysu2tGqlxGSEqBcq9qTWin2Vj1m/x4BjDNtHtEILG9MPWv5K0iUAdBMToh2o2JNa6VpCFuadeABLQ11sGdoSDpYffnn5D5HrAFCxJ9qBij2pdf6MfYWgv+LgYm2MTUNawMpIj+9IhKg9Kvak1mCMYc/NJGwNf44O9uZYM7gZjPXpI0xIVdA3hdQKHGPYeOkpDt59Cc8m1lji5QZdNZvIjBB1RsWeqD0Zx7DybBxOxKRhdFtbfNvDST6ZGSGkaqjYE7Um4xjmH72PEzFpmNTJHl996lBjF0ut77G5RtZDSG1AxZ6oLY4xLDvzGKcfpmPypw6Y1NmhRtfvYtG4RtdHiDqjYk/U1o5rz3H6YTpm9WmM0e5Vuyr2Q/z1/DQAwLMR3RaTaD4q9kQtnXqQhl9uJMG3ZQNM7eGE3NyiGt/GtqgQAFTsiXag0xmI2rmXkougs3Fo+4kZAvq40IRmhNQAKvZEraTmFWPu8QdoYKKPYO9manefWEJqK/omEbVRUCrFd0djIeE4bPy8Bczp7lKE1Bgq9kQtyDiGRScfISGzAMHezdCoGnPdEEIqRgdoiVoICUtA+LMsBPRxQUcHC5Vsc2ufnSrZDiHqgIo94d3x+6nYH5kMvzYNMcy9ocq2a2tip7JtEcI3GsYhvIpMysHq8/Ho1MgCM3s6q3Tbx54cwbEnR1S6TUL4Qj17wpuk7CIEnHgAe/M6WD2oKUQqvjn47tifAQC+jYeqdLuE8IF69oQXecUSzDoaAwDY+HlzmqqYECWjbxhROamMw/w/HiIltxhbh7eEnXkdviMRovGoZ09UbsOlp7j5IgfzP2sMDztzvuMQohWo2BOVOnT3JQ7fS8XYdnYY3LIB33EI0Ro0jENUJuJ5FjZeikdXJ0vM6ObIdxz87BnKdwRCVIaKPVGJhMxCzPvjIZysjBA0sAmEKj7zpjx169TlOwIhKkPDOETpcorenHmjLxJgg29zGOmpRx/jwKP9OPBoP98xCFEJ9fjWEY0lkXH4/sQDpItLsH1Ea9iYGvAdSe5toR/ZZAzPSQhRPurZE6VhjCH4/BPcTc5FoKcrWjU05TsSIVqLij1Rmv2RKTgRk4YJnezRv2l9vuMQotWUUuw5jsPixYvh5+cHf39/JCYmlnn+559/xpAhQzB06FCcO3dOGREIz8KeZmLLlWfo42qFyZ/W7I3CCSEfTilj9ufPn0dpaSkOHjyIqKgoBAcHY9u2bQCAvLw8hIaG4uzZsygqKoKvry/69u2rjBiEJ08yxAg8+QhN6htjqZcbBHRbQUJ4p5RiHxkZiW7dugEA3N3dERMTI3+uTp06aNiwIYqKilBUVET3F9UwmQWl+O5oLIz0hVjv0xwGukK+I1XoPwMP8x2BEJVRSrEXi8UwNjaWPxYKhZBKpRCJ3mzOxsYGAwcOhEwmw+TJk8tdh7GxPkSi6hUKoVAAc3PNv9ORurWzRCLD/EPRyCmW4NeJHeFqa1Yj61VWO82hPvsOUL/3U1monfxQSrE3NjZGQUGB/DHHcfJCHxYWhvT0dFy4cAEAMHHiRHh4eKBVq1Zl1iEWl1R7++bmhsjJKaz28rWFOrWTMYbFpx/jblIO1ng3hZ2Rbo1lU1Y7f4n5NwBgQosva3zd1aFO76cyUTuVx9rapMLnlHKA1sPDA2FhYQCAqKgouLq6yp8zMzODgYEB9PT0oK+vDxMTE+Tl5SkjBlGhXTeScOZhOqZ2aYTertZ8x6mSE/FHcSL+KN8xCFEJpfTs+/bti2vXrmHkyJFgjGHVqlXYtWsX7O3t0adPH1y/fh0jRoyAQCCAh4cHunTpoowYREUuxGVg27Xn6N+0Hv7V8RO+4xBCyqHDGGN8hyhPRkZ+tZelPxNV52FaPr48cA+u1sbYNqIV9EU1/8eistrpe2wAAOCY76kaX3d1qMP7qQrUTuVR+TAO0Q7p+SWYfSwWFnV0sc6nmVIKPSGkZtDcOKRaiiUyzDkei4ISGX4e5Y66Rnp8RyKEVIKKPflgHGNYduYxHqWJscG3OVysjfiOVC3qMnxDiCrQ393kg+28nojzca/xTQ8ndHOmOeEJqQ2o2JMPcuZhOn6OeAGfFg0wpq0t33E+yta7W7D17ha+YxCiElTsSZXdf5mHFX89hoedGQI+c6n1U12cSzyDc4ln+I5BiEooLPbp6emIj49HQkICFixYgIcPH6oiF1Ezr/KKMed4LOqZ6GPN4GbQFVI/gZDaROE3NiAgAK9fv8YPP/yALl26YNWqVarIRdRIYakM3x2LRYmUw0bfFjCvo8t3JELIB1JY7KVSKdq3b4+8vDwMHDgQHMepIhdRExxjCDz1CE9fF2C1d1M41lWfiZ0IIVWn8NRLiUSC1atXo127doiIiIBMJlNFLqImtl59jrCnmZjb2xmdG1nyHadGGYjU5364hCibwmIfHByMa9euYfjw4Th//jzWrVunilxEDfwZ+wp7byVhaGsbDHdvyHecGndg0O98RyBEZRQO4+zduxdjxoyBnp4eBgwYgJCQEFXkIjyLSs7FyrNP0N7eHHN6Odf6M28I0XYV9uz379+Pbdu2IScnB2fPnpX/3NnZWSXBCH9e5hZj7okHaGhmgGDvphBp6Jk3G26vAQDMbhfAcxJClK/CYj9mzBiMGTMG27dvx5QpU1SZifCooFSK747FQMYxbPBtDlMDzT3z5mryFQBU7Il2UDhmP3bsWJw6dQqlpaXyn/n6+io1FOGHjGMIPPkIzzMLsXlISzSypDNvCNEUCov9tGnTUK9ePdjY2AAAjd1qsG3XnuPqsyzM7e2Mjo0s+I5DCKlBCos9Ywzr169XRRbCo1MP0rDnZhKGtNLMM28I0XYKj7y5ubnh3r17KC0tlf8jmiUmNQ8rz8ah7SdmmNtbe868sTCwhIWBZl07QEhFFPbsb968iYsXL8of6+jo4MKFC0oNRVQnPb8Ec48/gJWxPoIHNdPYM2/Ks8trH98RCFEZhcX+xIkTAICcnByYmZlpTa9PGxRLZJh74gEKS2UIGdYS5oaae+YNIdpOYbG/desWli1bBplMBi8vLzRs2BDDhw9XRTaiRIwxrDr3BA9e5WO9TzO4WNXOu019jKC/lwIAFnVeymsOQlRB4d/smzZtwr59+2BlZYUpU6bg119/VUUuomT7bifj9MN0TOnigB4uVnzH4cXttJu4nXaT7xiEqITCYi8QCGBubg4dHR3o6+vDyEj7eoCa5lpCFkLCEvCZqxUmdLTnOw4hRAUUFnt7e3ts2LABOTk52LlzJxo2pNPyarPnWYVYdPIhGlsbYbGXGx2DIURLKCz2y5YtQ8OGDdG2bVsYGhpixYoVqshFlEBcIsXc47EQCQRY79scdXSFfEcihKiIwgO0QqEQzZs3h4uLCwDg3r17aN++vdKDkZrFMYZlZx4jKbsIPw5rBRtTmsvdxoj+SiXaQ2GxnzFjBrKzs2FjYwPGGHR0dKjY10J7bibhcnwmZvV0Qjt7c77jqIVtfX/iOwIhKqOw2GdmZuLAgQOqyEKUJOJ5FraFP4dnE2uM8rDlOw4hhAcKx+wdHR2RlpamiixECVLzirHo5CM4WxlhYT9XOiD7jkXhAVgUTtMbE+2gsGd/584d9OrVCxYWFvJCER4ervRg5OOVSDkEnHgAKcewZnAzOiD7DzGv7/MdgRCVUVjs//rrL1XkIEqw4VI8HqaJsd6nGewt6vAdhxDCI4XDOI8fP8bQoUPRtWtX+Pr64sGDB6rIRT7S2UfpOBr9CuM6fKK1V8gSQv5HYc8+KCgIK1euRJMmTfDw4UMsW7ZM4QFbjuOwdOlSPH78GHp6eggKCoKDg4P8+StXrmDr1q0AgGbNmmHJkiU0llyDXheUYu2FeLSwMcGULo34jkMIUQMKe/aMMTRp0gQA0LRpU4hECn8/4Pz58ygtLcXBgwcxe/ZsBAcHy58Ti8VYt24dtm/fjkOHDsHW1hbZ2dkf0QTyLsYY1px/giKJDEs83SAS0C/Rijibu8DZ3IXvGISohMLKLRKJcOnSJbRr1w63bt2Cnp6ewpVGRkaiW7duAAB3d3fExMTIn7t79y5cXV2xZs0aJCUlYfjw4bC0pBtI1JS/HmXgcnwmvunuiEZ16R6yldnQcwvfEQhRGYXFfuWA7vFqAAAfVUlEQVTKlVizZg02btwIJyenKk2XIBaLYWxsLH8sFAohlUohEomQnZ2NGzdu4NixYzA0NMSYMWPg7u4OR0fHj2sJwWtxCdZdjEdLG1OMbmvHdxxCiBpRWOxtbW0xZcoUJCQkwMXFBba2ii/KMTY2RkFBgfwxx3Hy4R9zc3O0bNkS1tbWAIB27drh4cOH7xV7Y2N9iETVO1VQKBTA3Fzze7XvtpMxhoA/H6JEymH98Faoa6k5s5Mq6/2cemoKAGDbgO01vu7q0MbPrSZTt3YqLPY//PADbty4gVatWiE0NBSfffYZJk2aVOkyHh4euHTpEgYMGICoqCi4urrKn2vRogXi4uKQlZUFU1NT3Lt3DyNGjHhvHWJxSTWa84a5uSFycgqrvXxt8W47T8am4eLjDMzq6QRLXYFGtV9Z7+fD9EcAoDb7Shs/t5qMj3ZaW5tU+JzCYn/16lUcPnwYAoEAMpkMfn5+Cot93759ce3aNYwcOfLNHZFWrcKuXbtgb2+PPn36YPbs2fJ1eHl5lfllQD5cen4JNlx6itYNTeHXhqZDIIS8T2Gxb9CgAQoKCmBiYgKpVAorK8XnbAsEAixfvrzMz5ydneX/HzhwIAYOHFiNuOSf3t5esFTGYbGXG4R09g0hpBwKi316ejo8PT3RpEkTxMfHQ1dXFyNHjgQAmiBNDfwZm4ZrCVn4rpczXSVLCKmQwmK/efNmVeQg1ZCaW4wNl56ija0p/NrQ3OwfqoVVS74jEKIyVZri+OTJkygp+d8B06VLlyozE6kCxhgWHo+BjGNY7OUGAV2B/MGCuq7hOwIhKqOw2AcEBODLL7+EqampKvKQKjp+/xWuxr/G3N4usDOn4RtCSOUUFnsHBwcMGTJEFVlIFb3KK8amK8/Q0dESw9xt+I5Ta0099+aMMLpjFdEGCou9p6cnZs2aVeZsmhkzZig1FKkYYwxBZ+PAMYbVn7cAnXxTfakFL/mOQIjKKJwI7T//+Q+aNm0KKysr+T/Cn6P3X+FGYg6+7eGETyzU5+o8Qoh6U9izNzMzw1dffaWKLESBl7nF2Hz5Gdrbm2NIKxq+IYRUncJib2FhgcWLF6NZs2byOef9/PyUHoyU9Xb4BgACPelesoSQD1OlA7QA8Pr1a6WHIRX7PToVt17kYH7fxrAxNeA7jkZoV78D3xEIURmFxX7GjBm4fPkynjx5AkdHR3z22WeqyEXe8TK3GFuuJKCjgzk+b9mA7zgaY1HnpXxHIERlFB6g3bBhA37//Xfo6uri2LFjWLOGLkRRJY4xrDgbBx0dYFE/Gr4hhFSPwp79rVu35HPgjBs3rtzpiIny/H4vFbdf5GBB38ZoQMM3NepfZ8YCAHZ57eM5CSHKp7DYS6VScBwHgUAAxhj1LFXoZW4xtoQ9Q0cHc/jS8E2Nyy7O4jsCISqjsNgPGDAAo0aNQuvWrREdHY0BAwaoIpfWezt8I9DRoeEbQshHU1jsJ0yYgK5du+LZs2cYOnQo3NzcVJFL69HwDSGkJik8QHvo0CEcPnwYXl5eWLNmDY4dO6aKXFotJbcIW8KeoZODBQ3fEEJqhMJi/+uvv2L27NkAgB07duDXX39Veiht9ubiqScQ6OhgYb/GNHyjRN3seqCbXQ++YxCiEgqHcQQCAfT19QEAurq6VHyU7Oj9V7j934unaPhGuWa3C+A7AiEqo7DY9+nTB6NHj0arVq0QGxuL3r17qyKXVnqVV4wtV97MfUMXTxFCapLCYj9t2jT06tULCQkJ8PX1RZMmTVSRS+u8vXE4xxgN36jIyD/f3KfhwKDfeU5CiPIpLPYA0LRpUzRt2lTZWbTan7Fp+Pt5Nub2doatGd15ShWKpcV8RyBEZRQeoCXKlyEuwQ+Xn8Hd1hTD3OnG4YSQmkfFnmeMMQSfj0epjEOgJ904nBCiHBUO4/j5+b03bvx2uoS3c+WQj3f2UQbCnmbi2x5OsLeg4RtCiHJUWOw3btyoyhxaKbOgFOsuxqN5AxOM8rDlO47W6evgxXcEQlSmwmJva/um+CQmJuLMmTOQSCQAgPT0dCxfvlw16TTc+otPUSiRYbGXK4R053CVm97mG74jEKIyCsfsAwLeXHhy584dJCcnIycnR+mhtMHlJ69xPi4Dkzo5wKmuEd9xCCEaTmGxNzAwwOTJk1G/fn0EBwfT7QlrQH6xFGsuxKOxtRG+aG/Hdxyt5XtsAHyP0SyuRDsoLPaMMWRkZKCgoACFhYXIzc1VRS6NtjnsGbILSxHo6QqRkE6IIoQon8JKM2PGDJw7dw4+Pj7o06cPunfvropcGisyKQfH77/CmHafoGl9E77jEEK0hMIraNu3bw9nZ2ckJSXh9OnTMDc3V0UujSTjGDZeegobU3182dme7ziEEC2isGe/f/9+jBw5Ejt37oSfnx+OHz+uilwa6dSDNMRlFGBGN0cY6Ar5jkMI0SIKe/a//fYb/vjjD+jr66OoqAhjx46Fj49PpctwHIelS5fi8ePH0NPTQ1BQEBwcHN57zVdffYU+ffpg1KhRH9eKWqBIIsP/hT9HCxsT9HWz5jsOATDY5XO+IxCiMgqLfd26dSEUvumFGhgYVGkY5/z58ygtLcXBgwcRFRWF4OBgbNu2rcxrNm3apFUHe/fdSsbrglIEezelGS3VxIQWX/IdgRCVUVjsGWPw9fVFmzZt8ODBA0ilUvmdqzZs2FDuMpGRkejWrRsAwN3dHTExMWWeP3PmDHR0dLTmYG+GuAR7byWhj6sVWtua8R2H/FehpBAAYKhryHMSQpRPYbGfMmWK/P/e3t5VWqlYLIaxsbH8sVAohFQqhUgkQlxcHP78809s2bIFW7durUbk2mf7teeQcgwzujnyHYW8Y/TJYQCAY76neE5CiPJVWOwvXbqEXr164dmzZ+8NO/j5+VW6UmNjYxQUFMgfcxwHkejNpo4dO4a0tDSMGzcOKSkp0NXVha2t7Xu9fGNjfYhE1TuIKRQKYG6uHr21h6l5+CM2Df/q3AgtGtWt0XWrUzuVSVntFInenJ+gLvuQ3k/Nom7trLDYv50WoTpXzHp4eODSpUsYMGAAoqKi4OrqKn/u+++/l/8/JCQEVlZW5Q7niMUlH7zdt8zNDZGTU1jt5WsKYwxBfz6Aib4Io91tajyTurRT2ZTVTqmUAwC12Yf0fmoWPtppbV3xtTsVnnr5+edvzlTw9vZGo0aNMGPGDBQXF8PX11fhBvv27Qs9PT2MHDkSq1evxvz587Fr1y5cuHChGvFrr+sJ2bj5IgcTO9nDrI4u33EIIVpM4Zh9QEAAZs2aBQDo0aMHFi5ciD179lS6jEAgeG9mTGdn5/de9/XXX39I1lpFyjFsDnuGT8wNMJzuPkUI4VmV7kHbsWNHAG+upuU4TqmBNMXx+6lIyCzEmsHNoEvz36ilkU3G8B2BEJVRWOxNTU1x8OBBuLu7Izo6GkZGNB2vIuISKXZcS0QbW1P0cqnZg7Kk5lCxJ9pEYZczODgY8fHxWLduHZ4+fYpVq1apIlettudmErKLJPi2pzNdQKXGMosykVmUyXcMQlRCYc/e0tISU6ZMQUnJm7NjiouLlR6qNnuVV4xf76TAs4k1mjegWS3V2cS//AHQefZEOygs9kuXLkVYWBjq1atHNxyvgv8Lfw7GGKbTBVSEEDWisNhHR0fj/PnzEAjoIKMiD9PycfphOsZ1+AQ2pgZ8xyGEEDmFFdzBwUE+hEMqxhjD5ivPYF5HF+M7fMJ3HEIIKUNhzz41NRW9evWST1FMwzjlu/osC5FJuZjb2wXG+lU6o5UQQlRGYVWqaGZL8j9SjiEk7BnsLepgSKsGfMchVTS++US+IxCiMhUW+99++w3Dhw/HgQMH3jt98LvvvlN6sNrk+P1UPM8qwnqfZnQD8VrEt/FQviMQojIVFvsGDd70UB0cHOQ3LyHvKyiVYuf1NxdQdXemC6hqk5T8ZACArYkdz0kIUb4Ki/3bm4+cOnUKv/zyi8oC1TZ7byUjq1CCjb7N6QKqWmb6ha8A0Hn2RDsoHLM3MTHBhQsX0KhRI/npl46OdA45AKTnl2D/7WT0c7NGcxtTvuMQQkiFFBb7rKws7N69W/5YR0cHe/fuVWamWmP7tefgGMO0bo34jkIIIZWqtNiLxWLs3LkTderUUVWeWuNJhhh/xqZhdFs72JrR/iGEqLcKTx3Zt28fBg8eDB8fH1y9elWVmWqFLWEJMDEQYUInuoCKEKL+KuzZ//nnnzhz5gzEYjG+//57+QFbAtxIzEbE82zM7OEEUwO6A1VtNdVdc2+eQ8g/VVjs9fT0oKenB0tLS0gkElVmUmscYwgJS4CNqT7dgaqW82zUn+8IhKhMla4AYowpO0etcfZRBh6nizGlSyPoiegCqtosPvsJ4rOf8B2DEJWosGcfHx+P2bNngzEm//9b2jqFQqmUw7bwBDS2NoJX03p8xyEfac6VbwHQefZEO1RY7Ddt2iT//8iRI1USRt0diU7Fy7wSbBnaGAK6gIoQUotUWOw7dOigyhxqr6BUil8iXqC9vTk6OVjwHYcQQj4IDTpX0X8iU5BTJMH0bo40LQIhpNahYl8FOYUS7L+djJ4udem+soSQWonuslEFe24lobBUhqldG/EdhdSgWW3n8h2BEJWhYq9Aen4Jfot6iQHN6sGprhHfcUgN6vFJL74jEKIyNIyjwM8RLyDjGL781IHvKKSG3X8djfuvo/mOQYhKUM++Esk5RTge8wqft2xAk51poMDweQDoPHuiHahnX4mfI15AqAP8q6M931EIIeSjULGvQGJWIU49SMPQ1g1Rz0Sf7ziEEPJRqNhX4N9/J0JPKMC4DjSFMSGk9qNiX46nrwtw9lEGRrRpiLpGenzHIYSQj0YHaMvxS8QL1NEVwr8d9eo12YKOS/iOQIjKKKXYcxyHpUuX4vHjx9DT00NQUBAcHP536uLu3btx8uRJAECPHj0wY8YMZcSolhfZRTgfl4Gx7exgbkg3JtFkHWw68h2BEJVRyjDO+fPnUVpaioMHD2L27NkIDg6WP5eUlIQTJ07gwIEDOHjwIMLDw/Ho0SNlxKiWvTeToCsUYHRbO76jECW7mXoDN1Nv8B2DEJVQSs8+MjJSfhtDd3d3xMTEyJ9r0KABfvrpJwiFQgCAVCqFvr56nO3yKq8YJx+k4fNWNjRWrwVW3VgGgM6zJ9pBKcVeLBbD2NhY/lgoFEIqlUIkEkFXVxeWlpZgjGHt2rVo1qwZHB0d31uHsbE+RCJhtbYvFApgbm74wcuFXEsEAEzv0xjm5up/EVV121nbKKudov/eaUxd9iG9n5pF3dqplGJvbGyMgoIC+WOO4yAS/W9TJSUlWLBgAYyMjLBkSfkHycTikmpv39zcEDk5hR+0TFZhKQ7eToJX03owAvvg5flQnXbWRspqp1TKAYDa7EN6PzULH+20tq54Vl6ljNl7eHggLCwMABAVFQVXV1f5c4wxTJs2DW5ubli+fLl8OIdvv0amoFTK0Xn1hBCNpJSefd++fXHt2jWMHDkSjDGsWrUKu3btgr29PTiOw82bN1FaWoqrV68CAL777ju0adNGGVGqRFwixW9RL9Hb1QqNLNXnzy5CCKkpSin2AoEAy5cvL/MzZ2dn+f/v37+vjM1W2+GolygolWE89eq1yoquwYpfRIiG0PqLqoolMvx6JwWdHCzQpD7dhUqbtLRqxXcEQlRG66dL+CM2DVmFEozvSL16bXMl6RKuJF3iOwYhKqHVPXspx7DvVhJa2pjAw86M7zhExX6IXAeA7lhFtINW9+zPPU7Hy7wSjOtgDx0dHb7jEEKI0mhtsWeMIfRWMhzrGqKbsyXfcQghRKm0tthff56NJxkF8G9nBwH16gkhGk5ri/3em0moZ6wHr6b1+I5CCCFKp5UHaGNS83AnORczezhBV6i1v++03voem/mOQIjKaGWx33MzCSb6Ivi2asB3FMIjF4vGfEcgRGW0rlv7PKsQV+IzMdzdBkZ6Wvm7jvzXX89P46/np/mOQYhKaF212387GbpCHYxoY8t3FMKzbVEhAADPRv15TkKI8mlVzz6zoBSnHqRhYPP6dHMSQohW0apif+huCiQyhjF0y0FCiJbRmmJfWCrD4Xup6OFSFw40jTEhRMtoTbE/EfMKecVS+LenCc8IIdpHKw7QSjmGXyOT0bqhKVo1NOU7DlETW/vs5DsCISqjFcX+YlwGXuaVYFZPZ8UvJlrD1oSO3RDtofHDOIwx7I9Mgb1FHXR3qct3HKJGjj05gmNPjvAdgxCV0Phify8lDw9e5WOUhy1NeEbK2B37M3bH/sx3DEJUQuOL/f7IZJgZiDCoeX2+oxBCCG80uti/yC7ClfhMDG1tAwNdId9xCCGENxpd7H+NTIZIqIPhNDUCIUTLaWyxzy2S4M/YNHg2qQcrmhqBEKLlNPbUy+P3X6FYymGUB/XqSfl+9gzlOwIhKqORxV4q43Ao6iXafmIG13rGfMchaqpuHToVl2gPjRzGOfcwHWn5JdSrJ5U68Gg/Djzaz3cMQlRCI4v93ohENDQzQFcn6rmRilGxJ9pE44r9o7R83E7Mhl+bhhAK6CIqQggBNLDYH7iTAiM9IQa3oPvLEkLIWxpX7B+miTGqgz2M9TXy2DMhhFSLxlXEXaPbwMbaGLm5RXxHIYQQtaFxxd5QTwgdmvCMVMF/Bh7mOwIhKqNxxZ6QqjLUpdtTEu2hlDF7juOwePFi+Pn5wd/fH4mJiWWeP3ToEIYMGYIRI0bg0qVLyohAiEK/xPwbv8T8m+8YhKiEUnr258+fR2lpKQ4ePIioqCgEBwdj27ZtAICMjAyEhobiyJEjKCkpwejRo9GlSxfo6dH8NUS1TsQfBQBMaPElz0kIUT6l9OwjIyPRrVs3AIC7uztiYmLkz0VHR6NNmzbQ09ODiYkJ7O3t8ejRI2XEIIQQ8l9K6dmLxWIYG/9vThqhUAipVAqRSASxWAwTExP5c0ZGRhCLxe+tw9hYHyJR9eagFwoFMDfX/PFYaufHEYne9HXUZR/S+6lZ1K2dSin2xsbGKCgokD/mOA4ikajc5woKCsoU/7fE4pJqb9/c3BA5OYXVXr62oHZ+HKmUAwC12Yf0fmoWPtppbf1+LX1LKcM4Hh4eCAsLAwBERUXB1dVV/lyrVq0QGRmJkpIS5Ofn4+nTp2WeJ4QQUvN0GGOsplfKcRyWLl2KuLg4MMawatUqhIWFwd7eHn369MGhQ4dw8OBBMMYwefJkeHp61nQEQggh71BKsSeEEKJeNG5uHEIIIe+jYk8IIVpAY6ZLeHuc4PHjx9DT00NQUBAcHBz4jlVjJBIJFixYgJSUFJSWlmLq1KlwcXHBvHnzoKOjg8aNG2PJkiUQCGr/7+/MzEwMGTIEv/zyC0QikUa2EQB27NiBixcvQiKRYNSoUejQoYPGtVUikWDevHlISUmBQCDAihUrNOo9vXfvHtavX4/Q0FAkJiaW264ff/wRly9fhkgkwoIFC9CqVSt+wjIN8ddff7GAgADGGGN3795lU6ZM4TlRzTp8+DALCgpijDGWlZXFevTowSZPnswiIiIYY4wFBgays2fP8hmxRpSWlrJp06axfv36sfj4eI1sI2OMRUREsMmTJzOZTMbEYjHbsmWLRrb13Llz7JtvvmGMMRYeHs5mzJihMe3cuXMnGzRoEBs+fDhjjJXbrpiYGObv7884jmMpKSlsyJAhvOWtnb9Oy1HZVbuawMvLC99++638sVAoRGxsLDp06AAA6N69O65fv85XvBqzZs0ajBw5EvXq1QMAjWwjAISHh8PV1RXTp0/HlClT0LNnT41sq6OjI2QyGTiOg1gshkgk0ph22tvbIyQkRP64vHZFRkaia9eu0NHRQcOGDSGTyZCVlcVLXo0p9hVdtaspjIyMYGxsDLFYjG+++QYzZ84EY0w+nbORkRHy8/N5Tvlxfv/9d1haWsp/aQPQuDa+lZ2djZiYGGzevBnLli3DnDlzNLKthoaGSElJQf/+/REYGAh/f3+Naaenp6f8YlGg/M/qP+sSn+3VmDH7yq7a1RSpqamYPn06Ro8eDW9vb6xbt07+XEFBAUxNTXlM9/GOHDkCHR0d/P3333j48CECAgLK9II0oY1vmZubw8nJCXp6enBycoK+vj5evXolf15T2rp792507doVs2fPRmpqKsaNGweJRCJ/XlPaCaDMcYe37arqjAGqoDE9+8qu2tUEr1+/xoQJEzB37lwMGzYMANCsWTPcuHEDABAWFoZ27drxGfGj7d+/H/v27UNoaCiaNm2KNWvWoHv37hrVxrfatm2Lq1evgjGGtLQ0FBUVoXPnzhrXVlNTU3lxMzMzg1Qq1bjP7VvltcvDwwPh4eHgOA4vX74Ex3GwtLTkJZ/GXFRV3lW7zs7OfMeqMUFBQTh9+jScnJzkP1u4cCGCgoIgkUjg5OSEoKAgCIXVmzxO3fj7+2Pp0qUQCAQIDAzUyDauXbsWN27cAGMMs2bNgp2dnca1taCgAAsWLEBGRgYkEgm++OILtGjRQmPamZycjO+++w6HDh1CQkJCue0KCQlBWFgYOI7D/PnzefvlpjHFnhBCSMU0ZhiHEEJIxajYE0KIFqBiTwghWoCKPSGEaAEq9oQQogWo2BOlu3HjBtq1a4fU1FT5z9avX4/ff/+92utMTk7GiBEjaiLee2QyGSZOnIhRo0YhNzdXKdtQtVu3buHRo0d8xyA8omJPVEJXVxfz589HbTjTNyMjA9nZ2fj1119hZmbGd5waceTIEaSnp/Mdg/BIs+YTIGqrU6dO4DgO+/fvx9ixY+U/f/eiFAAYMWIENm7ciKNHjyIxMRHZ2dnIzc3F6NGjcfbsWSQkJGDNmjWwsrJCVlYWpkyZgqysLPTo0QPTp09HamoqAgMDUVJSAn19faxYsQIymQxTp06Fubk5unfvji+//FK+/RMnTmDPnj3Q09NDo0aNsHz5cgQGBuL58+dYvHgxli9fLn/tvXv3sHLlSjDGUL9+faxfvx7Pnj3DihUrIBQK5dvjOA6zZs2CjY0NkpOTMXDgQDx58gQPHjxAz5498d1338Hf3x+Ojo5ISEgAYww//PADrK2tERwcjMjISADAoEGDMG7cOMybNw96enpISUlBeno6goOD0bx5c5w+fRq7d++GQCBA27ZtMWfOHISEhCA5ORmZmZl4+fIl5s+fDwsLC1y9ehWxsbFwcXHBli1b8OLFC5SUlGDixIkYMGCAij4FhFc8zLRJtExERASbOXMmy8rKYn369GEJCQls3bp17MiRIywpKUk+RSxjjA0fPpwlJSWxLVu2sIULFzLGGNuxY4d8mty3Uz0nJSWxzp07s7y8PCaVSpmfnx97+PAh+/bbb9nly5cZY4xdv36dfffddywpKYl17NiRlZSUlMmVlZXFPvvsM5afn88YY2zlypUsNDT0vUxveXt7s/j4eMYYY/v27WMxMTHs888/Zw8ePGCMvZnO9+uvv5ZvLy8vj6Wnp7OWLVuy7OxsVlxczDp37swYY2zs2LHs6NGj8nWtWLGCXbx4kU2fPp1xHMdKS0vZsGHD2KNHj1hAQADbtm0bY4yxgwcPssDAQJadnc369+/PCgsLGWOMzZkzh4WHh7MtW7awRYsWMcbeTCk8YcIExhhjAQEB7MqVKyw/P5/17NmTZWZmsszMTHbixImPem9J7UE9e6IyFhYWWLBgAebNmwcPD49yX8PeGeZp1qwZAMDExAQuLi4A3syvUlJSAgBo0qSJfN6Vli1bIiEhAXFxcdixYwd++uknMMagq6sLALCzs4Oenl6ZbSUlJcHFxUU+K2H79u0RHh6Onj17lpstMzNTPgXHmDFjAADp6elo2rSpfPkNGzYAAD755BOYmJhAT08PVlZWMDc3BwD5rIjAm792gDfzOl28eBENGjRAu3btoKOjA11dXbRu3RpPnz4FAPk2GjRogDt37uDFixfIysrCV199BeDNtARJSUnvvba0tLRMG4yNjREYGIjAwECIxWIMHjy43LYSzUNj9kSlevfuDUdHRxw9ehQAoK+vj8zMTMhkMuTl5SE5OVn+2ncLY3mePn2KgoICSKVSREdHo3HjxnBycsKcOXMQGhqKZcuWwdPTEwDKvROSnZ0dnj59isLCQgDAzZs34ejoWOH26tWrh+fPnwMAdu7ciXPnzqFevXryA5+3bt1Co0aNqpQdgPyeC3fu3IGLiwucnZ3lQzgSiQR3796V323tn+uzs7ODjY0NfvnlF4SGhmLs2LFo3bp1hdvW0dEBYwzp6emIjY3F1q1bsXPnTqxbt06jpgInFaOePVG5hQsXIiIiAgBgbW2NLl26YNiwYbC3t/+gW0mamZlh1qxZyMrKwoABA+Di4oKAgAAsXboUJSUlKC4uxsKFCytc3tLSEl9//TW++OILCAQC2NvbY86cOcjIyCj39cuWLcOCBQsgEAhgbW2N8ePHw9bWFitWrABjDEKhEKtWrapy/qNHj2L37t2oU6cO1q5dCwsLC9y8eRN+fn6QSCTw8vJC8+bNK8w+fvx4+Pv7QyaTwdbWFv37969wW61bt8b69euxadMmZGRkwNfXF4aGhpgwYYLGTQVOykcToRHCg7ezemrSzKxEvdEwDiGEaAHq2RNCiBagnj0hhGgBKvaEEKIFqNgTQogWoGJPCCFagIo9IYRoASr2hBCiBf4fAdsuyKTgIaAAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot explained total variance of principal components against number of components\n", "# make a vertical line to ensure you are extracting the right point on the graph\n", "\n", "plt.plot(np.cumsum(model_pca.explained_variance_ratio_))\n", "plt.axvline(x=48, color='green', linestyle='--')\n", "plt.xlabel('Number of components')\n", "plt.ylabel('Principal components')\n", "plt.title('Finding the ideal number of n_components parameter for PCA')" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(106, 106)\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAD0CAYAAAC/3RwjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFJ9JREFUeJzt3X2MVPW9x/HPeZjHnVlmH3kSIahIqLWCjdEol1sNV0P0JhqoSLsJtWlSQlIbCLQx6YY/KpEYE/8CahONNqQxVExMc9OkikmVNLZBuFev3m14UoHVfWZndufhzJlz/1hddoFlZBic+THvV2KyO3Nm9ru/wNvDmTNnrCAIAgEA6p5d6wEAAN8MwQYAQxBsADAEwQYAQxBsADAEwQYAQ7jX6on7+9PX6qlrKpGIKJPJ13qMusYalccaldeoa9TRkZzxPvawr5DrOrUeoe6xRuWxRuWxRhcj2ABgCIINAIYg2ABgCIINAIYg2ABgiGt2Wh8AMx3s6dOBI2c0mPXUFgvpseXzdf+tnbUeC2IPG8AUB3v6tPfQSeVKvuY0OcqVfO09dFIHe/pqPRpEsAFMceDIGTVHHbU3RRWJRNTeFFVz1NGBI2dqPRpEsAFMMZj1lHStabclXUuDWa9GE2Eqgg1gUlsspHRx+odQpYuB2mKhGk2EqQg2gEmPLZ+v0ZyvgbGc8vm8BsZyGs35emz5/FqPBnGWCIApvj4b5MCRM/pibOIskQ333shZInWCYAOY5v5bOwl0neKQCAAYouI97N/97nc6ePCgPM/TE088oXXr1lVzLgDABSoK9vvvv68jR47oj3/8o7LZrF566aVqzwUAuEBFwX7vvfe0ZMkSbd68WZlMRtu3b6/2XACAC1QU7OHhYZ09e1Z79+7V6dOntWnTJv3lL3+RZVnlHwwAqEhFwU6lUlq8eLHC4bAWL16sSCSioaEhtbW1TW6TSESuy4/4cRxbqVS81mPUNdaoPNaoPNboYhUF+84779Srr76qn/zkJ+rr61M2m1UqlZq2zfX64ZmpVFwjI+O1HqOusUblsUblNeoaXe5DeCsK9g9+8AP985//1Nq1axUEgbq7u+U419/eNADUk4pP6+OFRgD4dvHGGQAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAENcVbAHBwe1atUqHT9+vFrzAABmUHGwPc9Td3e3otFoNecBAMyg4mDv2rVL69evV2dnZzXnAQDMwK3kQQcOHFBra6tWrlypF1988ZLbJBIRua5zVcPVI8exlUrFaz1GXWONymONymONLmYFQRBc6YN+9KMfybIsWZalTz75RIsWLdKePXvU0dExuU1/f7qqg9aLVCqukZHxWo9R11ij8lij8hp1jTo6kjPeV9Ee9r59+ya/7urq0o4dO6bFGgBQfZzWBwCGqGgPe6o//OEP1ZgDAFAGe9gAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAi3kgd5nqenn35aZ86cUaFQ0KZNm/TAAw9UezYAwBQVBfvNN99UKpXSc889p+HhYT366KMEGwCusYqC/dBDD+nBBx+c/N5xnKoNBAC4NCsIgqDSB2cyGW3atEk//OEP9cgjj0y7L5styHWvv5A7ji3fL9V6jLrGGpXHGpXXqGsUCs3czYr2sCWpt7dXmzdv1oYNGy6KtSRlMvlKn7qupVJxjYyM13qMusYalccaldeoa9TRkZzxvoqCPTAwoCeffFLd3d265557Kh4MAPDNVXRa3969ezU6Oqrdu3erq6tLXV1dyuVy1Z4NADDFVR3Dvpz+/vS1eNqaa9R/pl2J62WNcp6vkaynfLGkiGsrFQspepnji1fielmja6lR1+hyh0R44wxwCTnP1xejOZWCQLGQrVIQ6IvRnHKeX+vR0MAINnAJI1lPYddWyLFlWZZCjq2wa2sk69V6NDQwgg1cQr5Ykmtb025zbUv5YuOdZob6QbCBS4i4toql6S/vFEuBIi5/ZVA7/OkDLiEVC6lQLMnzSwqCQJ5fUqFYUioWqvVoaGAVv3EGuF71pXP6uDet/rG8XMvSwtYmzUtFNac5WrWzRIBKEGxgir50Tu8eH1Qy4mhuMqJxz9fJoTHd0HL5WB/rz+i9YwMazHpqi4V0383turkj8S1OjkZAsIEpPu5NKxlxlIyGJUlJx5FU0Me9aXUmo5PbTQ10qVTS0FhRi9pimtsUUtor6U//fUZrvzefaKOqCDbwlb50Tu+fGpJtSYmYq0WtTUrFwoqHHP2rP6PhowUNZT0Vi0V9fi6vBamJQL97YkCZvK/5qYjceEQtXx3mfu/YAMFGVfGiI6Dzh0LCIVth15JflD7sHdVItqBPh8f1ry/HlPdLmt0U0sdfjunL0axKktxQSL5sJSKujg2cf1deMmRrkHO2UWUEG9D5QyFLO5Mq+JKsQFHb0ie953T402HlfF9HTo/o8NlzyhSKSkZcfT40EehEyFFgSeOF4uTzpb2S2jijBFVGsAFJIzlP8ZCjVDys2+Y2y7Vt5Txfg2MFDWc9Nbm2miOuip50LlvQWKGo8cLE29QXdyY0lvdlSSp6nobH8xrOebrv5vba/lK47nAMG5AUcqT/68vItmxFXVs3dTYpKMX1wWcjmt1syXEtuY6jhCPdkIzpxNC4Fre5KnqeQralG1riaou76h2bOEuEFxxxLRBsNLxz2YLG80WdHBiT69iKONKZc67mNEcVi7ialwzrwy8zkqSIY6sp6igVc7WwPT4Z6J/es4hA45oj2GhIUy+d+uHZczqX97Wks0l9mYLS+aJyRU/L5iaVCDvK+yV9b35KJwbHNJrzZVmWVt3crp/eu7jWvwYaDMFGw/n60qlh11YsZOuzoXG5tqWWWTHNbo5LksY8TyPjBf3bzR36r4+/1KyoqxXzm5XxSjqXK+o/ls2u8W+BRsSLjmg4F1461bYsOY6lsSlneViBZMnWgpa41iybrYhj68sxTxHH1ppls7WgJV7D3wCNij1sNJx8saRY6Py+ysK2uI73ZRTI0qxISQW/pHTe1y2dE8ekF7TECTTqAnvYaDgXXjp1SUdCbYmIrCDQaM5TsRRoTjKq78yZ+aOagFpgDxsNJxUL6YvRiQ+Ndm1LsbCj2+Y2K18sySsFSoQd3dga06xYuMaTAtMRbDScaMjRnOaoRrKest7EB+ze0png0qmoewQbDSkacjSHQMMwHMMGAEMQbAAwBMEGAENUfAy7VCppx44d6unpUTgc1m9/+1stXLiwmrMBAKaoeA/7rbfeUqFQ0GuvvaatW7fq2WefreZcAIALVBzsw4cPa+XKlZKkO+64Qx999FHVhgIAXKziQyKZTEaJxPnLSTqOo2KxKNedeMpEIiLXvf5Om3IcW6kUb1O+HNaoPNaoPNboYhUHO5FIaGxsbPL7Uqk0GWtJymTyVzdZnUql4hoZGS+/YQNjjcpjjcpr1DXq6Jj5kggVHxJZsWKF/va3v0mSjh49qiVLllT6VACAb6DiPezVq1fr0KFDWr9+vYIg0M6dO6s51zUz9cL1EddWKhbiLckAjGAFQRCU3+zK9fenr8XTXpWpF653bUvFUqBCsaQ5zdFvFO2+dE4nRwvqHcwoFQ1p2dykOpPRb2FyszTqP2WvBGtUXqOu0TU5JGKiCy9cH3JshV1bI1mv7GP70jm9e3xQnu+rPR5Swff17vFB9aVz38LkAHCdX/zpWH9G7/T0qTedn/hsvqKvj3rP6fRQQX4gJaPSzR3NWnVLmx6+bd5l97I/7k0rGXE0KxZWxg+UdBxJBX3cm2YvG8C34roN9tHPR/TqPz5T0fcVcS2dGijof/tyCkkqSgokDeSkoH9UmUJBqVhY993UPmO0R3Ke2uOhabfFQ44GxsvvnQNANRgV7M+Hx/V2T58+PDuq0WxBncmIvr+wVfcubpu2l3suW9Ab/3NW0sRlNB3L1qmRUVmS8pKiksJhS54fKJOTSknpH6eGdNu8WTNecjMVDWnc8zVrym3jnq9UNHTJ7QGg2uou2Ad7+nTgyBkNZj21xUJ6bPl83X9rpz4fHtefPvhcn47klMkV5Ni2zozkVAyGdC7rac135kxG+7OhrHJeUSFHCrmOXNuWV5Ssr35GSZIUyLEkTxMH8s/lisoXSzPOtWxuUu8eH1Q8W5B8X+Oer3Te18qbUtd2QQDgK3UV7IM9fdp76KSao47mNDlKF33tPXRSknQu6yld8OUXAzVHIgqHbGW9osZyRY2MTT+WnCn4ao2H1Tua19fv5Ym60rnCxNeWJNuy5auksC3JsjQr6irizvwabGcyqpU3tU2cJTKeVSoa0sqbUhy/BvCtqatgHzhyRs1RR+1NExGMRCQppwNHzmj5whYFQaCSFchxJvaVQ46tfNGTVwo0kjt/LDkRdrRsblKfj+Y0mvWUDNlqS4Y1MlhQRBPHsNP5knxJs+OWoiFHdy1qVSp2+cMbncmolixobchTjQDUXl2d1jeY9ZR0rWm3JV1Lg1lPrbGQLMuSHVjy/YlTxz2/JNeyFbKtaceSb2yNqSUe0b/f0q64a2tg3FNbLKL/XNau+a0hxVwpJGl2zNayuSk98f0bL/uCIwDUg7raw26LhZQu+l/tWU9IFwO1xUL6/sIWnRrMaMi1NJrLy/Fs5b2SZs+KKtUU1rK55082nxUL67vzmjVrKKSOpoiCktSeiKg9EeadjQCMVVfBfmz5/K+OWeeUdC2li4FGc7423HujFrTEtXbFgmlnicxPRS95loj0VbTnh2vziwDANVBXwb7/1k5JE8eyvxibOEtkw703Tt6+oCWujXcvquGEAFA7dRVsaSLaXwcaAHBeXb3oCACYGcEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwBMEGAEMQbAAwREVX60un09q2bZsymYw8z9Ovf/1rLV++vNqzAQCmqCjYL7/8su6++25t3LhRJ06c0NatW/XGG29UezYAwBQVBXvjxo0Khyc+zcX3fUWmfqYXAOCasIIgCC63wf79+/XKK69Mu23nzp26/fbb1d/fr5/97Gd6+umnddddd03bJpstyHWvv89OdBxbvl+q9Rh1jTUqjzUqr1HXKHSZz5wtG+yZ9PT0aMuWLdq+fbtWrVp10f39/elKnrbupVJxjYyM13qMusYalccaldeoa9TRkZzxvooOiRw7dkxPPfWUXnjhBS1durTiwQAA31xFwX7++edVKBT0zDPPSJISiYT27NlT1cEAANNVFGziDADfPt44AwCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGINgAYAiCDQCGuKpgHz9+XHfeeafy+Xy15gEAzKDiYGcyGe3atUvhcLia8wAAZlBRsIMg0G9+8xtt2bJFsVis2jMBAC7BLbfB/v379corr0y7bd68eVqzZo2WLl064+MSiYhc17n6CeuM49hKpeK1HqOusUblsUblsUYXs4IgCK70QatXr9acOXMkSUePHtXtt9+uffv2Tdumvz9dnQnrTCoV18jIeK3HqGusUXmsUXmNukYdHckZ7yu7h30pf/3rXye/vv/++/XSSy9V8jQAgCvAaX0AYIiK9rCnOnjwYDXmAACUwR42ABiCYAOAIQg2ABiCYAOAIQg2ABiCYAOAIQg2ABiCYAOAIQg2ABiCYAOAIQg2ABiCYAOAIQg2ABiCYAOAIa768qoAAOnPH57V/g9OazhXVEvU1boVN+jh786r6s9gDxsArtKfPzyrPe+dVM731R6zlfN97XnvpP784dmq/hyCDQBXaf8Hp5WI2GpriikaiaqtKaZExNb+D05X9ecQbAC4SsO5oppca9ptTa6l4Vyxqj+HYAPAVWqJuhorBtNuGysGaolW92VCgg0AV2ndihuUyZc0OJZVLp/T4FhWmXxJ61bcUNWfw1kiAHCVvj4bZP8HpzWQnThLpOu+hVU/S4RgA0AVPPzdeVUP9IU4JAIAhiDYAGAIgg0AhiDYAGAIgg0AhrCCIAjKbwYAqDX2sAHAEAQbAAxBsAHAEAT7CqTTaf385z/Xj3/8Yz3++OM6cuRIrUeqG6VSSd3d3Xr88cfV1dWlTz/9tNYj1R3P87Rt2zZt2LBBa9eu1dtvv13rkerW4OCgVq1apePHj9d6lLrCW9OvwMsvv6y7775bGzdu1IkTJ7R161a98cYbtR6rLrz11lsqFAp67bXXdPToUT377LPas2dPrceqK2+++aZSqZSee+45DQ8P69FHH9UDDzxQ67Hqjud56u7uVjQarfUodYdgX4GNGzcqHA5LknzfVyQSqfFE9ePw4cNauXKlJOmOO+7QRx99VOOJ6s9DDz2kBx98cPJ7x3FqOE392rVrl9avX68XX3yx1qPUHQ6JzGD//v16+OGHp/136tQpRaNR9ff3a9u2bdqyZUutx6wbmUxGiURi8nvHcVQsVvfi7aZrampSIpFQJpPRL37xC/3yl7+s9Uh158CBA2ptbZ38nz+mYw97BuvWrdO6desuur2np0dbtmzR9u3bddddd9VgsvqUSCQ0NjY2+X2pVJLr8sfrQr29vdq8ebM2bNigRx55pNbj1J3XX39dlmXp73//uz755BP96le/0p49e9TR0VHr0eoCf6OuwLFjx/TUU0/phRde0NKlS2s9Tl1ZsWKF3nnnHa1Zs0ZHjx7VkiVLaj1S3RkYGNCTTz6p7u5u3XPPPbUepy7t27dv8uuuri7t2LGDWE9BsK/A888/r0KhoGeeeUbSxF4lL6xNWL16tQ4dOqT169crCALt3Lmz1iPVnb1792p0dFS7d+/W7t27JUm///3veXEN3xhvTQcAQ/CiIwAYgmADgCEINgAYgmADgCEINgAYgmADgCEINgAYgmADgCH+HyZpNigcSSQQAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# new fit\n", "# Let's try adjust the number of components and see what we'll get\n", "pca = PCA(n_components=48)\n", "transformed = model_pca.fit_transform(df_scaled)\n", "print(transformed.shape)\n", "plt.scatter(transformed[:,0], transformed[:,1], alpha = 0.1)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Firmicutes(100) 54\n", "Proteobacteria(100) 14\n", "TM7(100) 12\n", "Bacteria_unclassified(100) 10\n", "Bacteroidetes(100) 7\n", "Deinococcus-Thermus(100) 4\n", "Actinobacteria(100) 4\n", "Verrucomicrobia(100) 1\n", "Name: phylum, dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "phylum.value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "From the results, i suspect that the points between 0 and 2 *x-axis* and 0 and 2 *y-axis* could represent the bacteria which are on the phylum **Firmicutes** to left could be the **Proteobacteria**. Whereas, the isolated points could be **TM7** and **Bacteria_unclassified**. I've achieved partially achieved my objective with this dataset. Later on, I'll run PCoA on this dataset and compare the results. In addition, if you choose another part of the matrix you'll get a different result. " ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:py35]", "language": "python", "name": "conda-env-py35-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 2 }