{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# importing packages/modules\n", "\n", "import pandas as pd\n", "from sklearn.manifold import TSNE\n", "from sklearn.feature_extraction import DictVectorizer\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", "from sklearn.decomposition import PCA\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "\n", "sns.set_style('darkgrid')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Objective: Use unsupervised learning to create distinct clusters to represent the most abundant OTU(Operational taxonomic units)/identities of bacteria found in a mouse *Mus Musculus* based on schloss lab MiSeq standard operating procedures (SOPs)." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# import the csv file into jupyter\n", "\n", "df = pd.read_csv(\"0.16.cons.taxonomy.csv\", delimiter=\";\",index_col=0, usecols=[\"otu\",\"size\",\"identity\",\"phylum\",\"class\",\"order\",\"family\",\"genus\",\"genus_0\",\"genus_1\"])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | size | \n", "identity | \n", "phylum | \n", "class | \n", "order | \n", "family | \n", "genus | \n", "genus_0 | \n", "genus_1 | \n", "
---|---|---|---|---|---|---|---|---|---|
otu | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
Otu001 | \n", "61864 | \n", "Bacteria(100) | \n", "Bacteroidetes(100) | \n", "Bacteroidia(100) | \n", "Bacteroidales(100) | \n", "Porphyromonadaceae(100) | \n", "Barnesiella(96) | \n", "Barnesiella_unclassified(96) | \n", "Barnesiella_unclassified(96) | \n", "
Otu002 | \n", "23360 | \n", "Bacteria(100) | \n", "Firmicutes(100) | \n", "Clostridia(100) | \n", "Clostridiales(100) | \n", "Lachnospiraceae(89) | \n", "Lachnospiraceae_unclassified(61) | \n", "Lachnospiraceae_unclassified(61) | \n", "Lachnospiraceae_unclassified(61) | \n", "
Otu003 | \n", "6679 | \n", "Bacteria(100) | \n", "Firmicutes(100) | \n", "Bacilli(100) | \n", "Lactobacillales(100) | \n", "Lactobacillaceae(95) | \n", "Lactobacillus(95) | \n", "Lactobacillus_unclassified(95) | \n", "Lactobacillus_unclassified(95) | \n", "
Otu004 | \n", "6584 | \n", "Bacteria(100) | \n", "Bacteroidetes(100) | \n", "Bacteroidia(100) | \n", "Bacteroidales(100) | \n", "Bacteroidaceae(100) | \n", "Bacteroides(100) | \n", "Bacteroides_unclassified(100) | \n", "Bacteroides_unclassified(100) | \n", "
Otu005 | \n", "5376 | \n", "Bacteria(100) | \n", "Bacteroidetes(100) | \n", "Bacteroidia(100) | \n", "Bacteroidales(100) | \n", "Rikenellaceae(100) | \n", "Alistipes(100) | \n", "Alistipes_unclassified(100) | \n", "Alistipes_unclassified(100) | \n", "