{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scanpy as sc\n",
    "import os\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.cluster import AgglomerativeClustering\n",
    "from sklearn.metrics.cluster import adjusted_rand_score\n",
    "from sklearn.metrics.cluster import adjusted_mutual_info_score\n",
    "from sklearn.metrics.cluster import homogeneity_score\n",
    "import rpy2.robjects as robjects\n",
    "from rpy2.robjects import pandas2ri"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n",
    "                                   'AMI_Louvain','AMI_kmeans','AMI_HC',\n",
    "                                   'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "workdir = './output/'\n",
    "path_fm = os.path.join(workdir,'feature_matrices/')\n",
    "path_clusters = os.path.join(workdir,'clusters/')\n",
    "path_metrics = os.path.join(workdir,'metrics/')\n",
    "os.system('mkdir -p '+path_clusters)\n",
    "os.system('mkdir -p '+path_metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n",
    "num_clusters = len(np.unique(metadata['label']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n",
    "len(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['FM_Control_BMnoisyp4.rds',\n",
       " 'FM_BROCKMAN_BMnoisyp4.rds',\n",
       " 'FM_Cusanovich2018_BMnoisyp4.rds',\n",
       " 'FM_cisTopic_BMnoisyp4.rds',\n",
       " 'FM_chromVAR_BMnoisyp4_kmers.rds',\n",
       " 'FM_chromVAR_BMnoisyp4_motifs.rds',\n",
       " 'FM_chromVAR_BMnoisyp4_kmers_pca.rds',\n",
       " 'FM_chromVAR_BMnoisyp4_motifs_pca.rds',\n",
       " 'FM_GeneScoring_BMnoisyp4.rds',\n",
       " 'FM_GeneScoring_BMnoisyp4_pca.rds',\n",
       " 'FM_Cicero_BMnoisyp4.rds',\n",
       " 'FM_Cicero_BMnoisyp4_pca.rds',\n",
       " 'FM_SnapATAC_BMnoisyp4.rds',\n",
       " 'FM_Scasat_BMnoisyp4.rds',\n",
       " 'FM_scABC_BMnoisyp4.rds',\n",
       " 'FM_SCRAT_BMnoisyp4.rds',\n",
       " 'FM_SCRAT_BMnoisyp4_pca.rds']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n",
    "    this_step = 0\n",
    "    this_min = float(range_min)\n",
    "    this_max = float(range_max)\n",
    "    while this_step < max_steps:\n",
    "        print('step ' + str(this_step))\n",
    "        this_resolution = this_min + ((this_max-this_min)/2)\n",
    "        sc.tl.louvain(adata,resolution=this_resolution)\n",
    "        this_clusters = adata.obs['louvain'].nunique()\n",
    "        \n",
    "        print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n",
    "        \n",
    "        if this_clusters > n_cluster:\n",
    "            this_max = this_resolution\n",
    "        elif this_clusters < n_cluster:\n",
    "            this_min = this_resolution\n",
    "        else:\n",
    "            return(this_resolution, adata)\n",
    "        this_step += 1\n",
    "    \n",
    "    print('Cannot find the number of clusters')\n",
    "    print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Control\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 8 at resolution 1.5\n",
      "step 1\n",
      "got 5 at resolution 0.75\n",
      "step 2\n",
      "got 6 at resolution 1.125\n",
      "BROCKMAN\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 11 at resolution 1.5\n",
      "step 1\n",
      "got 5 at resolution 0.75\n",
      "step 2\n",
      "got 8 at resolution 1.125\n",
      "step 3\n",
      "got 7 at resolution 0.9375\n",
      "step 4\n",
      "got 6 at resolution 0.84375\n",
      "Cusanovich2018\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "cisTopic\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "chromVAR_kmers\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "chromVAR_motifs\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 4 at resolution 1.125\n",
      "step 3\n",
      "got 5 at resolution 1.3125\n",
      "step 4\n",
      "got 6 at resolution 1.40625\n",
      "chromVAR_kmers_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "chromVAR_motifs_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "GeneScoring\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 35 at resolution 1.5\n",
      "step 1\n",
      "got 2 at resolution 0.75\n",
      "step 2\n",
      "got 15 at resolution 1.125\n",
      "step 3\n",
      "got 6 at resolution 0.9375\n",
      "GeneScoring_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 12 at resolution 1.5\n",
      "step 1\n",
      "got 9 at resolution 0.75\n",
      "step 2\n",
      "got 6 at resolution 0.375\n",
      "Cicero\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 34 at resolution 1.5\n",
      "step 1\n",
      "got 2 at resolution 0.75\n",
      "step 2\n",
      "got 17 at resolution 1.125\n",
      "step 3\n",
      "got 5 at resolution 0.9375\n",
      "step 4\n",
      "got 12 at resolution 1.03125\n",
      "step 5\n",
      "got 8 at resolution 0.984375\n",
      "step 6\n",
      "got 5 at resolution 0.9609375\n",
      "step 7\n",
      "got 8 at resolution 0.97265625\n",
      "step 8\n",
      "got 7 at resolution 0.966796875\n",
      "step 9\n",
      "got 7 at resolution 0.9638671875\n",
      "step 10\n",
      "got 7 at resolution 0.96240234375\n",
      "step 11\n",
      "got 6 at resolution 0.961669921875\n",
      "Cicero_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 11 at resolution 1.5\n",
      "step 1\n",
      "got 7 at resolution 0.75\n",
      "step 2\n",
      "got 3 at resolution 0.375\n",
      "step 3\n",
      "got 5 at resolution 0.5625\n",
      "step 4\n",
      "got 5 at resolution 0.65625\n",
      "step 5\n",
      "got 6 at resolution 0.703125\n",
      "SnapATAC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "Scasat\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 5 at resolution 0.75\n",
      "step 2\n",
      "got 5 at resolution 1.125\n",
      "step 3\n",
      "got 6 at resolution 1.3125\n",
      "scABC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 21 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 8 at resolution 1.125\n",
      "step 3\n",
      "got 4 at resolution 0.9375\n",
      "step 4\n",
      "got 4 at resolution 1.03125\n",
      "step 5\n",
      "got 6 at resolution 1.078125\n",
      "SCRAT\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 8 at resolution 1.5\n",
      "step 1\n",
      "got 5 at resolution 0.75\n",
      "step 2\n",
      "got 6 at resolution 1.125\n",
      "SCRAT_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 10 at resolution 1.5\n",
      "step 1\n",
      "got 5 at resolution 0.75\n",
      "step 2\n",
      "got 8 at resolution 1.125\n",
      "step 3\n",
      "got 6 at resolution 0.9375\n"
     ]
    }
   ],
   "source": [
    "for file in files:\n",
    "    file_split = file.split('_')\n",
    "    method = file_split[1]\n",
    "    dataset = file_split[2].split('.')[0]\n",
    "    if(len(file_split)>3):\n",
    "        method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n",
    "    print(method)\n",
    "\n",
    "    pandas2ri.activate()\n",
    "    readRDS = robjects.r['readRDS']\n",
    "    df_rds = readRDS(os.path.join(path_fm,file))\n",
    "    fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n",
    "    fm_mat.columns = metadata.index\n",
    "    \n",
    "    adata = sc.AnnData(fm_mat.T)\n",
    "    adata.var_names_make_unique()\n",
    "    adata.obs = metadata.loc[adata.obs.index,]\n",
    "    df_metrics.loc[method,] = \"\"\n",
    "    #Louvain\n",
    "    sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n",
    "#     sc.tl.louvain(adata)\n",
    "    getNClusters(adata,n_cluster=num_clusters)\n",
    "    #kmeans\n",
    "    kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n",
    "    adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n",
    "    #hierachical clustering\n",
    "    hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n",
    "    adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n",
    "    #clustering metrics\n",
    "    \n",
    "    #adjusted rank index\n",
    "    ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n",
    "    ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n",
    "    ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n",
    "    #adjusted mutual information\n",
    "    ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n",
    "    ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic')   \n",
    "    ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n",
    "    #homogeneity\n",
    "    homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n",
    "    homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n",
    "    homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n",
    "\n",
    "    df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n",
    "    df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n",
    "    df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n",
    "    adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_metrics.to_csv(path_metrics+'clustering_scores.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ARI_Louvain</th>\n",
       "      <th>ARI_kmeans</th>\n",
       "      <th>ARI_HC</th>\n",
       "      <th>AMI_Louvain</th>\n",
       "      <th>AMI_kmeans</th>\n",
       "      <th>AMI_HC</th>\n",
       "      <th>Homogeneity_Louvain</th>\n",
       "      <th>Homogeneity_kmeans</th>\n",
       "      <th>Homogeneity_HC</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Control</th>\n",
       "      <td>0.779759</td>\n",
       "      <td>0.780828</td>\n",
       "      <td>0.790929</td>\n",
       "      <td>0.848678</td>\n",
       "      <td>0.845885</td>\n",
       "      <td>0.846218</td>\n",
       "      <td>0.84847</td>\n",
       "      <td>0.846676</td>\n",
       "      <td>0.846452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BROCKMAN</th>\n",
       "      <td>0.561328</td>\n",
       "      <td>0.555279</td>\n",
       "      <td>0.602035</td>\n",
       "      <td>0.685324</td>\n",
       "      <td>0.680493</td>\n",
       "      <td>0.717159</td>\n",
       "      <td>0.682041</td>\n",
       "      <td>0.681051</td>\n",
       "      <td>0.709716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cusanovich2018</th>\n",
       "      <td>0.992017</td>\n",
       "      <td>0.768445</td>\n",
       "      <td>0.978179</td>\n",
       "      <td>0.988867</td>\n",
       "      <td>0.891575</td>\n",
       "      <td>0.971112</td>\n",
       "      <td>0.98893</td>\n",
       "      <td>0.863883</td>\n",
       "      <td>0.971277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cisTopic</th>\n",
       "      <td>0.953195</td>\n",
       "      <td>0.947545</td>\n",
       "      <td>0.917894</td>\n",
       "      <td>0.948382</td>\n",
       "      <td>0.94389</td>\n",
       "      <td>0.918501</td>\n",
       "      <td>0.94868</td>\n",
       "      <td>0.944194</td>\n",
       "      <td>0.918896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_kmers</th>\n",
       "      <td>0.495589</td>\n",
       "      <td>0.497999</td>\n",
       "      <td>0.531637</td>\n",
       "      <td>0.604726</td>\n",
       "      <td>0.636153</td>\n",
       "      <td>0.651479</td>\n",
       "      <td>0.587439</td>\n",
       "      <td>0.626889</td>\n",
       "      <td>0.641111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_motifs</th>\n",
       "      <td>0.398901</td>\n",
       "      <td>0.410407</td>\n",
       "      <td>0.316593</td>\n",
       "      <td>0.553781</td>\n",
       "      <td>0.567098</td>\n",
       "      <td>0.513624</td>\n",
       "      <td>0.555722</td>\n",
       "      <td>0.569391</td>\n",
       "      <td>0.503448</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_kmers_pca</th>\n",
       "      <td>0.601049</td>\n",
       "      <td>0.596801</td>\n",
       "      <td>0.558717</td>\n",
       "      <td>0.720589</td>\n",
       "      <td>0.700458</td>\n",
       "      <td>0.668644</td>\n",
       "      <td>0.712038</td>\n",
       "      <td>0.701502</td>\n",
       "      <td>0.666186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_motifs_pca</th>\n",
       "      <td>0.400536</td>\n",
       "      <td>0.345739</td>\n",
       "      <td>0.393151</td>\n",
       "      <td>0.553189</td>\n",
       "      <td>0.574613</td>\n",
       "      <td>0.532386</td>\n",
       "      <td>0.555077</td>\n",
       "      <td>0.561501</td>\n",
       "      <td>0.532387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GeneScoring</th>\n",
       "      <td>0.00806609</td>\n",
       "      <td>0.35607</td>\n",
       "      <td>0.246211</td>\n",
       "      <td>0.00920484</td>\n",
       "      <td>0.436762</td>\n",
       "      <td>0.324887</td>\n",
       "      <td>0.0148086</td>\n",
       "      <td>0.409791</td>\n",
       "      <td>0.291086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GeneScoring_pca</th>\n",
       "      <td>0.323092</td>\n",
       "      <td>0.357345</td>\n",
       "      <td>0.362712</td>\n",
       "      <td>0.417752</td>\n",
       "      <td>0.438917</td>\n",
       "      <td>0.439756</td>\n",
       "      <td>0.40738</td>\n",
       "      <td>0.428261</td>\n",
       "      <td>0.427891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cicero</th>\n",
       "      <td>0.0878461</td>\n",
       "      <td>0.473173</td>\n",
       "      <td>0.420783</td>\n",
       "      <td>0.0992193</td>\n",
       "      <td>0.680707</td>\n",
       "      <td>0.619445</td>\n",
       "      <td>0.100287</td>\n",
       "      <td>0.559189</td>\n",
       "      <td>0.530166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cicero_pca</th>\n",
       "      <td>0.530382</td>\n",
       "      <td>0.514069</td>\n",
       "      <td>0.448058</td>\n",
       "      <td>0.676972</td>\n",
       "      <td>0.628329</td>\n",
       "      <td>0.583012</td>\n",
       "      <td>0.642584</td>\n",
       "      <td>0.62543</td>\n",
       "      <td>0.574815</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SnapATAC</th>\n",
       "      <td>0.996003</td>\n",
       "      <td>0.997997</td>\n",
       "      <td>0.986067</td>\n",
       "      <td>0.994755</td>\n",
       "      <td>0.997053</td>\n",
       "      <td>0.981576</td>\n",
       "      <td>0.994782</td>\n",
       "      <td>0.99707</td>\n",
       "      <td>0.981671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Scasat</th>\n",
       "      <td>0.859124</td>\n",
       "      <td>0.79491</td>\n",
       "      <td>0.801662</td>\n",
       "      <td>0.886804</td>\n",
       "      <td>0.851331</td>\n",
       "      <td>0.853164</td>\n",
       "      <td>0.887068</td>\n",
       "      <td>0.851743</td>\n",
       "      <td>0.853761</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scABC</th>\n",
       "      <td>0.338829</td>\n",
       "      <td>0.421418</td>\n",
       "      <td>0.605121</td>\n",
       "      <td>0.441173</td>\n",
       "      <td>0.620527</td>\n",
       "      <td>0.687807</td>\n",
       "      <td>0.3912</td>\n",
       "      <td>0.600356</td>\n",
       "      <td>0.687662</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SCRAT</th>\n",
       "      <td>0.494443</td>\n",
       "      <td>0.500276</td>\n",
       "      <td>0.516734</td>\n",
       "      <td>0.647751</td>\n",
       "      <td>0.626947</td>\n",
       "      <td>0.636607</td>\n",
       "      <td>0.639919</td>\n",
       "      <td>0.62832</td>\n",
       "      <td>0.634003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SCRAT_pca</th>\n",
       "      <td>0.494298</td>\n",
       "      <td>0.494937</td>\n",
       "      <td>0.496254</td>\n",
       "      <td>0.6513</td>\n",
       "      <td>0.62783</td>\n",
       "      <td>0.643916</td>\n",
       "      <td>0.639984</td>\n",
       "      <td>0.628537</td>\n",
       "      <td>0.641292</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    ARI_Louvain ARI_kmeans    ARI_HC AMI_Louvain AMI_kmeans  \\\n",
       "Control                0.779759   0.780828  0.790929    0.848678   0.845885   \n",
       "BROCKMAN               0.561328   0.555279  0.602035    0.685324   0.680493   \n",
       "Cusanovich2018         0.992017   0.768445  0.978179    0.988867   0.891575   \n",
       "cisTopic               0.953195   0.947545  0.917894    0.948382    0.94389   \n",
       "chromVAR_kmers         0.495589   0.497999  0.531637    0.604726   0.636153   \n",
       "chromVAR_motifs        0.398901   0.410407  0.316593    0.553781   0.567098   \n",
       "chromVAR_kmers_pca     0.601049   0.596801  0.558717    0.720589   0.700458   \n",
       "chromVAR_motifs_pca    0.400536   0.345739  0.393151    0.553189   0.574613   \n",
       "GeneScoring          0.00806609    0.35607  0.246211  0.00920484   0.436762   \n",
       "GeneScoring_pca        0.323092   0.357345  0.362712    0.417752   0.438917   \n",
       "Cicero                0.0878461   0.473173  0.420783   0.0992193   0.680707   \n",
       "Cicero_pca             0.530382   0.514069  0.448058    0.676972   0.628329   \n",
       "SnapATAC               0.996003   0.997997  0.986067    0.994755   0.997053   \n",
       "Scasat                 0.859124    0.79491  0.801662    0.886804   0.851331   \n",
       "scABC                  0.338829   0.421418  0.605121    0.441173   0.620527   \n",
       "SCRAT                  0.494443   0.500276  0.516734    0.647751   0.626947   \n",
       "SCRAT_pca              0.494298   0.494937  0.496254      0.6513    0.62783   \n",
       "\n",
       "                       AMI_HC Homogeneity_Louvain Homogeneity_kmeans  \\\n",
       "Control              0.846218             0.84847           0.846676   \n",
       "BROCKMAN             0.717159            0.682041           0.681051   \n",
       "Cusanovich2018       0.971112             0.98893           0.863883   \n",
       "cisTopic             0.918501             0.94868           0.944194   \n",
       "chromVAR_kmers       0.651479            0.587439           0.626889   \n",
       "chromVAR_motifs      0.513624            0.555722           0.569391   \n",
       "chromVAR_kmers_pca   0.668644            0.712038           0.701502   \n",
       "chromVAR_motifs_pca  0.532386            0.555077           0.561501   \n",
       "GeneScoring          0.324887           0.0148086           0.409791   \n",
       "GeneScoring_pca      0.439756             0.40738           0.428261   \n",
       "Cicero               0.619445            0.100287           0.559189   \n",
       "Cicero_pca           0.583012            0.642584            0.62543   \n",
       "SnapATAC             0.981576            0.994782            0.99707   \n",
       "Scasat               0.853164            0.887068           0.851743   \n",
       "scABC                0.687807              0.3912           0.600356   \n",
       "SCRAT                0.636607            0.639919            0.62832   \n",
       "SCRAT_pca            0.643916            0.639984           0.628537   \n",
       "\n",
       "                    Homogeneity_HC  \n",
       "Control                   0.846452  \n",
       "BROCKMAN                  0.709716  \n",
       "Cusanovich2018            0.971277  \n",
       "cisTopic                  0.918896  \n",
       "chromVAR_kmers            0.641111  \n",
       "chromVAR_motifs           0.503448  \n",
       "chromVAR_kmers_pca        0.666186  \n",
       "chromVAR_motifs_pca       0.532387  \n",
       "GeneScoring               0.291086  \n",
       "GeneScoring_pca           0.427891  \n",
       "Cicero                    0.530166  \n",
       "Cicero_pca                0.574815  \n",
       "SnapATAC                  0.981671  \n",
       "Scasat                    0.853761  \n",
       "scABC                     0.687662  \n",
       "SCRAT                     0.634003  \n",
       "SCRAT_pca                 0.641292  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:ATACseq_clustering]",
   "language": "python",
   "name": "conda-env-ATACseq_clustering-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}