{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scanpy as sc\n",
    "import os\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.cluster import AgglomerativeClustering\n",
    "from sklearn.metrics.cluster import adjusted_rand_score\n",
    "from sklearn.metrics.cluster import adjusted_mutual_info_score\n",
    "from sklearn.metrics.cluster import homogeneity_score\n",
    "import rpy2.robjects as robjects\n",
    "from rpy2.robjects import pandas2ri"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n",
    "                                   'AMI_Louvain','AMI_kmeans','AMI_HC',\n",
    "                                   'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "workdir = './output/'\n",
    "path_fm = os.path.join(workdir,'feature_matrices/')\n",
    "path_clusters = os.path.join(workdir,'clusters/')\n",
    "path_metrics = os.path.join(workdir,'metrics/')\n",
    "os.system('mkdir -p '+path_clusters)\n",
    "os.system('mkdir -p '+path_metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    }
   ],
   "source": [
    "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n",
    "num_clusters = len(np.unique(metadata['label']))\n",
    "print(num_clusters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n",
    "len(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['FM_ChromVAR_buenrostro2018bulkpeaks_kmers.rds',\n",
       " 'FM_ChromVAR_buenrostro2018bulkpeaks_motifs.rds',\n",
       " 'FM_cisTopic_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_Cusanovich2018_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_Control_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_GeneScoring_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_Scasat_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_scABC_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_Cicero_buenrostro2018bulkpeaks.rds',\n",
       " 'FM_ChromVAR_buenrostro2018bulkpeaks_kmers_pca.rds',\n",
       " 'FM_ChromVAR_buenrostro2018bulkpeaks_motifs_pca.rds',\n",
       " 'FM_GeneScoring_buenrostro2018bulkpeaks_pca.rds',\n",
       " 'FM_Cicero_buenrostro2018bulkpeaks_pca.rds']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n",
    "    this_step = 0\n",
    "    this_min = float(range_min)\n",
    "    this_max = float(range_max)\n",
    "    while this_step < max_steps:\n",
    "        print('step ' + str(this_step))\n",
    "        this_resolution = this_min + ((this_max-this_min)/2)\n",
    "        sc.tl.louvain(adata,resolution=this_resolution)\n",
    "        this_clusters = adata.obs['louvain'].nunique()\n",
    "        \n",
    "        print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n",
    "        \n",
    "        if this_clusters > n_cluster:\n",
    "            this_max = this_resolution\n",
    "        elif this_clusters < n_cluster:\n",
    "            this_min = this_resolution\n",
    "        else:\n",
    "            return(this_resolution, adata)\n",
    "        this_step += 1\n",
    "    \n",
    "    print('Cannot find the number of clusters')\n",
    "    print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ChromVAR_kmers\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 9 at resolution 1.5\n",
      "step 1\n",
      "got 14 at resolution 2.25\n",
      "step 2\n",
      "got 10 at resolution 1.875\n",
      "ChromVAR_motifs\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 10 at resolution 1.5\n",
      "cisTopic\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 18 at resolution 1.5\n",
      "step 1\n",
      "got 13 at resolution 0.75\n",
      "step 2\n",
      "got 9 at resolution 0.375\n",
      "step 3\n",
      "got 13 at resolution 0.5625\n",
      "step 4\n",
      "got 10 at resolution 0.46875\n",
      "Cusanovich2018\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 15 at resolution 1.5\n",
      "step 1\n",
      "got 11 at resolution 0.75\n",
      "step 2\n",
      "got 7 at resolution 0.375\n",
      "step 3\n",
      "got 10 at resolution 0.5625\n",
      "Control\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 19 at resolution 1.5\n",
      "step 1\n",
      "got 11 at resolution 0.75\n",
      "step 2\n",
      "got 8 at resolution 0.375\n",
      "step 3\n",
      "got 10 at resolution 0.5625\n",
      "GeneScoring\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 8 at resolution 1.5\n",
      "step 1\n",
      "got 33 at resolution 2.25\n",
      "step 2\n",
      "got 21 at resolution 1.875\n",
      "step 3\n",
      "got 13 at resolution 1.6875\n",
      "step 4\n",
      "got 13 at resolution 1.59375\n",
      "step 5\n",
      "got 11 at resolution 1.546875\n",
      "step 6\n",
      "got 9 at resolution 1.5234375\n",
      "step 7\n",
      "got 9 at resolution 1.53515625\n",
      "step 8\n",
      "got 10 at resolution 1.541015625\n",
      "Scasat\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 21 at resolution 1.5\n",
      "step 1\n",
      "got 14 at resolution 0.75\n",
      "step 2\n",
      "got 9 at resolution 0.375\n",
      "step 3\n",
      "got 11 at resolution 0.5625\n",
      "step 4\n",
      "got 11 at resolution 0.46875\n",
      "step 5\n",
      "got 10 at resolution 0.421875\n",
      "scABC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 62 at resolution 1.5\n",
      "step 1\n",
      "got 2 at resolution 0.75\n",
      "step 2\n",
      "got 11 at resolution 1.125\n",
      "step 3\n",
      "got 4 at resolution 0.9375\n",
      "step 4\n",
      "got 7 at resolution 1.03125\n",
      "step 5\n",
      "got 10 at resolution 1.078125\n",
      "Cicero\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 89 at resolution 1.5\n",
      "step 1\n",
      "got 1 at resolution 0.75\n",
      "step 2\n",
      "got 15 at resolution 1.125\n",
      "step 3\n",
      "got 7 at resolution 0.9375\n",
      "step 4\n",
      "got 12 at resolution 1.03125\n",
      "step 5\n",
      "got 11 at resolution 0.984375\n",
      "step 6\n",
      "got 9 at resolution 0.9609375\n",
      "step 7\n",
      "got 8 at resolution 0.97265625\n",
      "step 8\n",
      "got 10 at resolution 0.978515625\n",
      "ChromVAR_kmers_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 12 at resolution 1.5\n",
      "step 1\n",
      "got 7 at resolution 0.75\n",
      "step 2\n",
      "got 10 at resolution 1.125\n",
      "ChromVAR_motifs_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 12 at resolution 1.5\n",
      "step 1\n",
      "got 7 at resolution 0.75\n",
      "step 2\n",
      "got 8 at resolution 1.125\n",
      "step 3\n",
      "got 9 at resolution 1.3125\n",
      "step 4\n",
      "got 10 at resolution 1.40625\n",
      "GeneScoring_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 14 at resolution 1.5\n",
      "step 1\n",
      "got 8 at resolution 0.75\n",
      "step 2\n",
      "got 12 at resolution 1.125\n",
      "step 3\n",
      "got 9 at resolution 0.9375\n",
      "step 4\n",
      "got 10 at resolution 1.03125\n",
      "Cicero_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 9 at resolution 1.5\n",
      "step 1\n",
      "got 19 at resolution 2.25\n",
      "step 2\n",
      "got 11 at resolution 1.875\n",
      "step 3\n",
      "got 9 at resolution 1.6875\n",
      "step 4\n",
      "got 12 at resolution 1.78125\n",
      "step 5\n",
      "got 12 at resolution 1.734375\n",
      "step 6\n",
      "got 11 at resolution 1.7109375\n",
      "step 7\n",
      "got 10 at resolution 1.69921875\n"
     ]
    }
   ],
   "source": [
    "for file in files:\n",
    "    file_split = file.split('_')\n",
    "    method = file_split[1]\n",
    "    dataset = file_split[2].split('.')[0]\n",
    "    if(len(file_split)>3):\n",
    "        method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n",
    "    print(method)\n",
    "\n",
    "    pandas2ri.activate()\n",
    "    readRDS = robjects.r['readRDS']\n",
    "    df_rds = readRDS(os.path.join(path_fm,file))\n",
    "    fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n",
    "    fm_mat.fillna(0,inplace=True)\n",
    "    fm_mat.columns = metadata.index\n",
    "    \n",
    "    adata = sc.AnnData(fm_mat.T)\n",
    "    adata.var_names_make_unique()\n",
    "    adata.obs = metadata.loc[adata.obs.index,]\n",
    "    df_metrics.loc[method,] = \"\"\n",
    "    #Louvain\n",
    "    sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n",
    "#     sc.tl.louvain(adata)\n",
    "    getNClusters(adata,n_cluster=num_clusters)\n",
    "    #kmeans\n",
    "    kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n",
    "    adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n",
    "    #hierachical clustering\n",
    "    hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n",
    "    adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n",
    "    #clustering metrics\n",
    "    \n",
    "    #adjusted rank index\n",
    "    ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n",
    "    ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n",
    "    ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n",
    "    #adjusted mutual information\n",
    "    ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n",
    "    ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic')   \n",
    "    ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n",
    "    #homogeneity\n",
    "    homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n",
    "    homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n",
    "    homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n",
    "\n",
    "    df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n",
    "    df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n",
    "    df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n",
    "    adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_metrics.to_csv(path_metrics+'clustering_scores.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ARI_Louvain</th>\n",
       "      <th>ARI_kmeans</th>\n",
       "      <th>ARI_HC</th>\n",
       "      <th>AMI_Louvain</th>\n",
       "      <th>AMI_kmeans</th>\n",
       "      <th>AMI_HC</th>\n",
       "      <th>Homogeneity_Louvain</th>\n",
       "      <th>Homogeneity_kmeans</th>\n",
       "      <th>Homogeneity_HC</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>ChromVAR_kmers</th>\n",
       "      <td>0.393487</td>\n",
       "      <td>0.260785</td>\n",
       "      <td>0.225031</td>\n",
       "      <td>0.524566</td>\n",
       "      <td>0.413008</td>\n",
       "      <td>0.356731</td>\n",
       "      <td>0.543092</td>\n",
       "      <td>0.370503</td>\n",
       "      <td>0.301594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ChromVAR_motifs</th>\n",
       "      <td>0.319824</td>\n",
       "      <td>0.209636</td>\n",
       "      <td>0.323051</td>\n",
       "      <td>0.510618</td>\n",
       "      <td>0.413654</td>\n",
       "      <td>0.464172</td>\n",
       "      <td>0.534893</td>\n",
       "      <td>0.429699</td>\n",
       "      <td>0.437689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cisTopic</th>\n",
       "      <td>0.551506</td>\n",
       "      <td>0.351849</td>\n",
       "      <td>0.383048</td>\n",
       "      <td>0.661205</td>\n",
       "      <td>0.555908</td>\n",
       "      <td>0.573953</td>\n",
       "      <td>0.674538</td>\n",
       "      <td>0.585062</td>\n",
       "      <td>0.59729</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cusanovich2018</th>\n",
       "      <td>0.490138</td>\n",
       "      <td>-0.00194164</td>\n",
       "      <td>-0.00194164</td>\n",
       "      <td>0.636935</td>\n",
       "      <td>-0.00278841</td>\n",
       "      <td>-0.00278841</td>\n",
       "      <td>0.635598</td>\n",
       "      <td>0.00302188</td>\n",
       "      <td>0.00302188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Control</th>\n",
       "      <td>0.183302</td>\n",
       "      <td>0.0229697</td>\n",
       "      <td>0.0397529</td>\n",
       "      <td>0.363688</td>\n",
       "      <td>0.0314137</td>\n",
       "      <td>0.0673721</td>\n",
       "      <td>0.377179</td>\n",
       "      <td>0.0328729</td>\n",
       "      <td>0.0616285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GeneScoring</th>\n",
       "      <td>0.0403609</td>\n",
       "      <td>0.0179964</td>\n",
       "      <td>0.0255461</td>\n",
       "      <td>0.111789</td>\n",
       "      <td>0.0318159</td>\n",
       "      <td>0.0369079</td>\n",
       "      <td>0.11883</td>\n",
       "      <td>0.0346644</td>\n",
       "      <td>0.0399724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Scasat</th>\n",
       "      <td>0.305007</td>\n",
       "      <td>0.161367</td>\n",
       "      <td>0.161388</td>\n",
       "      <td>0.517868</td>\n",
       "      <td>0.302171</td>\n",
       "      <td>0.378108</td>\n",
       "      <td>0.531279</td>\n",
       "      <td>0.320602</td>\n",
       "      <td>0.383529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scABC</th>\n",
       "      <td>0.020894</td>\n",
       "      <td>0.0109238</td>\n",
       "      <td>0.0327437</td>\n",
       "      <td>0.0737247</td>\n",
       "      <td>0.0190839</td>\n",
       "      <td>0.0682762</td>\n",
       "      <td>0.0780852</td>\n",
       "      <td>0.0196556</td>\n",
       "      <td>0.0635191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cicero</th>\n",
       "      <td>0.033752</td>\n",
       "      <td>-0.00369183</td>\n",
       "      <td>-0.00194164</td>\n",
       "      <td>0.091265</td>\n",
       "      <td>0.0020334</td>\n",
       "      <td>-0.00278841</td>\n",
       "      <td>0.095604</td>\n",
       "      <td>0.00611831</td>\n",
       "      <td>0.00302188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ChromVAR_kmers_pca</th>\n",
       "      <td>0.431822</td>\n",
       "      <td>0.249719</td>\n",
       "      <td>0.238284</td>\n",
       "      <td>0.554208</td>\n",
       "      <td>0.413693</td>\n",
       "      <td>0.370762</td>\n",
       "      <td>0.57097</td>\n",
       "      <td>0.386348</td>\n",
       "      <td>0.313476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ChromVAR_motifs_pca</th>\n",
       "      <td>0.278085</td>\n",
       "      <td>0.20965</td>\n",
       "      <td>0.277897</td>\n",
       "      <td>0.496071</td>\n",
       "      <td>0.392932</td>\n",
       "      <td>0.429734</td>\n",
       "      <td>0.516014</td>\n",
       "      <td>0.387038</td>\n",
       "      <td>0.41759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GeneScoring_pca</th>\n",
       "      <td>0.0231933</td>\n",
       "      <td>0.0252487</td>\n",
       "      <td>0.0252567</td>\n",
       "      <td>0.0785274</td>\n",
       "      <td>0.0365736</td>\n",
       "      <td>0.0399461</td>\n",
       "      <td>0.0913253</td>\n",
       "      <td>0.0397867</td>\n",
       "      <td>0.0429546</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cicero_pca</th>\n",
       "      <td>0.139162</td>\n",
       "      <td>-0.00194164</td>\n",
       "      <td>-0.00194164</td>\n",
       "      <td>0.243913</td>\n",
       "      <td>-0.00278841</td>\n",
       "      <td>-0.00278841</td>\n",
       "      <td>0.255627</td>\n",
       "      <td>0.00302188</td>\n",
       "      <td>0.00302188</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    ARI_Louvain  ARI_kmeans      ARI_HC AMI_Louvain  \\\n",
       "ChromVAR_kmers         0.393487    0.260785    0.225031    0.524566   \n",
       "ChromVAR_motifs        0.319824    0.209636    0.323051    0.510618   \n",
       "cisTopic               0.551506    0.351849    0.383048    0.661205   \n",
       "Cusanovich2018         0.490138 -0.00194164 -0.00194164    0.636935   \n",
       "Control                0.183302   0.0229697   0.0397529    0.363688   \n",
       "GeneScoring           0.0403609   0.0179964   0.0255461    0.111789   \n",
       "Scasat                 0.305007    0.161367    0.161388    0.517868   \n",
       "scABC                  0.020894   0.0109238   0.0327437   0.0737247   \n",
       "Cicero                 0.033752 -0.00369183 -0.00194164    0.091265   \n",
       "ChromVAR_kmers_pca     0.431822    0.249719    0.238284    0.554208   \n",
       "ChromVAR_motifs_pca    0.278085     0.20965    0.277897    0.496071   \n",
       "GeneScoring_pca       0.0231933   0.0252487   0.0252567   0.0785274   \n",
       "Cicero_pca             0.139162 -0.00194164 -0.00194164    0.243913   \n",
       "\n",
       "                     AMI_kmeans      AMI_HC Homogeneity_Louvain  \\\n",
       "ChromVAR_kmers         0.413008    0.356731            0.543092   \n",
       "ChromVAR_motifs        0.413654    0.464172            0.534893   \n",
       "cisTopic               0.555908    0.573953            0.674538   \n",
       "Cusanovich2018      -0.00278841 -0.00278841            0.635598   \n",
       "Control               0.0314137   0.0673721            0.377179   \n",
       "GeneScoring           0.0318159   0.0369079             0.11883   \n",
       "Scasat                 0.302171    0.378108            0.531279   \n",
       "scABC                 0.0190839   0.0682762           0.0780852   \n",
       "Cicero                0.0020334 -0.00278841            0.095604   \n",
       "ChromVAR_kmers_pca     0.413693    0.370762             0.57097   \n",
       "ChromVAR_motifs_pca    0.392932    0.429734            0.516014   \n",
       "GeneScoring_pca       0.0365736   0.0399461           0.0913253   \n",
       "Cicero_pca          -0.00278841 -0.00278841            0.255627   \n",
       "\n",
       "                    Homogeneity_kmeans Homogeneity_HC  \n",
       "ChromVAR_kmers                0.370503       0.301594  \n",
       "ChromVAR_motifs               0.429699       0.437689  \n",
       "cisTopic                      0.585062        0.59729  \n",
       "Cusanovich2018              0.00302188     0.00302188  \n",
       "Control                      0.0328729      0.0616285  \n",
       "GeneScoring                  0.0346644      0.0399724  \n",
       "Scasat                        0.320602       0.383529  \n",
       "scABC                        0.0196556      0.0635191  \n",
       "Cicero                      0.00611831     0.00302188  \n",
       "ChromVAR_kmers_pca            0.386348       0.313476  \n",
       "ChromVAR_motifs_pca           0.387038        0.41759  \n",
       "GeneScoring_pca              0.0397867      0.0429546  \n",
       "Cicero_pca                  0.00302188     0.00302188  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:ATACseq_clustering]",
   "language": "python",
   "name": "conda-env-ATACseq_clustering-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}