{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scanpy as sc\n",
    "import os\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.cluster import AgglomerativeClustering\n",
    "from sklearn.metrics.cluster import adjusted_rand_score\n",
    "from sklearn.metrics.cluster import adjusted_mutual_info_score\n",
    "from sklearn.metrics.cluster import homogeneity_score\n",
    "import rpy2.robjects as robjects\n",
    "from rpy2.robjects import pandas2ri"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n",
    "                                   'AMI_Louvain','AMI_kmeans','AMI_HC',\n",
    "                                   'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "workdir = './output/'\n",
    "path_fm = os.path.join(workdir,'feature_matrices/')\n",
    "path_clusters = os.path.join(workdir,'clusters/')\n",
    "path_metrics = os.path.join(workdir,'metrics/')\n",
    "os.system('mkdir -p '+path_clusters)\n",
    "os.system('mkdir -p '+path_metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n",
    "num_clusters = len(np.unique(metadata['label']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n",
    "len(files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['FM_Control_BMcov1000.rds',\n",
       " 'FM_BROCKMAN_BMcov1000.rds',\n",
       " 'FM_Cusanovich2018_BMcov1000.rds',\n",
       " 'FM_cisTopic_BMcov1000.rds',\n",
       " 'FM_chromVAR_BMcov1000_kmers.rds',\n",
       " 'FM_chromVAR_BMcov1000_motifs.rds',\n",
       " 'FM_chromVAR_BMcov1000_kmers_pca.rds',\n",
       " 'FM_chromVAR_BMcov1000_motifs_pca.rds',\n",
       " 'FM_GeneScoring_BMcov1000.rds',\n",
       " 'FM_GeneScoring_BMcov1000_pca.rds',\n",
       " 'FM_Cicero_BMcov1000.rds',\n",
       " 'FM_Cicero_BMcov1000_pca.rds',\n",
       " 'FM_SnapATAC_BMcov1000.rds',\n",
       " 'FM_Scasat_BMcov1000.rds',\n",
       " 'FM_scABC_BMcov1000.rds',\n",
       " 'FM_SCRAT_BMcov1000.rds',\n",
       " 'FM_SCRAT_BMcov1000_pca.rds']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n",
    "    this_step = 0\n",
    "    this_min = float(range_min)\n",
    "    this_max = float(range_max)\n",
    "    while this_step < max_steps:\n",
    "        print('step ' + str(this_step))\n",
    "        this_resolution = this_min + ((this_max-this_min)/2)\n",
    "        sc.tl.louvain(adata,resolution=this_resolution)\n",
    "        this_clusters = adata.obs['louvain'].nunique()\n",
    "        \n",
    "        print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n",
    "        \n",
    "        if this_clusters > n_cluster:\n",
    "            this_max = this_resolution\n",
    "        elif this_clusters < n_cluster:\n",
    "            this_min = this_resolution\n",
    "        else:\n",
    "            return(this_resolution, adata)\n",
    "        this_step += 1\n",
    "    \n",
    "    print('Cannot find the number of clusters')\n",
    "    print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Control\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 8 at resolution 1.5\n",
      "step 1\n",
      "got 4 at resolution 0.75\n",
      "step 2\n",
      "got 4 at resolution 1.125\n",
      "step 3\n",
      "got 6 at resolution 1.3125\n",
      "BROCKMAN\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 10 at resolution 1.5\n",
      "step 1\n",
      "got 6 at resolution 0.75\n",
      "Cusanovich2018\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "cisTopic\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 8 at resolution 1.5\n",
      "step 1\n",
      "got 4 at resolution 0.75\n",
      "step 2\n",
      "got 6 at resolution 1.125\n",
      "chromVAR_kmers\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 4 at resolution 1.125\n",
      "step 3\n",
      "got 4 at resolution 1.3125\n",
      "step 4\n",
      "got 4 at resolution 1.40625\n",
      "step 5\n",
      "got 5 at resolution 1.453125\n",
      "step 6\n",
      "got 4 at resolution 1.4765625\n",
      "step 7\n",
      "got 5 at resolution 1.48828125\n",
      "step 8\n",
      "got 5 at resolution 1.494140625\n",
      "step 9\n",
      "got 5 at resolution 1.4970703125\n",
      "step 10\n",
      "got 5 at resolution 1.49853515625\n",
      "step 11\n",
      "got 7 at resolution 1.499267578125\n",
      "step 12\n",
      "got 5 at resolution 1.4989013671875\n",
      "step 13\n",
      "got 7 at resolution 1.49908447265625\n",
      "step 14\n",
      "got 5 at resolution 1.498992919921875\n",
      "step 15\n",
      "got 7 at resolution 1.4990386962890625\n",
      "step 16\n",
      "got 7 at resolution 1.4990158081054688\n",
      "step 17\n",
      "got 7 at resolution 1.4990043640136719\n",
      "step 18\n",
      "got 7 at resolution 1.4989986419677734\n",
      "step 19\n",
      "got 5 at resolution 1.4989957809448242\n",
      "Cannot find the number of clusters\n",
      "Clustering solution from last iteration is used:5 at resolution 1.4989957809448242\n",
      "chromVAR_motifs\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 4 at resolution 1.125\n",
      "step 3\n",
      "got 7 at resolution 1.3125\n",
      "step 4\n",
      "got 5 at resolution 1.21875\n",
      "step 5\n",
      "got 6 at resolution 1.265625\n",
      "chromVAR_kmers_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 4 at resolution 1.125\n",
      "step 3\n",
      "got 4 at resolution 1.3125\n",
      "step 4\n",
      "got 6 at resolution 1.40625\n",
      "chromVAR_motifs_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 8 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 5 at resolution 1.125\n",
      "step 3\n",
      "got 7 at resolution 1.3125\n",
      "step 4\n",
      "got 6 at resolution 1.21875\n",
      "GeneScoring\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 30 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 14 at resolution 1.125\n",
      "step 3\n",
      "got 5 at resolution 0.9375\n",
      "step 4\n",
      "got 9 at resolution 1.03125\n",
      "step 5\n",
      "got 7 at resolution 0.984375\n",
      "step 6\n",
      "got 7 at resolution 0.9609375\n",
      "step 7\n",
      "got 7 at resolution 0.94921875\n",
      "step 8\n",
      "got 6 at resolution 0.943359375\n",
      "GeneScoring_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 13 at resolution 1.5\n",
      "step 1\n",
      "got 11 at resolution 0.75\n",
      "step 2\n",
      "got 7 at resolution 0.375\n",
      "step 3\n",
      "got 7 at resolution 0.1875\n",
      "step 4\n",
      "got 4 at resolution 0.09375\n",
      "step 5\n",
      "got 5 at resolution 0.140625\n",
      "step 6\n",
      "got 6 at resolution 0.1640625\n",
      "Cicero\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 37 at resolution 1.5\n",
      "step 1\n",
      "got 1 at resolution 0.75\n",
      "step 2\n",
      "got 19 at resolution 1.125\n",
      "step 3\n",
      "got 8 at resolution 0.9375\n",
      "step 4\n",
      "got 2 at resolution 0.84375\n",
      "step 5\n",
      "got 4 at resolution 0.890625\n",
      "step 6\n",
      "got 4 at resolution 0.9140625\n",
      "step 7\n",
      "got 6 at resolution 0.92578125\n",
      "Cicero_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 12 at resolution 1.5\n",
      "step 1\n",
      "got 3 at resolution 0.75\n",
      "step 2\n",
      "got 8 at resolution 1.125\n",
      "step 3\n",
      "got 5 at resolution 0.9375\n",
      "step 4\n",
      "got 7 at resolution 1.03125\n",
      "step 5\n",
      "got 6 at resolution 0.984375\n",
      "SnapATAC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 6 at resolution 1.5\n",
      "Scasat\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 4 at resolution 0.75\n",
      "step 2\n",
      "got 4 at resolution 1.125\n",
      "step 3\n",
      "got 6 at resolution 1.3125\n",
      "scABC\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 29 at resolution 1.5\n",
      "step 1\n",
      "got 2 at resolution 0.75\n",
      "step 2\n",
      "got 10 at resolution 1.125\n",
      "step 3\n",
      "got 3 at resolution 0.9375\n",
      "step 4\n",
      "got 5 at resolution 1.03125\n",
      "step 5\n",
      "got 8 at resolution 1.078125\n",
      "step 6\n",
      "got 8 at resolution 1.0546875\n",
      "step 7\n",
      "got 6 at resolution 1.04296875\n",
      "SCRAT\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 7 at resolution 1.5\n",
      "step 1\n",
      "got 4 at resolution 0.75\n",
      "step 2\n",
      "got 5 at resolution 1.125\n",
      "step 3\n",
      "got 5 at resolution 1.3125\n",
      "step 4\n",
      "got 7 at resolution 1.40625\n",
      "step 5\n",
      "got 6 at resolution 1.359375\n",
      "SCRAT_pca\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
      "  res = PandasDataFrame.from_items(items)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0\n",
      "got 10 at resolution 1.5\n",
      "step 1\n",
      "got 6 at resolution 0.75\n"
     ]
    }
   ],
   "source": [
    "for file in files:\n",
    "    file_split = file.split('_')\n",
    "    method = file_split[1]\n",
    "    dataset = file_split[2].split('.')[0]\n",
    "    if(len(file_split)>3):\n",
    "        method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n",
    "    print(method)\n",
    "\n",
    "    pandas2ri.activate()\n",
    "    readRDS = robjects.r['readRDS']\n",
    "    df_rds = readRDS(os.path.join(path_fm,file))\n",
    "    fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n",
    "    fm_mat.columns = metadata.index\n",
    "    \n",
    "    adata = sc.AnnData(fm_mat.T)\n",
    "    adata.var_names_make_unique()\n",
    "    adata.obs = metadata.loc[adata.obs.index,]\n",
    "    df_metrics.loc[method,] = \"\"\n",
    "    #Louvain\n",
    "    sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n",
    "#     sc.tl.louvain(adata)\n",
    "    getNClusters(adata,n_cluster=num_clusters)\n",
    "    #kmeans\n",
    "    kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n",
    "    adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n",
    "    #hierachical clustering\n",
    "    hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n",
    "    adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n",
    "    #clustering metrics\n",
    "    \n",
    "    #adjusted rank index\n",
    "    ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n",
    "    ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n",
    "    ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n",
    "    #adjusted mutual information\n",
    "    ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n",
    "    ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic')   \n",
    "    ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n",
    "    #homogeneity\n",
    "    homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n",
    "    homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n",
    "    homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n",
    "\n",
    "    df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n",
    "    df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n",
    "    df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n",
    "    adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_metrics.to_csv(path_metrics+'clustering_scores.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ARI_Louvain</th>\n",
       "      <th>ARI_kmeans</th>\n",
       "      <th>ARI_HC</th>\n",
       "      <th>AMI_Louvain</th>\n",
       "      <th>AMI_kmeans</th>\n",
       "      <th>AMI_HC</th>\n",
       "      <th>Homogeneity_Louvain</th>\n",
       "      <th>Homogeneity_kmeans</th>\n",
       "      <th>Homogeneity_HC</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Control</th>\n",
       "      <td>0.5928</td>\n",
       "      <td>0.569088</td>\n",
       "      <td>0.588754</td>\n",
       "      <td>0.714715</td>\n",
       "      <td>0.718895</td>\n",
       "      <td>0.723575</td>\n",
       "      <td>0.695935</td>\n",
       "      <td>0.701095</td>\n",
       "      <td>0.699836</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BROCKMAN</th>\n",
       "      <td>0.54574</td>\n",
       "      <td>0.500141</td>\n",
       "      <td>0.499691</td>\n",
       "      <td>0.682654</td>\n",
       "      <td>0.642841</td>\n",
       "      <td>0.65918</td>\n",
       "      <td>0.67829</td>\n",
       "      <td>0.641288</td>\n",
       "      <td>0.645486</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cusanovich2018</th>\n",
       "      <td>0.943846</td>\n",
       "      <td>0.745106</td>\n",
       "      <td>0.758834</td>\n",
       "      <td>0.941653</td>\n",
       "      <td>0.851342</td>\n",
       "      <td>0.854749</td>\n",
       "      <td>0.941908</td>\n",
       "      <td>0.825158</td>\n",
       "      <td>0.813651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cisTopic</th>\n",
       "      <td>0.530953</td>\n",
       "      <td>0.529333</td>\n",
       "      <td>0.49712</td>\n",
       "      <td>0.645415</td>\n",
       "      <td>0.643513</td>\n",
       "      <td>0.629724</td>\n",
       "      <td>0.639389</td>\n",
       "      <td>0.644356</td>\n",
       "      <td>0.619491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_kmers</th>\n",
       "      <td>0.467463</td>\n",
       "      <td>0.430748</td>\n",
       "      <td>0.417099</td>\n",
       "      <td>0.610438</td>\n",
       "      <td>0.577779</td>\n",
       "      <td>0.540338</td>\n",
       "      <td>0.555603</td>\n",
       "      <td>0.577083</td>\n",
       "      <td>0.537387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_motifs</th>\n",
       "      <td>0.3906</td>\n",
       "      <td>0.369871</td>\n",
       "      <td>0.290375</td>\n",
       "      <td>0.520555</td>\n",
       "      <td>0.510661</td>\n",
       "      <td>0.43916</td>\n",
       "      <td>0.519439</td>\n",
       "      <td>0.512255</td>\n",
       "      <td>0.432029</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_kmers_pca</th>\n",
       "      <td>0.458582</td>\n",
       "      <td>0.492114</td>\n",
       "      <td>0.436538</td>\n",
       "      <td>0.612727</td>\n",
       "      <td>0.622779</td>\n",
       "      <td>0.568588</td>\n",
       "      <td>0.602843</td>\n",
       "      <td>0.624776</td>\n",
       "      <td>0.563519</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chromVAR_motifs_pca</th>\n",
       "      <td>0.390955</td>\n",
       "      <td>0.367727</td>\n",
       "      <td>0.298085</td>\n",
       "      <td>0.524297</td>\n",
       "      <td>0.48803</td>\n",
       "      <td>0.427891</td>\n",
       "      <td>0.522027</td>\n",
       "      <td>0.489588</td>\n",
       "      <td>0.417594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GeneScoring</th>\n",
       "      <td>0.00267149</td>\n",
       "      <td>0.111998</td>\n",
       "      <td>0.103837</td>\n",
       "      <td>0.00570611</td>\n",
       "      <td>0.20693</td>\n",
       "      <td>0.179649</td>\n",
       "      <td>0.011055</td>\n",
       "      <td>0.1803</td>\n",
       "      <td>0.149194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GeneScoring_pca</th>\n",
       "      <td>0.09707</td>\n",
       "      <td>0.135782</td>\n",
       "      <td>0.106418</td>\n",
       "      <td>0.117303</td>\n",
       "      <td>0.222123</td>\n",
       "      <td>0.200712</td>\n",
       "      <td>0.113637</td>\n",
       "      <td>0.205834</td>\n",
       "      <td>0.179901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cicero</th>\n",
       "      <td>0.00687868</td>\n",
       "      <td>0.142307</td>\n",
       "      <td>0.397534</td>\n",
       "      <td>0.0111747</td>\n",
       "      <td>0.389042</td>\n",
       "      <td>0.589552</td>\n",
       "      <td>0.0168549</td>\n",
       "      <td>0.249484</td>\n",
       "      <td>0.48291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cicero_pca</th>\n",
       "      <td>0.448862</td>\n",
       "      <td>0.443354</td>\n",
       "      <td>0.423914</td>\n",
       "      <td>0.534053</td>\n",
       "      <td>0.566373</td>\n",
       "      <td>0.562634</td>\n",
       "      <td>0.522034</td>\n",
       "      <td>0.547409</td>\n",
       "      <td>0.50774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SnapATAC</th>\n",
       "      <td>0.804994</td>\n",
       "      <td>0.860076</td>\n",
       "      <td>0.770361</td>\n",
       "      <td>0.84069</td>\n",
       "      <td>0.874407</td>\n",
       "      <td>0.816767</td>\n",
       "      <td>0.841453</td>\n",
       "      <td>0.875022</td>\n",
       "      <td>0.816121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Scasat</th>\n",
       "      <td>0.613989</td>\n",
       "      <td>0.634201</td>\n",
       "      <td>0.590497</td>\n",
       "      <td>0.724743</td>\n",
       "      <td>0.74057</td>\n",
       "      <td>0.708525</td>\n",
       "      <td>0.715335</td>\n",
       "      <td>0.741861</td>\n",
       "      <td>0.705696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scABC</th>\n",
       "      <td>0.20426</td>\n",
       "      <td>0.448982</td>\n",
       "      <td>0.465818</td>\n",
       "      <td>0.247493</td>\n",
       "      <td>0.583171</td>\n",
       "      <td>0.568088</td>\n",
       "      <td>0.229539</td>\n",
       "      <td>0.529003</td>\n",
       "      <td>0.531906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SCRAT</th>\n",
       "      <td>0.444511</td>\n",
       "      <td>0.448028</td>\n",
       "      <td>0.457745</td>\n",
       "      <td>0.610931</td>\n",
       "      <td>0.581328</td>\n",
       "      <td>0.59356</td>\n",
       "      <td>0.596725</td>\n",
       "      <td>0.582187</td>\n",
       "      <td>0.592494</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SCRAT_pca</th>\n",
       "      <td>0.469748</td>\n",
       "      <td>0.405095</td>\n",
       "      <td>0.461812</td>\n",
       "      <td>0.606473</td>\n",
       "      <td>0.585137</td>\n",
       "      <td>0.597519</td>\n",
       "      <td>0.601129</td>\n",
       "      <td>0.571914</td>\n",
       "      <td>0.591323</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    ARI_Louvain ARI_kmeans    ARI_HC AMI_Louvain AMI_kmeans  \\\n",
       "Control                  0.5928   0.569088  0.588754    0.714715   0.718895   \n",
       "BROCKMAN                0.54574   0.500141  0.499691    0.682654   0.642841   \n",
       "Cusanovich2018         0.943846   0.745106  0.758834    0.941653   0.851342   \n",
       "cisTopic               0.530953   0.529333   0.49712    0.645415   0.643513   \n",
       "chromVAR_kmers         0.467463   0.430748  0.417099    0.610438   0.577779   \n",
       "chromVAR_motifs          0.3906   0.369871  0.290375    0.520555   0.510661   \n",
       "chromVAR_kmers_pca     0.458582   0.492114  0.436538    0.612727   0.622779   \n",
       "chromVAR_motifs_pca    0.390955   0.367727  0.298085    0.524297    0.48803   \n",
       "GeneScoring          0.00267149   0.111998  0.103837  0.00570611    0.20693   \n",
       "GeneScoring_pca         0.09707   0.135782  0.106418    0.117303   0.222123   \n",
       "Cicero               0.00687868   0.142307  0.397534   0.0111747   0.389042   \n",
       "Cicero_pca             0.448862   0.443354  0.423914    0.534053   0.566373   \n",
       "SnapATAC               0.804994   0.860076  0.770361     0.84069   0.874407   \n",
       "Scasat                 0.613989   0.634201  0.590497    0.724743    0.74057   \n",
       "scABC                   0.20426   0.448982  0.465818    0.247493   0.583171   \n",
       "SCRAT                  0.444511   0.448028  0.457745    0.610931   0.581328   \n",
       "SCRAT_pca              0.469748   0.405095  0.461812    0.606473   0.585137   \n",
       "\n",
       "                       AMI_HC Homogeneity_Louvain Homogeneity_kmeans  \\\n",
       "Control              0.723575            0.695935           0.701095   \n",
       "BROCKMAN              0.65918             0.67829           0.641288   \n",
       "Cusanovich2018       0.854749            0.941908           0.825158   \n",
       "cisTopic             0.629724            0.639389           0.644356   \n",
       "chromVAR_kmers       0.540338            0.555603           0.577083   \n",
       "chromVAR_motifs       0.43916            0.519439           0.512255   \n",
       "chromVAR_kmers_pca   0.568588            0.602843           0.624776   \n",
       "chromVAR_motifs_pca  0.427891            0.522027           0.489588   \n",
       "GeneScoring          0.179649            0.011055             0.1803   \n",
       "GeneScoring_pca      0.200712            0.113637           0.205834   \n",
       "Cicero               0.589552           0.0168549           0.249484   \n",
       "Cicero_pca           0.562634            0.522034           0.547409   \n",
       "SnapATAC             0.816767            0.841453           0.875022   \n",
       "Scasat               0.708525            0.715335           0.741861   \n",
       "scABC                0.568088            0.229539           0.529003   \n",
       "SCRAT                 0.59356            0.596725           0.582187   \n",
       "SCRAT_pca            0.597519            0.601129           0.571914   \n",
       "\n",
       "                    Homogeneity_HC  \n",
       "Control                   0.699836  \n",
       "BROCKMAN                  0.645486  \n",
       "Cusanovich2018            0.813651  \n",
       "cisTopic                  0.619491  \n",
       "chromVAR_kmers            0.537387  \n",
       "chromVAR_motifs           0.432029  \n",
       "chromVAR_kmers_pca        0.563519  \n",
       "chromVAR_motifs_pca       0.417594  \n",
       "GeneScoring               0.149194  \n",
       "GeneScoring_pca           0.179901  \n",
       "Cicero                     0.48291  \n",
       "Cicero_pca                 0.50774  \n",
       "SnapATAC                  0.816121  \n",
       "Scasat                    0.705696  \n",
       "scABC                     0.531906  \n",
       "SCRAT                     0.592494  \n",
       "SCRAT_pca                 0.591323  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:ATACseq_clustering]",
   "language": "python",
   "name": "conda-env-ATACseq_clustering-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}