{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import os\n", "from sklearn.cluster import KMeans\n", "from sklearn.cluster import AgglomerativeClustering\n", "from sklearn.metrics.cluster import adjusted_rand_score\n", "from sklearn.metrics.cluster import adjusted_mutual_info_score\n", "from sklearn.metrics.cluster import homogeneity_score\n", "import rpy2.robjects as robjects\n", "from rpy2.robjects import pandas2ri" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n", " 'AMI_Louvain','AMI_kmeans','AMI_HC',\n", " 'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "workdir = './output/'\n", "path_fm = os.path.join(workdir,'feature_matrices/')\n", "path_clusters = os.path.join(workdir,'clusters/')\n", "path_metrics = os.path.join(workdir,'metrics/')\n", "os.system('mkdir -p '+path_clusters)\n", "os.system('mkdir -p '+path_metrics)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n", "num_clusters = len(np.unique(metadata['label']))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n", "len(files)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['FM_Control_BMcov1000.rds',\n", " 'FM_BROCKMAN_BMcov1000.rds',\n", " 'FM_Cusanovich2018_BMcov1000.rds',\n", " 'FM_cisTopic_BMcov1000.rds',\n", " 'FM_chromVAR_BMcov1000_kmers.rds',\n", " 'FM_chromVAR_BMcov1000_motifs.rds',\n", " 'FM_chromVAR_BMcov1000_kmers_pca.rds',\n", " 'FM_chromVAR_BMcov1000_motifs_pca.rds',\n", " 'FM_GeneScoring_BMcov1000.rds',\n", " 'FM_GeneScoring_BMcov1000_pca.rds',\n", " 'FM_Cicero_BMcov1000.rds',\n", " 'FM_Cicero_BMcov1000_pca.rds',\n", " 'FM_SnapATAC_BMcov1000.rds',\n", " 'FM_Scasat_BMcov1000.rds',\n", " 'FM_scABC_BMcov1000.rds',\n", " 'FM_SCRAT_BMcov1000.rds',\n", " 'FM_SCRAT_BMcov1000_pca.rds']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n", " this_step = 0\n", " this_min = float(range_min)\n", " this_max = float(range_max)\n", " while this_step < max_steps:\n", " print('step ' + str(this_step))\n", " this_resolution = this_min + ((this_max-this_min)/2)\n", " sc.tl.louvain(adata,resolution=this_resolution)\n", " this_clusters = adata.obs['louvain'].nunique()\n", " \n", " print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n", " \n", " if this_clusters > n_cluster:\n", " this_max = this_resolution\n", " elif this_clusters < n_cluster:\n", " this_min = this_resolution\n", " else:\n", " return(this_resolution, adata)\n", " this_step += 1\n", " \n", " print('Cannot find the number of clusters')\n", " print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Control\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 4 at resolution 0.75\n", "step 2\n", "got 4 at resolution 1.125\n", "step 3\n", "got 6 at resolution 1.3125\n", "BROCKMAN\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 6 at resolution 0.75\n", "Cusanovich2018\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "cisTopic\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 4 at resolution 0.75\n", "step 2\n", "got 6 at resolution 1.125\n", "chromVAR_kmers\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 4 at resolution 1.125\n", "step 3\n", "got 4 at resolution 1.3125\n", "step 4\n", "got 4 at resolution 1.40625\n", "step 5\n", "got 5 at resolution 1.453125\n", "step 6\n", "got 4 at resolution 1.4765625\n", "step 7\n", "got 5 at resolution 1.48828125\n", "step 8\n", "got 5 at resolution 1.494140625\n", "step 9\n", "got 5 at resolution 1.4970703125\n", "step 10\n", "got 5 at resolution 1.49853515625\n", "step 11\n", "got 7 at resolution 1.499267578125\n", "step 12\n", "got 5 at resolution 1.4989013671875\n", "step 13\n", "got 7 at resolution 1.49908447265625\n", "step 14\n", "got 5 at resolution 1.498992919921875\n", "step 15\n", "got 7 at resolution 1.4990386962890625\n", "step 16\n", "got 7 at resolution 1.4990158081054688\n", "step 17\n", "got 7 at resolution 1.4990043640136719\n", "step 18\n", "got 7 at resolution 1.4989986419677734\n", "step 19\n", "got 5 at resolution 1.4989957809448242\n", "Cannot find the number of clusters\n", "Clustering solution from last iteration is used:5 at resolution 1.4989957809448242\n", "chromVAR_motifs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 4 at resolution 1.125\n", "step 3\n", "got 7 at resolution 1.3125\n", "step 4\n", "got 5 at resolution 1.21875\n", "step 5\n", "got 6 at resolution 1.265625\n", "chromVAR_kmers_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 4 at resolution 1.125\n", "step 3\n", "got 4 at resolution 1.3125\n", "step 4\n", "got 6 at resolution 1.40625\n", "chromVAR_motifs_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 5 at resolution 1.125\n", "step 3\n", "got 7 at resolution 1.3125\n", "step 4\n", "got 6 at resolution 1.21875\n", "GeneScoring\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 30 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 14 at resolution 1.125\n", "step 3\n", "got 5 at resolution 0.9375\n", "step 4\n", "got 9 at resolution 1.03125\n", "step 5\n", "got 7 at resolution 0.984375\n", "step 6\n", "got 7 at resolution 0.9609375\n", "step 7\n", "got 7 at resolution 0.94921875\n", "step 8\n", "got 6 at resolution 0.943359375\n", "GeneScoring_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 13 at resolution 1.5\n", "step 1\n", "got 11 at resolution 0.75\n", "step 2\n", "got 7 at resolution 0.375\n", "step 3\n", "got 7 at resolution 0.1875\n", "step 4\n", "got 4 at resolution 0.09375\n", "step 5\n", "got 5 at resolution 0.140625\n", "step 6\n", "got 6 at resolution 0.1640625\n", "Cicero\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 37 at resolution 1.5\n", "step 1\n", "got 1 at resolution 0.75\n", "step 2\n", "got 19 at resolution 1.125\n", "step 3\n", "got 8 at resolution 0.9375\n", "step 4\n", "got 2 at resolution 0.84375\n", "step 5\n", "got 4 at resolution 0.890625\n", "step 6\n", "got 4 at resolution 0.9140625\n", "step 7\n", "got 6 at resolution 0.92578125\n", "Cicero_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 12 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 8 at resolution 1.125\n", "step 3\n", "got 5 at resolution 0.9375\n", "step 4\n", "got 7 at resolution 1.03125\n", "step 5\n", "got 6 at resolution 0.984375\n", "SnapATAC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "Scasat\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 4 at resolution 0.75\n", "step 2\n", "got 4 at resolution 1.125\n", "step 3\n", "got 6 at resolution 1.3125\n", "scABC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 29 at resolution 1.5\n", "step 1\n", "got 2 at resolution 0.75\n", "step 2\n", "got 10 at resolution 1.125\n", "step 3\n", "got 3 at resolution 0.9375\n", "step 4\n", "got 5 at resolution 1.03125\n", "step 5\n", "got 8 at resolution 1.078125\n", "step 6\n", "got 8 at resolution 1.0546875\n", "step 7\n", "got 6 at resolution 1.04296875\n", "SCRAT\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 4 at resolution 0.75\n", "step 2\n", "got 5 at resolution 1.125\n", "step 3\n", "got 5 at resolution 1.3125\n", "step 4\n", "got 7 at resolution 1.40625\n", "step 5\n", "got 6 at resolution 1.359375\n", "SCRAT_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 6 at resolution 0.75\n" ] } ], "source": [ "for file in files:\n", " file_split = file.split('_')\n", " method = file_split[1]\n", " dataset = file_split[2].split('.')[0]\n", " if(len(file_split)>3):\n", " method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n", " print(method)\n", "\n", " pandas2ri.activate()\n", " readRDS = robjects.r['readRDS']\n", " df_rds = readRDS(os.path.join(path_fm,file))\n", " fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n", " fm_mat.columns = metadata.index\n", " \n", " adata = sc.AnnData(fm_mat.T)\n", " adata.var_names_make_unique()\n", " adata.obs = metadata.loc[adata.obs.index,]\n", " df_metrics.loc[method,] = \"\"\n", " #Louvain\n", " sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n", "# sc.tl.louvain(adata)\n", " getNClusters(adata,n_cluster=num_clusters)\n", " #kmeans\n", " kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n", " adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n", " #hierachical clustering\n", " hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n", " adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n", " #clustering metrics\n", " \n", " #adjusted rank index\n", " ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n", " ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n", " ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n", " #adjusted mutual information\n", " ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n", " ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic') \n", " ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n", " #homogeneity\n", " homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n", " homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n", " homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n", "\n", " df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n", " df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n", " df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n", " adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df_metrics.to_csv(path_metrics+'clustering_scores.csv')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ARI_LouvainARI_kmeansARI_HCAMI_LouvainAMI_kmeansAMI_HCHomogeneity_LouvainHomogeneity_kmeansHomogeneity_HC
Control0.59280.5690880.5887540.7147150.7188950.7235750.6959350.7010950.699836
BROCKMAN0.545740.5001410.4996910.6826540.6428410.659180.678290.6412880.645486
Cusanovich20180.9438460.7451060.7588340.9416530.8513420.8547490.9419080.8251580.813651
cisTopic0.5309530.5293330.497120.6454150.6435130.6297240.6393890.6443560.619491
chromVAR_kmers0.4674630.4307480.4170990.6104380.5777790.5403380.5556030.5770830.537387
chromVAR_motifs0.39060.3698710.2903750.5205550.5106610.439160.5194390.5122550.432029
chromVAR_kmers_pca0.4585820.4921140.4365380.6127270.6227790.5685880.6028430.6247760.563519
chromVAR_motifs_pca0.3909550.3677270.2980850.5242970.488030.4278910.5220270.4895880.417594
GeneScoring0.002671490.1119980.1038370.005706110.206930.1796490.0110550.18030.149194
GeneScoring_pca0.097070.1357820.1064180.1173030.2221230.2007120.1136370.2058340.179901
Cicero0.006878680.1423070.3975340.01117470.3890420.5895520.01685490.2494840.48291
Cicero_pca0.4488620.4433540.4239140.5340530.5663730.5626340.5220340.5474090.50774
SnapATAC0.8049940.8600760.7703610.840690.8744070.8167670.8414530.8750220.816121
Scasat0.6139890.6342010.5904970.7247430.740570.7085250.7153350.7418610.705696
scABC0.204260.4489820.4658180.2474930.5831710.5680880.2295390.5290030.531906
SCRAT0.4445110.4480280.4577450.6109310.5813280.593560.5967250.5821870.592494
SCRAT_pca0.4697480.4050950.4618120.6064730.5851370.5975190.6011290.5719140.591323
\n", "
" ], "text/plain": [ " ARI_Louvain ARI_kmeans ARI_HC AMI_Louvain AMI_kmeans \\\n", "Control 0.5928 0.569088 0.588754 0.714715 0.718895 \n", "BROCKMAN 0.54574 0.500141 0.499691 0.682654 0.642841 \n", "Cusanovich2018 0.943846 0.745106 0.758834 0.941653 0.851342 \n", "cisTopic 0.530953 0.529333 0.49712 0.645415 0.643513 \n", "chromVAR_kmers 0.467463 0.430748 0.417099 0.610438 0.577779 \n", "chromVAR_motifs 0.3906 0.369871 0.290375 0.520555 0.510661 \n", "chromVAR_kmers_pca 0.458582 0.492114 0.436538 0.612727 0.622779 \n", "chromVAR_motifs_pca 0.390955 0.367727 0.298085 0.524297 0.48803 \n", "GeneScoring 0.00267149 0.111998 0.103837 0.00570611 0.20693 \n", "GeneScoring_pca 0.09707 0.135782 0.106418 0.117303 0.222123 \n", "Cicero 0.00687868 0.142307 0.397534 0.0111747 0.389042 \n", "Cicero_pca 0.448862 0.443354 0.423914 0.534053 0.566373 \n", "SnapATAC 0.804994 0.860076 0.770361 0.84069 0.874407 \n", "Scasat 0.613989 0.634201 0.590497 0.724743 0.74057 \n", "scABC 0.20426 0.448982 0.465818 0.247493 0.583171 \n", "SCRAT 0.444511 0.448028 0.457745 0.610931 0.581328 \n", "SCRAT_pca 0.469748 0.405095 0.461812 0.606473 0.585137 \n", "\n", " AMI_HC Homogeneity_Louvain Homogeneity_kmeans \\\n", "Control 0.723575 0.695935 0.701095 \n", "BROCKMAN 0.65918 0.67829 0.641288 \n", "Cusanovich2018 0.854749 0.941908 0.825158 \n", "cisTopic 0.629724 0.639389 0.644356 \n", "chromVAR_kmers 0.540338 0.555603 0.577083 \n", "chromVAR_motifs 0.43916 0.519439 0.512255 \n", "chromVAR_kmers_pca 0.568588 0.602843 0.624776 \n", "chromVAR_motifs_pca 0.427891 0.522027 0.489588 \n", "GeneScoring 0.179649 0.011055 0.1803 \n", "GeneScoring_pca 0.200712 0.113637 0.205834 \n", "Cicero 0.589552 0.0168549 0.249484 \n", "Cicero_pca 0.562634 0.522034 0.547409 \n", "SnapATAC 0.816767 0.841453 0.875022 \n", "Scasat 0.708525 0.715335 0.741861 \n", "scABC 0.568088 0.229539 0.529003 \n", "SCRAT 0.59356 0.596725 0.582187 \n", "SCRAT_pca 0.597519 0.601129 0.571914 \n", "\n", " Homogeneity_HC \n", "Control 0.699836 \n", "BROCKMAN 0.645486 \n", "Cusanovich2018 0.813651 \n", "cisTopic 0.619491 \n", "chromVAR_kmers 0.537387 \n", "chromVAR_motifs 0.432029 \n", "chromVAR_kmers_pca 0.563519 \n", "chromVAR_motifs_pca 0.417594 \n", "GeneScoring 0.149194 \n", "GeneScoring_pca 0.179901 \n", "Cicero 0.48291 \n", "Cicero_pca 0.50774 \n", "SnapATAC 0.816121 \n", "Scasat 0.705696 \n", "scABC 0.531906 \n", "SCRAT 0.592494 \n", "SCRAT_pca 0.591323 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ATACseq_clustering]", "language": "python", "name": "conda-env-ATACseq_clustering-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }