{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import os\n", "from sklearn.cluster import KMeans\n", "from sklearn.cluster import AgglomerativeClustering\n", "from sklearn.metrics.cluster import adjusted_rand_score\n", "from sklearn.metrics.cluster import adjusted_mutual_info_score\n", "from sklearn.metrics.cluster import homogeneity_score\n", "import rpy2.robjects as robjects\n", "from rpy2.robjects import pandas2ri" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n", " 'AMI_Louvain','AMI_kmeans','AMI_HC',\n", " 'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "workdir = './output/'\n", "path_fm = os.path.join(workdir,'feature_matrices/')\n", "path_clusters = os.path.join(workdir,'clusters/')\n", "path_metrics = os.path.join(workdir,'metrics/')\n", "os.system('mkdir -p '+path_clusters)\n", "os.system('mkdir -p '+path_metrics)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n", "num_clusters = len(np.unique(metadata['label']))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n", "len(files)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['FM_Control_BMnoisyp4.rds',\n", " 'FM_BROCKMAN_BMnoisyp4.rds',\n", " 'FM_Cusanovich2018_BMnoisyp4.rds',\n", " 'FM_cisTopic_BMnoisyp4.rds',\n", " 'FM_chromVAR_BMnoisyp4_kmers.rds',\n", " 'FM_chromVAR_BMnoisyp4_motifs.rds',\n", " 'FM_chromVAR_BMnoisyp4_kmers_pca.rds',\n", " 'FM_chromVAR_BMnoisyp4_motifs_pca.rds',\n", " 'FM_GeneScoring_BMnoisyp4.rds',\n", " 'FM_GeneScoring_BMnoisyp4_pca.rds',\n", " 'FM_Cicero_BMnoisyp4.rds',\n", " 'FM_Cicero_BMnoisyp4_pca.rds',\n", " 'FM_SnapATAC_BMnoisyp4.rds',\n", " 'FM_Scasat_BMnoisyp4.rds',\n", " 'FM_scABC_BMnoisyp4.rds',\n", " 'FM_SCRAT_BMnoisyp4.rds',\n", " 'FM_SCRAT_BMnoisyp4_pca.rds']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n", " this_step = 0\n", " this_min = float(range_min)\n", " this_max = float(range_max)\n", " while this_step < max_steps:\n", " print('step ' + str(this_step))\n", " this_resolution = this_min + ((this_max-this_min)/2)\n", " sc.tl.louvain(adata,resolution=this_resolution)\n", " this_clusters = adata.obs['louvain'].nunique()\n", " \n", " print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n", " \n", " if this_clusters > n_cluster:\n", " this_max = this_resolution\n", " elif this_clusters < n_cluster:\n", " this_min = this_resolution\n", " else:\n", " return(this_resolution, adata)\n", " this_step += 1\n", " \n", " print('Cannot find the number of clusters')\n", " print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Control\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 6 at resolution 1.125\n", "BROCKMAN\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 11 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 8 at resolution 1.125\n", "step 3\n", "got 7 at resolution 0.9375\n", "step 4\n", "got 6 at resolution 0.84375\n", "Cusanovich2018\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "cisTopic\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "chromVAR_kmers\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "chromVAR_motifs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 4 at resolution 1.125\n", "step 3\n", "got 5 at resolution 1.3125\n", "step 4\n", "got 6 at resolution 1.40625\n", "chromVAR_kmers_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "chromVAR_motifs_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "GeneScoring\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 35 at resolution 1.5\n", "step 1\n", "got 2 at resolution 0.75\n", "step 2\n", "got 15 at resolution 1.125\n", "step 3\n", "got 6 at resolution 0.9375\n", "GeneScoring_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 12 at resolution 1.5\n", "step 1\n", "got 9 at resolution 0.75\n", "step 2\n", "got 6 at resolution 0.375\n", "Cicero\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 34 at resolution 1.5\n", "step 1\n", "got 2 at resolution 0.75\n", "step 2\n", "got 17 at resolution 1.125\n", "step 3\n", "got 5 at resolution 0.9375\n", "step 4\n", "got 12 at resolution 1.03125\n", "step 5\n", "got 8 at resolution 0.984375\n", "step 6\n", "got 5 at resolution 0.9609375\n", "step 7\n", "got 8 at resolution 0.97265625\n", "step 8\n", "got 7 at resolution 0.966796875\n", "step 9\n", "got 7 at resolution 0.9638671875\n", "step 10\n", "got 7 at resolution 0.96240234375\n", "step 11\n", "got 6 at resolution 0.961669921875\n", "Cicero_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 11 at resolution 1.5\n", "step 1\n", "got 7 at resolution 0.75\n", "step 2\n", "got 3 at resolution 0.375\n", "step 3\n", "got 5 at resolution 0.5625\n", "step 4\n", "got 5 at resolution 0.65625\n", "step 5\n", "got 6 at resolution 0.703125\n", "SnapATAC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "Scasat\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 5 at resolution 1.125\n", "step 3\n", "got 6 at resolution 1.3125\n", "scABC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 21 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 8 at resolution 1.125\n", "step 3\n", "got 4 at resolution 0.9375\n", "step 4\n", "got 4 at resolution 1.03125\n", "step 5\n", "got 6 at resolution 1.078125\n", "SCRAT\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 6 at resolution 1.125\n", "SCRAT_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 8 at resolution 1.125\n", "step 3\n", "got 6 at resolution 0.9375\n" ] } ], "source": [ "for file in files:\n", " file_split = file.split('_')\n", " method = file_split[1]\n", " dataset = file_split[2].split('.')[0]\n", " if(len(file_split)>3):\n", " method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n", " print(method)\n", "\n", " pandas2ri.activate()\n", " readRDS = robjects.r['readRDS']\n", " df_rds = readRDS(os.path.join(path_fm,file))\n", " fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n", " fm_mat.columns = metadata.index\n", " \n", " adata = sc.AnnData(fm_mat.T)\n", " adata.var_names_make_unique()\n", " adata.obs = metadata.loc[adata.obs.index,]\n", " df_metrics.loc[method,] = \"\"\n", " #Louvain\n", " sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n", "# sc.tl.louvain(adata)\n", " getNClusters(adata,n_cluster=num_clusters)\n", " #kmeans\n", " kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n", " adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n", " #hierachical clustering\n", " hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n", " adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n", " #clustering metrics\n", " \n", " #adjusted rank index\n", " ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n", " ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n", " ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n", " #adjusted mutual information\n", " ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n", " ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic') \n", " ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n", " #homogeneity\n", " homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n", " homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n", " homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n", "\n", " df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n", " df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n", " df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n", " adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df_metrics.to_csv(path_metrics+'clustering_scores.csv')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ARI_LouvainARI_kmeansARI_HCAMI_LouvainAMI_kmeansAMI_HCHomogeneity_LouvainHomogeneity_kmeansHomogeneity_HC
Control0.7797590.7808280.7909290.8486780.8458850.8462180.848470.8466760.846452
BROCKMAN0.5613280.5552790.6020350.6853240.6804930.7171590.6820410.6810510.709716
Cusanovich20180.9920170.7684450.9781790.9888670.8915750.9711120.988930.8638830.971277
cisTopic0.9531950.9475450.9178940.9483820.943890.9185010.948680.9441940.918896
chromVAR_kmers0.4955890.4979990.5316370.6047260.6361530.6514790.5874390.6268890.641111
chromVAR_motifs0.3989010.4104070.3165930.5537810.5670980.5136240.5557220.5693910.503448
chromVAR_kmers_pca0.6010490.5968010.5587170.7205890.7004580.6686440.7120380.7015020.666186
chromVAR_motifs_pca0.4005360.3457390.3931510.5531890.5746130.5323860.5550770.5615010.532387
GeneScoring0.008066090.356070.2462110.009204840.4367620.3248870.01480860.4097910.291086
GeneScoring_pca0.3230920.3573450.3627120.4177520.4389170.4397560.407380.4282610.427891
Cicero0.08784610.4731730.4207830.09921930.6807070.6194450.1002870.5591890.530166
Cicero_pca0.5303820.5140690.4480580.6769720.6283290.5830120.6425840.625430.574815
SnapATAC0.9960030.9979970.9860670.9947550.9970530.9815760.9947820.997070.981671
Scasat0.8591240.794910.8016620.8868040.8513310.8531640.8870680.8517430.853761
scABC0.3388290.4214180.6051210.4411730.6205270.6878070.39120.6003560.687662
SCRAT0.4944430.5002760.5167340.6477510.6269470.6366070.6399190.628320.634003
SCRAT_pca0.4942980.4949370.4962540.65130.627830.6439160.6399840.6285370.641292
\n", "
" ], "text/plain": [ " ARI_Louvain ARI_kmeans ARI_HC AMI_Louvain AMI_kmeans \\\n", "Control 0.779759 0.780828 0.790929 0.848678 0.845885 \n", "BROCKMAN 0.561328 0.555279 0.602035 0.685324 0.680493 \n", "Cusanovich2018 0.992017 0.768445 0.978179 0.988867 0.891575 \n", "cisTopic 0.953195 0.947545 0.917894 0.948382 0.94389 \n", "chromVAR_kmers 0.495589 0.497999 0.531637 0.604726 0.636153 \n", "chromVAR_motifs 0.398901 0.410407 0.316593 0.553781 0.567098 \n", "chromVAR_kmers_pca 0.601049 0.596801 0.558717 0.720589 0.700458 \n", "chromVAR_motifs_pca 0.400536 0.345739 0.393151 0.553189 0.574613 \n", "GeneScoring 0.00806609 0.35607 0.246211 0.00920484 0.436762 \n", "GeneScoring_pca 0.323092 0.357345 0.362712 0.417752 0.438917 \n", "Cicero 0.0878461 0.473173 0.420783 0.0992193 0.680707 \n", "Cicero_pca 0.530382 0.514069 0.448058 0.676972 0.628329 \n", "SnapATAC 0.996003 0.997997 0.986067 0.994755 0.997053 \n", "Scasat 0.859124 0.79491 0.801662 0.886804 0.851331 \n", "scABC 0.338829 0.421418 0.605121 0.441173 0.620527 \n", "SCRAT 0.494443 0.500276 0.516734 0.647751 0.626947 \n", "SCRAT_pca 0.494298 0.494937 0.496254 0.6513 0.62783 \n", "\n", " AMI_HC Homogeneity_Louvain Homogeneity_kmeans \\\n", "Control 0.846218 0.84847 0.846676 \n", "BROCKMAN 0.717159 0.682041 0.681051 \n", "Cusanovich2018 0.971112 0.98893 0.863883 \n", "cisTopic 0.918501 0.94868 0.944194 \n", "chromVAR_kmers 0.651479 0.587439 0.626889 \n", "chromVAR_motifs 0.513624 0.555722 0.569391 \n", "chromVAR_kmers_pca 0.668644 0.712038 0.701502 \n", "chromVAR_motifs_pca 0.532386 0.555077 0.561501 \n", "GeneScoring 0.324887 0.0148086 0.409791 \n", "GeneScoring_pca 0.439756 0.40738 0.428261 \n", "Cicero 0.619445 0.100287 0.559189 \n", "Cicero_pca 0.583012 0.642584 0.62543 \n", "SnapATAC 0.981576 0.994782 0.99707 \n", "Scasat 0.853164 0.887068 0.851743 \n", "scABC 0.687807 0.3912 0.600356 \n", "SCRAT 0.636607 0.639919 0.62832 \n", "SCRAT_pca 0.643916 0.639984 0.628537 \n", "\n", " Homogeneity_HC \n", "Control 0.846452 \n", "BROCKMAN 0.709716 \n", "Cusanovich2018 0.971277 \n", "cisTopic 0.918896 \n", "chromVAR_kmers 0.641111 \n", "chromVAR_motifs 0.503448 \n", "chromVAR_kmers_pca 0.666186 \n", "chromVAR_motifs_pca 0.532387 \n", "GeneScoring 0.291086 \n", "GeneScoring_pca 0.427891 \n", "Cicero 0.530166 \n", "Cicero_pca 0.574815 \n", "SnapATAC 0.981671 \n", "Scasat 0.853761 \n", "scABC 0.687662 \n", "SCRAT 0.634003 \n", "SCRAT_pca 0.641292 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ATACseq_clustering]", "language": "python", "name": "conda-env-ATACseq_clustering-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }