{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import os\n", "from sklearn.cluster import KMeans\n", "from sklearn.cluster import AgglomerativeClustering\n", "from sklearn.metrics.cluster import adjusted_rand_score\n", "from sklearn.metrics.cluster import adjusted_mutual_info_score\n", "from sklearn.metrics.cluster import homogeneity_score\n", "import rpy2.robjects as robjects\n", "from rpy2.robjects import pandas2ri" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n", " 'AMI_Louvain','AMI_kmeans','AMI_HC',\n", " 'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "workdir = './output/'\n", "path_fm = os.path.join(workdir,'feature_matrices/')\n", "path_clusters = os.path.join(workdir,'clusters/')\n", "path_metrics = os.path.join(workdir,'metrics/')\n", "os.system('mkdir -p '+path_clusters)\n", "os.system('mkdir -p '+path_metrics)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n", "num_clusters = len(np.unique(metadata['label']))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n", "len(files)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['FM_Control_Erynoisyp2.rds',\n", " 'FM_BROCKMAN_Erynoisyp2.rds',\n", " 'FM_Cusanovich2018_Erynoisyp2.rds',\n", " 'FM_cisTopic_Erynoisyp2.rds',\n", " 'FM_chromVAR_Erynoisyp2_kmers.rds',\n", " 'FM_chromVAR_Erynoisyp2_motifs.rds',\n", " 'FM_chromVAR_Erynoisyp2_kmers_pca.rds',\n", " 'FM_chromVAR_Erynoisyp2_motifs_pca.rds',\n", " 'FM_GeneScoring_Erynoisyp2.rds',\n", " 'FM_GeneScoring_Erynoisyp2_pca.rds',\n", " 'FM_Cicero_Erynoisyp2.rds',\n", " 'FM_Cicero_Erynoisyp2_pca.rds',\n", " 'FM_SnapATAC_Erynoisyp2.rds',\n", " 'FM_Scasat_Erynoisyp2.rds',\n", " 'FM_scABC_Erynoisyp2.rds',\n", " 'FM_SCRAT_Erynoisyp2.rds',\n", " 'FM_SCRAT_Erynoisyp2_pca.rds']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n", " this_step = 0\n", " this_min = float(range_min)\n", " this_max = float(range_max)\n", " while this_step < max_steps:\n", " print('step ' + str(this_step))\n", " this_resolution = this_min + ((this_max-this_min)/2)\n", " sc.tl.louvain(adata,resolution=this_resolution)\n", " this_clusters = adata.obs['louvain'].nunique()\n", " \n", " print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n", " \n", " if this_clusters > n_cluster:\n", " this_max = this_resolution\n", " elif this_clusters < n_cluster:\n", " this_min = this_resolution\n", " else:\n", " return(this_resolution, adata)\n", " this_step += 1\n", " \n", " print('Cannot find the number of clusters')\n", " print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Control\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 9 at resolution 1.5\n", "step 1\n", "got 14 at resolution 2.25\n", "step 2\n", "got 10 at resolution 1.875\n", "step 3\n", "got 13 at resolution 2.0625\n", "step 4\n", "got 13 at resolution 1.96875\n", "step 5\n", "got 11 at resolution 1.921875\n", "step 6\n", "got 12 at resolution 1.9453125\n", "BROCKMAN\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 11 at resolution 1.5\n", "step 1\n", "got 13 at resolution 2.25\n", "step 2\n", "got 11 at resolution 1.875\n", "step 3\n", "got 12 at resolution 2.0625\n", "Cusanovich2018\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 13 at resolution 2.25\n", "step 2\n", "got 11 at resolution 1.875\n", "step 3\n", "got 11 at resolution 2.0625\n", "step 4\n", "got 12 at resolution 2.15625\n", "cisTopic\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 10 at resolution 2.25\n", "step 2\n", "got 10 at resolution 2.625\n", "step 3\n", "got 12 at resolution 2.8125\n", "chromVAR_kmers\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "step 1\n", "got 10 at resolution 2.25\n", "step 2\n", "got 15 at resolution 2.625\n", "step 3\n", "got 12 at resolution 2.4375\n", "chromVAR_motifs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 14 at resolution 2.25\n", "step 2\n", "got 10 at resolution 1.875\n", "step 3\n", "got 11 at resolution 2.0625\n", "step 4\n", "got 12 at resolution 2.15625\n", "chromVAR_kmers_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 10 at resolution 2.25\n", "step 2\n", "got 13 at resolution 2.625\n", "step 3\n", "got 11 at resolution 2.4375\n", "step 4\n", "got 11 at resolution 2.53125\n", "step 5\n", "got 11 at resolution 2.578125\n", "step 6\n", "got 12 at resolution 2.6015625\n", "chromVAR_motifs_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 7 at resolution 1.5\n", "step 1\n", "got 14 at resolution 2.25\n", "step 2\n", "got 9 at resolution 1.875\n", "step 3\n", "got 13 at resolution 2.0625\n", "step 4\n", "got 11 at resolution 1.96875\n", "step 5\n", "got 11 at resolution 2.015625\n", "step 6\n", "got 12 at resolution 2.0390625\n", "GeneScoring\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 32 at resolution 1.5\n", "step 1\n", "got 3 at resolution 0.75\n", "step 2\n", "got 12 at resolution 1.125\n", "GeneScoring_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 13 at resolution 1.5\n", "step 1\n", "got 6 at resolution 0.75\n", "step 2\n", "got 8 at resolution 1.125\n", "step 3\n", "got 10 at resolution 1.3125\n", "step 4\n", "got 10 at resolution 1.40625\n", "step 5\n", "got 13 at resolution 1.453125\n", "step 6\n", "got 12 at resolution 1.4296875\n", "Cicero\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 36 at resolution 1.5\n", "step 1\n", "got 1 at resolution 0.75\n", "step 2\n", "got 17 at resolution 1.125\n", "step 3\n", "got 7 at resolution 0.9375\n", "step 4\n", "got 12 at resolution 1.03125\n", "Cicero_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 17 at resolution 2.25\n", "step 2\n", "got 16 at resolution 1.875\n", "step 3\n", "got 12 at resolution 1.6875\n", "SnapATAC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 11 at resolution 1.5\n", "step 1\n", "got 12 at resolution 2.25\n", "Scasat\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "step 1\n", "got 13 at resolution 2.25\n", "step 2\n", "got 9 at resolution 1.875\n", "step 3\n", "got 12 at resolution 2.0625\n", "scABC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 5 at resolution 1.5\n", "step 1\n", "got 14 at resolution 2.25\n", "step 2\n", "got 8 at resolution 1.875\n", "step 3\n", "got 9 at resolution 2.0625\n", "step 4\n", "got 13 at resolution 2.15625\n", "step 5\n", "got 10 at resolution 2.109375\n", "step 6\n", "got 11 at resolution 2.1328125\n", "step 7\n", "got 13 at resolution 2.14453125\n", "step 8\n", "got 13 at resolution 2.138671875\n", "step 9\n", "got 13 at resolution 2.1357421875\n", "step 10\n", "got 11 at resolution 2.13427734375\n", "step 11\n", "got 11 at resolution 2.135009765625\n", "step 12\n", "got 13 at resolution 2.1353759765625\n", "step 13\n", "got 13 at resolution 2.13519287109375\n", "step 14\n", "got 11 at resolution 2.135101318359375\n", "step 15\n", "got 11 at resolution 2.1351470947265625\n", "step 16\n", "got 13 at resolution 2.1351699829101562\n", "step 17\n", "got 11 at resolution 2.1351585388183594\n", "step 18\n", "got 13 at resolution 2.135164260864258\n", "step 19\n", "got 13 at resolution 2.1351613998413086\n", "Cannot find the number of clusters\n", "Clustering solution from last iteration is used:13 at resolution 2.1351613998413086\n", "SCRAT\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 9 at resolution 1.5\n", "step 1\n", "got 11 at resolution 2.25\n", "step 2\n", "got 11 at resolution 2.625\n", "step 3\n", "got 12 at resolution 2.8125\n", "SCRAT_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 12 at resolution 2.25\n" ] } ], "source": [ "for file in files:\n", " file_split = file.split('_')\n", " method = file_split[1]\n", " dataset = file_split[2].split('.')[0]\n", " if(len(file_split)>3):\n", " method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n", " print(method)\n", "\n", " pandas2ri.activate()\n", " readRDS = robjects.r['readRDS']\n", " df_rds = readRDS(os.path.join(path_fm,file))\n", " fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n", " fm_mat.columns = metadata.index\n", " \n", " adata = sc.AnnData(fm_mat.T)\n", " adata.var_names_make_unique()\n", " adata.obs = metadata.loc[adata.obs.index,]\n", " df_metrics.loc[method,] = \"\"\n", " #Louvain\n", " sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n", "# sc.tl.louvain(adata)\n", " getNClusters(adata,n_cluster=num_clusters)\n", " #kmeans\n", " kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n", " adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n", " #hierachical clustering\n", " hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n", " adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n", " #clustering metrics\n", " \n", " #adjusted rank index\n", " ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n", " ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n", " ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n", " #adjusted mutual information\n", " ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n", " ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic') \n", " ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n", " #homogeneity\n", " homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n", " homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n", " homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n", "\n", " df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n", " df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n", " df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n", " adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df_metrics.to_csv(path_metrics+'clustering_scores.csv')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ARI_LouvainARI_kmeansARI_HCAMI_LouvainAMI_kmeansAMI_HCHomogeneity_LouvainHomogeneity_kmeansHomogeneity_HC
Control0.693010.6428990.6745610.8274310.7966410.8148050.8232040.800170.807588
BROCKMAN0.7510530.6666870.7303740.8524330.8182220.8415230.8543860.8115930.838416
Cusanovich20180.796980.6585920.6318060.8832790.8084030.8357810.87610.7955510.791579
cisTopic0.821620.8369870.8165150.8976950.9011820.8927430.8976750.9030260.894268
chromVAR_kmers0.5708390.5420630.5223480.7188450.7117790.6825720.7072860.7126580.677166
chromVAR_motifs0.3010880.2914130.2671980.4933160.5143340.4658850.4942410.5213940.470202
chromVAR_kmers_pca0.5903160.5894650.5425530.7308080.76270.6999340.7257540.7606910.696847
chromVAR_motifs_pca0.2986860.3137890.2546160.4891150.5098860.4673670.4925380.5189930.468481
GeneScoring0.003240630.2689660.2426340.008409180.4680740.4037750.02906290.3897740.386797
GeneScoring_pca0.2085260.2290010.2208070.416260.4214160.4142280.4228110.416510.409035
Cicero0.002257310.2720.364590.005913860.5081340.5732790.02667930.4126840.527064
Cicero_pca0.3986980.3894850.3805730.5749550.5675910.5647430.5653140.5611420.537896
SnapATAC0.8897320.896420.8715280.9406820.938410.923930.9377490.9396420.924514
Scasat0.6729680.6789680.6518720.8127180.813160.8001340.8068570.8094120.793816
scABC0.4674220.313240.5280990.6492620.5479040.7011770.6038840.4809190.687006
SCRAT0.6529630.5942750.6121530.790890.7715650.7819080.790990.769920.775793
SCRAT_pca0.6620290.5922040.6023120.7950310.7699020.7794810.7896060.7680440.776074
\n", "
" ], "text/plain": [ " ARI_Louvain ARI_kmeans ARI_HC AMI_Louvain AMI_kmeans \\\n", "Control 0.69301 0.642899 0.674561 0.827431 0.796641 \n", "BROCKMAN 0.751053 0.666687 0.730374 0.852433 0.818222 \n", "Cusanovich2018 0.79698 0.658592 0.631806 0.883279 0.808403 \n", "cisTopic 0.82162 0.836987 0.816515 0.897695 0.901182 \n", "chromVAR_kmers 0.570839 0.542063 0.522348 0.718845 0.711779 \n", "chromVAR_motifs 0.301088 0.291413 0.267198 0.493316 0.514334 \n", "chromVAR_kmers_pca 0.590316 0.589465 0.542553 0.730808 0.7627 \n", "chromVAR_motifs_pca 0.298686 0.313789 0.254616 0.489115 0.509886 \n", "GeneScoring 0.00324063 0.268966 0.242634 0.00840918 0.468074 \n", "GeneScoring_pca 0.208526 0.229001 0.220807 0.41626 0.421416 \n", "Cicero 0.00225731 0.272 0.36459 0.00591386 0.508134 \n", "Cicero_pca 0.398698 0.389485 0.380573 0.574955 0.567591 \n", "SnapATAC 0.889732 0.89642 0.871528 0.940682 0.93841 \n", "Scasat 0.672968 0.678968 0.651872 0.812718 0.81316 \n", "scABC 0.467422 0.31324 0.528099 0.649262 0.547904 \n", "SCRAT 0.652963 0.594275 0.612153 0.79089 0.771565 \n", "SCRAT_pca 0.662029 0.592204 0.602312 0.795031 0.769902 \n", "\n", " AMI_HC Homogeneity_Louvain Homogeneity_kmeans \\\n", "Control 0.814805 0.823204 0.80017 \n", "BROCKMAN 0.841523 0.854386 0.811593 \n", "Cusanovich2018 0.835781 0.8761 0.795551 \n", "cisTopic 0.892743 0.897675 0.903026 \n", "chromVAR_kmers 0.682572 0.707286 0.712658 \n", "chromVAR_motifs 0.465885 0.494241 0.521394 \n", "chromVAR_kmers_pca 0.699934 0.725754 0.760691 \n", "chromVAR_motifs_pca 0.467367 0.492538 0.518993 \n", "GeneScoring 0.403775 0.0290629 0.389774 \n", "GeneScoring_pca 0.414228 0.422811 0.41651 \n", "Cicero 0.573279 0.0266793 0.412684 \n", "Cicero_pca 0.564743 0.565314 0.561142 \n", "SnapATAC 0.92393 0.937749 0.939642 \n", "Scasat 0.800134 0.806857 0.809412 \n", "scABC 0.701177 0.603884 0.480919 \n", "SCRAT 0.781908 0.79099 0.76992 \n", "SCRAT_pca 0.779481 0.789606 0.768044 \n", "\n", " Homogeneity_HC \n", "Control 0.807588 \n", "BROCKMAN 0.838416 \n", "Cusanovich2018 0.791579 \n", "cisTopic 0.894268 \n", "chromVAR_kmers 0.677166 \n", "chromVAR_motifs 0.470202 \n", "chromVAR_kmers_pca 0.696847 \n", "chromVAR_motifs_pca 0.468481 \n", "GeneScoring 0.386797 \n", "GeneScoring_pca 0.409035 \n", "Cicero 0.527064 \n", "Cicero_pca 0.537896 \n", "SnapATAC 0.924514 \n", "Scasat 0.793816 \n", "scABC 0.687006 \n", "SCRAT 0.775793 \n", "SCRAT_pca 0.776074 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ATACseq_clustering]", "language": "python", "name": "conda-env-ATACseq_clustering-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }