{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import os\n", "from sklearn.cluster import KMeans\n", "from sklearn.cluster import AgglomerativeClustering\n", "from sklearn.metrics.cluster import adjusted_rand_score\n", "from sklearn.metrics.cluster import adjusted_mutual_info_score\n", "from sklearn.metrics.cluster import homogeneity_score\n", "import rpy2.robjects as robjects\n", "from rpy2.robjects import pandas2ri" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n", " 'AMI_Louvain','AMI_kmeans','AMI_HC',\n", " 'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "workdir = './output/'\n", "path_fm = os.path.join(workdir,'feature_matrices/')\n", "path_clusters = os.path.join(workdir,'clusters/')\n", "path_metrics = os.path.join(workdir,'metrics/')\n", "os.system('mkdir -p '+path_clusters)\n", "os.system('mkdir -p '+path_metrics)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10\n" ] } ], "source": [ "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n", "num_clusters = len(np.unique(metadata['label']))\n", "print(num_clusters)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n", "len(files)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['FM_ChromVAR_buenrostro2018_motifs.rds',\n", " 'FM_ChromVAR_buenrostro2018_kmers.rds',\n", " 'FM_cisTopic_buenrostro2018.rds',\n", " 'FM_SnapATAC_buenrostro2018.rds',\n", " 'FM_SCRAT_buenrostro2018_motifs.rds',\n", " 'FM_BROCKMAN_buenrostro2018.rds',\n", " 'FM_Cusanovich2018_buenrostro2018.rds',\n", " 'FM_Control_buenrostro2018.rds',\n", " 'FM_GeneScoring_buenrostro2018.rds',\n", " 'FM_Scasat_buenrostro2018.rds',\n", " 'FM_scABC_buenrostro2018.rds',\n", " 'FM_Cicero_buenrostro2018.rds',\n", " 'FM_ChromVAR_buenrostro2018_kmers_pca.rds',\n", " 'FM_ChromVAR_buenrostro2018_motifs_pca.rds',\n", " 'FM_GeneScoring_buenrostro2018_pca.rds',\n", " 'FM_Cicero_buenrostro2018_pca.rds',\n", " 'FM_SCRAT_buenrostro2018_pca.rds']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n", " this_step = 0\n", " this_min = float(range_min)\n", " this_max = float(range_max)\n", " while this_step < max_steps:\n", " print('step ' + str(this_step))\n", " this_resolution = this_min + ((this_max-this_min)/2)\n", " sc.tl.louvain(adata,resolution=this_resolution)\n", " this_clusters = adata.obs['louvain'].nunique()\n", " \n", " print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n", " \n", " if this_clusters > n_cluster:\n", " this_max = this_resolution\n", " elif this_clusters < n_cluster:\n", " this_min = this_resolution\n", " else:\n", " return(this_resolution, adata)\n", " this_step += 1\n", " \n", " print('Cannot find the number of clusters')\n", " print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ChromVAR_motifs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 9 at resolution 1.5\n", "step 1\n", "got 12 at resolution 2.25\n", "step 2\n", "got 10 at resolution 1.875\n", "ChromVAR_kmers\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 9 at resolution 1.5\n", "step 1\n", "got 14 at resolution 2.25\n", "step 2\n", "got 13 at resolution 1.875\n", "step 3\n", "got 10 at resolution 1.6875\n", "cisTopic\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 18 at resolution 1.5\n", "step 1\n", "got 13 at resolution 0.75\n", "step 2\n", "got 10 at resolution 0.375\n", "SnapATAC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 22 at resolution 1.5\n", "step 1\n", "got 16 at resolution 0.75\n", "step 2\n", "got 15 at resolution 0.375\n", "step 3\n", "got 10 at resolution 0.1875\n", "SCRAT_motifs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 17 at resolution 1.5\n", "step 1\n", "got 9 at resolution 0.75\n", "step 2\n", "got 14 at resolution 1.125\n", "step 3\n", "got 11 at resolution 0.9375\n", "step 4\n", "got 10 at resolution 0.84375\n", "BROCKMAN\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 18 at resolution 1.5\n", "step 1\n", "got 12 at resolution 0.75\n", "step 2\n", "got 7 at resolution 0.375\n", "step 3\n", "got 10 at resolution 0.5625\n", "Cusanovich2018\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 15 at resolution 1.5\n", "step 1\n", "got 11 at resolution 0.75\n", "step 2\n", "got 8 at resolution 0.375\n", "step 3\n", "got 9 at resolution 0.5625\n", "step 4\n", "got 10 at resolution 0.65625\n", "Control\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 19 at resolution 1.5\n", "step 1\n", "got 12 at resolution 0.75\n", "step 2\n", "got 5 at resolution 0.375\n", "step 3\n", "got 10 at resolution 0.5625\n", "GeneScoring\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "Scasat\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 20 at resolution 1.5\n", "step 1\n", "got 13 at resolution 0.75\n", "step 2\n", "got 8 at resolution 0.375\n", "step 3\n", "got 12 at resolution 0.5625\n", "step 4\n", "got 12 at resolution 0.46875\n", "step 5\n", "got 12 at resolution 0.421875\n", "step 6\n", "got 8 at resolution 0.3984375\n", "step 7\n", "got 10 at resolution 0.41015625\n", "scABC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 19 at resolution 1.5\n", "step 1\n", "got 4 at resolution 0.75\n", "step 2\n", "got 7 at resolution 1.125\n", "step 3\n", "got 14 at resolution 1.3125\n", "step 4\n", "got 11 at resolution 1.21875\n", "step 5\n", "got 9 at resolution 1.171875\n", "step 6\n", "got 10 at resolution 1.1953125\n", "Cicero\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 76 at resolution 1.5\n", "step 1\n", "got 1 at resolution 0.75\n", "step 2\n", "got 11 at resolution 1.125\n", "step 3\n", "got 5 at resolution 0.9375\n", "step 4\n", "got 9 at resolution 1.03125\n", "step 5\n", "got 13 at resolution 1.078125\n", "step 6\n", "got 8 at resolution 1.0546875\n", "step 7\n", "got 9 at resolution 1.06640625\n", "step 8\n", "got 9 at resolution 1.072265625\n", "step 9\n", "got 10 at resolution 1.0751953125\n", "ChromVAR_kmers_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 11 at resolution 1.5\n", "step 1\n", "got 8 at resolution 0.75\n", "step 2\n", "got 9 at resolution 1.125\n", "step 3\n", "got 11 at resolution 1.3125\n", "step 4\n", "got 10 at resolution 1.21875\n", "ChromVAR_motifs_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 9 at resolution 1.5\n", "step 1\n", "got 13 at resolution 2.25\n", "step 2\n", "got 11 at resolution 1.875\n", "step 3\n", "got 11 at resolution 1.6875\n", "step 4\n", "got 11 at resolution 1.59375\n", "step 5\n", "got 11 at resolution 1.546875\n", "step 6\n", "got 11 at resolution 1.5234375\n", "step 7\n", "got 10 at resolution 1.51171875\n", "GeneScoring_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 16 at resolution 1.5\n", "step 1\n", "got 7 at resolution 0.75\n", "step 2\n", "got 12 at resolution 1.125\n", "step 3\n", "got 10 at resolution 0.9375\n", "Cicero_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "SCRAT_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 18 at resolution 1.5\n", "step 1\n", "got 9 at resolution 0.75\n", "step 2\n", "got 15 at resolution 1.125\n", "step 3\n", "got 13 at resolution 0.9375\n", "step 4\n", "got 13 at resolution 0.84375\n", "step 5\n", "got 9 at resolution 0.796875\n", "step 6\n", "got 12 at resolution 0.8203125\n", "step 7\n", "got 10 at resolution 0.80859375\n" ] } ], "source": [ "for file in files:\n", " file_split = file.split('_')\n", " method = file_split[1]\n", " dataset = file_split[2].split('.')[0]\n", " if(len(file_split)>3):\n", " method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n", " print(method)\n", "\n", " pandas2ri.activate()\n", " readRDS = robjects.r['readRDS']\n", " df_rds = readRDS(os.path.join(path_fm,file))\n", " fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n", " fm_mat.fillna(0,inplace=True)\n", " fm_mat.columns = metadata.index\n", " \n", " adata = sc.AnnData(fm_mat.T)\n", " adata.var_names_make_unique()\n", " adata.obs = metadata.loc[adata.obs.index,]\n", " df_metrics.loc[method,] = \"\"\n", " #Louvain\n", " sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n", "# sc.tl.louvain(adata)\n", " getNClusters(adata,n_cluster=num_clusters)\n", " #kmeans\n", " kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n", " adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n", " #hierachical clustering\n", " hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n", " adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n", " #clustering metrics\n", " \n", " #adjusted rank index\n", " ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n", " ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n", " ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n", " #adjusted mutual information\n", " ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n", " ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic') \n", " ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n", " #homogeneity\n", " homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n", " homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n", " homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n", "\n", " df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n", " df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n", " df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n", " adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df_metrics.to_csv(path_metrics+'clustering_scores.csv')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ARI_LouvainARI_kmeansARI_HCAMI_LouvainAMI_kmeansAMI_HCHomogeneity_LouvainHomogeneity_kmeansHomogeneity_HC
ChromVAR_motifs0.3448840.2434580.3077770.5160440.4159840.4320230.533290.4148720.407445
ChromVAR_kmers0.3895980.2791330.2215870.5441250.4280860.3625370.5558260.4007410.307758
cisTopic0.6412390.4984770.517010.7194160.6575230.6612360.72520.6812880.682697
SnapATAC0.4203410.3532990.4430560.6844990.6050170.6890810.6528150.5896450.645974
SCRAT_motifs0.05158740.0405990.03804920.1400080.1018520.1045610.1540750.1150260.116999
BROCKMAN0.1152130.03298840.03535070.2615930.1039870.1190180.2744680.1157360.131517
Cusanovich20180.533643-0.00194164-0.001941640.663841-0.00278841-0.002788410.6804030.003021880.00302188
Control0.1824220.01375860.03441670.3724630.0240630.04499970.3841450.02422910.044414
GeneScoring0.04172570.02082230.02443480.1120210.03159730.04018680.1218470.03494620.0413666
Scasat0.3202180.15970.188610.5195180.3009510.3838860.5380660.320150.397076
scABC0.04503370.01270840.02480510.127540.02392570.04332460.1323660.0248740.043172
Cicero0.09669-0.00325062-0.002153270.1727260.00262284-0.002535540.1801950.006419330.00335946
ChromVAR_kmers_pca0.4432340.2629850.2703380.569540.4203580.4346440.5908460.3778820.386469
ChromVAR_motifs_pca0.3084430.2561850.2948660.5164590.4102290.439110.5343530.4075460.412825
GeneScoring_pca0.03066010.0248730.01351620.09516040.03620620.02988170.1072380.03957310.0322586
Cicero_pca0.211067-0.00194164-0.002153270.37366-0.00278841-0.002535540.3974310.003021880.00335946
SCRAT_pca0.04414180.03923680.04331180.1149290.09895910.1157280.1269790.1125350.129509
\n", "
" ], "text/plain": [ " ARI_Louvain ARI_kmeans ARI_HC AMI_Louvain \\\n", "ChromVAR_motifs 0.344884 0.243458 0.307777 0.516044 \n", "ChromVAR_kmers 0.389598 0.279133 0.221587 0.544125 \n", "cisTopic 0.641239 0.498477 0.51701 0.719416 \n", "SnapATAC 0.420341 0.353299 0.443056 0.684499 \n", "SCRAT_motifs 0.0515874 0.040599 0.0380492 0.140008 \n", "BROCKMAN 0.115213 0.0329884 0.0353507 0.261593 \n", "Cusanovich2018 0.533643 -0.00194164 -0.00194164 0.663841 \n", "Control 0.182422 0.0137586 0.0344167 0.372463 \n", "GeneScoring 0.0417257 0.0208223 0.0244348 0.112021 \n", "Scasat 0.320218 0.1597 0.18861 0.519518 \n", "scABC 0.0450337 0.0127084 0.0248051 0.12754 \n", "Cicero 0.09669 -0.00325062 -0.00215327 0.172726 \n", "ChromVAR_kmers_pca 0.443234 0.262985 0.270338 0.56954 \n", "ChromVAR_motifs_pca 0.308443 0.256185 0.294866 0.516459 \n", "GeneScoring_pca 0.0306601 0.024873 0.0135162 0.0951604 \n", "Cicero_pca 0.211067 -0.00194164 -0.00215327 0.37366 \n", "SCRAT_pca 0.0441418 0.0392368 0.0433118 0.114929 \n", "\n", " AMI_kmeans AMI_HC Homogeneity_Louvain \\\n", "ChromVAR_motifs 0.415984 0.432023 0.53329 \n", "ChromVAR_kmers 0.428086 0.362537 0.555826 \n", "cisTopic 0.657523 0.661236 0.7252 \n", "SnapATAC 0.605017 0.689081 0.652815 \n", "SCRAT_motifs 0.101852 0.104561 0.154075 \n", "BROCKMAN 0.103987 0.119018 0.274468 \n", "Cusanovich2018 -0.00278841 -0.00278841 0.680403 \n", "Control 0.024063 0.0449997 0.384145 \n", "GeneScoring 0.0315973 0.0401868 0.121847 \n", "Scasat 0.300951 0.383886 0.538066 \n", "scABC 0.0239257 0.0433246 0.132366 \n", "Cicero 0.00262284 -0.00253554 0.180195 \n", "ChromVAR_kmers_pca 0.420358 0.434644 0.590846 \n", "ChromVAR_motifs_pca 0.410229 0.43911 0.534353 \n", "GeneScoring_pca 0.0362062 0.0298817 0.107238 \n", "Cicero_pca -0.00278841 -0.00253554 0.397431 \n", "SCRAT_pca 0.0989591 0.115728 0.126979 \n", "\n", " Homogeneity_kmeans Homogeneity_HC \n", "ChromVAR_motifs 0.414872 0.407445 \n", "ChromVAR_kmers 0.400741 0.307758 \n", "cisTopic 0.681288 0.682697 \n", "SnapATAC 0.589645 0.645974 \n", "SCRAT_motifs 0.115026 0.116999 \n", "BROCKMAN 0.115736 0.131517 \n", "Cusanovich2018 0.00302188 0.00302188 \n", "Control 0.0242291 0.044414 \n", "GeneScoring 0.0349462 0.0413666 \n", "Scasat 0.32015 0.397076 \n", "scABC 0.024874 0.043172 \n", "Cicero 0.00641933 0.00335946 \n", "ChromVAR_kmers_pca 0.377882 0.386469 \n", "ChromVAR_motifs_pca 0.407546 0.412825 \n", "GeneScoring_pca 0.0395731 0.0322586 \n", "Cicero_pca 0.00302188 0.00335946 \n", "SCRAT_pca 0.112535 0.129509 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ATACseq_clustering]", "language": "python", "name": "conda-env-ATACseq_clustering-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }