{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "from scipy import stats\n", "import os\n", "from sklearn.cluster import KMeans\n", "from sklearn.cluster import AgglomerativeClustering\n", "from sklearn.metrics.cluster import adjusted_rand_score\n", "from sklearn.metrics.cluster import adjusted_mutual_info_score\n", "from sklearn.metrics.cluster import homogeneity_score\n", "import rpy2.robjects as robjects\n", "from rpy2.robjects import pandas2ri" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# df_metrics = pd.DataFrame(columns=['ARI_Louvain','ARI_kmeans','ARI_HC',\n", "# 'AMI_Louvain','AMI_kmeans','AMI_HC',\n", "# 'Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "workdir = './output/'\n", "path_fm = os.path.join(workdir,'feature_matrices/')\n", "path_clusters = os.path.join(workdir,'clusters/')\n", "path_metrics = os.path.join(workdir,'metrics/')\n", "os.system('mkdir -p '+path_clusters)\n", "os.system('mkdir -p '+path_metrics)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8\n" ] } ], "source": [ "metadata = pd.read_csv('./input/metadata.tsv',sep='\\t',index_col=0)\n", "num_clusters = len(np.unique(metadata['label']))\n", "print(num_clusters)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "17" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files = [x for x in os.listdir(path_fm) if x.startswith('FM')]\n", "len(files)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "['FM_ChromVAR_10xpbmc5k_motifs.rds',\n", " 'FM_ChromVAR_10xpbmc5k_kmers.rds',\n", " 'FM_cisTopic_10xpbmc5k.rds',\n", " 'FM_SnapATAC_10xpbmc5k.rds',\n", " 'FM_BROCKMAN_10xpbmc5k.rds',\n", " 'FM_SCRAT_10xpbmc5k_motifs.rds',\n", " 'FM_Cusanovich2018_10xpbmc5k.rds',\n", " 'FM_Control_10xpbmc5k.rds',\n", " 'FM_GeneScoring_10xpbmc5k.rds',\n", " 'FM_Scasat_10xpbmc5k.rds',\n", " 'FM_scABC_10xpbmc5k.rds',\n", " 'FM_Cicero_10xpbmc5k.rds',\n", " 'FM_ChromVAR_10xpbmc_kmers_pca.rds',\n", " 'FM_ChromVAR_10xpbmc_motifs_pca.rds',\n", " 'FM_GeneScoring_10xpbmc_pca.rds',\n", " 'FM_Cicero_10xpbmc_pca.rds',\n", " 'FM_SCRAT_10xpbmc_pca.rds']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def getNClusters(adata,n_cluster,range_min=0,range_max=3,max_steps=20):\n", " this_step = 0\n", " this_min = float(range_min)\n", " this_max = float(range_max)\n", " while this_step < max_steps:\n", " print('step ' + str(this_step))\n", " this_resolution = this_min + ((this_max-this_min)/2)\n", " sc.tl.louvain(adata,resolution=this_resolution)\n", " this_clusters = adata.obs['louvain'].nunique()\n", " \n", " print('got ' + str(this_clusters) + ' at resolution ' + str(this_resolution))\n", " \n", " if this_clusters > n_cluster:\n", " this_max = this_resolution\n", " elif this_clusters < n_cluster:\n", " this_min = this_resolution\n", " else:\n", " return(this_resolution, adata)\n", " this_step += 1\n", " \n", " print('Cannot find the number of clusters')\n", " print('Clustering solution from last iteration is used:' + str(this_clusters) + ' at resolution ' + str(this_resolution))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ChromVAR_motifs\n", "step 0\n", "got 8 at resolution 1.5\n", "ChromVAR_kmers\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 6 at resolution 1.5\n", "step 1\n", "got 17 at resolution 2.25\n", "step 2\n", "got 9 at resolution 1.875\n", "step 3\n", "got 6 at resolution 1.6875\n", "step 4\n", "got 7 at resolution 1.78125\n", "step 5\n", "got 8 at resolution 1.828125\n", "cisTopic\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 17 at resolution 1.5\n", "step 1\n", "got 14 at resolution 0.75\n", "step 2\n", "got 9 at resolution 0.375\n", "step 3\n", "got 6 at resolution 0.1875\n", "step 4\n", "got 8 at resolution 0.28125\n", "SnapATAC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 19 at resolution 1.5\n", "step 1\n", "got 14 at resolution 0.75\n", "step 2\n", "got 11 at resolution 0.375\n", "step 3\n", "got 6 at resolution 0.1875\n", "step 4\n", "got 8 at resolution 0.28125\n", "BROCKMAN\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 20 at resolution 1.5\n", "step 1\n", "got 13 at resolution 0.75\n", "step 2\n", "got 8 at resolution 0.375\n", "SCRAT_motifs\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 19 at resolution 1.5\n", "step 1\n", "got 14 at resolution 0.75\n", "step 2\n", "got 8 at resolution 0.375\n", "Cusanovich2018\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 17 at resolution 1.5\n", "step 1\n", "got 11 at resolution 0.75\n", "step 2\n", "got 8 at resolution 0.375\n", "Control\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 22 at resolution 1.5\n", "step 1\n", "got 13 at resolution 0.75\n", "step 2\n", "got 7 at resolution 0.375\n", "step 3\n", "got 10 at resolution 0.5625\n", "step 4\n", "got 9 at resolution 0.46875\n", "step 5\n", "got 8 at resolution 0.421875\n", "GeneScoring\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "Scasat\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 25 at resolution 1.5\n", "step 1\n", "got 17 at resolution 0.75\n", "step 2\n", "got 11 at resolution 0.375\n", "step 3\n", "got 6 at resolution 0.1875\n", "step 4\n", "got 9 at resolution 0.28125\n", "step 5\n", "got 6 at resolution 0.234375\n", "step 6\n", "got 6 at resolution 0.2578125\n", "step 7\n", "got 8 at resolution 0.26953125\n", "scABC\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 7 at resolution 1.125\n", "step 3\n", "got 10 at resolution 1.3125\n", "step 4\n", "got 8 at resolution 1.21875\n", "Cicero\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 8 at resolution 1.5\n", "ChromVAR_kmers_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 10 at resolution 1.5\n", "step 1\n", "got 6 at resolution 0.75\n", "step 2\n", "got 8 at resolution 1.125\n", "ChromVAR_motifs_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 9 at resolution 1.5\n", "step 1\n", "got 6 at resolution 0.75\n", "step 2\n", "got 7 at resolution 1.125\n", "step 3\n", "got 7 at resolution 1.3125\n", "step 4\n", "got 8 at resolution 1.40625\n", "GeneScoring_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 15 at resolution 1.5\n", "step 1\n", "got 9 at resolution 0.75\n", "step 2\n", "got 7 at resolution 0.375\n", "step 3\n", "got 7 at resolution 0.5625\n", "step 4\n", "got 7 at resolution 0.65625\n", "step 5\n", "got 7 at resolution 0.703125\n", "step 6\n", "got 9 at resolution 0.7265625\n", "step 7\n", "got 7 at resolution 0.71484375\n", "step 8\n", "got 8 at resolution 0.720703125\n", "Cicero_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 12 at resolution 1.5\n", "step 1\n", "got 5 at resolution 0.75\n", "step 2\n", "got 11 at resolution 1.125\n", "step 3\n", "got 7 at resolution 0.9375\n", "step 4\n", "got 10 at resolution 1.03125\n", "step 5\n", "got 7 at resolution 0.984375\n", "step 6\n", "got 9 at resolution 1.0078125\n", "step 7\n", "got 8 at resolution 0.99609375\n", "SCRAT_pca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_clustering/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n", " res = PandasDataFrame.from_items(items)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "step 0\n", "got 23 at resolution 1.5\n", "step 1\n", "got 16 at resolution 0.75\n", "step 2\n", "got 11 at resolution 0.375\n", "step 3\n", "got 6 at resolution 0.1875\n", "step 4\n", "got 9 at resolution 0.28125\n", "step 5\n", "got 7 at resolution 0.234375\n", "step 6\n", "got 8 at resolution 0.2578125\n" ] } ], "source": [ "for file in files:\n", " file_split = file.split('_')\n", " method = file_split[1]\n", " dataset = file_split[2].split('.')[0]\n", " if(len(file_split)>3):\n", " method = method + '_' + '_'.join(file_split[3:]).split('.')[0]\n", " print(method)\n", "\n", " pandas2ri.activate()\n", " readRDS = robjects.r['readRDS']\n", " df_rds = readRDS(os.path.join(path_fm,file))\n", " fm_mat = pandas2ri.ri2py(robjects.r['data.frame'](robjects.r['as.matrix'](df_rds)))\n", " fm_mat.fillna(0,inplace=True)\n", " fm_mat.columns = metadata.index\n", " \n", " adata = sc.AnnData(fm_mat.T)\n", " adata.var_names_make_unique()\n", " adata.obs = metadata.loc[adata.obs.index,]\n", "# df_metrics.loc[method,] = \"\"\n", " #Louvain\n", " sc.pp.neighbors(adata, n_neighbors=15,use_rep='X')\n", "# sc.tl.louvain(adata)\n", " getNClusters(adata,n_cluster=num_clusters)\n", " #kmeans\n", " kmeans = KMeans(n_clusters=num_clusters, random_state=2019).fit(adata.X)\n", " adata.obs['kmeans'] = pd.Series(kmeans.labels_,index=adata.obs.index).astype('category')\n", " #hierachical clustering\n", " hc = AgglomerativeClustering(n_clusters=num_clusters).fit(adata.X)\n", " adata.obs['hc'] = pd.Series(hc.labels_,index=adata.obs.index).astype('category')\n", "# #clustering metrics\n", " \n", "# #adjusted rank index\n", "# ari_louvain = adjusted_rand_score(adata.obs['label'], adata.obs['louvain'])\n", "# ari_kmeans = adjusted_rand_score(adata.obs['label'], adata.obs['kmeans'])\n", "# ari_hc = adjusted_rand_score(adata.obs['label'], adata.obs['hc'])\n", "# #adjusted mutual information\n", "# ami_louvain = adjusted_mutual_info_score(adata.obs['label'], adata.obs['louvain'],average_method='arithmetic')\n", "# ami_kmeans = adjusted_mutual_info_score(adata.obs['label'], adata.obs['kmeans'],average_method='arithmetic') \n", "# ami_hc = adjusted_mutual_info_score(adata.obs['label'], adata.obs['hc'],average_method='arithmetic')\n", "# #homogeneity\n", "# homo_louvain = homogeneity_score(adata.obs['label'], adata.obs['louvain'])\n", "# homo_kmeans = homogeneity_score(adata.obs['label'], adata.obs['kmeans'])\n", "# homo_hc = homogeneity_score(adata.obs['label'], adata.obs['hc'])\n", "\n", "# df_metrics.loc[method,['ARI_Louvain','ARI_kmeans','ARI_HC']] = [ari_louvain,ari_kmeans,ari_hc]\n", "# df_metrics.loc[method,['AMI_Louvain','AMI_kmeans','AMI_HC']] = [ami_louvain,ami_kmeans,ami_hc]\n", "# df_metrics.loc[method,['Homogeneity_Louvain','Homogeneity_kmeans','Homogeneity_HC']] = [homo_louvain,homo_kmeans,homo_hc] \n", " adata.obs[['louvain','kmeans','hc']].to_csv(os.path.join(path_clusters ,method + '_clusters.tsv'),sep='\\t')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "def residual_average_gini_index(gene_scores,folder_clusters,\n", " housekeeping_genes,marker_genes,\n", " min_cells_per_cluster=10):\n", "\n", " #Subset from the main matrix the housekeeping genes and marker genes\n", " df_matrix_housekeeping=gene_scores.loc[gene_scores.index.intersection(housekeeping_genes),]\n", " df_matrix_marker=gene_scores.loc[gene_scores.index.intersection(marker_genes),]\n", " \n", " #Define a function to compute the Gini score\n", " def gini(list_of_values):\n", " sorted_list = sorted(list_of_values)\n", " height, area = 0, 0\n", " for value in sorted_list:\n", " height += value\n", " area += height - value / 2.\n", " fair_area = height * len(list_of_values) / 2.\n", " return (fair_area - area) / fair_area\n", " \n", " #Function to calculate Gini value for all the genes\n", " def calculate_gini(df_matrix, gene_name,clustering_info):\n", " return gini(get_avg_per_cluster(df_matrix,gene_name,clustering_info,use_log2=False))\n", "\n", " #Function to calculate Gini value for all the genes\n", " def calculate_gini_values(df_matrix,clustering_info):\n", " gini_values=[]\n", " for gene_name in df_matrix.index:\n", " gini_values.append(calculate_gini(df_matrix, gene_name,clustering_info))\n", " return gini_values\n", " \n", " #Write a function to compute delta difference of the average accessibility in Marker vs Housekeeping and Kolmogorov Smirnov test\n", " def score_clustering_solution(df_matrix_marker,df_matrix_housekeeping,clustering_info):\n", " gini_values_housekeeping=calculate_gini_values(df_matrix_housekeeping,clustering_info)\n", " gini_values_marker=calculate_gini_values(df_matrix_marker,clustering_info)\n", " statistic,p_value=stats.ks_2samp(gini_values_marker,gini_values_housekeeping)\n", " \n", " return np.mean(gini_values_marker), np.mean(gini_values_housekeeping),np.mean(gini_values_marker)-np.mean(gini_values_housekeeping), statistic,p_value\n", "\n", " #Function to compute the average accessibility value per cluster\n", " def get_avg_per_cluster(df_matrix, gene_name, clustering_info,use_log2=False):\n", " N_clusters=len(clustering_info.index.unique())\n", " avg_per_cluster=np.zeros(N_clusters)\n", " for idx,idx_cluster in enumerate(sorted(np.unique(clustering_info.index.unique()))):\n", " if use_log2:\n", " values_cluster=df_matrix.loc[gene_name,clustering_info.loc[idx_cluster,:].values.flatten()].apply(lambda x:np.log2(x+1))\n", " else:\n", " values_cluster=df_matrix.loc[gene_name,clustering_info.loc[idx_cluster,:].values.flatten()]\n", " \n", " avg_per_cluster[idx]=values_cluster.mean()\n", " if avg_per_cluster[idx]>0:\n", " avg_per_cluster[idx]=avg_per_cluster[idx]#/values_cluster.std()\n", "\n", " return avg_per_cluster\n", " \n", "\n", " #Run the method for all the clustering solutions\n", " \n", " df_metrics = pd.DataFrame(columns=['Method','Clustering','Gini_Marker_Genes','Gini_Housekeeping_Genes','Difference','KS_statistics','p-value']) \n", " \n", " for clusters_filename in os.listdir(folder_clusters): \n", " method = '_'.join(clusters_filename.split('_')[:-1])\n", " print(method)\n", " df_clusters = pd.read_csv(os.path.join(folder_clusters,clusters_filename),sep='\\t',index_col=0)\n", " for clustering_method in df_clusters.columns:\n", " clustering_info = pd.DataFrame(df_clusters[clustering_method])\n", " clustering_info['Barcode'] = clustering_info.index\n", " clustering_info=clustering_info.set_index(clustering_method)\n", " \n", " #REMOVE CLUSTERS WITH FEW CELLS\n", " cluster_sizes=pd.value_counts(clustering_info.index)\n", " clustering_info=clustering_info.loc[cluster_sizes[cluster_sizes>min_cells_per_cluster].index.values,:]\n", "\n", "\n", " mean_gini_marker,mean_gini_housekeeping,mean_gini_difference,statistics,p_value=score_clustering_solution(df_matrix_marker,df_matrix_housekeeping,clustering_info)\n", "\n", " df_metrics = df_metrics.append({'Method': method,'Clustering':clustering_method,\n", " 'Gini_Marker_Genes':mean_gini_marker,'Gini_Housekeeping_Genes':mean_gini_housekeeping,\n", " 'Difference':mean_gini_difference,'KS_statistics':statistics,'p-value':p_value},\n", " ignore_index=True) \n", " \n", " return df_metrics" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "gene_scores = pd.read_csv('./run_methods/GeneScoring/FM_GeneScoring_10xpbmc5k.tsv',\n", " sep = '\\t',index_col=0)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AAACGAAAGCGCAATG-1AAACGAAAGGGTATCG-1AAACGAAAGTAACATG-1AAACGAAAGTTACACC-1AAACGAACAGAGATGC-1AAACGAACATGCTATG-1AAACGAAGTGCATCAT-1AAACGAAGTGGACGAT-1AAACGAAGTGGCCTCA-1AAACGAATCAGTGTAC-1...TTTGGTTGTCAGAAAT-1TTTGGTTGTTGTATCG-1TTTGGTTTCAGTGGTT-1TTTGTGTAGGAAACTT-1TTTGTGTCAAGCCTTA-1TTTGTGTCACTCAAGT-1TTTGTGTCACTGGGCT-1TTTGTGTGTACGCAAG-1TTTGTGTGTCTGCGCA-1TTTGTGTTCAACTTGG-1
A1BG0.0862011.7756943.5916240.2125870.0408470.0043130.0021570.1765120.0005180.007131...0.0413913.5884901.8224000.0456785.3713660.0026752.4543771.9849451.7325471.730391
A1BG-AS10.4582010.4783511.3654540.9308580.0187630.0230400.0117130.9433150.0000970.038674...0.0111581.3459470.9221460.0422872.2558180.0120033.7027151.3938660.4444380.432918
A2LD10.0003630.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
A2M0.0000000.0549600.0006650.0000002.2058090.0000000.0000000.0000000.0000000.000000...0.0000000.0072162.2589260.0000000.0008190.0000000.0012280.0006650.0000000.000000
A2M-AS10.0000000.0512650.0006210.0000002.0575060.0000000.0000000.0000000.0000000.000000...0.0000000.0067312.1070520.0000000.0007640.0000000.0011460.0006210.0000000.000000
\n", "

5 rows × 5335 columns

\n", "
" ], "text/plain": [ " AAACGAAAGCGCAATG-1 AAACGAAAGGGTATCG-1 AAACGAAAGTAACATG-1 \\\n", "A1BG 0.086201 1.775694 3.591624 \n", "A1BG-AS1 0.458201 0.478351 1.365454 \n", "A2LD1 0.000363 0.000000 0.000000 \n", "A2M 0.000000 0.054960 0.000665 \n", "A2M-AS1 0.000000 0.051265 0.000621 \n", "\n", " AAACGAAAGTTACACC-1 AAACGAACAGAGATGC-1 AAACGAACATGCTATG-1 \\\n", "A1BG 0.212587 0.040847 0.004313 \n", "A1BG-AS1 0.930858 0.018763 0.023040 \n", "A2LD1 0.000000 0.000000 0.000000 \n", "A2M 0.000000 2.205809 0.000000 \n", "A2M-AS1 0.000000 2.057506 0.000000 \n", "\n", " AAACGAAGTGCATCAT-1 AAACGAAGTGGACGAT-1 AAACGAAGTGGCCTCA-1 \\\n", "A1BG 0.002157 0.176512 0.000518 \n", "A1BG-AS1 0.011713 0.943315 0.000097 \n", "A2LD1 0.000000 0.000000 0.000000 \n", "A2M 0.000000 0.000000 0.000000 \n", "A2M-AS1 0.000000 0.000000 0.000000 \n", "\n", " AAACGAATCAGTGTAC-1 ... TTTGGTTGTCAGAAAT-1 TTTGGTTGTTGTATCG-1 \\\n", "A1BG 0.007131 ... 0.041391 3.588490 \n", "A1BG-AS1 0.038674 ... 0.011158 1.345947 \n", "A2LD1 0.000000 ... 0.000000 0.000000 \n", "A2M 0.000000 ... 0.000000 0.007216 \n", "A2M-AS1 0.000000 ... 0.000000 0.006731 \n", "\n", " TTTGGTTTCAGTGGTT-1 TTTGTGTAGGAAACTT-1 TTTGTGTCAAGCCTTA-1 \\\n", "A1BG 1.822400 0.045678 5.371366 \n", "A1BG-AS1 0.922146 0.042287 2.255818 \n", "A2LD1 0.000000 0.000000 0.000000 \n", "A2M 2.258926 0.000000 0.000819 \n", "A2M-AS1 2.107052 0.000000 0.000764 \n", "\n", " TTTGTGTCACTCAAGT-1 TTTGTGTCACTGGGCT-1 TTTGTGTGTACGCAAG-1 \\\n", "A1BG 0.002675 2.454377 1.984945 \n", "A1BG-AS1 0.012003 3.702715 1.393866 \n", "A2LD1 0.000000 0.000000 0.000000 \n", "A2M 0.000000 0.001228 0.000665 \n", "A2M-AS1 0.000000 0.001146 0.000621 \n", "\n", " TTTGTGTGTCTGCGCA-1 TTTGTGTTCAACTTGG-1 \n", "A1BG 1.732547 1.730391 \n", "A1BG-AS1 0.444438 0.432918 \n", "A2LD1 0.000000 0.000000 \n", "A2M 0.000000 0.000000 \n", "A2M-AS1 0.000000 0.000000 \n", "\n", "[5 rows x 5335 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gene_scores.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "#https://www.tau.ac.il/~elieis/Housekeeping_genes.html List of Housekeeping genes\n", "housekeeping_genes=['ACTB','ALDOA','GAPDH','PGK1','LDHA','RPS27A','RPL19','RPL11','NONO','ARHGDIA','RPL32','RPS18','HSPCB',\n", " 'C1orf43','CHMP2A','EMC7','GPI','PSMB2,''PSMB4','RAB7A','REEP5','SNRPD3','VCP','VPS29']\n", "\n", "#List of Marker Genes\n", "marker_genes=['CD209', 'ENG', 'FOXP3', 'CD34', 'BATF3', 'S100A12', 'THBD','CD3D', 'THY1', 'CD8A', 'CD8B', 'CD14', 'PROM1', 'IL2RA', 'FCGR3A',\n", "'IL3RA', 'FCGR1A', 'CD19', 'IL7R', 'CD79A', 'MS4A1', 'NCAM1','CD3E', 'CD3G', 'KIT', 'CD1C', 'CD68', 'CD4']" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "folder_clusters = './output/clusters/'" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ChromVAR_motifs\n", "ChromVAR_kmers\n", "cisTopic\n", "SnapATAC\n", "BROCKMAN\n", "SCRAT_motifs\n", "Cusanovich2018\n", "Control\n", "GeneScoring\n", "Scasat\n", "scABC\n", "Cicero\n", "ChromVAR_kmers_pca\n", "ChromVAR_motifs_pca\n", "GeneScoring_pca\n", "Cicero_pca\n", "SCRAT_pca\n" ] } ], "source": [ "df_metrics = residual_average_gini_index(gene_scores,folder_clusters,\n", " housekeeping_genes,marker_genes,\n", " min_cells_per_cluster=10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "df_metrics.to_csv(path_metrics+'clustering_RAGI_scores.csv')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MethodClusteringGini_Marker_GenesGini_Housekeeping_GenesDifferenceKS_statisticsp-value
0ChromVAR_motifslouvain0.3297940.2029170.1268780.5357140.001101
1ChromVAR_motifskmeans0.3437450.2373990.1063450.4642860.007129
2ChromVAR_motifshc0.3811680.2900410.0911270.5000000.002897
3ChromVAR_kmerslouvain0.3091640.1788500.1303150.5357140.001101
4ChromVAR_kmerskmeans0.3400880.2631700.0769180.4880950.003940
5ChromVAR_kmershc0.3192020.2410890.0781130.5238100.001532
6cisTopiclouvain0.3691190.2546880.1144310.5714290.000392
7cisTopickmeans0.4173190.3160600.1012590.5000000.002897
8cisTopichc0.4685340.3624930.1060410.5000000.002897
9SnapATAClouvain0.3324780.1957980.1366800.4523810.009484
10SnapATACkmeans0.3615060.2178680.1436370.5714290.000392
11SnapATAChc0.3714380.2314290.1400090.5238100.001532
12BROCKMANlouvain0.3469990.2341250.1128740.4761900.005320
13BROCKMANkmeans0.3277890.2628200.0649690.3928570.035346
14BROCKMANhc0.3240610.2558780.0681830.4642860.007129
15SCRAT_motifslouvain0.3478780.2936450.0542330.4761900.005320
16SCRAT_motifskmeans0.2873550.2345070.0528480.3928570.035346
17SCRAT_motifshc0.3152290.2639670.0512620.4166670.021353
18Cusanovich2018louvain0.3888970.2809570.1079400.4642860.007129
19Cusanovich2018kmeans0.2540270.1773050.0767220.4642860.007129
20Cusanovich2018hc0.2254680.1411250.0843430.4642860.007129
21Controllouvain0.4439710.3588640.0851070.4642860.007129
22Controlkmeans0.4564800.4538630.0026170.2738100.280812
23Controlhc0.4717650.4674230.0043420.2380950.448886
24GeneScoringlouvain0.4601540.3894020.0707520.4285710.016413
25GeneScoringkmeans0.4501600.471894-0.0217340.2976190.197109
26GeneScoringhc0.5033480.5014730.0018750.2857140.236193
27Scasatlouvain0.4724450.3745990.0978460.5000000.002897
28Scasatkmeans0.4971290.4215120.0756170.4285710.016413
29Scasathc0.4776550.3990980.0785570.4642860.007129
30scABClouvain0.4616980.4414620.0202370.2976190.197109
31scABCkmeans0.4586730.4547370.0039370.2380950.448886
32scABChc0.4257210.3963300.0293910.3214290.134155
33Cicerolouvain0.3549930.2495620.1054310.5476190.000786
34Cicerokmeans0.0000000.0000000.0000000.0000001.000000
35Cicerohc0.2641510.2037670.0603830.3571430.071204
36ChromVAR_kmers_pcalouvain0.3522240.2321390.1200860.5119050.002114
37ChromVAR_kmers_pcakmeans0.2874730.2276810.0597920.4880950.003940
38ChromVAR_kmers_pcahc0.3220120.2468210.0751910.4642860.007129
39ChromVAR_motifs_pcalouvain0.3336970.2051990.1284980.5357140.001101
40ChromVAR_motifs_pcakmeans0.3964330.3011020.0953310.4285710.016413
41ChromVAR_motifs_pcahc0.3974840.3017300.0957550.4404760.012522
42GeneScoring_pcalouvain0.5176970.5032020.0144950.2619050.331171
43GeneScoring_pcakmeans0.4882940.499929-0.0116350.3333330.109433
44GeneScoring_pcahc0.4693330.473566-0.0042330.2857140.236193
45Cicero_pcalouvain0.4702270.3560270.1142000.4880950.003940
46Cicero_pcakmeans0.2622210.1976410.0645800.3571430.071204
47Cicero_pcahc0.2504960.1720510.0784460.4642860.007129
48SCRAT_pcalouvain0.3920710.3202360.0718350.4642860.007129
49SCRAT_pcakmeans0.2875310.2341640.0533670.3928570.035346
50SCRAT_pcahc0.2781230.2328570.0452660.4047620.027574
\n", "
" ], "text/plain": [ " Method Clustering Gini_Marker_Genes \\\n", "0 ChromVAR_motifs louvain 0.329794 \n", "1 ChromVAR_motifs kmeans 0.343745 \n", "2 ChromVAR_motifs hc 0.381168 \n", "3 ChromVAR_kmers louvain 0.309164 \n", "4 ChromVAR_kmers kmeans 0.340088 \n", "5 ChromVAR_kmers hc 0.319202 \n", "6 cisTopic louvain 0.369119 \n", "7 cisTopic kmeans 0.417319 \n", "8 cisTopic hc 0.468534 \n", "9 SnapATAC louvain 0.332478 \n", "10 SnapATAC kmeans 0.361506 \n", "11 SnapATAC hc 0.371438 \n", "12 BROCKMAN louvain 0.346999 \n", "13 BROCKMAN kmeans 0.327789 \n", "14 BROCKMAN hc 0.324061 \n", "15 SCRAT_motifs louvain 0.347878 \n", "16 SCRAT_motifs kmeans 0.287355 \n", "17 SCRAT_motifs hc 0.315229 \n", "18 Cusanovich2018 louvain 0.388897 \n", "19 Cusanovich2018 kmeans 0.254027 \n", "20 Cusanovich2018 hc 0.225468 \n", "21 Control louvain 0.443971 \n", "22 Control kmeans 0.456480 \n", "23 Control hc 0.471765 \n", "24 GeneScoring louvain 0.460154 \n", "25 GeneScoring kmeans 0.450160 \n", "26 GeneScoring hc 0.503348 \n", "27 Scasat louvain 0.472445 \n", "28 Scasat kmeans 0.497129 \n", "29 Scasat hc 0.477655 \n", "30 scABC louvain 0.461698 \n", "31 scABC kmeans 0.458673 \n", "32 scABC hc 0.425721 \n", "33 Cicero louvain 0.354993 \n", "34 Cicero kmeans 0.000000 \n", "35 Cicero hc 0.264151 \n", "36 ChromVAR_kmers_pca louvain 0.352224 \n", "37 ChromVAR_kmers_pca kmeans 0.287473 \n", "38 ChromVAR_kmers_pca hc 0.322012 \n", "39 ChromVAR_motifs_pca louvain 0.333697 \n", "40 ChromVAR_motifs_pca kmeans 0.396433 \n", "41 ChromVAR_motifs_pca hc 0.397484 \n", "42 GeneScoring_pca louvain 0.517697 \n", "43 GeneScoring_pca kmeans 0.488294 \n", "44 GeneScoring_pca hc 0.469333 \n", "45 Cicero_pca louvain 0.470227 \n", "46 Cicero_pca kmeans 0.262221 \n", "47 Cicero_pca hc 0.250496 \n", "48 SCRAT_pca louvain 0.392071 \n", "49 SCRAT_pca kmeans 0.287531 \n", "50 SCRAT_pca hc 0.278123 \n", "\n", " Gini_Housekeeping_Genes Difference KS_statistics p-value \n", "0 0.202917 0.126878 0.535714 0.001101 \n", "1 0.237399 0.106345 0.464286 0.007129 \n", "2 0.290041 0.091127 0.500000 0.002897 \n", "3 0.178850 0.130315 0.535714 0.001101 \n", "4 0.263170 0.076918 0.488095 0.003940 \n", "5 0.241089 0.078113 0.523810 0.001532 \n", "6 0.254688 0.114431 0.571429 0.000392 \n", "7 0.316060 0.101259 0.500000 0.002897 \n", "8 0.362493 0.106041 0.500000 0.002897 \n", "9 0.195798 0.136680 0.452381 0.009484 \n", "10 0.217868 0.143637 0.571429 0.000392 \n", "11 0.231429 0.140009 0.523810 0.001532 \n", "12 0.234125 0.112874 0.476190 0.005320 \n", "13 0.262820 0.064969 0.392857 0.035346 \n", "14 0.255878 0.068183 0.464286 0.007129 \n", "15 0.293645 0.054233 0.476190 0.005320 \n", "16 0.234507 0.052848 0.392857 0.035346 \n", "17 0.263967 0.051262 0.416667 0.021353 \n", "18 0.280957 0.107940 0.464286 0.007129 \n", "19 0.177305 0.076722 0.464286 0.007129 \n", "20 0.141125 0.084343 0.464286 0.007129 \n", "21 0.358864 0.085107 0.464286 0.007129 \n", "22 0.453863 0.002617 0.273810 0.280812 \n", "23 0.467423 0.004342 0.238095 0.448886 \n", "24 0.389402 0.070752 0.428571 0.016413 \n", "25 0.471894 -0.021734 0.297619 0.197109 \n", "26 0.501473 0.001875 0.285714 0.236193 \n", "27 0.374599 0.097846 0.500000 0.002897 \n", "28 0.421512 0.075617 0.428571 0.016413 \n", "29 0.399098 0.078557 0.464286 0.007129 \n", "30 0.441462 0.020237 0.297619 0.197109 \n", "31 0.454737 0.003937 0.238095 0.448886 \n", "32 0.396330 0.029391 0.321429 0.134155 \n", "33 0.249562 0.105431 0.547619 0.000786 \n", "34 0.000000 0.000000 0.000000 1.000000 \n", "35 0.203767 0.060383 0.357143 0.071204 \n", "36 0.232139 0.120086 0.511905 0.002114 \n", "37 0.227681 0.059792 0.488095 0.003940 \n", "38 0.246821 0.075191 0.464286 0.007129 \n", "39 0.205199 0.128498 0.535714 0.001101 \n", "40 0.301102 0.095331 0.428571 0.016413 \n", "41 0.301730 0.095755 0.440476 0.012522 \n", "42 0.503202 0.014495 0.261905 0.331171 \n", "43 0.499929 -0.011635 0.333333 0.109433 \n", "44 0.473566 -0.004233 0.285714 0.236193 \n", "45 0.356027 0.114200 0.488095 0.003940 \n", "46 0.197641 0.064580 0.357143 0.071204 \n", "47 0.172051 0.078446 0.464286 0.007129 \n", "48 0.320236 0.071835 0.464286 0.007129 \n", "49 0.234164 0.053367 0.392857 0.035346 \n", "50 0.232857 0.045266 0.404762 0.027574 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_metrics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ATACseq_clustering]", "language": "python", "name": "conda-env-ATACseq_clustering-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }