In [1]:
import pandas as pd
import numpy as np
import scanpy as sc
import os
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import homogeneity_score
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri

In [2]:
metadata = pd.read_csv('../input/metadata.tsv',sep='\t',index_col=0)
num_clusters = len(np.unique(metadata['label']))
print(num_clusters)

10


In [3]:
df_metrics = pd.DataFrame(columns=['ARI','AMI','Homogeneity'])

In [4]:
df_clusters = pd.DataFrame(index=metadata.index)
for dirpath, dirnames, filenames in os.walk("./"):
    for filename in [f for f in filenames if(f.endswith(".tsv") and f.startswith("clustering"))]:
        print(os.path.join(dirpath, filename))
        df = pd.read_csv(os.path.join(dirpath, filename),sep='\t',index_col=0)
        df_clusters = pd.merge(df_clusters, df, left_index=True, right_index=True)

./SnapATAC/clusteringSolution.tsv
./Cusanovich2018/clusteringSolution.tsv
./scABC/clusteringSolution.tsv
./cisTopic/clusteringSolution.tsv
./Scasat/clusteringSolution.tsv
./Cicero/clusteringSolution.tsv


In [5]:
df_clusters.head()

Unnamed: 0,SnapATAC,cusanovich2018,scABC,cisTopic,Scasat,Cicero
BM1077-CLP-Frozen-160106-13,4,6,6,1,1,4
BM1077-CLP-Frozen-160106-14,4,6,6,1,1,9
BM1077-CLP-Frozen-160106-2,4,6,6,1,1,3
BM1077-CLP-Frozen-160106-21,4,6,6,1,1,4
BM1077-CLP-Frozen-160106-27,1,6,9,2,1,4


In [6]:
for method in df_clusters.columns:
    print(method)

    #adjusted rank index
    ari = adjusted_rand_score(metadata['label'], df_clusters[method])

    #adjusted mutual information
    ami = adjusted_mutual_info_score(metadata['label'], df_clusters[method],average_method='arithmetic')
    
    #homogeneity
    homo = homogeneity_score(metadata['label'], df_clusters[method])

    df_metrics.loc[method,'ARI'] = ari
    df_metrics.loc[method,'AMI'] = ami
    df_metrics.loc[method,'Homogeneity'] = homo

SnapATAC
cusanovich2018
scABC
cisTopic
Scasat
Cicero


In [7]:
df_metrics

Unnamed: 0,ARI,AMI,Homogeneity
SnapATAC,0.323942,0.587034,0.559376
cusanovich2018,0.48362,0.662329,0.68703
scABC,0.270214,0.464873,0.446248
cisTopic,0.51701,0.661236,0.682697
Scasat,0.111576,0.324815,0.328444
Cicero,0.22272,0.349726,0.352056


In [8]:
df_metrics.to_csv('./clustering_scores.csv')