import numpy as np import pandas as pd from sklearn.decomposition import PCA from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform import umap import time def run_pca(data,n_components=30,random_state =1): today= time.ctime() pca = PCA(n_components=n_components, random_state=random_state) pca_data= pca.fit_transform(data) print('process start',today) return pca_data def run_umap(pca_data, n_neighbors=15, n_components=2, metric='euclidean', metric_kwds=None, output_metric='euclidean', output_metric_kwds=None, n_epochs=None, learning_rate=1.0, init='spectral', min_dist=0.1, spread=1.0, low_memory=True, n_jobs=-1, set_op_mix_ratio=1.0, local_connectivity=1.0, repulsion_strength=1.0, negative_sample_rate=5, transform_queue_size=4.0, a=None, b=None, random_state=None, angular_rp_forest=False, target_n_neighbors=-1, target_metric='categorical', target_metric_kwds=None, target_weight=0.5, transform_seed=42, transform_mode='embedding', force_approximation_algorithm=False, verbose=False, unique=False, densmap=False, dens_lambda=2.0, dens_frac=0.3, dens_var_shift=0.1, output_dens=False, disconnection_distance=None): fit = umap.UMAP(n_neighbors, n_components, metric, metric_kwds, output_metric, output_metric_kwds, n_epochs, learning_rate, init, min_dist, spread, low_memory, n_jobs, set_op_mix_ratio, local_connectivity, repulsion_strength, negative_sample_rate, transform_queue_size, a, b, random_state, angular_rp_forest, target_n_neighbors, target_metric, target_metric_kwds, target_weight, transform_seed, transform_mode, force_approximation_algorithm, verbose, unique, densmap, dens_lambda, dens_frac, dens_var_shift, output_dens, disconnection_distance) dim_reduction = fit.fit_transform(pca_data,y=None) return dim_reduction def mbPHENIX(data, umap_data,t=10,decay=15,metric='euclidean',knn=10): print('calculando distancias') distance_matrix =pdist(umap_data, metric) distance_matrix = (squareform(distance_matrix)) D = distance_matrix print(distance_matrix.shape) print('done distance matrix') print('knn') print(' knn done') print(' afinity matrix') n,m =D.shape E = np.zeros((m,m)) knn_dst = np.sort(distance_matrix, axis=1) epsilon = knn_dst[:,knn] pdx_scale = (distance_matrix / epsilon).T E = np.exp(-1 * ( pdx_scale ** decay)) print(' afinity matrix done') A = (E + E.T) diff_deg = np.diag(np.sum(A,0)) diff_op = np.dot(np.diag(np.diag(diff_deg)**(-1)),A) new_matrix = np.linalg.matrix_power(diff_op, t) data_new = np.array(np.dot(new_matrix,data)) print('imputation') Matix_col_genes_row_cell2 = (data +1) - data Matix_col_genes_row_cell2 = Matix_col_genes_row_cell2 - Matix_col_genes_row_cell2 Matix_impu = Matix_col_genes_row_cell2 + data_new sc_PHENIX = Matix_impu print('done') end= time.ctime() print('done',end) return (sc_PHENIX)