######################################################################################## # name: t-SNE plot # author: Marc Bickle # category: dimensionality reduction # preview: t-SNE (TSNE) converts affinities of data points to probabilities. The affinities in the original space are represented by Gaussian joint probabilities and the affinities in the embedded space are represented by Student’s t-distributions. This allows t-SNE to be particularly sensitive to local structure and has a few other advantages over existing techniques: Revealing the structure at many scales on a single map Revealing data that lie in multiple, different, manifolds or clusters Reducing the tendency to crowd points together at the center online help: http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html http://scikit-learn.org/stable/modules/manifold.html#t-sne https://distill.pub/2016/misread-tsne/ Inputs: 1) Features of interest: choose parameters to test 2) Parametrise t-SNE algorithm (for tips see:https://distill.pub/2016/misread-tsne/) ######

t-SNE (TSNE) converts affinities of data points to probabilities. Parametrization: http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html # 1) Define the group column groupingColumns = # 2) Define what numerical values to use features = [] # 3) Parametrise the t-SNE algorithm comp = int() per = int() earl = int() ler = int() iter = int() iter_n = int() Distance = ; meth = ######################################################################################## # name: Venn Diagram # author: Marc Bickle # category: relations # preview: venn.png Creates a Venn Diagram of 2 or 3 sets. Accepts at most 3 columns with the option to draw the circles approximately proportional to the size of the data. python script from Chris DeBoever, http://nbviewer.ipython.org/github/ucsd-scientific-python/user-group/blob/master/presentations/20131016/hierarchical_clustering_heatmaps_gridspec.ipynb ######

$$$TEMPLATE_DESC$$$ # b) Define what values to use features = [] Title = prop ="" ######################################################################################## # name: Stream plot # author: Felix Meyenhofer (python script from Anand Patil, Steam plot method by Lee Bryon) # category: experimental # preview: stream-plot.png Creates a stream plot (stacked histogram) from several input parameter Inputs: 1) Features of interest: choose parameters to form the stream plot (stacked histogram) 2) Number of bins: binning of the data 3) Color chooser: method to pick the colors for the different parameter ######

$$$TEMPLATE_DESC$$$ # 1. Parameter selection # 1) select the readouts that span the phenotypic space keys = Nbins = int() colorSequence = # Script 0)[0][0] for stream in streams] order = np.argsort(onset_times) streams = np.asarray(streams) sorted_streams = streams[order] t = np.arange(streams.shape[1]) # Establish bounds stream_bounds = [np.vstack((np.zeros(streams.shape[1]), sorted_streams[0])), np.vstack((-sorted_streams[1], (np.zeros(streams.shape[1]))))] side = -1 for stream in sorted_streams[2:]: side *= -1 if side==1: stream_bounds.append(np.vstack((stream_bounds[-2][1], stream_bounds[-2][1]+stream))) else: stream_bounds.append(np.vstack((stream_bounds[-2][0]-stream, stream_bounds[-2][0]))) stream_bounds = np.array(stream_bounds) # Compute baseline baseline = baseline_fn(sorted_streams, stream_bounds) # Choose colors t_poly = np.hstack((t,t[::-1])) if color_seq=='linear': colors = np.linspace(0,1,streams.shape[1]) elif color_seq=='random': colors = np.random.random(size=streams.shape[1]) else: raise ValueError, 'Color sequence %s unrecognized'%color_seq # Plot pl.axis('off') for i in xrange(len(stream_bounds)): bound = stream_bounds[i] color = cmap(colors[i]) pl.fill(t_poly, np.hstack((bound[0]-baseline,(bound[1]-baseline)[::-1])), facecolor=color, linewidth=0.,edgecolor='none') # Demo if __name__ == '__main__': v = [] for key in keys: v.extend(kIn[key]) mi = min(v) ma = max(v) dsets = [] for key in keys: c,s,trash = pl.hist(kIn[key], Nbins, range=(mi, ma)) dsets.append(c) pl.clf() stacked_graph(dsets, baseline_fn=symmetric, color_seq=colorSequence) pl.show() ]]> ######################################################################################## # name: estimate gaussian intersection # author: Felix Meyenhofer # category: univariate # preview: gaussian-intersection.png compute the histograms of random variables, fit a gaussian and compute the intersection points. This version can only deal with TWO groups. Make sure there are no more and no less to be found in the "Grouping column". Inputs: 1) Features of interest: choose parameters to form the stream plot (stacked histogram) 2) Number of bins: binning of the data 3) Color chooser: method to pick the colors for the different parameter ######

$$$TEMPLATE_DESC$$$ # a) Define your treatments of interest strColName = # b) Define what numerical values to use numColName = # c) number of bins. nBins = int() intersection[0]: o1 = integrate.quad(g1, mue1-sig15, intersection[0]) else: o1 = integrate.quad(g1, intersection[0], mue1+sig15) sig25 = sig2*5 i2 = integrate.quad(g2, mue1-sig25, mue1+sig25) if mue2 > intersection[0]: o2 = integrate.quad(g2, mue2-sig25, intersection[0]) else: o2 = integrate.quad(g2, intersection[0], mue2+sig15) overlap = (o1[0] + o2[0]) / (i1[0] + i2[0]) # plot the fitted cureves and the intersection. sigm = np.max([sig1, sig2])*3 muemi = np.min([mue1, mue2]) muema = np.max([mue1, mue2]) x = np.linspace(muemi-sigm, muema+sigm, 100) pyplot.plot(x, g1(x), 'b', x, g2(x), 'g', intersection[0], intersection[1], 'r.', markersize=10) pyplot.title("Relative overlap = %s" % overlap) # Main vec = kIn[numColName] cat = kIn[strColName] sets = {} for index in range(0,len(vec)-1): if sets.has_key(cat[index]): sets[cat[index]].append(vec[index]) else: sets[cat[index]] = [vec[index]] keys = sets.keys() numSets = len(keys) if numSets == 2: data = findIntersection(sets[keys[0]], sets[keys[1]]) else: raise Exception ]]> ######################################################################################## # name: 2D scatter plot # author: Felix Meyenhofer # category: scatter-plots # preview: scatterplot_2d.png Make a 2D scatter plot form 2 variables. Parameter: grouping column: column assotiating each measurement to a particular group. Per default this attribute is empty -> all measurments blong to the same group. x values: values displayed on the x-axis y values: values displayed on the y-axis ######

$$$TEMPLATE_DESC$$$ # a) Grouping column gColName = # b) Define what numerical values to use xColName = yColName = # c) Title. title = # d) Legend control show_legend = "" ######################################################################################## # name: 3D scatter plot # author: Felix Meyenhofer # category: scatter-plots # preview: scatterplot_3d.png Make a 3D scatter plot form 3 variables. Parameter: grouping column: column assotiating each measurement to a particular group. Per default this attribute is empty -> all measurments blong to the same group. x values: values displayed on the x-axis y values: values displayed on the y-axis z values: values displayed ... ######

$$$TEMPLATE_DESC$$$ # a) Grouping column gColName = # b) Define what numerical values to use xColName = yColName = zColName = # c) Title. title = # d) Legend control show_legend = "" ######################################################################################## # name: 3D PCA scatter plot # author: Felix Meyenhofer # category: scatter-plots # preview: pca_scatterplot_3d.png Take any number of numerical features, project it into an orthogonal space using principal componant analysis (PCA) and use the first three componants to create a 3D scatter plot. Parameter: features of interest: The featuers included into the PCA grouping column: column assotiating each measurement to a particular group. Per default this attribute is empty -> all measurments blong to the same group. ######

$$$TEMPLATE_DESC$$$ # a) Grouping column gColName = # b) Define what numerical values to use features = # c) Title. title = # d) Legend control show_legend = "" ############################################################ # name: Violin Plot # author: Felix Meyenhofer # category: distributions # preview: pandas-violinplot.png Creates a violing plot. Note: the Seaborn module has to be installed. ######

$$$TEMPLATE_DESC$$$ # a) Define grouping grp = sec = # b) Define what numerical values to use amp = # c) Graph title tit = # d) Orientation ori = ; # e) Inner glyphs gly =