########################################################################################
# name: Calculate vector combinations
# author: Martin Stoeter
# category: calculators
Calculates all combinations of unique values of selected column. Useful for generating a table for iteration of XY-plot (e.g. A,B,C will be A vs B, A vs C, B vs C)
- unique_combinations: see above
- all_combinations: also A vs A, B vs B, C vs C
- combiSize: A, B, C, D; combiSize = 3 will be ABC, ABD, ACD, BCD (maximum of combiSize = 4 for all_combinations)
- pairwise_with_second_column: all combinations of unique values of two columns (e.g. A, B, C and D, E, F will be A vs D, A vs E, ..., C vs F)
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select the column for combination
combiColumn =
combiSize =
# 2) select type of combination
combiType =
combiColumn2 =
length(uniqueCombiColumn))) combiSize <- length(uniqueCombiColumn) #prevent too many combinations
#2.3. calculate
if ((combiType == "unique_combinations")) {
rOut <- as.data.frame(t(combn(uniqueCombiColumn, combiSize)))
names(rOut) <- paste(combiColumn, names(rOut), sep="_")
}
if ((combiType == "all_combinations")) {
if (combiSize == 1) rOut <- expand.grid(uniqueCombiColumn)
if (combiSize == 2) rOut <- expand.grid(uniqueCombiColumn, uniqueCombiColumn)
if (combiSize == 3) rOut <- expand.grid(uniqueCombiColumn, uniqueCombiColumn, uniqueCombiColumn)
if (combiSize >= 4) rOut <- expand.grid(uniqueCombiColumn, uniqueCombiColumn, uniqueCombiColumn, uniqueCombiColumn)
names(rOut) <- paste(combiColumn, names(rOut), sep="_")
}
if ((combiType == "pairwise_with_second_column")) {
rOut <- expand.grid(uniqueCombiColumn, unique(kIn[, combiColumn2]))
names(rOut) <- c(combiColumn, combiColumn2)
}
]]>
########################################################################################
# name: Robust linear scaling (Percentile normalization)
# author: Marc Bickle
# category: normalization
Normalizes using a projection of the cell based data between 0 and 1 based on the 1st percentile and the 99th percentile [1]
[1] http://www.ncbi.nlm.nih.gov/books/NBK126174/#hcsimage.4_Normalization_of_HCS_data
######
$$$TEMPLATE_DESC$$$
# Parameter selection
selReadouts = c()
Group = ;
Treatment = ;
########################################################################################
# name: Levene's test of equal variance
# author: Marc Bickle
# category: statistical hypothesis tests
Performs the Levene's test of homoscedasticity for two groups of a population.
1) define the parameters to be tested
2) define the population column with the groups
3) choose how to compute the center of each population. Median is more robust.
######
Performs Levene's test of equal variance.
# Parameter selection
selReadouts = c()
Group = ;
Centering = ;
########################################################################################
# name: KS-test OR unpaired Wilcoxon rank test (nonparametric tests)
# author: Marc Bickle, Antje Janosch
# category: statistical hypothesis tests
Calculates the likelihood that two non-normal populations are drawn from the same distribution.
Choose your readouts of interest and the column providing group information. The test will be performed for each readout and each combination of possible values of the group column.
Choose whether you want to do a
Kolmogorov-Smirnov test or a unpaired Wilcoxon rank test (Mann-Whitney U test).
######
Calculates the likelihood that two non-normal populations are drawn from the same distribution.
Choose your readouts of interest and the column providing group information. The test will be performed for each readout and each combination of possible values of the group column.
Choose whether you want to do a
Kolmogorov-Smirnov test or a unpaired Wilcoxon rank test (Mann-Whitney U test).
# Parameter selection
test = ;
selReadouts = c()
group = ;
########################################################################################
# name: Get gene info from NCBI
# author: Martin Stoeter
# category: bioinformatics
# preview:
Gets the gene name, symbol, NewlocusID, CurrentRecord, LastUpdate, locusID, and species from NCBI data base
- define column with gene IDs (all unique gene IDs will be queried)
- define batch size for query (larger queries often give time-out error)
Info from 'NCBI2R' package:
Warning:
These functions use NCBI's eutils, and come with the same user requirements - if performing many queries,
you must run the scripts during certain hours when the NCBI servers are not in high demand.
Please see the package website for more details: http://NCBI2R.wordpress.com
Violation of the terms described there, and the terms on the eutils website may result in losing access
to NCBI for your group.
Output: table from NCBI
NOTE: requires R-library NCBI2R
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select the column for gene IDs
geneIDcolumn =
# 2) define batch size
batchSize =
#######################################################################################
# name: Calculate percent responders (2:1, for ArrayScan cell data)
# author: Martin Stoeter
# category: calculators
# preview:
Calculates percent "HIGH" and "LOW" similarly as ArrayScan BioApplications. Using an upper and lower threshold value objects/cells will be classified as above or below these thresholds and percentages of all objects within a well will be calculated. This will be done for all wells and all parameters (use loop for multiple plates).
THIS SNIPPET EXPECTS TWO TABLES: OBJECT/CELL DATA and A TABLE WITH THRESHOLDS PER PARAMETER:
Top input (object/cell data):
- select columns for row and column information
Bottom input (parameter-threshold table):
- select columns that contain parameter names
- select columns that contain upper and lower thresholds
Hint: generate bottom table by aggregation of reference data (e.g. negative control) as mean/median and sd of each parameter, use math node to calculate upper and lower threshold from these values (e.g. mean +/- 2xsd).
ATTENTION: NEEDS 2:1 SNIPPET!!!
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1.1 select row and column data
rowName = ;
colName = ;
# 1.2 select parameter column
parameterColumn = ;
# 1.3 select threshold columns
thrHigh = ;
thrLow = ;
kIn2[which(kIn2[,parameterColumn] == currentParameter),thrHigh])) / length(na.omit(subdataCol[,currentParameter])) * 100
wellList[[wellID]] <- data.frame("parameter" = currentParameter, "plateRow" = row, "plateColumn" = col, "percentHigh" = percentHigh, "percentLow" = percentLow)
wellID <- wellID +1
}
}
}
rOut <- data.frame(do.call("rbind", wellList))
]]>
########################################################################################
# name: Median and MAD
# author: Marc Bickle
# category: calculators
Calculates the median and the median absolute deviation (MAD) of the specified parameters grouped by the specified metadata column.
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
selReadouts = c()
Group = c()
########################################################################################
# name: R math / absolute values
# author: Martin Stoeter
# category: calculators
Calculates absolute values of selected columns. Useful for turning a correlation matrix (e.g. Linear Correlation) containing correlation and anti-correlation into a matrix containing absolute correlation values (anti-correlation = correlation)
- select columns to be returned as absolute values. Not selected columns will be returned as they are.
- select if values should be returned as 1-absolute(value), useful for hierarchical clustering, where highly correlating parameters should show low distance (therefore cluster together).
Available formulars: a) abs(x), b) 1-abs(x)
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select the columns
parameters = match(c(), names(kIn));
# 2) select math
mathForm = c()
########################################################################################
# name: Unique values
# author: Antje Niederlein
# category: calculators
Returns unique values for all selected columns independently. If they differ in length, missing values will be added.
######
$$$TEMPLATE_DESC$$$
# 1. Parameter selection
# 1) select the columns
colSelection = match(c(), names(kIn));
########################################################################################
# name: Mahalanobis Distance Calculator
# author: Antje Janosch
# category: distance measures
Calculates the squared mahalonobis distances (MD) to the mean of a reference population (typically your negative controls).
Option 'Use inverted': If 'yes', the inverted covariance matrix will be used. If 'auto', the script will make a choice based on the determinant of the covariance matrix.
Option 'Outlier p-value': The given p-value is used to calculate a MD threshold. Data rows with a MD above this threshold can be considered as outliers.
ATTENTION:
This snippet expects a column named 'treatment' with nominal domain values! If it's available, a reference population can be chosen. It will be used if 'Reference population column' is set to -USE SELECTED TREATMENTS- (default).
Alternatively, the message 'Error: Could not find atrribute: treatment' can be ignored. A reference population column can be selected and the name of the reference population can be given as text.
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select the readouts that span the phenotypic space
parameters = c()
# 2) calculate mean and covariance of the negative controls
treatment =
p_thresh = as.numeric()
useInverted =
######################################################################################
# name: Power transformation and Z rank transformation
# author: Martin Stoeter, Caroline Reiche
# category: distributions
# preview:
This rank-preserving data transformation is a pre-processing technique to stabilize the variance and to make the data more normal distribution-like. The Z rank transformation (project_to_Gaussian) projects the data to a random normal distribution. For each group and each parameter the values are transformed independently.
- select type of normalization
- select parameters that should be rank normalized
- select a group column that should be used for ranking (e.g. barcode, date, project name,...)
If subset data is chosen as reference, only this subset data is used to calculate the lambda for the transformation (either a) or b); not possible in Z rank!).
a) select a column and give name of subset data used as reference
b) select subset of column 'treatment' to be used as reference (needs a column with name 'treatment')
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1.1 select type of transformation
transformation = ;
# 1.2 select the columns
selectedParams = match(c(), names(kIn));
# 1.3 select group
group =
# 1.4 select subset
subsetOption =
subsetColumn =
subsetTreatment =
2) {
# apply the trafo
switch(which(transformation == c('Box-Cox','Yeo-Johnson','basic')), {
powers <- c()
for (k in 1:ncol(dataMatrix)) {
powers[k] <- powerTransform(dataMatrix[,k])$lambda
}
data[which(data[,group] == allGroups[i]), selectedParams] <- bcPower(data[which(data[,group] == allGroups[i]), selectedParams], powers) #Box-Cox
transformedNames <- paste(names(data)[selectedParams], '.bcPower', sep = "")}, {
powers <- c()
for (k in 1:ncol(dataMatrix)) {
powers[k] <- powerTransform(dataMatrix[,k],family="yjPower")$lambda
}
data[which(data[,group] == allGroups[i]), selectedParams] <- yjPower(data[which(data[,group] == allGroups[i]), selectedParams], powers) #Yeo-Johnson
transformedNames <- paste(names(data)[selectedParams], '.yjPower', sep = "")}, {
powers <- c()
for (k in 1:ncol(dataMatrix)) {
powers[k] <- powerTransform(dataMatrix[,k])$lambda
}
data[which(data[,group] == allGroups[i]), selectedParams] <- basicPower(data[which(data[,group] == allGroups[i]), selectedParams], powers) #basic power transformation
transformedNames <- paste(names(data)[selectedParams], '.power', sep = "")})
} else {
data[which(data[,group] == allGroups[i]), selectedParams] <- NA
}
}
}
#rename transformed data columns
#switch(which(transformation == c('Box-Cox','Yeo-Johnson','basic','project_to_Gaussian')), {
# transformedNames <- paste(names(data)[selectedParams], '.bcPower', sep = "")}, { #Box-Cox#
# transformedNames <- paste(names(data)[selectedParams], '.yjPower', sep = "")}, { #Yeo-Johnson
# transformedNames <- paste(names(data)[selectedParams], '.power', sep = "")}, { #basic power transformation
# transformedNames <- paste(names(data)[selectedParams], '.zrank', sep = "")} #project_to_Gaussian
# )
# bind both dataframes together
newColNames <- c(names(kIn), transformedNames)
rOut <- cbind(kIn, data[,selectedParams])
names(rOut) <- newColNames
]]>
########################################################################################
# name: Ranking Multiparametric Profiles (for Normalization Carpenter)
# author: Marc Bickle
# category: distributions
Ranks the values of a parameter in a multi parametric profile within a dataset. Best practice is to use normalised data and rank over the entire data set.
######
$$$TEMPLATE_DESC$$$
#1. Parameter selection
selReadouts = c()
Dataset = c();
########################################################################################
# name: Shapiro-Wilk Normality Test (QQ-Plot)
# author: Holger Brandl, Martin Stoeter
# category: distributions
# preview: qq-plot.png
Shapiro-Wilk Normality Test calculates statistics to judge the distribution of data (with respect to normal distribution).
The QQ-Plot visualizes the data in respect to a normal distribution.
- to use a flow variable check box and type in variable name using the format: FLOWVAR(variable name)
- use the R-plot template "QQ-Plot grid" to do the QQ-plots
######
$$$TEMPLATE_DESC$$$
#1. Parameter selection
#Select the parameter of interest
selReadouts = c()
5000) {
data <- sample(data, 5000)
}
# do the test and add the p-value to the plot
result <- shapiro.test(data)
allResults[[i]] <- data.frame(parameter = selReadouts[i], "p-value" = result$p.value, "log10(p-value)" = log10(result$p.value), "p-value.text" = format(result$p.value, scientific = TRUE, digits = 4), W = result$statistic, n = length(data))
}
rOut <- data.frame(do.call("rbind", allResults))
]]>
########################################################################################
# name: Normalization Carpenter
# author: Marc Bickle
# category: normalization
Normalizes using a projection of the cell based data between 0 and 1 based on the 1st percentile and the 99th percentile
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
selReadouts = c()
Group = ;
Treatment = ;
########################################################################################
# name: Correlation of parameters
# author: Martin Stoeter
# category: relations
Calculates the correlation between all chosen parameters.
For all selected parameters a pairwise correlation will be calculated (P1 vs P2, P1 vs P3, P2 vs P3) for the entire table (missing values will be removed/ignored).
n is the number of data pointe in correlation. The type of correlation can be selected.
######
$$$TEMPLATE_DESC$$$
# 1. Parameter selection
# 1) select parameters for replicate correlation
paramName = c();
# 2) select correlation method
corrFunctionMethod = ;
AB, AC, BC)
for (i in 1:(length(repVector)-1)) {
for (j in (i+1):length(repVector)) {
corrTable <- as.matrix(kIn[,c(repVector[i], repVector[j])])
#remove NA
corrTable <- corrTable[which(!is.na(corrTable[,repVector[i]]) & !is.na(corrTable[,repVector[j]])),]
#check if XYtable match is bigger than 1
sampleIDmatch <- length(corrTable)
if (sampleIDmatch > 1) {
#calculate correlation
correlation <- cor.test(corrTable[,repVector[i]], corrTable[,repVector[j]], method = corrFunctionMethod)
corrEstimate <- correlation$"estimate"
corrMethod <- correlation$"method"
} else {
corrEstimate <- as.numeric(NA)
corrMethod <- "not enough samples for correlation"
sampleIDmatch <- 0
}
#assemble result
pearsonResult <- data.frame(parameter1 = repVector[i], parameter2 = repVector[j], n = sampleIDmatch, coefficient = corrEstimate, method = corrMethod)
if (exists("pearsonResultTable")) {
pearsonResultTable <- data.frame(rbind(pearsonResultTable, pearsonResult))
} else {
pearsonResultTable <- pearsonResult
}
}
}
rOut <- pearsonResultTable
]]>
########################################################################################
# name: Correlation of multi-parametric profiles
# author: Martin Stoeter
# category: relations
Calculates the correlation between multi-parameteric profiles.
Select columns for "Group identifier" (e.g., gene symbol) and "Sample identifier" (e.g. siRNA-ID or oligo letter). Use the check boxes to define which combinations should be calculated.
a) all combinations of groups: e.g. oligos of gene A will be correlated with oligos of gene B
b) all combinations of samples: e.g. oligo A will be correlated with oligo B
Be aware that if you do all combinations the snippet could take a while!
For all pairwise combinations of the correlation of selected profile parameters will be calculated (missing values will be removed/ignored).
n is the number of parameters in correlation. The type of correlation can be selected.
######
$$$TEMPLATE_DESC$$$
# 1. Parameter selection
# 1) select parameters for replicate correlation
paramName = c();
# 2) select column that contains sample ID
IDgene =
# 3) select column that contains group ID
IDoligo =
# 4) select correlation method
corrFunctionMethod = ;
1) {
correlation <- cor.test(corrTable[,1], corrTable[,2], method = corrFunctionMethod)
corrEstimate <- correlation$"estimate"
corrMethod <- correlation$"method"
} else {
corrEstimate <- as.numeric(NA)
corrMethod <- "not enough values for correlation"
sampleIDmatch <- as.numeric(NA)
}
} else {
corrEstimate <- as.numeric(NA)
corrMethod <- "not enough profiles for correlation"
sampleIDmatch <- as.numeric(NA)
}
pearsonResult <- rbind(pearsonResult, data.frame(groupID1 = combVec[comb,1], sampleID1 = kInXtable[xOligo,IDoligo], groupID2 = combVec[comb,2], sampleID1 = kInYtable[yOligo,IDoligo], n = sampleIDmatch, coefficient = corrEstimate, method = corrMethod))
}
}
}
}
#now do correlation between groups (geneA vs geneB)
if (allCombiGene){
combVec <- t(combn(geneVector,2))
for (comb in 1:length(combVec[,1])){
# extract data rows to correlate
kInXtable <- kIn[which(as.character(kIn[,IDgene]) == combVec[comb,1]),c(IDgene, IDoligo, paramName)]
kInYtable <- kIn[which(as.character(kIn[,IDgene]) == combVec[comb,2]),c(IDgene, IDoligo, paramName)]
for (xOligo in 1:length(kInXtable)){
for (yOligo in 1:length(kInYtable)){
#correlate all olives or only the same olives (oligoA vs oligoA, B, ...)
if (allCombiOligo || (kInXtable[xOligo,IDoligo] == kInYtable[yOligo,IDoligo])){
corrTable <- t(as.matrix(rbind(kInXtable[xOligo,paramName], kInYtable[yOligo,paramName])))
#remove NA
corrTable <- corrTable[which(!is.na(corrTable[,1]) & !is.na(corrTable[,2])),]
sampleIDmatch <- length(corrTable[,1])
if (sampleIDmatch > 1) {
correlation <- cor.test(corrTable[,1], corrTable[,2], method = corrFunctionMethod)
corrEstimate <- correlation$"estimate"
corrMethod <- correlation$"method"
} else {
corrEstimate <- as.numeric(NA)
corrMethod <- "not enough values for correlation"
sampleIDmatch <- as.numeric(NA)
}
#assemble result
pearsonResult <- rbind(pearsonResult, data.frame(groupID1 = combVec[comb,1], sampleID1 = kInXtable[xOligo,IDoligo], groupID2 = combVec[comb,2], sampleID1 = kInYtable[yOligo,IDoligo], n = sampleIDmatch, coefficient = corrEstimate, method = corrMethod))
}
}
}
}
}
rOut <- pearsonResult
]]>
########################################################################################
# name: Correlation of replicates
# author: Martin Stoeter
# category: relations
Calculates the correlation between replicates for all chosen parameters.
The table must contain a column of replicate information. Then another column must contain the sample identifier, which must be unique within a replicate (e.g. platenumber::well, but not barcode!), AND the same between the replicates.
For the corresponding sample identifiers the correlation will be calculated pairwise between the replicates (A vs B, A vs C, B vs C) for the selected parameters (missing values will be removed/ignored).
n is the number of sample identifiers per replicate in correlation. The type of correlation can be selected.
Option: R square can be also calculated from linear regression
Trouble shooting:
a) "sampleID not unique" -> more than 1 sampleID (eg wells) per replicate
b) "not enough samples for correlation" -> sampleID for a replicate = 0 (no data to correlate)
HINT: use GroupBy[Unique concatenate with count(replicate)] on selected sampleID column, must be replicate count = 1 for each sampleID
######
$$$TEMPLATE_DESC$$$
# 1. Parameter selection
# 1) select parameters for replicate correlation
paramName = c();
# 2) select column that contains replicate annotation
repColName = ;
# 3) select column that contains sample ID
IDname = ;
# 4) select correlation method
corrFunctionMethod = ;
# 4) select correlation method
useRegression = ;
AB, AC, BC)
for (i in 1:(length(repVector)-1)) {
#define first table (A): e.g. kInA <- kIn[(which(kIn$"replicate"=="A")),c("sampleID","replicate","AvgIntenCh1","MinDistCh1","AreaCh1","CellNumber")]
kInXtable <- kIn[which(as.character(kIn[,repColName]) == repVector[i]),c(IDname, repColName, paramName)]
for (j in (i+1):length(repVector)) {
#define second table (B)
kInYtable <- kIn[which(as.character(kIn[,repColName]) == repVector[j]),c(IDname, repColName, paramName)]
for (paramIteration in paramName) {
#check if sampleIDs are unique, NA are ignored
if (length(unique(as.character(na.omit(kInXtable[,IDname])))) == length(as.character(na.omit(kInXtable[,IDname]))) & length(unique(as.character(na.omit(kInYtable[,IDname])))) == length(as.character(na.omit(kInYtable[,IDname])))) {
corrTable <- merge(kInXtable[,c(IDname, repColName, paramIteration)], kInYtable[,c(IDname, repColName, paramIteration)], by.x = IDname, by.y =IDname, all = FALSE)
#remove NA => now not needed anymore because merge(... all=FALSE) means inner join and row with NA in joining/merging column are removed
if(corrFunctionMethod != "pearson") corrTable <- corrTable[which(!is.na(corrTable[,paste(paramIteration, "x", sep="."),]) & !is.na(corrTable[,paste(paramIteration, "y", sep="."),])),]
#check if XYtable match is bigger than 1
sampleIDmatch <- length(corrTable[,1])
if (sampleIDmatch > 1) {
#calculate correlation
correlation <- cor.test(corrTable[,paste(paramIteration, "x", sep=".")], corrTable[,paste(paramIteration, "y", sep=".")], method = corrFunctionMethod)
corrEstimate <- correlation$"estimate"
corrMethod <- correlation$"method"
if(corrFunctionMethod == "pearson") {
corrDF <- correlation$"parameter" + 2 #DF = degree of freedom = number of XY-pairs without NAs
} else {
corrDF <- sampleIDmatch
}
#corrPvalue <- correlation$"p.value" #is allways 0 in pearson correlation!?
if(useRegression) {
regression <- lm(corrTable[,paste(paramIteration, "x", sep=".")] ~ corrTable[,paste(paramIteration, "y", sep=".")])
rSquared <- summary(regression)$r.squared
}
} else {
corrEstimate <- as.numeric(NA)
corrMethod <- "not enough samples for correlation"
#corrPvalue <- as.numeric(NA)
if(useRegression) rSquared <- as.numeric(NA)
corrDF <- 0
}
} else {
corrEstimate <- as.numeric(NA)
corrMethod <- "sampleID not unique"
#corrPvalue <- as.numeric(NA)
if(useRegression) rSquared <- as.numeric(NA)
corrDF <- as.numeric(NA)
}
#assemble result
if(useRegression) {
pearsonResult <- data.frame(parameter = paramIteration, replicate1 = repVector[i], replicate2 = repVector[j], n = corrDF, coefficient = corrEstimate, method = corrMethod, linear.regression.R.squared = rSquared) #p.value = corrPvalue,
} else {
pearsonResult <- data.frame(parameter = paramIteration, replicate1 = repVector[i], replicate2 = repVector[j], n = corrDF, coefficient = corrEstimate, method = corrMethod)
}
if (exists("pearsonResultTable")) {
pearsonResultTable <- data.frame(rbind(pearsonResultTable, pearsonResult))
} else {
pearsonResultTable <- pearsonResult
}
}
}
}
rOut <- pearsonResultTable
]]>
########################################################################################
# name: Create column name table from selection
# author: Holger Brandl, Martin Stoeter
# category: utilities
# preview: CreateColumnNameTableFromSelection.png
Creates a table with a single column that contains the selected column headers.
- select or type name of column for column headers
- preview shows what the snippet is doing instead of using KNIME nodes
- use check box to create two more columns with additional tags (for looping barplot)
- if the selecsted column headers haver already the tag ' (Mean)' from GroupBy node, then this tag can be removed before processing (generate column table from aggregated data)
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select the columns
columnHeaders = c()
# 2) select column name
# 3) make columns for barplot looping
########################################################################################
# name: Handle and filter NaN and infinite values
# author: Martin Stoeter
# category: utilities
NaN are missing values that are created by some nodes (e.g. HCS tools - Normalization) and that are different from KNIMEs' missing values ("?", in R = "NA").
Infinite are positive or negative invinite values that are created by some nodes (e.g. HCS tools - Normalization) by devision by zero.
NaN can be filtered by Row Filter with patters "NaN" on number column, however this is tedious for many columns.
Infinite can be filtered by Row Filter using very high or low values, however this is tedious for many columns.
This R snippet allows handling of NaN and infinite on input table.
- select type of output:
1. All column names and number of found NaN and infinite (count all NaN and infinite per column)
2a. Only column names that contain NaN (list of NaN columns)
2b. Only column names that contain NaN or infinite (list of NaN and infinite columns)
3a. Input table without NaN columns (filter NaN columns out)
3b. Input table without NaN and infinite columns (filter NaN and infinite columns out)
4a. Input table with NaN replaced by NA (NA = KNIME understandable missing values)
4b. Input table with NaN and infinite replaced by NA (NA = KNIME understandable missing values)
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 2) select column name
typeOfOutput = ;
0)]
}
#return the input table without column containing NaN or infinite
if (typeOfOutput == "Input table without NaN or infinite columns") {
NaNresult <- kIn[,which((!NaNresult[,2]>0) | (!NaNresult[,3]>0))]
}
#returns only the column names that contain NaN values
if (typeOfOutput == "Only column names that contain NaN") {
NaNresult <- NaNresult[which(NaNresult[,2]>0),]
}
#returns only the column names that contain NaN or infinite values
if (typeOfOutput == "Only column names that contain NaN or infinite") {
NaNresult <- NaNresult[which((NaNresult[,2]>0) | (NaNresult[,3]>0)),]
}
#replace NaN with NA and turn complete table
if (typeOfOutput == "Input table with NaN replaced by NA") {
for (i in 1:(length(kIn[1,]))) {
if (NaNresult[i,"numberOfNaN"] > 0) {
kIn[which(is.nan(kIn[,i])),i] <- NA
}
}
NaNresult <- kIn
}
#replace NaN and infinite with NA and turn complete table
if (typeOfOutput == "Input table with NaN and infinite replaced by NA") {
for (i in 1:(length(kIn[1,]))) {
if ((NaNresult[i,"numberOfNaN"] > 0) || (NaNresult[i,"numberOfInfinite"] > 0)) {
kIn[which((is.nan(kIn[,i])) | (is.infinite(kIn[,i]))),i] <- NA
}
}
NaNresult <- kIn
}
rOut<- NaNresult
]]>
########################################################################################
# name: Reference Column Filter (2:1)
# author: Martin Stoeter
# category: utilities
# preview: ReferenceColumnFilter.png
Filters the unselected columns of input port 1 (columns to be filtered) using a reference column of input port 2.
- select columns to be kept in input 1 (filter is not applied)
- select the column in input 2 that contains column headers for reference filter
- select way of filtering (filter in = keep what is in reference, filter out = drop what is in reference)
Preview shows what the snippet is doing instead of using KNIME nodes
NEEDS R snippet (2:1) !!!
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select columns that should be kept
keepColumns = c();
# 2) select column that contains replicate annotation
refColumnName = ;
# 3) type of filter
filter = ;
#######################################################################################
# name: Get info from R-server (installed packages, R version, system info)
# author: Martin Stoeter
# category: utilities
# preview:
This snippet lists the version and packages installed on your R-server.
a) installed packages and their versions (installed.packages())
b) libraries/packages that can be loaded (library())
c) base packages (sessionInfo()["basePkgs"])
d) R version and platform (sessionInfo()[1])
e) other packages installed (not always present)
f) system information (Sys.info())
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1.1 select type of transformation
getInfo =
########################################################################################
# name: Reshape to long format
# category: utilities
Creates a table with a single measurement column and several id columns.
######
library(reshape)
rOut <- melt(kIn)
########################################################################################
# name: Convert numbers to defined text/string
# author: Antje Niederlein, Martin Stoeter
# category: utilities
# preview:
Converts selected column to a specified number format
- select column to convert or define flow variable using the format: FLOWVAR(variable name)
- define digits before and after digit point
- if minus is checked and negative values are present, an additional digit will be added to make positive and negative values the same length (for sorting good!)
- if automatic is checked, then number of digits before and after will be calculated from the data
- example: [3,2,minus=true]: 5.5 -> 0005.50, -012.125 -> -012.13; [2,3,minus=false] -5.0 -> -05.000, 1234.5678 -> 1234.5678
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1) select the columns
columnHeader =
# 2) select the format
0) decimal <- TRUE
width <- leadingWidth + decimalWidth
if(decimal) width <- width + 1
if(minus) width <- width + 1
outChar <- ""
if(decimal) {
if(minus) outChar <- formatC(x, format = "f", digits = decimalWidth, width = width, flag = "00")
else outChar <- formatC(x, format = "f", digits = decimalWidth, width = width, flag = "0")
}
else {
if(minus) outChar <- formatC(x, format = "d", width = width, flag = "00")
else outChar <- formatC(x, format = "d", width = width, flag = "0")
}
outChar
}
lWidth = 0
dWidth = 0
minus <- FALSE
if(minusDigits && min(numVec) < 0) minus <- TRUE
if(autoNumberDigits) {
lWidth <- nchar(as.character(floor(max(abs(numVec)))))
splitDec <- strsplit(as.character(numVec),".", fixed = TRUE)
decPart <- sapply(splitDec[sapply(splitDec, length) > 1], "[[", 2)
dWidth <- max(nchar(decPart))
} else {
lWidth = digitVector[1]
dWidth = digitVector[2]
}
charVec <- sapply(numVec, sortableNumberString, minus, lWidth, dWidth)
if(appendColumn) {
kIn <- cbind(kIn,charVec)
names(kIn)[ncol(kIn)] <- paste(columnHeader, newColumnSuffix, sep = "")
} else {
kIn[,columnHeader] <- charVec
}
#sort(charVec)
rOut <- kIn
]]>
######################################################################################
# name: Z rank transformation (beta)
# author: Martin Stoeter
# category: distributions
# preview:
This rank-preserving data transformation is a pre-processing technique to stabilize the variance and to make the data more normal distribution-like. The Z rank transformation (project_to_Gaussian) projects the data to a random normal distribution. For each group and each parameter the values are transformed independently.
- select type of normalization
- select parameters that should be rank normalized
- select a group column that should be used for ranking (e.g. barcode, date, project name,...)
If subset data is chosen as reference, only this subset data is used to calculate the lambda for the transformation (either a) or b); not possible in Z rank!).
a) select a column and give name of subset data used as reference
b) select subset of column 'treatment' to be used as reference (needs a column with name 'treatment')
######
$$$TEMPLATE_DESC$$$
#1.0 Parameter selection
# 1.1 select type of transformation
transformation = ;
# 1.2 select the columns
selectedParams = match(c(), names(kIn));
# 1.3 select group
groupColumnName =
# 1.4 select subset
subsetOption =
subsetColumn =
subsetTreatment =
% mutate(rowID = row.names(kIn))
#table <- table %>% filter(Metadata_barcode == "060AZ190218A-man-fixed")
rOut <- table
aggregatedGroupData <- head(table,0)
randomDataTable <- NULL # data frame to store random normal distributions
calculateTransformationModel <- TRUE
debugAndPlot <- FALSE # set this TRUE for visualization and debugging
storeRandomDistributionTable <- FALSE
if (transformation == "project_to_Gaussian") {
#calc z rank normalization per group
for (group in 1:(length(allGroups))) {
# ================================= for each group, (get subset data,) and calculate & store randome distribution ================
# get name of group
curGroup <- allGroups[group]
groupData <- table %>% filter(get(groupColumnName) == curGroup)
# z rank normalization can be calculated on complete data set (still for each group independently, here = ELSE), or based on a subset of the group (e.g. untreated) and then applied on the complete data set of the group (here = IF)
groupDataModel <- groupData %>% filter(0 > 1) # initialize table with 0 rows
if(subsetOption == "a) subset column") {
groupDataModel <- groupData %>% filter(get(subsetColumn) == subsetName) # get the subset table to calculate the normalization / transformation model data
doApplyOnNormalDistribution <- TRUE
}
if(subsetOption == "b) subset of column 'treatment'" && subsetTreatment != "-NOT USED-") {
subsetColumn <- "treatment"
groupDataModel <- groupData %>% filter(get(subsetColumn) == subsetTreatment) # get the subset table to calculate the normalization / transformation model data
doApplyOnNormalDistribution <- TRUE
}
if (subsetOption == "-NO SUBSET - USE ALL DATA IN GROUP!-" | nrow(groupDataModel) == 0) {
groupDataModel <- groupData
doApplyOnNormalDistribution <- FALSE
}
# generate data set with random normal distribution that is 100 bigger than the group data set (100 => less random sampling errors)
nValues <- nrow(groupDataModel) # length of model data vector
normData <- sort(rnorm(nValues*100, mean = 0, sd = 1)) # random normal distribution is sampled 100-fold more to avoid effects due to random data
# store random distribution in table to export to output and use as model later...
if (storeRandomDistributionTable) {
if (is.null(randomDataTable)) randomDataTable <- data.frame(cbind(normData))
else {
if (nrow(randomDataTable) > length(normData)) randomDataTable <- data.frame(cbind(randomDataTable, c(normData, rep(NA,nrow(randomDataTable)-length(normData)))))
else {
for (fillNA in (nrow(randomDataTable)+1):length(normData)) randomDataTable <- data.frame(rbind(randomDataTable, rep(NA, ncol(randomDataTable))))
randomDataTable <- data.frame(cbind(randomDataTable, normData))
}
}
names(randomDataTable)[group] <- levels(curGroup)
} # end of if (storeRandomDistributionTable)
# ================================= for each group, (get subset data,) and calculate & store randome distribution ================
# iterate through all selected parameter
for (param in 1:(length(selectedParams))) {
paramName <- colnames(groupDataModel)[selectedParams[param]] # get current parameter name
groupData$normData <- NA # make column for transformed data and set all values to NA for now
if(calculateTransformationModel) { # xxx is this if variable neccessary?
rawValues <- pull(groupDataModel, paramName)
rankVals <- as.integer(rank(rawValues))
transformedData <- normData[rankVals*100-50]
temp <- data.frame(rowID = groupDataModel, values = rawValues, rank = rankVals, normData = transformedData) # xxx is this needed?
} # if
if (doApplyOnNormalDistribution) {
groupData <- groupData %>% arrange_(paramName)
rawValuesGroupDataSorted <- pull(groupData, paramName)
i = 1 # varible for iteration through transformation model data
j = 1 # varible for iteration through group data
rawValuesSorted <- sort(rawValues) # xxx again sorting? yes, but same variable could be used...
transformedData <- sort(transformedData) # xxx again sorting?
#rawValuesSorted <- rawValuesSorted[1:5]
intvervalLow <- rawValuesSorted[i]
intvervalHigh <- rawValuesSorted[i + 1]
intervalHighLow <- intvervalHigh - intvervalLow
intervalNormData <- normData[(i+1)*100-50]-normData[i*100-50]
intervalTransformedData <- transformedData[i + 1] - transformedData[i]
if (debugAndPlot) { # initialize plot for debugging with first data point
plot(x = rawValuesSorted[1], y = transformedData[1], ylim = c(-5,5), xlim = c(rawValuesGroupDataSorted[2],rawValuesGroupDataSorted[length(rawValuesGroupDataSorted)])) # first data: c(rawValuesGroupDataSorted[2],rawValuesGroupDataSorted[20]) ; end of the data: c(rawValuesGroupDataSorted[length(rawValuesGroupDataSorted)-20],rawValuesGroupDataSorted[length(rawValuesGroupDataSorted)])
abline(v = intvervalLow, lty = 2, col = "grey")
} # debug
# find data points that are lower smaller than model data of subset
while (!intvervalLow <= rawValuesGroupDataSorted[j]) j = j + 1
if (j > 1) { # apply linear model on this data
xData <- rawValuesSorted[1:(length(rawValuesSorted)*0.05)] # get the lowest 5% of the data for fitting a linear model
yData <- transformedData[1:(length(rawValuesSorted)*0.05)]
linearModelLow <- lm(yData ~ xData)
differenceLastModelDataPoint <- predict.lm(linearModelLow, data.frame(xData = rawValuesGroupDataSorted[j-1])) - transformedData[1] # to ensure continuity / steadiness of the data a shift correction of the fitted data and the projected data is neccessary
groupData$normData[1:(j-1)] <- predict.lm(linearModelLow, data.frame(xData = rawValuesGroupDataSorted[1:(j-1)])) - differenceLastModelDataPoint
if (debugAndPlot) {
points(x = xData, y = yData, col = "lightgrey") # plot the modelled data
points(y = groupData$normData[1:(j-1)], x = rawValuesGroupDataSorted[1:(j-1)], col = "darkgrey") # plot fitted points; j-1 because value of j ist now already bigger/equal intervalLow
#print(as.numeric(c(i,j,groupData$normData[1:(j-1)],rawValuesGroupDataSorted[1:(j-1)])))
#print(rawValuesSorted[1:10])
#print(transformedData[1:10])
abline(linearModelLow, col="darkgrey") # plot line to show linear model
} # debug
} # apply linear model
#print(as.numeric(c( intvervalLow, intvervalHigh,intervalHighLow, intervalNormData, intervalTransformedData)))
#print(as.numeric(c(i, intvervalLow, intvervalHigh, intervalHighLow, intervalNormData)))
while (i < length(rawValuesSorted) && j <= length(rawValuesGroupDataSorted)) {
if(intvervalLow <= rawValuesGroupDataSorted[j] && rawValuesGroupDataSorted[j] <= intvervalHigh) {
groupData$normData[j] <- as.numeric(normData[i*100-50] + intervalNormData * (rawValuesGroupDataSorted[j] - intvervalLow) / intervalHighLow)
if (debugAndPlot) {
points(y = groupData$normData[j],x = rawValuesGroupDataSorted[j], col = (i %% 5) + 1)
#if (i < 20) print(as.numeric(c(i,j,groupData$normData[j],rawValuesGroupDataSorted[j]))) # print first 20
#if (i > length(rawValuesSorted)-20) print(as.numeric(c(i,j,groupData$normData[j],rawValuesGroupDataSorted[j]))) # print last 20
}
j = j + 1
} else {
i = i + 1
if (i < length(rawValuesSorted)) {
intvervalLow <- rawValuesSorted[i]
intvervalHigh <- rawValuesSorted[i + 1]
intervalHighLow <- intvervalHigh - intvervalLow
intervalNormData <- normData[(i+1)*100-50] - normData[i*100-50]
if (debugAndPlot) {
abline(v = c(intvervalLow), lty = 2, col = (i %% 5) + 1)
#if (i < 20) print(as.numeric(c( intvervalLow, intvervalHigh,intervalHighLow))) # print first 20
#if (i > length(rawValuesSorted)-20) print(as.numeric(c( intvervalLow, intvervalHigh,intervalHighLow))) # print last 20
#print(as.numeric(c(i, intvervalLow, intvervalHigh, intervalHighLow, intervalNormData)))
} # debug
} # if
} # if else
} # while
if (debugAndPlot) abline(v = c(intvervalHigh), lty = 2, col = (i %% 5) + 1) # plot last data point / line of modelled subset data
#print(as.numeric(c(i,j, length(rawValuesSorted), length(rawValuesGroupDataSorted))))
# apply linear model on data that is larger than model data of subset
if (i >= length(rawValuesSorted) && j <= length(rawValuesGroupDataSorted)) {
xData <- rawValuesSorted[(length(rawValuesSorted)*0.95):length(rawValuesSorted)] # get the highest 5% of the data for fitting a linear model
yData <- transformedData[(length(rawValuesSorted)*0.95):length(rawValuesSorted)]
#print("done")
linearModelHigh <- lm(yData ~ xData)
differenceLastModelDataPoint <- predict.lm(linearModelHigh, data.frame(xData = rawValuesGroupDataSorted[j])) - transformedData[i] # to ensure continuity / steadiness of the data a shift correction of the fitted data and the projected data is neccessary
groupData$normData[(j):length(rawValuesGroupDataSorted)] <- predict.lm(linearModelHigh, data.frame(xData = rawValuesGroupDataSorted[(j):length(rawValuesGroupDataSorted)])) - differenceLastModelDataPoint
if (debugAndPlot) {
#print(tail(rawValuesSorted))
#print(tail(transformedData))
points(x = xData, y = yData, col = "lightgrey") # plot the modelled data
points(y = groupData$normData[(j):length(rawValuesGroupDataSorted)], x = rawValuesGroupDataSorted[(j):length(rawValuesGroupDataSorted)], col = "darkgrey") # plot fitted points; j-1 because value of j ist now already bigger/equal intervalLow
#print(as.numeric(c(i,j,groupData$normData[(j):length(rawValuesGroupDataSorted)],rawValuesGroupDataSorted[(j):length(rawValuesGroupDataSorted)])))
abline(linearModelHigh, col="darkgrey") # plot line to show linear model
} # debug
} # if
} else { # if (doApplyOnNormalDistribution)
groupData$normData <- transformedData # if groupDataModel = groupData, then just put transformed data into table
}
names(groupData)[ncol(groupData)] <- paste(paramName, '.zrank', sep = "") #project_to_Gaussian, rename current column name to .zrank
} # for (param in 1:(length(selectedParams)))
aggregatedGroupData <- bind_rows(aggregatedGroupData, groupData)
} # for (group in 1:(length(allGroups)))
rOut <- rOut %>% left_join(select(aggregatedGroupData, c("rowID",tail(names(aggregatedGroupData),length(selectedParams)))), by="rowID") %>% select(-one_of(c("rowID")))
rOut <- as.data.frame(rOut)
row.names(rOut) <- row.names(kIn)
} # if (transformation == "project_to_Gaussian")
#print("done all loops")
if (storeRandomDistributionTable) {
randomDataTable <- cbind(randomDataTable, rank = row.names(randomDataTable))
rSave <- rOut
if(nrow(rOut) < nrow(randomDataTable)) {
rOut <- cbind(rOut, rowNames = row.names(rOut))
for (fillNA in (nrow(rOut)+1):nrow(randomDataTable)) rOut <- data.frame(rbind(rOut, rep(NA, ncol(rOut))))
rOut <- data.frame(cbind(rOut, randomDataTable))
}
}
]]>
######################################################################################
# name: Uniform Manifold Approximation and Projection (UMAP)
# author: Martin Stoeter
# category: dimension_reduction
# preview:
Uniform Manifold Approximation and Projection (UMAP) is an algorithm for dimensional reduction. Originally describer here: https://arxiv.org/abs/1802.03426
This template uses R package 'umap'. More details here: https://cran.r-project.org/web/packages/umap/vignettes/umap.html
######
$$$TEMPLATE_DESC$$$
# 1. Parameter selection
# 1) select parameters for UMAP calculation
paramName = c();
# 2) UMAP configuration
########################################################################################
# name: Find Peaks Plot-Table
# author: Martin Stoeter
# category: analysis
# preview: Find_peaks_plot.png
Analyses a sequence of data point and detects peaks therein.
The plot version just a visualizes, the snippet version just outputs the peak table.
######
Custom template to create a line plot with peak detection
https://www.rdocumentation.org/packages/pracma/versions/1.9.9/topics/findpeaks
https://www.mathworks.com/help/signal/ref/findpeaks.html#bufbbs1-2
Arguments
1) x : numerical vector taken as a time series
2) nups : minimum number of increasing steps before a peak is reached
3) ndowns : minimum number of decreasing steps after the peak
4) zeros : can be "+", "-", or "0"; how to interprete succeeding steps of the same value: increasing, decreasing, or special
5) peakpat : define a peak as a regular pattern, such as the default pattern "[+]1,[-]1,"; if a pattern is provided, the parameters nups and ndowns are not taken into account
6) minpeakheight : the minimum (absolute) height a peak has to have to be recognized as such
7) minpeakdistance : the minimum distance (in indices) peaks have to have to be counted
8) threshold : the minimum height difference between a peak and its neighbors. Use this argument to have findpeaks return only those peaks that exceed their immediate neighboring values by at least the value of "threshold"
9) npeaks : the number of peaks to return
10) sortstr : logical; should the peaks be returned sorted in decreasing oreder of their maximum value
Returns a matrix where each row represents one peak found. The first column gives the height, the second the position/index where the maximum is reached, the third and forth the indices of where the peak begins and ends --- in the sense of where the pattern starts and ends.
Parameter:
input data/signal - date series to be used for peak detection
time series - column with time values for series; -USE INDEX- will return the index values (1 to end of series)
signal peak threshold - minimum peak height threshold
delta slope threshold - minimum difference threshold in slope detection (within chosen distance, onset and offset)
delta slope distance - distance value (index) in slope between the difference is calculated
To use a flow variable check box and type in variable names using the format: FLOWVAR(variable name)
Layout Options:
- plot points or lines or both
- set size of points (default = [0.7,2.0,1.5,1.2])
- give plot title and title factor (default = 1.2)
- adjust y-axis scale (enter min + max for y-axis)
#1.0 Parameter selection
#R debugging: delete all variables except kIn
#rm(list=ls(all=TRUE)[which(ls(all=TRUE) != "kIn")])
# Define your numerical parameters for input signal
signalValueColumn =
# Define your numerical parameters for time series
timeColumn =
output = "table"
# delete this line for snippet template, to diaable GUI choise
# Define peak detection parameters
# adjustment parameter, minimum peak height threshold and lower threshold
minPeakHeight = as.numeric()
threshold = as.numeric()
# adjustment parameter, minimum difference in slope (within chosen distance)
minDeltaSlope = as.numeric()
# More/advanced peak definition parameters
# adjustment parameter, distance value (index) in slope between the difference is calculated
distanceDeltaSlope = as.numeric()
# adjustment parameter, minimum number of increasing/decreasing steps before/after a peak is reached
nUpsDowns = as.numeric()
# adjustment parameter, the minimum (absolute) height / distance (indices) peaks has to have to be recognized
minPeakDistance = as.numeric()
#diagram options
plotPointsLines =
pointPointSizes = as.numeric()
plotTitleSize = as.numeric(c())
= minDeltaSlope))
# now start moving left
while (currentSlopeDelta >= minDeltaSlope & currentSlopePosition > 1) {
currentSlopePosition = currentSlopePosition - 1
currentSlopeDelta <- pSignal[currentSlopePosition + distanceDeltaSlope] - pSignal[currentSlopePosition]
#print(c(i, currentPeakPosition, currentSlopePosition, pSignal[currentSlopePosition + distanceDeltaSlope], pSignal[currentSlopePosition], currentSlopeDelta, currentSlopeDelta > minDeltaSlope))
}
peakOnset[i] <- currentSlopePosition + 1 # this vector will contain all peak onset indices
#print(peakOnset)
# find peak offset and move from peak maximum rightwards (higher index) until currentSlopeDelta is smaller than distanceDeltaSlope
currentSlopePosition <- currentPeakPosition + distanceDeltaSlope # currentSlopePosition is positon that is increased and checked iteratively
if(currentSlopePosition >= length(pSignal)) currentSlopePosition = length(pSignal) # if peak is at right edge of signal
currentSlopeDelta <- pSignal[currentPeakPosition] - pSignal[currentSlopePosition]
#print(c(i, currentPeakPosition, currentSlopePosition, pSignal[currentPeakPosition], pSignal[currentSlopePosition], currentSlopeDelta, currentSlopeDelta >= minDeltaSlope))
# now start moving right
while (currentSlopeDelta >= minDeltaSlope & currentSlopePosition < length(pSignal)) {
currentSlopePosition = currentSlopePosition + 1
currentSlopeDelta <- pSignal[currentSlopePosition - distanceDeltaSlope] - pSignal[currentSlopePosition]
#print(c(i, currentPeakPosition, currentSlopePosition, pSignal[currentSlopePosition - distanceDeltaSlope], pSignal[currentSlopePosition], currentSlopeDelta, currentSlopeDelta > minDeltaSlope))
}
peakOffset[i] <- currentSlopePosition - 1 # this vector will contain all peak offset indices
#print(peakOffset)
}
}
# this is the final index-based result table
peakTable <- cbind(peakTable, onset = peakOnset, offset = peakOffset)
# make rOut data frame for returing data to KNIME
rOut <- data.frame(peakTable)
# replace index values (position start end onset offset) with real (time series) values from input table
if (timeColumn != "-USE INDEX-") {
if (rOut[1,"position"] != 0) {
for (col in 2:6) rOut[, col] <- kIn[rOut[, col], timeColumn]
}
}
#2.4.1 layout plot
# plot signal values
if (output == "plot") { # if this is plot template then true, if this is snippet template then false
if (plotPointsLines == "points_and_lines") plotPointsLines <- "o"
if (plotPointsLines == "points_only") plotPointsLines <- "p"
if (plotPointsLines == "lines_only") plotPointsLines <- "l"
plot(x = xValues, y = pSignal, type=plotPointsLines, col="grey40", cex = pointPointSizes[1], ylim = yAxisScale,
main = plotTitle, xlab = xAxisTitle, ylab = signalValueColumn, cex.main = plotTitleSize)
grid()
#2.4.2 plot data
# plot peaks, start, end and values between onset and offset
if (peakTable[1,"position"] != 0) {
for(i in 1:nrow(peakTable)) {
points(kIn[peakTable[i,"onset"]:peakTable[i,"offset"], "xValues"], kIn[peakTable[i,"onset"]:peakTable[i,"offset"], signalValueColumn], pch=20, col="black", cex = pointPointSizes[4])
}
points(kIn[peakTable[,"position"], "xValues"], kIn[peakTable[,"position"], signalValueColumn], pch=24, col="red", cex = pointPointSizes[2])
points(kIn[peakTable[, "start"], "xValues"], kIn[peakTable[, "start"], signalValueColumn], pch=22, col="darkorange", cex = pointPointSizes[3])
points(kIn[peakTable[, "end"], "xValues"], kIn[peakTable[, "end"], signalValueColumn], pch=23, col="darkred", cex = pointPointSizes[3])
}
#2.4.3 plot options
# adds horizontal lines
if (plotThreshold) abline(h = threshold, lty = 2)
if (plotMinPeakHeight) abline(h = minPeakHeight, lty = 3)
#abline(h = hlines[2], lwd = 1.5)
} # end plot data
# debug
#rOut
#peakTable
#findpeaks(pSignal, threshold=threshold, sortstr=TRUE)
]]>