#' TasKar – Binary-Classification Performance Calculator/Dashboard (Missing Metrics Calculator)
#' Copyright (C) 2016-2018 Gürol Canbek
#' This file is licensed under
#'
#' GNU Affero General Public License v3.0, GNU AGPLv3
#'
#' This program is free software: you can redistribute it and/or modify
#' it under the terms of the GNU Affero General Public License as published
#' by the Free Software Foundation, either version 3 of the License, or
#' (at your option) any later version.
#'
#' This program is distributed in the hope that it will be useful,
#' but WITHOUT ANY WARRANTY; without even the implied warranty of
#' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#' GNU Affero General Public License for more details.
#'
#' You should have received a copy of the GNU Affero General Public License
#' along with this program. If not, see .
#'
#' See the license file in
#'
#' @author Gürol Canbek,
#' @references
#' @keywords utilities, common functions
#' @title Missing Metrics Revealer
#' @version 1.2
#' @description R functions for calculating confusion matrix (base measures)
#' from given performance instruments (e.g. P, N, TPR, and ACC) as well as
#' initializing global confusion matrix by given instruments.
#' @note version history
#' 1.2, 13 November 2018, Converting calculated base measures into integer
#' 1.1, 26 March 2018, Batch processing
#' 1.0, 19 February 2017, The first version
#' @date 13 November 2018
#' libraries
# None
#' ### integerClassBaseMeasures
#' Convert the base measures of a single class into integer.
#' **Parameters:**
#' *fTrueClass*: Calculated float True-Class value (TP or TN)
#' *fFalseOtherClass*: Calculated float False-Other-Class value (FN or FP)
#' *dClass*: Given (actual) class size (P or N)
#' **Return:**
#' A vector first element integer True-Class and integer False-Other-Class value
#' **Example Usage:**
# int_values <- integerClassBaseMeasures(12.25, 12.75, 25)
integerClassBaseMeasures <- function(fTrueClass, fFalseOtherClass, dClass)
{
tTrueClass <- trunc(fTrueClass)
tFalseOtherClass <- trunc(fFalseOtherClass)
tClass <- tTrueClass + tFalseOtherClass
delta <- dClass - tClass
if (delta == 0) {
return (c(tTrueClass, tFalseOtherClass))
}
if (delta == 2) {
# Increase truncated values
return (c(tTrueClass+1, tFalseOtherClass+1))
}
if (delta == -2) {
# Decreas truncated values
return (c(tTrueClass-1, tFalseOtherClass-1))
}
frTrueClass <- fTrueClass - tTrueClass
frFalseOtherClass <- fFalseOtherClass - tFalseOtherClass
if (delta == 1) {
# Increase truncated values
if (frTrueClass >= 0.5 && frFalseOtherClass >= 0.5) {
if (frTrueClass > frFalseOtherClass) {
return (c(tTrueClass+1, tFalseOtherClass))
}
if (frTrueClass < frFalseOtherClass) {
return (c(tTrueClass, tFalseOtherClass+1))
}
if (tTrueClass > tFalseOtherClass) {
return (c(tTrueClass+1, tFalseOtherClass))
}
return (c(tTrueClass, tFalseOtherClass+1))
}
if (frTrueClass >= 0.5) {
return (c(tTrueClass+1, tFalseOtherClass))
}
if (frFalseOtherClass >= 0.5) {
return (c(tTrueClass, tFalseOtherClass + 1))
}
if (tTrueClass > tFalseOtherClass) {
return (c(tTrueClass+1, tFalseOtherClass))
}
return (c(tTrueClass, tFalseOtherClass+1))
}
# Delta = -1, Decrease truncated values
if (frTrueClass >= 0.5 && frFalseOtherClass >= 0.5) {
if (frTrueClass > frFalseOtherClass) {
return (c(tTrueClass-1, tFalseOtherClass))
}
if (frTrueClass < frFalseOtherClass) {
return (c(tTrueClass, tFalseOtherClass-1))
}
if (tTrueClass > tFalseOtherClass) {
return (c(tTrueClass-1, tFalseOtherClass))
}
return (c(tTrueClass, tFalseOtherClass-1))
}
if (frTrueClass >= 0.5) {
return (c(tTrueClass-1, tFalseOtherClass))
}
if (frFalseOtherClass >= 0.5) {
return (c(tTrueClass, tFalseOtherClass-1))
}
if (tTrueClass > tFalseOtherClass) {
return (c(tTrueClass-1, tFalseOtherClass))
}
return (c(tTrueClass, tFalseOtherClass-1))
}
#' ### getMeasuresViaP_TPR_FPR_ACC
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, TPR, FPR, ACC
#' **Parameters:**
#' *p*: Positive
#' *tpr*: True Positive Rate
#' *fpr*: False Positive Rate
#' *acc*: Accuracy
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_TPR_FPR_ACC <- function(p, tpr, fpr, acc)
{
# Check inputs
p <- as.numeric(p)
stopifnot(all(p >= 0))
tpr <- as.numeric(tpr)
stopifnot(all(tpr >= 0) && all(tpr <= 1))
fpr <- as.numeric(fpr)
stopifnot(all(fpr >= 0) && all(fpr <= 1))
tnr <- 1 - fpr
acc <- as.numeric(acc)
stopifnot(all(acc >= 0) && all(acc <= 1))
# Calculate base and 1st level measures
tp <- p*tpr
fn <- p - tp
tn <- tnr*(tp - acc*p)/(acc*(fpr+tnr) - tnr)
fp <- fpr*tn/tnr
sn <- tp+fp+fn+tn
# n <- p*(tpr-acc)/(acc+fpr-1)
# sn <- p+n
# tn <- acc*sn-tp
# fp <- n-tn
# fn <- p-tp
return (list(TP=tp, FP=fp, FN=fn, TN=tn, Sn=sn))
}
#' ### getMeasuresViaP_N_TPR_ACC
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, TPR, ACC
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *tpr*: True Positive Rate
#' *acc*: Accuracy
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_TPR_ACC<-function(p, n, tpr, acc)
{
# Check inputs
p <- as.numeric(p)
stopifnot(all(p >= 0))
n <- as.numeric(n)
stopifnot(all(n >= 0))
tpr <- as.numeric(tpr)
stopifnot(all(tpr >= 0) && all(tpr <= 1))
acc <- as.numeric(acc)
stopifnot(all(acc >= 0) && all(acc <= 1))
# Calculate base and 1st level measures
tp <- p*tpr
sn <- p+n
tn <- acc*sn-tp
fp <- n-tn
fn <- p-tp
return (list(TP=tp, FP=fp, FN=fn, TN=tn, Sn=sn))
}
#' ### getMeasuresViaP_N_TNR_ACC
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, TNR, ACC
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *tnr*: True Negative Rate
#' *acc*: Accuracy
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_TNR_ACC<-function(p, n, tnr, acc)
{
# Check inputs
p <- as.numeric(p)
stopifnot(all(p >= 0))
n <- as.numeric(n)
stopifnot(all(n >= 0))
tnr <- as.numeric(tnr)
stopifnot(all(tnr >= 0) && all(tnr <= 1))
acc <- as.numeric(acc)
stopifnot(all(acc >= 0) && all(acc <= 1))
# Calculate base and 1st level measures
tn <- n*tnr
sn <- p+n
tp <- acc*sn-tn
fp <- n-tn
fn <- p-tp
return (list(TP=tp, FP=fp, FN=fn, TN=tn, Sn=sn))
}
#' ### getMeasuresViaP_N_TPR_PPV
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, TPR, PPV
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *tpr*: True Positive Rate
#' *ppv*: Positive Predictive Value
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_TPR_PPV<-function(p, n, tpr, ppv)
{
# Check inputs
p <- as.numeric(p)
stopifnot(all(p >= 0))
n <- as.numeric(n)
stopifnot(all(n >= 0))
tpr <- as.numeric(tpr)
stopifnot(all(tpr >= 0) && all(tpr <= 1))
ppv <- as.numeric(ppv)
stopifnot(all(ppv >= 0) && all(ppv <= 1))
# Calculate base and 1st level measures
tp <- p*tpr
sn <- p+n
fp <- (tp/ppv)-tp
fn <- p-tp
tn <- n-fp
return (list(TP=tp, FP=fp, FN=fn, TN=tn, Sn=sn))
}
#' ### getMeasuresViaP_N_TPR_FPR
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, TPR, FPR
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *tpr*: True Positive Rate
#' *fpr*: False Positive Rate
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_TPR_FPR <- function(p, n, tpr, fpr)
{
# Check inputs
p <- as.numeric(p)
stopifnot(all(p >= 0))
n <- as.numeric(n)
stopifnot(all(n >= 0))
tpr <- as.numeric(tpr)
stopifnot(all(tpr >= 0) && all(tpr <= 1))
fpr <- as.numeric(fpr)
stopifnot(all(fpr >= 0) && all(fpr <= 1))
# Calculate base and 1st level measures
tp <- p*tpr
sn <- p+n
fp <- n*fpr
fn <- p-tp
tn <- n-fp
return (list(TP=tp, FP=fp, FN=fn, TN=tn, Sn=sn))
}
#' ### getMeasuresViaP_N_TPR_TNR
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, TPR, TNR via
#' getMeasuresViaP_N_TPR_FPR
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *tpr*: True Positive Rate
#' *tnr*: True Negative Rate
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_TPR_TNR <- function(p, n, tpr, tnr)
{
fpr <- 1-as.numeric(tnr)
return (getMeasuresViaP_N_TPR_FPR(p, n, tpr, fpr))
}
#' ### getMeasuresViaP_N_ACC_F1
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, ACC, F1
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *acc*: Accuracy
#' *f1*: F1
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_ACC_F1 <- function(p, n, acc, f1)
{
# Check inputs
p <- as.numeric(p)
stopifnot(all(p >= 0))
n <- as.numeric(n)
stopifnot(all(n >= 0))
acc <- as.numeric(acc)
stopifnot(all(acc >= 0) && all(acc <= 1))
# F1 = 2TP/(2TP+FC)
f1 <- as.numeric(f1)
stopifnot(all(f1 >= 0) && all(f1 <= 1))
# Calculate base and 1st level measures
# ACC = TC/(P+N) => TC = ACC.(P+N)
tc <- acc*(p+n)
# FC = (P+N) - TC (1)
fc <- (p+n)-tc
# => 2TP.F1 + FC.F1 = 2TP
# => 2TP.F1 - 2TP = -FC.F1
# => 2TP(F1-1) = -FC.F1
# => 2TP(1-F1) = FC.F1
# TP = FC.F1/(2.(1-F1))
tp <- fc*f1/(2*(1-f1))
tpr <- tp/p
stopifnot(all(tpr >= 0) && all(tpr <= 1))
return (getMeasuresViaP_N_TPR_ACC(p, n, tpr, acc))
}
#' ### getMeasuresViaP_N_FPR_FNR
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, FPR, FNR via
#' getMeasuresViaP_N_TPR_TNR
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *fpr*: False Positive Rate
#' *fnr*: False Negative Rate
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_FPR_FNR <- function(p, n, fpr, fnr)
{
tpr <- 1-as.numeric(fnr)
tnr <- 1-as.numeric(fpr)
return (getMeasuresViaP_N_TPR_TNR(p, n, tpr, tnr))
}
#' ### getMeasuresViaP_N_FNR_ACC
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, FNR, ACC via
#' getMeasuresViaP_N_TPR_ACC
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *fnr*: False Negative Rate
#' *acc*: Accuracy
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_FNR_ACC <- function(p, n, fnr, acc)
{
tpr <- 1-as.numeric(fnr)
return (getMeasuresViaP_N_TPR_ACC(p, n, tpr, acc))
}
#' ### getMeasuresViaP_N_FPR_ACC
#' Reveal base measures (TP, FP, FN, TN) and Sn from P, N, FPR, ACC via
#' getMeasuresViaP_N_TNR_ACC
#' **Parameters:**
#' *p*: Positive
#' *n*: Negative
#' *fpr*: False Positive Rate
#' *acc*: Accuracy
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaP_N_FPR_ACC <- function(p, n, fpr, acc)
{
tnr <- 1-as.numeric(fpr)
return (getMeasuresViaP_N_TNR_ACC(p, n, tnr, acc))
}
#' ### getMeasuresViaBaseMeasures
#' Return base measures (TP, FP, FN, TN) and Sn from base measures
#' **Parameters:**
#' *tp*: True Positive
#' *fp*: False Positive
#' *fn*: False Negative
#' *tn*: True Negative
#' **Return:**
#' A list with base measures and Sn
getMeasuresViaBaseMeasures <- function(tp, fp, fn, tn)
{
tp <- as.numeric(tp)
stopifnot(all(tp >= 0))
fp <- as.numeric(fp)
stopifnot(all(fp >= 0))
fn <- as.numeric(fn)
stopifnot(all(fn >= 0))
tn <- as.numeric(tn)
stopifnot(all(tn >= 0))
sn <- tp + fp + fn + tn
return (list(TP=tp, FP=fp, FN=fn, TN=tn, Sn=sn))
}
#' ### initConfusionMatrixViaBaseMeasures
#' Initialize global base measures (TP, FP, FN, TN) and Sn from base measures
#' **Parameters:**
#' *bm*: Base Measures data frame (TP, FP, FN, TN)
#' **Return:**
#' None
#' **Examples:**
# base_measures <- rclip()
# initConfusionMatrixViaBaseMeasures(base_measures)
## then
# source('TasKarMetrics.R')
## to calculate all the metrics/measures/indicators
# wclip(TasKar)
## Also
# source('TasKarSummaries.R')
## to calculate and populate all the stuff related to the instruments
## to copy values to paste into other platforms such as Excel use
# getAll()
initConfusionMatrixViaBaseMeasures<-function(base_measures)
{
m <- getMeasuresViaBaseMeasures(base_measures$TP, base_measures$FP,
base_measures$FN, base_measures$TN)
TP <<- m$TP
FP <<- m$FP
FN <<- m$FN
TN <<- m$TN
Sn <<- m$Sn
}
#' ### initConfusionMatrixViaP_TPR_FPR_ACC
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, TPR, FPR, ACC columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, TPR, FPR, ACC
#' **Return:**
#' None
#' **Example Usage for all initConfusionMatrix...:**
## clear all objects from the workspace
# source('TasKar.R')
## copy reported metrics from the spreadsheet
# metrics <- rclip()
# initConfusionMatrixViaP_N_TPR_ACC(metrics)
# source('TasKarMetrics.R')
# wclip(TasKar)
# source('TasKarSummaries.R')
# getAll()
initConfusionMatrixViaP_TPR_FPR_ACC<-function(metrics)
{
m <- getMeasuresViaP_TPR_FPR_ACC(metrics$P, metrics$TPR,
metrics$FPR, metrics$ACC)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_TPR_ACC
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, TPR, ACC columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, TPR, ACC
#' **Return:**
#' None
initConfusionMatrixViaP_N_TPR_ACC<-function(metrics)
{
m <- getMeasuresViaP_N_TPR_ACC(metrics$P, metrics$N,
metrics$TPR, metrics$ACC)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_TNR_ACC
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, TNR, ACC columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, TNR, ACC
#' **Return:**
#' None
initConfusionMatrixViaP_N_TNR_ACC<-function(metrics)
{
m <- getMeasuresViaP_N_TNR_ACC(metrics$P, metrics$N,
metrics$TNR, metrics$ACC)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_TPR_PPV
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, TPR, PPV columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, TPR, PPV
#' **Return:**
#' None
initConfusionMatrixViaP_N_TPR_PPV<-function(metrics)
{
m <- getMeasuresViaP_N_TPR_PPV(metrics$P, metrics$N,
metrics$TPR, metrics$PPV)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_TPR_FPR
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, TNR, FPR columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, TNR, FPR
#' **Return:**
#' None
initConfusionMatrixViaP_N_TPR_FPR<-function(metrics)
{
m <- getMeasuresViaP_N_TPR_FPR(metrics$P, metrics$N,
metrics$TPR, metrics$FPR)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_TPR_TNR
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, TPR, TNR columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, TPR, TNR
#' **Return:**
#' None
initConfusionMatrixViaP_N_TPR_TNR<-function(metrics)
{
m <- getMeasuresViaP_N_TPR_TNR(metrics$P, metrics$N,
metrics$TPR, metrics$TNR)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_ACC_F1
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, ACC, F1 columns
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, ACC, F1
#' **Return:**
#' None
initConfusionMatrixViaP_N_ACC_F1<-function(metrics)
{
m <- getMeasuresViaP_N_ACC_F1(metrics$P, metrics$N,
metrics$ACC, metrics$F1)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_FPR_FNR
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, FPR, FNR columns via
#' getMeasuresViaP_N_FPR_FNR
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, FPR, FNR
#' **Return:**
#' None
initConfusionMatrixViaP_N_FPR_FNR<-function(metrics)
{
m <- getMeasuresViaP_N_FPR_FNR(metrics$P, metrics$N,
metrics$FPR, metrics$FNR)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_FNR_ACC
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, FNR, ACC columns via
#' getMeasuresViaP_N_FPR_FNR
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, FNR, ACC
#' **Return:**
#' None
initConfusionMatrixViaP_N_FNR_ACC<-function(metrics)
{
m <- getMeasuresViaP_N_FNR_ACC(metrics$P, metrics$N,
metrics$FNR, metrics$ACC)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### initConfusionMatrixViaP_N_FPR_ACC
#' Initialize global base measures (TP, FP, FN, TN) and Sn from
#' give data frame with P, N, FPR, ACC columns via
#' getMeasuresViaP_N_FPR_FNR
#' **Parameters:**
#' *metrics*: Instrument data frame with P, N, FPR, ACC
#' **Return:**
#' None
initConfusionMatrixViaP_N_FPR_ACC<-function(metrics)
{
m <- getMeasuresViaP_N_FPR_ACC(metrics$P, metrics$N,
metrics$FPR, metrics$ACC)
initConfusionMatrixViaBaseMeasures(m)
}
#' ### addStudyBaseMeasures
#' Adds an instance of study of which the base measures are revealed.
#' **Parameters:**
#' *bm*: Revealed base measure data frane (TP, FP, FN, TN, Sn)
#' *study*: Name of the study (e.g. s01)
#' *config*: Classification configuration in the study (e.g. 1)
#' *inputs*: General input data frame (TP, FP, FN, TN, TPR, FPR, FNR, ...) that
#' is used to reveal the base measures
#' *possibility*: Internal possibility numbers indicating the combination of
#' given inputs (i.e. Possibility: 1: NaN P, N, TPR, FPR, 2: NaN P, N, TPR,
#' ACC) (default: 1)
#' #' *add_inconsistent_BMs*: Should inconsistent revealing results be added
#' into the global results (e.g. revealed base measures yielding more than
#' original Sn value (i.e. input$P + input$N or input$TP + input$FP + input$FN
#' + input$TN). If it is TRUE, the inconsistencies are stored in global log
#' (LogInconsistencies) (default: TRUE).
#' **Return:**
#' None
addStudyBaseMeasures <- function(bm, study, config, inputs,
instrument_combination, possibility=1,
add_inconsistent_BMs=TRUE)
{
new_study <- length(Study) + 1
hasP_and_N = !is.na(inputs$N) && !is.na(inputs$P)
consistency <- 'Mismatching Sn!'
delta_Sn <- NA
is_consistent <- FALSE
# Check original Sn with given P and N or given BM
# with calculated Base Measures (TP, FP, FN, TN)
if (hasP_and_N) {
input_Sn <- inputs$P + inputs$N
delta_Sn <- abs(bm$Sn - input_Sn)
if (delta_Sn < 1) {
consistency <- 'Matching Sn'
is_consistent <- TRUE
}
else if (input_Sn == bm$Sn) {
consistency <- 'Exact matching Sn'
is_consistent <- TRUE
}
}
else {
hasBM = !is.na(inputs$TP) && !is.na(inputs$FP) &&
!is.na(inputs$FN) && !is.na(inputs$TNP)
if (hasBM) {
input_Sn <- inputs$TP + inputs$FP + inputs$FN + inputs$TN
delta_Sn <- abs(bm$Sn - input_Sn)
if (delta_Sn < 1) {
consistency <- 'Matching Sn via BM'
is_consistent <- TRUE
}
else if (inputs$P + inputs$N == bm$Sn) {
consistency <- 'Exact matching Sn via BM'
is_consistent <- TRUE
}
else {
consistency <- 'Mismatching Sn via BM'
}
}
}
# Check calculated BM >= 0
if (is_consistent && (bm$TP < 0 || bm$FP < 0 || bm$FN < 0 || bm$TN < 0)) {
is_consistent <- FALSE
df <- data.frame(TP=bm$TP, FP=bm$FP, FN=bm$FN, TN=bm$TN)
df <- df[colSums(df) < 0]
consistency <- paste('Negative Base Measure(s):',
paste(colnames(df), round(as.numeric(df[1, ]), 1),
sep=':', collapse=', '))
}
if (is_consistent || add_inconsistent_BMs) {
reported_metrics <- inputs[!colnames(inputs) %in%
c('Study', 'Config', 'N', 'P',
'TP', 'FP', 'FN', 'TN')]
reported_metrics <- removeNaColumns(reported_metrics)
ReportedMetrics[new_study] <<-
paste(colnames(reported_metrics), collapse=', ')
reported_metric_values <- as.numeric(reported_metrics[1, ])
ReportedMetricValues[new_study] <<-
paste(colnames(reported_metrics),
round(reported_metric_values, 4),
sep=':', collapse=', ')
has_negative_performance <- any(c('FPR', 'FNR', 'FDR', 'FOR', 'MCR')
%in% names(reported_metrics))
if (has_negative_performance) {
reported_metric_values.positive <- as.numeric(
reported_metrics[1,
-which(names(reported_metrics) %in%
c('FPR', 'FNR', 'FDR', 'FOR', 'MCR'))])
reported_metric_values.negative <- 1 - as.numeric(
reported_metrics[1,
which(names(reported_metrics) %in%
c('FPR', 'FNR', 'FDR', 'FOR', 'MCR'))])
}
else {
reported_metric_values.positive <- as.numeric(reported_metrics[1, ])
reported_metric_values.negative <- NA
}
AverageReportedMetricValues[new_study] <<- mean(
c(reported_metric_values.positive, reported_metric_values.negative),
na.rm=TRUE)
MaxReportedMetricValues[new_study] <<- max(
c(reported_metric_values.positive, reported_metric_values.negative),
na.rm=TRUE)
MinReportedMetricValues[new_study] <<- min(
c(reported_metric_values.positive, reported_metric_values.negative),
na.rm=TRUE)
inputs <- appendDataFrameColumns(inputs, prefix='input')
Inputs[new_study, ] <<-
inputs[1, -which(names(inputs) %in% c('inputStudy', 'inputConfig'))]
# TP[new_study] <<- round(bm$TP, 1)
# FP[new_study] <<- round(bm$FP, 1)
# FN[new_study] <<- round(bm$FN, 1)
# TN[new_study] <<- round(bm$TN, 1)
# Sn[new_study] <<- round(bm$Sn, 1)
fTP[new_study] <<- bm$TP
fFP[new_study] <<- bm$FP
fFN[new_study] <<- bm$FN
fTN[new_study] <<- bm$TN
fSn[new_study] <<- bm$Sn
resultsP <- integerClassBaseMeasures(bm$TP, bm$FN, inputs$inputP)
TP[new_study] <<- resultsP[1]
FN[new_study] <<- resultsP[2]
resultsN <- integerClassBaseMeasures(bm$TN, bm$FP, inputs$inputN)
TN[new_study] <<- resultsN[1]
FP[new_study] <<- resultsN[2]
Sn[new_study] <<- TP[new_study]+FP[new_study]+FN[new_study]+TN[new_study]
# FIX HERE:
# round(TP, 0) + round(FN, 0) + 1 == P => TP <<- ceil(TP), FN <<- ceil(FN)
# round(TN, 0) + round(FP, 0) + 1 == N => TN <<- ceil(TN), FP <<- ceil(FP)
Study[new_study] <<- study
Config[new_study] <<- as.numeric(config)
InstrumentCombination[new_study] <<- instrument_combination
Possibilities[new_study] <<- possibility
Consistency[new_study] <<- consistency
DeltaSn[new_study] <<- round(delta_Sn, 1)
}
else {
# Log any inconsistencies
LogInconsistencies[length(LogInconsistencies)+1] <<-
paste(study, as.numeric(config), possibility, instrument_combination,
consistency, round(delta_Sn, 1), sep='; ')
}
}
#' ### initParsedMetrics
#' Initialize global variables about base measure revealing process
#' **Parameters:**
#' *survey*: Classification study (survey) performance inputs
#' *add_inconsistent_BMs*: Should inconsistent revealing results be added
#' into the global results (e.g. revealed base measures yielding more than
#' original Sn value (i.e. input$P + input$N or input$TP + input$FP + input$FN
#' + input$TN). If it is TRUE, the inconsistencies are stored in global log
#' (LogInconsistencies) (default: TRUE).
#' **Return:**
#' None
initParsedMetrics <- function(survey)
{
fTP <<- fFP <<- fFN <<- fTN <<- fSn <<- TP <<- FP <<- FN <<- TN <<- Sn <<-
Possibilities <<- Config <<- DeltaSn <<-
ReportedMetricValues <<- ReportedMetrics <<- AverageReportedMetricValues <<-
MaxReportedMetricValues <<- MinReportedMetricValues <<- numeric()
Study <<- InstrumentCombination <<- Consistency <<- LogInconsistencies <<-
character()
Inputs <<- appendDataFrameColumns(
subset(emptyDataFrame(colnames(survey)), select=-c(Study, Config)),
prefix='input')
}
#' ### revealConfusionMatrixes
#' Parse the (performance instrument) inputs per configuration per study and
#' reveal the confusion matrix (i.e. base measures) from possible combinations
#' of inputs (four terms) and store the corresponding results in global
#' variables
#' **Parameters:**
#' *survey*: Classification study (survey) performance inputs
#' **Return:**
#' A copy of the results in global variables in a data frame (can be used to
#' copy and paste into another medium for example a spreadsheet)
#' **Example Usage:**
## Copy spreadsheet like:
## Study Config N P TP FP FN TN TPR TNR FPR FNR ACC PPV NPV F1
## s01 1 261 180 0.956 0.621
## s01 2 261 180 0.467 0.13
#
# survey <- rclip()
## Set problematic metrics as NA
## (for example the ones cause exceptions in initParsedMetrics)
# survey$F1[93] <- NA
## if o studies are included:
# survey$F1[120] <- NA
# survey$F1[121] <- NA
#
# parsed_base_metrics <- revealConfusionMatrixes(survey)
## or exclude mismatching Sns
# parsed_base_metrics <- revealConfusionMatrixes(survey, FALSE)
#
# wclip(parsed_base_metrics)
revealConfusionMatrixes <- function(survey, add_inconsistent_BMs=TRUE)
{
count_survey <- nrow(survey)
if (count_survey > 0) {
initParsedMetrics(survey)
}
for (i in 1:count_survey) {
inputs <- survey[i, ]
study <- inputs
hasTP <- !is.na(study$TP)
hasFP <- !is.na(study$FP)
hasFN <- !is.na(study$FN)
hasTN <- !is.na(study$TN)
hasP <- !is.na(study$P)
hasN <- !is.na(study$N)
hasTPR <- !is.na(study$TPR)
hasTNR <- !is.na(study$TNR)
hasFPR <- !is.na(study$FPR)
hasFNR <- !is.na(study$FNR)
hasPPV <- !is.na(study$PPV)
hasNPV <- !is.na(study$NPV)
hasACC <- !is.na(study$ACC)
hasMCR <- !is.na(study$MCR)
hasF1 <- !is.na(study$F1)
hasCK <- !is.na(study$CK)
hasMCC <- !is.na(study$MCC)
study <- removeNaColumns(inputs)
k <- 1
cat(paste0('[', i, '/', count_survey, '] ', study$Study, '#', study$Config,
': '))
if (hasTP && hasFP && hasFN && hasTN) {
m <- getMeasuresViaBaseMeasures(study$TP, study$FP, study$FN, study$TN)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'BM', k, add_inconsistent_BMs)
cat(paste(k, ': via Base Measures. '))
k <- k + 1
}
if (hasP && hasTPR && hasFPR && hasACC) {
m <- getMeasuresViaP_TPR_FPR_ACC(study$P, study$TPR, study$FPR, study$ACC)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,TPR,FPR,ACC', k, add_inconsistent_BMs)
cat(paste(k, ': via P,TPR,FPR,ACC. '))
k <- k + 1
}
if (hasP && hasN && hasTPR && hasACC) {
m <- getMeasuresViaP_N_TPR_ACC(study$P, study$N, study$TPR, study$ACC)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,TPR,ACC', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,TPR,ACC. '))
k <- k + 1
}
if (hasP && hasN && hasTNR && hasACC) {
m <- getMeasuresViaP_N_TNR_ACC(study$P, study$N, study$TNR, study$ACC)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,TNR,ACC', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,TNR,ACC. '))
k <- k + 1
}
if (hasP && hasN && hasTPR && hasPPV) {
m <- getMeasuresViaP_N_TPR_PPV(study$P, study$N, study$TPR, study$PPV)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,TPR,PPV', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,TPR,PPV. '))
k <- k + 1
}
if (hasP && hasN && hasTPR && hasFPR) {
m <- getMeasuresViaP_N_TPR_FPR(study$P, study$N, study$TPR, study$FPR)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,TPR,FPR', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,TPR,FPR '))
k <- k + 1
}
if (hasP && hasN && hasTPR && hasTNR) {
m <- getMeasuresViaP_N_TPR_TNR(study$P, study$N, study$TPR, study$TNR)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,TPR,TNR', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,TPR,TNR. '))
k <- k + 1
}
if (hasP && hasN && hasACC && hasF1) {
m <- getMeasuresViaP_N_ACC_F1(study$P, study$N, study$ACC, study$F1)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,ACC,F1', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,ACC,F1. '))
k <- k + 1
}
if (hasP && hasN && hasFPR && hasFNR) {
m <- getMeasuresViaP_N_FPR_FNR(study$P, study$N, study$FPR, study$FNR)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,FPR,FNR', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,FPR,FNR. '))
k <- k + 1
}
if (hasP && hasN && hasFNR && hasACC) {
m <- getMeasuresViaP_N_FNR_ACC(study$P, study$N, study$FNR, study$ACC)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,FNR,ACC', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,FNR,ACC. '))
k <- k + 1
}
if (hasP && hasN && hasFPR && hasACC) {
m <- getMeasuresViaP_N_FPR_ACC(study$P, study$N, study$FPR, study$ACC)
addStudyBaseMeasures(m, study$Study, study$Config, inputs,
'P,N,FPR,ACC', k, add_inconsistent_BMs)
cat(paste(k, ': via P,N,FPR,ACC. '))
k <- k + 1
}
if (k == 1) {
cat(paste('Unknown combinations: ',
paste(colnames(study), collapse=','), '\n'))
}
else {
cat('[done]\n')
}
}
if (add_inconsistent_BMs == FALSE) {
count_inconsistencies <- length(LogInconsistencies)
if (count_inconsistencies == 0) {
cat('No inconsistencies.\n')
}
else {
cat(paste0('\n', count_inconsistencies, ' inconsistencies:\n'))
cat(paste(paste('study', 'config', 'possibility', 'instrument_combination',
'consistency', 'delta_Sn', sep='; ')), '\n')
cat(paste(LogInconsistencies, collapse='\n'))
}
}
return (data.frame(Study, Config, Inputs,
ReportedMetrics, ReportedMetricValues,
MinReportedMetricValues,
AverageReportedMetricValues, MaxReportedMetricValues,
Possibilities, InstrumentCombination,
fTP, fFP, fFN, fTN, fSn, TP, FP, FN, TN, Sn,
InputSn=Inputs$inputN+Inputs$inputP,
Consistency, DeltaSn))
}