# renv::install(paste0("agdamsbo/",c("REDCapCAST"))) ################################################################################ ######## ######## TALOS ######## ################################################################################ #' Imports specified TALOS data #' #' @param token api token #' @param vars variable names to retrieve (fields in REDCap lingo) #' #' @return tibble of REDCap exported data #' @examples #' data <- import_talos() #' skimr::skim(data) import_talos <- function(key = "TALOS_REDCAP_API", vars = c("record_id", "inkl_rnumb", "cpr", "talos_inkl03x", "talos_basis02a", "talos_basis02b", "basis_sys_site")) { REDCapR::redcap_read( redcap_uri = "https://redcap.au.dk/api/", token = keyring::key_get(key), fields = vars )$data } #' Filter TALOS by site #' #' @param data #' @param site default is "10015", which is Aarhus #' #' @return #' @export #' #' @examples #' aarhus <- import_talos() |> filter_talos_site(site = "10015") filter_talos_site <- function(data, site = "10015") { if (is.null(site)) { data } else { data |> dplyr::filter(basis_sys_site %in% site) } } #' Quick number of subjects in project #' #' @param key API key name for ekyring key retrieval #' #' @return numeric vector #' @export #' #' @examples #' redcap_get_n() redcap_get_n <- function(key = "SVD_REDCAP_API") { REDCapR::redcap_read_oneshot( redcap_uri = "https://redcap.au.dk/api/", token = keyring::key_get(key), fields = "record_id" ) |> purrr::pluck("data") |> nrow() } #' Modifies exported talos data to new database format #' #' @param data tibbled REDCap data #' @param trial source trial name. TALOS or RESIST #' @param key destination project API key name #' #' @return tibble with modified data #' @examples #' ds |> modify_data_talos(index = 0, trial = "TALOS") modify_data_talos <- function(data, index = redcap_get_n(), trial, id.var = inkl_rnumb, cpr.var = cpr, name.var = talos_inkl03x, date.var = talos_basis02a, time.var = talos_basis02b) { ids <- seq_len(nrow(data)) + (index) data |> dplyr::mutate(ids = ids) |> dplyr::transmute( record_id = glue::glue("svd_{ids}"), trial_id = {{ id.var }}, cpr = {{ cpr.var }}, name = {{ name.var }}, index_date = format(as.POSIXct({{ date.var }}), "%Y-%m-%d"), index_time = format(as.POSIXct({{ time.var }}), "%H:%M"), trial_name = trial, basis_complete = 2 ) } ################################################################################ ######## ######## RESIST ######## ################################################################################ #' Imports specified data #' #' @param token api token #' @param vars variable names to retrieve (fields in REDCap lingo) #' #' @return tibble of REDCap exported data #' @examples #' data <- import_resist() #' skimr::skim(data) import_resist <- function(key = "RESIST-MIGRATION", keyring = "REDCAP_APIs", service = "redcapAPI", vars = c("resistid", "cpr", "navn", "scan_dato_tid", "afdelingid", "target_population", "diagnosis")) { REDCapR::redcap_read( redcap_uri = "https://redcap.au.dk/api/", token = keyring::key_get(service = service, username = key, keyring = keyring), fields = vars )$data } #' Filter RESIST by site and target and scan #' #' @param data #' @param site default is "10015", which is Aarhus #' #' @return #' @export #' #' @examples #' resist_aarhus <- import_redcap( #' key = "RESIST-MIGRATION", keyring = "REDCAP_APIs", service = "redcapAPI" #' ) |> #' filter_resist(site = 1, target = 1, datetime.var = "scan_dato_tid", diag = 1) #' data |> filter_resist() filter_resist <- function(data, site = 1, target = 1, datetime.var = "scan_dato_tid", diag = 1) { data |> dplyr::filter(afdelingid %in% site) |> dplyr::filter(diagnosis %in% diag) |> dplyr::filter(target_population %in% target) |> dplyr::filter(!is.na(datetime.var)) } #' Modifies exported RESIST data to new database format #' #' @param data tibbled REDCap data #' @param trial source trial name. TALOS or RESIST #' @param key destination project API key name #' #' @return tibble with modified data #' @examples #' resist_aarhus |> modify_data_resist(trial = "RESIST", id.var = resistid, datetime.var = scan_dato_tid, name.var = navn) modify_data_resist <- function(data, index = redcap_get_n(), trial = "RESIST", id.var = resistid, cpr.var = cpr, datetime.var = scan_dato_tid, name.var = navn) { ids <- seq_len(nrow(data)) + (index) data |> dplyr::mutate(ids = ids) |> dplyr::transmute( record_id = glue::glue("svd_{ids}"), trial_id = {{ id.var }}, cpr = {{ cpr.var }}, name = {{ name.var }}, index_date = strftime({{ datetime.var }}, "%Y-%m-%d"), index_time = strftime({{ datetime.var }}, "%H:%M"), trial_name = trial, basis_complete = 2 ) } ################################################################################ ######## ######## UPLOAD ######## ################################################################################ #' Write data to REDCap db #' #' @param key project key set in `keyring` #' @param data data to write #' #' @return #' @examples #' ds |> #' filter_talos_site() |> #' modify_data() |> #' write2db() #' resist_ds <- import_resist() |> #' filter_resist() |> #' modify_data_resist(index = 509) #' resist_ds |> write2db() write2db <- function(data, key = "SVD_REDCAP_API") { REDCapR::redcap_write( redcap_uri = "https://redcap.au.dk/api/", token = keyring::key_get(key), ds_to_write = data ) } ################################################################################ ######## ######## DATA BACKUP and corrections ######## ################################################################################ # The following is just a mixed bag of commands from backing up # backup <- REDCapR::redcap_read_oneshot( # redcap_uri = "https://redcap.au.dk/api/", # token = keyring::key_get("SVD_REDCAP_API")) # svd <- backup$data |> dplyr::filter(!is.na(redcap_repeat_instance)) # # resist <- backup$data |> dplyr::filter(trial_name=="RESIST") # # resist_svd_backup <- resist |> dplyr::filter(record_id %in% svd$record_id) |> dplyr::select(record_id,trial_id) |> dplyr::left_join(dplyr::select(svd,record_id,tidyselect::starts_with("redcap"),tidyselect::starts_with("svd")),by="record_id")|> # dplyr::filter(svd_perf==1) |> dplyr::select(-record_id) # # dplyr::inner_join(resist_svd_backup,dplyr::select(resist_ds,record_id,trial_id))|> # dplyr::select(-trial_id) |> # dplyr::mutate( # svd_time=strftime(svd_time, "%H:%M"), # svd_time_scan=strftime(svd_time_scan, "%H:%M") # ) |> # write2db() # cpr <- REDCapR::redcap_read( # redcap_uri = "https://redcap.au.dk/api/", # token = keyring::key_get("SVD_REDCAP_API"),fields = c("record_id","cpr")) # # cpr_extract <- function(data,sep=""){ # paste(substr(data,1,6),substr(data,nchar(data)-3,nchar(data)),sep=sep) # } # # cpr_rev <- cpr$data |> dplyr::mutate(cpr = cpr_extract(cpr)) # # cpr_rev |> write2db() ################################################################################ ######## ######## SVD data management and analyses ######## ################################################################################ ## Inter-rater-reliability # https://stackoverflow.com/questions/71587719/inter-rater-reliability-by-groups # http://www.cookbook-r.com/Statistical_analysis/Inter-rater_reliability/ # # https://joon-e.github.io/tidycomm/reference/test_icr.html #' Sample data for inter-rater-reliability testing #' #' @return data.frame #' @export #' @examples #' irr_sample() irr_sample <- function() { set.seed(8) paste0("svd_", sample(1:5, 5, replace = FALSE)) |> lapply(\(x){ rbind( c(x, "A", sample(1:0, 20, replace = TRUE)), c(x, "B", sample(1:0, 20, replace = TRUE)) ) |> as.data.frame() |> setNames(c("record_id", "svd_user", paste0("item", 1:20))) }) |> purrr::list_rbind() } #' Cleans ID numbers and arranges by increasing value #' #' @param data data set #' @param remove string to remove #' #' @return #' @export #' #' @examples #' irr_sample() |> clean_record_id() arrange_record_id <- function(data, remove = "svd_") { data |> dplyr::arrange(as.numeric(stringr::str_remove(record_id, remove))) ## Improve by removing everything not being a number? } #' Read single REDCap instrument #' #' @param key API key #' @param instrument instrument name #' #' @return data.frame #' @export #' #' @examples #' data <- read_instrument(key = "SVD_REDCAP_API", instrument = "svd_score") #' basis_ds <- read_instrument(key = "SVD_REDCAP_API", instrument = "basis") #' basis_ds |> #' arrange_record_id() |> #' head(100) read_instrument <- function(key = "SVD_REDCAP_API", instrument = "svd_score", raw_label = "raw") { REDCapCAST::read_redcap_tables( uri = "https://redcap.au.dk/api/", token = keyring::key_get(key), fields = "record_id", forms = instrument, raw_or_label = raw_label )[[instrument]] } #' Clean SVD data by filtering on repeated and on any marking as data not present #' #' @param data data set #' #' @return tibble #' @export #' svd_score_clean <- function(data) { # Filtering non-performed annotations out filtered <- split(data, data$record_id)[!split(data, data$record_id) |> purrr::map(\(x){ any(x$svd_perf == 2) }) |> purrr::list_c()] |> dplyr::bind_rows() # Keeps the first instance in case of the same user having filled out more than one split(filtered, filtered$record_id) |> purrr::map(\(x){ x[!duplicated(x$svd_user), ] }) |> dplyr::bind_rows() |> arrange_record_id() } #' Filtering out users, that did not complete all annotations #' #' @param data data #' #' @return data.frame or tibble #' @export filter_incomplete_users <- function(data) { data |> (\(x){ nall <- x |> dplyr::group_by(svd_user) |> dplyr::count() |> dplyr::ungroup() |> dplyr::filter(n == max(n)) |> dplyr::select(svd_user) x |> dplyr::filter(svd_user %in% nall[[1]]) })() } #' Cleaning data for IRR calculation #' #' @param key #' @param instrument #' #' @return #' @export #' #' @examples #' read_instrument() |> inter_rater_data() inter_rater_data <- function(data) { data |> arrange_record_id() |> dplyr::select(tidyselect::all_of( c( "record_id", "svd_user", "svd_quality", "svd_microbleed", "svd_microbleed_location___1", "svd_microbleed_location___2", "svd_microbleed_location___3", "svd_siderose", "svd_lacunes", "svd_wmh", "svd_atrophy" ) )) } #' Simplified SVD score 0-4 #' #' @param data #' #' @return #' @export #' #' @examples #' data <- read_instrument() |> inter_rater_data() #' data |> simple_score() simple_score <- function(data) { data |> arrange_record_id() |> dplyr::transmute(record_id, svd_user, microbleed = dplyr::if_else(svd_microbleed < 1, 0, 1), lacunes = dplyr::if_else(svd_lacunes < 1, 0, 1), wmh = dplyr::if_else(svd_wmh < 2, 0, 1), atrophy = dplyr::if_else(svd_atrophy < 2, 0, 1), score = microbleed + lacunes + wmh + atrophy ) } #' Inter rater reliability calculations #' #' @param data data #' #' @return #' @export tibble #' #' @examples #' irr_sample |> inter_rater_calc() #' data <- read_instrument() |> #' inter_rater_data() #' data |> inter_rater_calc() inter_rater_calc <- function(data) { data |> tidycomm::test_icr( unit_var = record_id, coder_var = svd_user, holsti = FALSE, fleiss_kappa = TRUE, brennan_prediger = TRUE, na.omit = TRUE ) } #' ICC calculations #' #' @param data minimal dataset with only relevant variables #' @param unit_var subject var #' @param coder_var rater var #' #' @return #' @export #' #' @examples #' # ds_simple |> icc_multi(unit_var=record_id, coder_var=svd_user) icc_multi <- function(data, unit_var = record_id, coder_var = svd_user) { # The function to calculate ICC icc_calc <- function(data) { irr::icc(data, model = "twoway", type = "agreement", unit = "single") |> purrr::pluck("value") } # Names of provided variables suppressWarnings(nms <- data |> dplyr::select(-{{ unit_var }}, -{{ coder_var }}) |> names()) # ICC calculation for each variable nms |> lapply(function(.x) { tidycomm:::unit_coder_matrix(data, unit_var = {{ unit_var }}, coder_var = {{ coder_var }}, test_var = .x ) }) |> purrr::map(icc_calc) |> purrr::list_c() |> (\(.y){ tibble::tibble( Variable = nms, IntraclCorrCoef = .y ) })() } #' Join IRR and ICC calculations #' #' @param data data #' #' @return tibble #' @export irr_icc_calc <- function(data) { dplyr::left_join(inter_rater_calc(data), icc_multi(data)) |> dplyr::select(-tidyselect::all_of(c("n_Coders", "n_Categories", "Level", "n_Units"))) } ################################################################################ ######## ######## Assessor allocation ######## ################################################################################ get_allocations <- function(key = "SVD_REDCAP_API") { REDCapR::redcap_read( redcap_uri = "https://redcap.au.dk/api/", token = keyring::key_get(key), fields = c("record_id", "allocated_assessor", "allocated_assessor_2") )$data |> dplyr::select(c("record_id", "allocated_assessor", "allocated_assessor_2")) |> arrange_record_id() |> dplyr::filter(allocated_assessor != "all") } #' Upload assessor allocation #' #' @param path allocation table path #' #' @return #' @export #' #' @examples #' allocate_assessors(key=assessor_key2) |> View() allocate_assessors <- function(path = "data/allocation.ods", key) { ds <- readODS::read_ods(here::here(path)) ls <- split.default(ds, grepl(".2$", colnames(ds))) |> lapply(na.omit) |> lapply(setNames, c("assessor", "start", "stop")) |> lapply(function(.x) { seq_len(nrow(.x)) |> lapply(function(.y) { .x[.y, ] |> dplyr::tibble( record_id = paste0("svd_", seq(start, stop)), allocated_assessor = multi_replace(assessor, key = key) ) |> dplyr::select(tidyselect::all_of(c( "record_id", "allocated_assessor" ))) }) |> dplyr::bind_rows() }) out <- dplyr::full_join( ls[[1]], ls[[2]] |> setNames(c("record_id", "allocated_assessor_2")) ) testing <- out |> dplyr::mutate( test = dplyr::if_else(allocated_assessor == allocated_assessor_2, TRUE, FALSE, missing = FALSE ) ) |> dplyr::filter(allocated_assessor != "all") if (testing |> (function(x) { any(x$test) })() ) { print(dplyr::filter(testing, test)) stop("The samme assessor is allocated twice to the same subject") } out } #' Cuts hms data into intervals #' #' @param data data #' @param breaks specified breaks. Character vector #' @param labels desired labels. Breaks are used if NULL. #' #' @return #' @export #' time_cutter <- function(data, breaks = c("00:00:00", "12:00:00", "23:59:00"), labels = c("AM", "PM")) { if (!"hms" %in% class(data)) stop("Data has to be of class 'hms'") if (length(breaks) - 1 != length(labels) | is.null(labels)) { message("Generic labels are used") labels <- seq_len(length(breaks) - 1) |> lapply(\(x){ glue::glue("[{substr(breaks,1,5)[x]}-{substr(breaks,1,5)[x+1]}]") }) |> purrr::list_c() } cut(lubridate::ymd_hms(paste(Sys.Date(), data)), breaks = lubridate::ymd_hms(paste(Sys.Date(), breaks)), labels = labels ) } #' Identify subjects with missing assesment #' #' @param data data #' #' @return #' @export #' who_is_missing <- function(data) { data |> dplyr::group_by(record_id) |> dplyr::group_split() |> (\(x){ unique(dplyr::bind_rows(x)[["record_id"]])[!purrr::list_c(lapply(x, nrow)) == length(unique(dplyr::bind_rows(x)[["svd_user"]]))] })() } #' Replace multiple values in a vector with a named key #' #' @param data vector to replace #' @param key named character vector with old values named with new values #' @param keep.non.keyed keep non-keyed values or give NA #' #' @return character vector #' @export #' #' @examples #' ds <- sample(1:6, 20, replace = TRUE) #' multi_replace(ds, key = c("3" = 1, "4" = 2, "8" = 3, "HEY" = 4, "0" = 5)) #' multi_replace(ds, key = c("3" = 1, "4" = 2, "8" = 3, "HEY" = 4, "0" = 5), keep.non.keyed = FALSE) multi_replace <- function(data, key = assessor_key, keep.non.keyed = TRUE) { trans <- names(key)[match(data, key, nomatch = NA)] if (any(is.na(trans))) { message("Mind that the key is incomplete") } if (keep.non.keyed) { data[data %in% key] <- trans[!is.na(trans)] data } else { trans } } #' Collapse vector to readable sentence #' #' @param data vector #' @param sep.last last sep word. Default is "and" #' #' @return character vector length 1 #' @export #' #' @examples #' rownames(mtcars)[1:4] |> chr_collapse() chr_collapse <- function(data, sep.last = "and") { if (is.numeric(data)) { data <- round(data, 2) } paste(paste(data[-length(data)], collapse = ", "), sep.last, data[length(data)]) } #' Title #' #' @return list #' @export #' #' @examples missing_annotations <- function() { data <- REDCapCAST::read_redcap_tables( uri = "https://redcap.au.dk/api/", token = keyring::key_get("SVD_REDCAP_API"), fields = c( "record_id", "allocated_assessor", "allocated_assessor_2", "svd_user", "svd_score_complete", "svd_perf" ), raw_or_label = "raw" ) # Selecting and splitting allocations allos <- data$basis |> tidyr::pivot_longer(-record_id) |> dplyr::filter(value != "all") |> dplyr::select(-name) allos_split <- allos |> (\(.x){ split(.x, .x$value) })() # Selecting and splitting annotations/assessments annos <- data$svd_score |> dplyr::filter(record_id %in% allos$record_id) annos_split <- annos |> (\(.x){ split(.x, .x$svd_user) })() # Filtering out all with missing annotations missing_all <- names(allos_split) |> purrr::map(function(.x) { # allos_split[[.x]] if (.x %in% names(annos_split)) { dplyr::full_join(allos_split[[.x]], annos_split[[.x]]) } else { tibble::tibble(allos_split[[.x]], svd_user = NA) } }) |> purrr::map(arrange_record_id) |> purrr::map(function(.x) { .x |> dplyr::filter(is.na(svd_user)) }) # Filtering out subject with any assessor marked "not performed" missing_scan <- data$svd_score |> (\(.x){ split(.x, .x$record_id) })() |> purrr::map(function(.x) { any(.x$svd_perf != 1) }) |> (\(.x){ names(.x)[purrr::list_c(.x)] })() # Printing out missings, that have not been assessed as missing scans missing_all |> purrr::map(function(.x) { .x |> dplyr::filter(!record_id %in% missing_scan) }) |> setNames(names(allos_split)) } ## Create consensus instrument #' Create two-column HTML table for data piping in REDCap instruments #' #' @param text descriptive text #' @param variable variable to pipe #' #' @return character vector #' @export #' #' @examples #' create_html_table(text = paste("assessor", 1:2, sep = "_"), variable = c("[cpr]")) #' create_html_table(text = c("CPR nummer"), variable = c("[cpr][1]", "[cpr][2]")) create_html_table <- function(text, variable) { start <- '' end <- "
" # Extension would allow defining number of columns and specify styling items <- purrr::map2(text, variable, function(.x, .y) { glue::glue('
{.x}
{.y}
') }) glue::glue(start, glue::glue_collapse(purrr::list_c(items)), end) } #' Title #' #' @param variable #' @param repeats #' #' @return #' @export #' #' @examples #' wrap_repeatable_variable(c("cpr", "age")) wrap_repeatable_variable <- function(variable, repeats = 2) { purrr::map(variable, function(.x) { purrr::map(seq_len(repeats), function(.y) { glue::glue("[{.x}][{.y}]") }) |> purrr::list_c() }) } #' Title #' #' @param variable variables name(s) #' @param assessments number of assessments #' @param label assessor label #' #' @return #' @export #' #' @examples #' assessors_table(c("cpr", "age")) assessors_table <- function(variable, assessments = 2, label = "assessor") { wrap_repeatable_variable(variable, repeats = assessments) |> purrr::map(function(.x) { create_html_table(paste(label, seq_len(length(.x)), sep = "_"), .x) }) } #' Vector starts with any of matches #' #' @param data character vector to check #' @param match character vector to match against #' #' @return #' @export #' #' @examples #' vec_starts_any(dd$field_name, consensus_vars) vec_starts_any <- function(data, match) { match |> purrr::map(function(.x) { grepl(paste0("^", .x, "*"), data) }) |> dplyr::bind_cols() |> apply(1, any) } #' Vector ends with any of matches #' #' @param data character vector to check #' @param match character vector to match against #' #' @return #' @export #' #' @examples #' vec_ends_any(dd$field_name, scan) vec_ends_any <- function(data, match) { match |> purrr::map(function(.x) { grepl(paste0("*", .x, "$"), data) }) |> dplyr::bind_cols() |> apply(1, any) } #' Vector contains with any of matches #' #' @param data character vector to check #' @param match character vector to match against #' #' @return #' @export #' #' @examples #' vec_contains_any(dd$field_name, scan) vec_contains_any <- function(data, match, inv = FALSE) { out <- match |> purrr::map(function(.x) { grepl(paste0("*", .x, "*"), data) }) |> dplyr::bind_cols() |> apply(1, any) if (inv) !out else out } #' Cleans branching logic to only include relevant cases #' #' @param data #' #' @return #' @export #' clean_branching <- function(data) { # This is a highly specific function not taking any edge case into account strsplit(data, " or ") |> (function(.x) { purrr::map2( .x, purrr::map(.x, vec_contains_any, "consensus_perf", inv = TRUE ), function(.y, .z) { .y[.z] } ) })() |> purrr::map(glue::glue_collapse, sep = " or ") |> purrr::list_c() } #' Creates relevant instrument files and zips it all #' #' Should go to REDCapCAST #' #' @param data #' @param dir #' #' @return #' @export #' write_instrument_meta <- function(data, dir) { temp_dir <- tempdir() write.csv(data, paste0(temp_dir, "/instrument.csv"), row.names = FALSE, na = "") writeLines("redcap.au.dk", paste0(temp_dir, "/origin.txt")) zip::zip(paste0(dir, "/", unique(data$form_name), Sys.Date(), ".zip"), files = c("origin.txt", "instrument.csv"), root = temp_dir ) }