{ "cells": [ { "cell_type": "code", "execution_count": 32, "id": "5adb72ac-565d-4960-931c-3803a71e145c", "metadata": { "tags": [] }, "outputs": [], "source": [ "suppressPackageStartupMessages(library(readxl))\n", "suppressPackageStartupMessages(library(dplyr))\n", "suppressPackageStartupMessages(library(tidyr))\n", "suppressPackageStartupMessages(library(tibble))\n", "suppressPackageStartupMessages(library(ggplot2))\n", "suppressPackageStartupMessages(library(patchwork))\n", "suppressPackageStartupMessages(library(pheatmap))\n", "suppressPackageStartupMessages(library(stringr))\n", "library(hise)\n", "library(plyr)\n", "library(purrr)" ] }, { "cell_type": "code", "execution_count": 33, "id": "eb4d8715-8f5a-48a9-b17f-3c029e03e812", "metadata": { "tags": [] }, "outputs": [], "source": [ "fileDescToDataframe_beta <- function(descriptors,keep_labs = FALSE) {\n", " descriptors<-descriptors[[1]]\n", " assertthat::assert_that(typeof(descriptors) == \"list\")\n", " assertthat::assert_that(typeof(keep_labs) == \"logical\")\n", " \n", " do.call(\n", " rbind.fill,\n", " lapply(\n", " descriptors,\n", " function(desc) {\n", " desc <- unlist(desc)\n", " desc <- desc[!grepl(\"scheme\", names(desc))]\n", " names(desc) <- sub(\"^descriptors.\",\"\",names(desc))\n", " desc <- desc[!grepl(\"^specimens\", names(desc))]\n", " desc <- desc[!grepl(\"^lab\", names(desc))]\n", " desc <- desc[!grepl(\"^emr\", names(desc))]\n", " desc <- desc[!grepl(\"^survey\", names(desc))]\n", " desc <- desc[!grepl(\"^revision\", names(desc))]\n", " desc <- desc[!grepl(\"^file.userTags\", names(desc))]\n", " desc <- as.list(desc)\n", " df <- as.data.frame(desc)\n", " df\n", " }\n", " )\n", " )\n", "}\n", "\n", "fileDescToDataframe <- function(descriptors,\n", " keep_labs = FALSE) {\n", " \n", " assertthat::assert_that(typeof(descriptors) == \"list\")\n", " assertthat::assert_that(typeof(keep_labs) == \"logical\")\n", " \n", " do.call(\n", " rbind,\n", " lapply(\n", " descriptors,\n", " function(desc) {\n", " desc <- unlist(desc)\n", " desc <- desc[!grepl(\"scheme\", names(desc))]\n", " names(desc) <- sub(\"^descriptors.\",\"\",names(desc))\n", " desc <- desc[!grepl(\"^specimens\", names(desc))]\n", " desc <- desc[!grepl(\"^lab\", names(desc))]\n", " desc <- desc[!grepl(\"^emr\", names(desc))]\n", " desc <- desc[!grepl(\"^survey\", names(desc))]\n", " desc <- desc[!grepl(\"^revision\", names(desc))]\n", " desc <- desc[!grepl(\"^file.userTags\", names(desc))]\n", " desc <- as.list(desc)\n", " df <- as.data.frame(desc)\n", " df\n", " }\n", " )\n", " )\n", "}" ] }, { "cell_type": "code", "execution_count": 34, "id": "886829cd-06ef-4300-956b-75fbabca4378", "metadata": { "tags": [] }, "outputs": [], "source": [ "BR1_rna_desc <- getFileDescriptors(\n", " fileType = \"scRNA-seq-labeled\", \n", " filter = list(cohort.cohortGuid = \"BR1\"))\n", "BR2_rna_desc <- getFileDescriptors(\n", " fileType = \"scRNA-seq-labeled\", \n", " filter = list(cohort.cohortGuid = \"BR2\"))\n", "UP1_rna_desc <- getFileDescriptors(\n", " fileType = \"scRNA-seq-labeled\", \n", " filter = list(cohort.cohortGuid = \"UP1\"))" ] }, { "cell_type": "code", "execution_count": 35, "id": "7713d294-470e-4bce-b823-50db7bb28ea7", "metadata": { "tags": [] }, "outputs": [], "source": [ "BR1_rna_desc <- fileDescToDataframe_beta(BR1_rna_desc)\n", "BR2_rna_desc <- fileDescToDataframe_beta(BR2_rna_desc)\n", "UP1_rna_desc <- fileDescToDataframe_beta(UP1_rna_desc)" ] }, { "cell_type": "code", "execution_count": 36, "id": "fea5288b-59be-47ad-8e23-1ec57b1080a4", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data<-rbind.fill(BR1_rna_desc , BR2_rna_desc )" ] }, { "cell_type": "code", "execution_count": 37, "id": "a337a609-68bb-4ea1-8f60-2ba3010a98ce", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data<-meta_data%>% filter(!file.batchID==\"B004\")%>% filter(!subject.subjectGuid%in% c(\"BR2007\",\"BR2049\",'BR1034'))" ] }, { "cell_type": "code", "execution_count": 38, "id": "e599d045-47e3-48d1-8fe2-26db30b86a79", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data$pbmc_sample_id<-gsub(\"_\",\"\",paste0(\"PB0\",substr(sub(\".*PB0\", \"\", meta_data$file.name),1,8)))\n", "meta_data<-meta_data %>% filter(!grepl(\"EXP\",file.batchID))%>% arrange(pbmc_sample_id)\n", "meta_data<-meta_data[!duplicated(meta_data[, c(\"sample.sampleKitGuid\")], fromLast=T),]%>% arrange(sample.sampleKitGuid)\n", "meta_data<-meta_data%>%mutate(sample.visitName=ifelse(sample.visitName==\"Other - Non-Flu\",sample.visitDetails,sample.visitName) )" ] }, { "cell_type": "code", "execution_count": 39, "id": "54a7a172-ca11-42a7-95a4-eb1ab9098d9b", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data<-meta_data %>% filter(sample.visitName=='Flu Year 1 Day 0')" ] }, { "cell_type": "code", "execution_count": 40, "id": "5eb5922c-a134-4661-9338-e0494569eaa0", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "\n", "BR1 BR2 \n", " 47 45 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "table(meta_data$cohort.cohortGuid)" ] }, { "cell_type": "code", "execution_count": 41, "id": "c07e656a-63e6-4815-b6b2-dfbb5eaa8bb5", "metadata": { "tags": [] }, "outputs": [], "source": [ "UP1_rna_desc$Sample_ID<-paste0('PB',gsub('KT','',UP1_rna_desc$sample.sampleKitGuid))" ] }, { "cell_type": "code", "execution_count": 42, "id": "70d7eb1b-dd86-4519-acea-765d06a5ed4d", "metadata": { "tags": [] }, "outputs": [], "source": [ "UP_Sample_ID<-c(\"PB00809\",\"PB00811\",\"PB00193\",\"PB00841\",\n", " \"PB00842\",\"PB00833\",\"PB00910\",\"PB00884\",\n", " \"PB00892\",\"PB00914\",\"PB00913\",\"PB00927\",\n", " \"PB00928\",\"PB02391\",\"PB02392\",\"PB03223\")" ] }, { "cell_type": "code", "execution_count": 43, "id": "d43c00fd-15f1-45c0-ae6e-f122ba146355", "metadata": { "tags": [] }, "outputs": [], "source": [ "UP1_rna_desc<-UP1_rna_desc %>% filter(Sample_ID %in% UP_Sample_ID)" ] }, { "cell_type": "code", "execution_count": 44, "id": "551cb10a-8a18-4f54-a843-a752cbb35fe1", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data<-rbind.fill(meta_data,UP1_rna_desc)" ] }, { "cell_type": "code", "execution_count": 16, "id": "03fe287d-4a04-46d9-8bd3-eca00df2184e", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "submitting request as query ID first...\n", "\n", "retrieving files using fileIDS...\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[1] 1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "submitting request as query ID first...\n", "\n", "retrieving files using fileIDS...\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[1] 2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "submitting request as query ID first...\n", "\n", "retrieving files using fileIDS...\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[1] 3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "submitting request as query ID first...\n", "\n", "retrieving files using fileIDS...\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[1] 4\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "submitting request as query ID first...\n", "\n", "retrieving files using fileIDS...\n", "\n" ] }, { "ename": "ERROR", "evalue": "Error in curl::curl_fetch_memory(url, handle = handle): Operation was aborted by an application callback\n", "output_type": "error", "traceback": [ "Error in curl::curl_fetch_memory(url, handle = handle): Operation was aborted by an application callback\nTraceback:\n", "1. cacheFiles(list(meta_data$file.id[i]))", "2. doQuery(assembleQuery(id = ids_expanded, format = \"all\", endpoint = hydraEnvVar(\"searchExecEndpoint\")))", "3. httr::GET(url, do.call(httr::add_headers, h))", "4. request_perform(req, hu$handle$handle)", "5. request_fetch(req$output, req$url, handle)", "6. request_fetch.write_memory(req$output, req$url, handle)", "7. curl::curl_fetch_memory(url, handle = handle)" ] } ], "source": [ "meta_data_scRNA_list<-list()\n", "for (i in 1:dim(meta_data)[1]){\n", "meta_data_scRNA_single <- cacheFiles(list(meta_data$file.id[i]))\n", "print (i)\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "6f36e862-74b3-42bb-80c7-8721dd677888", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 17, "id": "1899832d-825b-41ce-99dc-d13a24966c05", "metadata": { "tags": [] }, "outputs": [], "source": [ "write.csv(meta_data,paste0(\"hise_meta_data_\",Sys.Date(),\".csv\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "e582811f-d8de-4072-a74c-dcfb0f313767", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 45, "id": "1bd27b93-a1a6-4c3f-8740-b9f43324ee1d", "metadata": { "tags": [] }, "outputs": [], "source": [ "files<-data.frame(list.files(path = \"cache/\", pattern = 'h5', all.files = TRUE,\n", " full.names = FALSE, recursive = TRUE,\n", " ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE))" ] }, { "cell_type": "code", "execution_count": 46, "id": "d441e826-c527-4e45-8cf9-071f3682e733", "metadata": { "tags": [] }, "outputs": [], "source": [ "colnames(files)<-'file.path'" ] }, { "cell_type": "code", "execution_count": 47, "id": "90e8d053-76e4-481f-836a-c70372fdf7d9", "metadata": { "tags": [] }, "outputs": [], "source": [ "separated <- strsplit(files$file.path, split = '/')" ] }, { "cell_type": "code", "execution_count": 48, "id": "322385cf-29e9-439c-af61-3cd561554f4c", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "# Use strsplit to separate each string on '/', then unlist and rbind to create a matrix\n", "separated_matrix <- do.call(rbind, lapply(files$file.path, function(x) unlist(strsplit(x, split = '/'))))\n", "\n", "# Convert the matrix to a data frame\n", "df <- as.data.frame(separated_matrix, stringsAsFactors = FALSE)\n", "\n", "# Set column names\n", "colnames(df) <- c(\"file.id\", \"file.name.downloaded\")\n" ] }, { "cell_type": "code", "execution_count": 49, "id": "77aa9ee4-e655-4759-900a-2de17cf40f55", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data<-left_join(meta_data,cbind(df,files),by=('file.id'))" ] }, { "cell_type": "code", "execution_count": 50, "id": "1700b9b1-2547-4987-8e8c-7a2699a7adea", "metadata": { "tags": [] }, "outputs": [], "source": [ "meta_data$file.path<-paste0('/home//jupyter/reference_generating_new/cache/',meta_data$file.path)" ] }, { "cell_type": "code", "execution_count": 53, "id": "38c653da-00a7-4d41-a148-012e8bae876c", "metadata": { "tags": [] }, "outputs": [], "source": [ "write.csv(meta_data,paste0(\"hise_meta_data_\",Sys.Date(),\".csv\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "61def613-3746-4fed-b87f-93c19f157e77", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R 4", "language": "R", "name": "ir4" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.3.1" } }, "nbformat": 4, "nbformat_minor": 5 }