{
 "cells": [
  {
   "cell_type": "raw",
   "id": "eb1a0d79-3c64-40c8-901d-a788b24fb053",
   "metadata": {},
   "source": [
    "!pip install -q https://github.com/aifimmunology/multicelltypist/archive/main.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "660c6563-9bc8-47e0-a39a-5caea64b2b18",
   "metadata": {},
   "outputs": [],
   "source": [
    "import multicelltypist\n",
    "from datetime import date\n",
    "import hisepy\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import scanpy as sc"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8c18fd58-c65b-4764-b547-0400f6eb8530",
   "metadata": {},
   "source": [
    "### Retrieve 10x Genomics Flex Probe gene list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4b7e239e-5df8-4196-873d-c5380897437e",
   "metadata": {},
   "outputs": [],
   "source": [
    "cache_res = hisepy.download_from_project_store(\n",
    "    store_name = 'rds', # Reference Data Sets Project Store\n",
    "    file_name = 'AIFI-2024-03-11T02:09:16.856602896Z/Chromium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.probe_metadata.tsv', # File from 10x Genomics\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5b0535e2-39ec-446f-b2dd-a7460e4cf458",
   "metadata": {},
   "outputs": [],
   "source": [
    "probe_file = 'rds/Chromium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.probe_metadata.tsv'\n",
    "flex_probes = pd.read_csv(probe_file, sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "54905616-a063-4eda-a243-46db7c6a031a",
   "metadata": {},
   "outputs": [],
   "source": [
    "flex_genes = flex_probes['gene_name'].unique().tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6703a5b4-e367-4477-87ef-703f5060f405",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18529"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(flex_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "2611fcb9-1cc9-4b22-a3b2-c4a6c4d7c4ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_adata_uuid(h5ad_uuid):\n",
    "    h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)\n",
    "    if not os.path.isdir(h5ad_path):\n",
    "        hise_res = hisepy.reader.cache_files([h5ad_uuid])\n",
    "    h5ad_filename = os.listdir(h5ad_path)[0]\n",
    "    h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)\n",
    "    adata = sc.read_h5ad(h5ad_file)\n",
    "    return adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b26381f4-75ce-4776-9859-9f7775c198d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def resample_anndata_min_max(adata, label_column, max_cells=None, min_cells=None, random_state = 3030):\n",
    "    \"\"\"\n",
    "    Resamples an AnnData object based on the cell labels, with the option to resample with \n",
    "    replacement for classes below a specified threshold.\n",
    "\n",
    "    Parameters:\n",
    "    ad (AnnData): The AnnData object to be resampled.\n",
    "    label_column (str): The column in ad.obs where the labels are stored.\n",
    "    max_cells (int, optional): The maximum number of cells to keep per label. If None, no upper limit is applied.\n",
    "    min_cells (int, optional): The minimum number of cells below which resampling with replacement occurs. If None, no lower limit is applied.\n",
    "    random_state (int, default = 3030): An integer used to set the state of the numpy.random.Generator\n",
    "    \n",
    "    Returns:\n",
    "    AnnData: The resampled AnnData object.\n",
    "    \"\"\"\n",
    "    \n",
    "    labels = adata.obs[label_column].unique()\n",
    "\n",
    "    subsets = []\n",
    "\n",
    "    rng = np.random.default_rng(random_state)\n",
    "        \n",
    "    for label in labels:\n",
    "        # Subset AnnData object for the current label\n",
    "        subset = adata.obs[adata.obs[label_column] == label]\n",
    "    \n",
    "        # Resample with replacement if the number of cells is below min_cells and min_cells is defined\n",
    "        if min_cells is not None and subset.shape[0] < min_cells:\n",
    "            subset = subset.sample(min_cells, replace = True, random_state = rng)\n",
    "        # Resample without replacement if the number of cells is greater than max_cells and max_cells is defined\n",
    "        elif max_cells is not None and subset.shape[0] > max_cells:\n",
    "            subset = subset.sample(max_cells, replace = False, random_state = rng)\n",
    "    \n",
    "        subsets.append(subset)\n",
    "\n",
    "    # Concatenate all subsets\n",
    "    resampled_obs = pd.concat(subsets)\n",
    "    \n",
    "    resampled_adata = adata[resampled_obs.index]\n",
    "    resampled_adata.obs_names_make_unique()\n",
    "\n",
    "    return resampled_adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "025a1a8c-9a27-425d-856f-d57d06085150",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_column = 'AIFI_L1'\n",
    "max_cell_number = 100000"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f5d2a81-4c8b-41a6-887a-903305be8e41",
   "metadata": {},
   "source": [
    "## Read clean, annotated dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "402ba1b7-74b9-45d3-b840-f4e5ea967fdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "h5ad_uuid = '6e8972a5-9463-4230-84b4-a20de055b9c3'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d19d07d9-c602-4c16-ada7-16091f43ef4c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "downloading fileID: 6e8972a5-9463-4230-84b4-a20de055b9c3\n",
      "Files have been successfully downloaded!\n"
     ]
    }
   ],
   "source": [
    "adata = read_adata_uuid(h5ad_uuid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "0e7cc125-7921-4fb0-a3de-c8d5ca493d97",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1823666, 1261)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "02304406-4c05-459e-aad3-aae00f909516",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIFI_L1\n",
       "T cell             1152286\n",
       "Monocyte            327919\n",
       "B cell              160632\n",
       "NK cell             147761\n",
       "DC                   23287\n",
       "Platelet              7903\n",
       "Progenitor cell       1526\n",
       "Erythrocyte           1508\n",
       "ILC                    844\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.obs[label_column].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afd7084d-0f4e-4aa8-b538-af3b316e4992",
   "metadata": {},
   "source": [
    "## Sample and prepare reference data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "139d3a45-9492-4ffb-af7a-79b28722a034",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata_subset = resample_anndata_min_max(\n",
    "    adata, \n",
    "    label_column, \n",
    "    max_cells = max_cell_number,\n",
    "    random_state = 3030\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "bc027836-a9ce-44d2-95ec-50b90f4a74e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(435068, 1261)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "bb48435e-91ec-4e20-9ff6-1459c785d784",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIFI_L1\n",
       "B cell             100000\n",
       "Monocyte           100000\n",
       "NK cell            100000\n",
       "T cell             100000\n",
       "DC                  23287\n",
       "Platelet             7903\n",
       "Progenitor cell      1526\n",
       "Erythrocyte          1508\n",
       "ILC                   844\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset.obs[label_column].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8d8c478d-2cf3-425e-9327-61af4db59d2c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(435068, 33538)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset = adata_subset.raw.to_adata()\n",
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "2596500e-62c7-4fa5-bd16-63cacc8331b0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata_subset, target_sum=1e4)\n",
    "sc.pp.log1p(adata_subset)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "786af4bd-2ac4-4d48-a362-f697ab441da7",
   "metadata": {},
   "source": [
    "### Limit to genes available in 10x Flex Probes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "fadeb31f-c31e-42fb-91f0-39913cc77667",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18329"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_var = adata_subset.var.index.isin(flex_genes)\n",
    "sum(keep_var)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a93e4d35-ddc8-49d2-9058-414af91b4ea3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(435068, 18329)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset = adata_subset[:,keep_var]\n",
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "800223ae-e51d-4547-9e89-5b6b5022844a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata_subset, target_sum=1e4)\n",
    "sc.pp.log1p(adata_subset)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "846d818e-27d8-46d1-8079-cddc37a1830f",
   "metadata": {},
   "source": [
    "## Generate Initial model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "c0451b10-b41c-4567-8807-26baba781431",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "🍳 Preparing data before training\n",
      "✂️ 1223 non-expressed genes are filtered out\n",
      "🔬 Input data has 435068 cells and 17106 genes\n",
      "⚖️ Scaling input data\n",
      "🏋️ Training data using SGD logistic regression\n",
      "⚠️ Warning: it may take a long time to train this dataset with 435068 cells and 17106 genes, try to downsample cells and/or restrict genes to a subset (e.g., hvgs)\n",
      "✅ Model training done!\n"
     ]
    }
   ],
   "source": [
    "model_fs = multicelltypist.train(\n",
    "    adata_subset, \n",
    "    label_column, \n",
    "    n_jobs = 60, \n",
    "    max_iter = 10, \n",
    "    multi_class = 'ovr', \n",
    "    use_SGD = True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bee3b1e1-f085-45a4-91b3-c14e3775bee9",
   "metadata": {},
   "source": [
    "## Identify top features used for the model"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e841c3de-c87b-49fc-93c0-a92a34aafc42",
   "metadata": {},
   "source": [
    "Detected genes:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "879de263-fcc0-4c85-92ba-501a8d15848d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = adata_subset.X.toarray()\n",
    "flag = df.sum(axis = 0) == 0\n",
    "gene = adata_subset.var_names[ ~flag]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d26bc4ef-63a3-49f4-9b0c-80c5b1b6b140",
   "metadata": {},
   "source": [
    "Features with high absolute classifier coefficients for each cell type class\n",
    "\n",
    "`np.argpartition` will take the coefficient scores for each class, and retrieve the positions of the highest absolute coefficient scores to the end of an array of positions. We then select the `top_n` positions from the end of our array of positions, which allow us to retrieve genes with the highest absolute coefficients for each class.\n",
    "\n",
    "We can then combine these to get a unique list of genes that are important for our model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9eff950b-8272-4c52-9fce-f2a530ded5c4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of genes selected: 1102\n"
     ]
    }
   ],
   "source": [
    "top_n = 200\n",
    "\n",
    "gene_index = np.argpartition(\n",
    "    np.abs(model_fs.classifier.coef_),\n",
    "    -top_n,\n",
    "    axis = 1\n",
    ")\n",
    "gene_index = gene_index[:, -top_n:]\n",
    "gene_index = np.unique(gene_index)\n",
    "\n",
    "print('Number of genes selected: {n}'.format(n = len(gene_index)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "0ce86088-ecca-4571-8047-822125da39e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "selected_genes = gene[gene_index.tolist()]\n",
    "selected_df = pd.DataFrame({'gene': selected_genes})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "259ed4e9-9d0a-4bff-93c5-a531ea7a3722",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>HES4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TTLL10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TNFRSF18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TNFRSF4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TNFRSF25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       gene\n",
       "0      HES4\n",
       "1    TTLL10\n",
       "2  TNFRSF18\n",
       "3   TNFRSF4\n",
       "4  TNFRSF25"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fcd1074-f9f9-4225-8f0b-66781474fea3",
   "metadata": {},
   "source": [
    "## Generate full model using selected features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "76482001-3514-40f8-84ba-ac49ee8de99b",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = adata.raw.to_adata()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "8ed94738-b085-4013-b2a0-af8f8eec304b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata, target_sum=1e4)\n",
    "sc.pp.log1p(adata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "2a83bcfb-e70d-42b8-98e2-1ca4e28769ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = adata[:, adata.var_names.isin(selected_genes)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "25f7d232-e933-4a1c-8d37-672347804bad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1823666, 1102)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "187251e5-d64f-4c94-acf7-41405f0c4587",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "🍳 Preparing data before training\n",
      "🔬 Input data has 1823666 cells and 1102 genes\n",
      "⚖️ Scaling input data\n",
      "🏋️ Training data using logistic regression\n",
      "✅ Model training done!\n"
     ]
    }
   ],
   "source": [
    "model_fs = multicelltypist.train(\n",
    "    adata, \n",
    "    label_column, \n",
    "    n_jobs = 60,\n",
    "    max_iter = 100,\n",
    "    multi_class = 'ovr',\n",
    "    check_expression = False\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "acaad207-8a4c-438f-8b7f-7790282a55f2",
   "metadata": {},
   "source": [
    "## Write outputs for storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "e3504c04-3764-4b63-ad0a-e358958924fc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "out_dir = 'output'\n",
    "if not os.path.isdir(out_dir):\n",
    "    os.makedirs(out_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "7032c10f-2aed-4f92-82ff-08fee9d72711",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_genes = 'output/ref_pbmc_clean_celltypist_top{n}_flex-features_{l}_{d}.csv'.format(\n",
    "    n = top_n,\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")\n",
    "\n",
    "selected_df.to_csv(out_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "0ea6d0e1-5f66-4b44-82c1-cef218e73770",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_model = 'output/ref_pbmc_clean_celltypist_model_flex-features_{l}_{d}.pkl'.format(\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")\n",
    "\n",
    "model_fs.write(out_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1a1f474-2902-4476-9728-1639e3e6eeea",
   "metadata": {},
   "source": [
    "## Upload model to HISE\n",
    "\n",
    "Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "63fe4465-3209-461d-9ce6-1d83b7d940cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'\n",
    "title = 'PBMC Reference {l} CellTypist Model Flex Features {d}'.format(\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "13757b6a-266a-4952-8375-675a85af41c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "in_files = [h5ad_uuid]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "d20e6385-6353-4f83-ae5d-7aca61904b7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['6e8972a5-9463-4230-84b4-a20de055b9c3']"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "in_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "7165f511-e640-4a88-b0cb-3e302f5097eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_files = [out_genes, out_model]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "cfb825fe-dde6-449c-9a7e-d9a8946f4f7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L1_2024-03-11.csv',\n",
       " 'output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L1_2024-03-11.pkl']"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "298b7444-d517-4203-95a2-96e364f43ed1",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L1_2024-03-11.csv\n",
      "output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L1_2024-03-11.pkl\n",
      "you are trying to upload file_ids... ['output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L1_2024-03-11.csv', 'output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L1_2024-03-11.pkl']. Do you truly want to proceed?\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "(y/n) y\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'trace_id': '3768e951-13f8-4aac-9c46-908d128c6e3c',\n",
       " 'files': ['output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L1_2024-03-11.csv',\n",
       "  'output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L1_2024-03-11.pkl']}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hisepy.upload.upload_files(\n",
    "    files = out_files,\n",
    "    study_space_id = study_space_uuid,\n",
    "    title = title,\n",
    "    input_file_ids = in_files\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "745e9f83-122c-4445-beb5-db6c20e31eed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<details>\n",
       "<summary>Click to view session information</summary>\n",
       "<pre>\n",
       "-----\n",
       "anndata             0.10.3\n",
       "hisepy              0.3.0\n",
       "multicelltypist     1.6.2\n",
       "numpy               1.25.2\n",
       "pandas              2.1.4\n",
       "scanpy              1.9.6\n",
       "session_info        1.0.0\n",
       "-----\n",
       "</pre>\n",
       "<details>\n",
       "<summary>Click to view modules imported as dependencies</summary>\n",
       "<pre>\n",
       "PIL                         10.0.1\n",
       "anyio                       NA\n",
       "arrow                       1.3.0\n",
       "asttokens                   NA\n",
       "attr                        23.2.0\n",
       "attrs                       23.2.0\n",
       "babel                       2.14.0\n",
       "beatrix_jupyterlab          NA\n",
       "brotli                      NA\n",
       "cachetools                  5.3.1\n",
       "certifi                     2024.02.02\n",
       "cffi                        1.16.0\n",
       "charset_normalizer          3.3.2\n",
       "cloudpickle                 2.2.1\n",
       "colorama                    0.4.6\n",
       "comm                        0.1.4\n",
       "cryptography                41.0.7\n",
       "cycler                      0.10.0\n",
       "cython_runtime              NA\n",
       "dateutil                    2.8.2\n",
       "db_dtypes                   1.1.1\n",
       "debugpy                     1.8.0\n",
       "decorator                   5.1.1\n",
       "defusedxml                  0.7.1\n",
       "deprecated                  1.2.14\n",
       "exceptiongroup              1.2.0\n",
       "executing                   2.0.1\n",
       "fastjsonschema              NA\n",
       "fqdn                        NA\n",
       "google                      NA\n",
       "greenlet                    2.0.2\n",
       "grpc                        1.58.0\n",
       "grpc_status                 NA\n",
       "h5py                        3.10.0\n",
       "idna                        3.6\n",
       "igraph                      0.10.8\n",
       "importlib_metadata          NA\n",
       "ipykernel                   6.28.0\n",
       "ipython_genutils            0.2.0\n",
       "ipywidgets                  8.1.1\n",
       "isoduration                 NA\n",
       "jedi                        0.19.1\n",
       "jinja2                      3.1.2\n",
       "joblib                      1.3.2\n",
       "json5                       NA\n",
       "jsonpointer                 2.4\n",
       "jsonschema                  4.20.0\n",
       "jsonschema_specifications   NA\n",
       "jupyter_events              0.9.0\n",
       "jupyter_server              2.12.1\n",
       "jupyterlab_server           2.25.2\n",
       "jwt                         2.8.0\n",
       "kiwisolver                  1.4.5\n",
       "leidenalg                   0.10.1\n",
       "llvmlite                    0.41.0\n",
       "lz4                         4.3.2\n",
       "markupsafe                  2.1.3\n",
       "matplotlib                  3.8.0\n",
       "matplotlib_inline           0.1.6\n",
       "mpl_toolkits                NA\n",
       "mpmath                      1.3.0\n",
       "natsort                     8.4.0\n",
       "nbformat                    5.9.2\n",
       "numba                       0.58.0\n",
       "opentelemetry               NA\n",
       "overrides                   NA\n",
       "packaging                   23.2\n",
       "parso                       0.8.3\n",
       "pexpect                     4.8.0\n",
       "pickleshare                 0.7.5\n",
       "pkg_resources               NA\n",
       "platformdirs                4.1.0\n",
       "plotly                      5.18.0\n",
       "prettytable                 3.9.0\n",
       "prometheus_client           NA\n",
       "prompt_toolkit              3.0.42\n",
       "proto                       NA\n",
       "psutil                      NA\n",
       "ptyprocess                  0.7.0\n",
       "pure_eval                   0.2.2\n",
       "pyarrow                     13.0.0\n",
       "pydev_ipython               NA\n",
       "pydevconsole                NA\n",
       "pydevd                      2.9.5\n",
       "pydevd_file_utils           NA\n",
       "pydevd_plugins              NA\n",
       "pydevd_tracing              NA\n",
       "pygments                    2.17.2\n",
       "pynvml                      NA\n",
       "pyparsing                   3.1.1\n",
       "pyreadr                     0.5.0\n",
       "pythonjsonlogger            NA\n",
       "pytz                        2023.3.post1\n",
       "referencing                 NA\n",
       "requests                    2.31.0\n",
       "rfc3339_validator           0.1.4\n",
       "rfc3986_validator           0.1.1\n",
       "rpds                        NA\n",
       "scipy                       1.11.4\n",
       "send2trash                  NA\n",
       "shapely                     1.8.5.post1\n",
       "six                         1.16.0\n",
       "sklearn                     1.3.2\n",
       "sniffio                     1.3.0\n",
       "socks                       1.7.1\n",
       "sql                         NA\n",
       "sqlalchemy                  2.0.21\n",
       "sqlparse                    0.4.4\n",
       "stack_data                  0.6.2\n",
       "sympy                       1.12\n",
       "termcolor                   NA\n",
       "texttable                   1.7.0\n",
       "threadpoolctl               3.2.0\n",
       "torch                       2.1.2+cu121\n",
       "torchgen                    NA\n",
       "tornado                     6.3.3\n",
       "tqdm                        4.66.1\n",
       "traitlets                   5.9.0\n",
       "typing_extensions           NA\n",
       "uri_template                NA\n",
       "urllib3                     1.26.18\n",
       "wcwidth                     0.2.12\n",
       "webcolors                   1.13\n",
       "websocket                   1.7.0\n",
       "wrapt                       1.15.0\n",
       "xarray                      2023.12.0\n",
       "yaml                        6.0.1\n",
       "zipp                        NA\n",
       "zmq                         25.1.2\n",
       "zoneinfo                    NA\n",
       "zstandard                   0.22.0\n",
       "</pre>\n",
       "</details> <!-- seems like this ends pre, so might as well be explicit -->\n",
       "<pre>\n",
       "-----\n",
       "IPython             8.19.0\n",
       "jupyter_client      8.6.0\n",
       "jupyter_core        5.6.1\n",
       "jupyterlab          4.1.2\n",
       "notebook            6.5.4\n",
       "-----\n",
       "Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]\n",
       "Linux-5.15.0-1053-gcp-x86_64-with-glibc2.31\n",
       "-----\n",
       "Session information updated at 2024-03-11 19:30\n",
       "</pre>\n",
       "</details>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import session_info\n",
    "session_info.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6824f32d-6603-4e9e-b1a8-abc87d0f7908",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}