{
 "cells": [
  {
   "cell_type": "raw",
   "id": "7f0c5dc2-90b4-4a57-bc2c-687eb90c576f",
   "metadata": {},
   "source": [
    "!pip install -q https://github.com/aifimmunology/multicelltypist/archive/main.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "660c6563-9bc8-47e0-a39a-5caea64b2b18",
   "metadata": {},
   "outputs": [],
   "source": [
    "import multicelltypist\n",
    "from datetime import date\n",
    "import hisepy\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import scanpy as sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2611fcb9-1cc9-4b22-a3b2-c4a6c4d7c4ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_adata_uuid(h5ad_uuid):\n",
    "    h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)\n",
    "    if not os.path.isdir(h5ad_path):\n",
    "        hise_res = hisepy.reader.cache_files([h5ad_uuid])\n",
    "    h5ad_filename = os.listdir(h5ad_path)[0]\n",
    "    h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)\n",
    "    adata = sc.read_h5ad(h5ad_file)\n",
    "    return adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b26381f4-75ce-4776-9859-9f7775c198d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def resample_anndata_min_max(adata, label_column, max_cells=None, min_cells=None, random_state = 3030):\n",
    "    \"\"\"\n",
    "    Resamples an AnnData object based on the cell labels, with the option to resample with \n",
    "    replacement for classes below a specified threshold.\n",
    "\n",
    "    Parameters:\n",
    "    ad (AnnData): The AnnData object to be resampled.\n",
    "    label_column (str): The column in ad.obs where the labels are stored.\n",
    "    max_cells (int, optional): The maximum number of cells to keep per label. If None, no upper limit is applied.\n",
    "    min_cells (int, optional): The minimum number of cells below which resampling with replacement occurs. If None, no lower limit is applied.\n",
    "    random_state (int, default = 3030): An integer used to set the state of the numpy.random.Generator\n",
    "    \n",
    "    Returns:\n",
    "    AnnData: The resampled AnnData object.\n",
    "    \"\"\"\n",
    "    \n",
    "    labels = adata.obs[label_column].unique()\n",
    "\n",
    "    subsets = []\n",
    "\n",
    "    rng = np.random.default_rng(random_state)\n",
    "        \n",
    "    for label in labels:\n",
    "        # Subset AnnData object for the current label\n",
    "        subset = adata.obs[adata.obs[label_column] == label]\n",
    "    \n",
    "        # Resample with replacement if the number of cells is below min_cells and min_cells is defined\n",
    "        if min_cells is not None and subset.shape[0] < min_cells:\n",
    "            subset = subset.sample(min_cells, replace = True, random_state = rng)\n",
    "        # Resample without replacement if the number of cells is greater than max_cells and max_cells is defined\n",
    "        elif max_cells is not None and subset.shape[0] > max_cells:\n",
    "            subset = subset.sample(max_cells, replace = False, random_state = rng)\n",
    "    \n",
    "        subsets.append(subset)\n",
    "\n",
    "    # Concatenate all subsets\n",
    "    resampled_obs = pd.concat(subsets)\n",
    "    \n",
    "    resampled_adata = adata[resampled_obs.index]\n",
    "    resampled_adata.obs_names_make_unique()\n",
    "\n",
    "    return resampled_adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "025a1a8c-9a27-425d-856f-d57d06085150",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_column = 'AIFI_L2'\n",
    "max_cell_number = 80000"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f5d2a81-4c8b-41a6-887a-903305be8e41",
   "metadata": {},
   "source": [
    "## Read clean, annotated dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "402ba1b7-74b9-45d3-b840-f4e5ea967fdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "h5ad_uuid = '6e8972a5-9463-4230-84b4-a20de055b9c3'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d19d07d9-c602-4c16-ada7-16091f43ef4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = read_adata_uuid(h5ad_uuid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0e7cc125-7921-4fb0-a3de-c8d5ca493d97",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1823666, 1261)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "02304406-4c05-459e-aad3-aae00f909516",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIFI_L2\n",
       "Naive CD4 T cell         378071\n",
       "Memory CD4 T cell        321788\n",
       "CD14 monocyte            269328\n",
       "Memory CD8 T cell        183096\n",
       "CD56dim NK cell          133881\n",
       "Naive CD8 T cell         121167\n",
       "Naive B cell              86711\n",
       "gdT                       50587\n",
       "MAIT                      48084\n",
       "Memory B cell             47886\n",
       "CD16 monocyte             45920\n",
       "Treg                      39087\n",
       "cDC2                      14235\n",
       "Intermediate monocyte     12671\n",
       "Transitional B cell       12555\n",
       "Effector B cell           11329\n",
       "CD56bright NK cell        11055\n",
       "Platelet                   7903\n",
       "pDC                        7587\n",
       "CD8aa                      5737\n",
       "Proliferating NK cell      2825\n",
       "DN T cell                  2349\n",
       "Proliferating T cell       2320\n",
       "Plasma cell                2151\n",
       "Progenitor cell            1526\n",
       "Erythrocyte                1508\n",
       "cDC1                        943\n",
       "ILC                         844\n",
       "ASDC                        522\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.obs[label_column].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afd7084d-0f4e-4aa8-b538-af3b316e4992",
   "metadata": {},
   "source": [
    "## Sample and prepare reference data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "139d3a45-9492-4ffb-af7a-79b28722a034",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata_subset = resample_anndata_min_max(\n",
    "    adata, \n",
    "    label_column, \n",
    "    max_cells = max_cell_number,\n",
    "    random_state = 3030\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "bc027836-a9ce-44d2-95ec-50b90f4a74e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(889624, 1261)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bb48435e-91ec-4e20-9ff6-1459c785d784",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIFI_L2\n",
       "CD14 monocyte            80000\n",
       "CD56dim NK cell          80000\n",
       "Naive B cell             80000\n",
       "Memory CD8 T cell        80000\n",
       "Memory CD4 T cell        80000\n",
       "Naive CD4 T cell         80000\n",
       "Naive CD8 T cell         80000\n",
       "gdT                      50587\n",
       "MAIT                     48084\n",
       "Memory B cell            47886\n",
       "CD16 monocyte            45920\n",
       "Treg                     39087\n",
       "cDC2                     14235\n",
       "Intermediate monocyte    12671\n",
       "Transitional B cell      12555\n",
       "Effector B cell          11329\n",
       "CD56bright NK cell       11055\n",
       "Platelet                  7903\n",
       "pDC                       7587\n",
       "CD8aa                     5737\n",
       "Proliferating NK cell     2825\n",
       "DN T cell                 2349\n",
       "Proliferating T cell      2320\n",
       "Plasma cell               2151\n",
       "Progenitor cell           1526\n",
       "Erythrocyte               1508\n",
       "cDC1                       943\n",
       "ILC                        844\n",
       "ASDC                       522\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset.obs[label_column].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "8d8c478d-2cf3-425e-9327-61af4db59d2c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(889624, 33538)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset = adata_subset.raw.to_adata()\n",
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "2596500e-62c7-4fa5-bd16-63cacc8331b0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata_subset, target_sum=1e4)\n",
    "sc.pp.log1p(adata_subset)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b5fccae-79fd-4eb9-a4a8-f68536db9dda",
   "metadata": {},
   "source": [
    "## Generate Initial model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7ed68f41-5230-477e-b57f-3bcf1ad6bba8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "🍳 Preparing data before training\n",
      "✂️ 4486 non-expressed genes are filtered out\n",
      "🔬 Input data has 889624 cells and 29052 genes\n",
      "⚖️ Scaling input data\n",
      "🏋️ Training data using SGD logistic regression\n",
      "⚠️ Warning: it may take a long time to train this dataset with 889624 cells and 29052 genes, try to downsample cells and/or restrict genes to a subset (e.g., hvgs)\n",
      "✅ Model training done!\n"
     ]
    }
   ],
   "source": [
    "model_fs = multicelltypist.train(\n",
    "    adata_subset, \n",
    "    label_column, \n",
    "    n_jobs = 60, \n",
    "    max_iter = 10, \n",
    "    multi_class = 'ovr', \n",
    "    use_SGD = True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0b4b82a-787e-41bd-a2a9-b847b0e84770",
   "metadata": {},
   "source": [
    "## Identify top features used for the model"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "265dd0e4-14f4-4114-98fc-09af92018bf5",
   "metadata": {},
   "source": [
    "Detected genes:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "f98c3839-7e99-4643-82c8-254a2bd89ce1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = adata_subset.X.toarray()\n",
    "flag = df.sum(axis = 0) == 0\n",
    "gene = adata_subset.var_names[ ~flag]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ce3de9a5-dcf9-4d02-85f4-034143633098",
   "metadata": {},
   "source": [
    "Features with high absolute classifier coefficients for each cell type class\n",
    "\n",
    "`np.argpartition` will take the coefficient scores for each class, and retrieve the positions of the highest absolute coefficient scores to the end of an array of positions. We then select the `top_n` positions from the end of our array of positions, which allow us to retrieve genes with the highest absolute coefficients for each class.\n",
    "\n",
    "We can then combine these to get a unique list of genes that are important for our model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "bcb18ca4-360c-40de-a3c8-233deff3d7ee",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of genes selected: 1931\n"
     ]
    }
   ],
   "source": [
    "top_n = 200\n",
    "\n",
    "gene_index = np.argpartition(\n",
    "    np.abs(model_fs.classifier.coef_),\n",
    "    -top_n,\n",
    "    axis = 1\n",
    ")\n",
    "gene_index = gene_index[:, -top_n:]\n",
    "gene_index = np.unique(gene_index)\n",
    "\n",
    "print('Number of genes selected: {n}'.format(n = len(gene_index)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "5e6a0dc7-418e-4400-8cc2-4ca286b393fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "selected_genes = gene[gene_index.tolist()]\n",
    "selected_df = pd.DataFrame({'gene': selected_genes})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "76116481-6d8c-4539-9cc8-095cc31d390e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>HES4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ISG15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TTLL10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TNFRSF18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TNFRSF4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       gene\n",
       "0      HES4\n",
       "1     ISG15\n",
       "2    TTLL10\n",
       "3  TNFRSF18\n",
       "4   TNFRSF4"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "398634dc-57eb-4c6f-884a-226e1af3ad02",
   "metadata": {},
   "source": [
    "## Generate full model using selected features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "030648e4-e48c-4c69-82e6-16b598ecaa13",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = adata.raw.to_adata()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "13377112-3e01-4956-a1f0-0b0819fa6aa7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata, target_sum=1e4)\n",
    "sc.pp.log1p(adata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "9890237b-6705-47ab-88ad-879392432998",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = adata[:, adata.var_names.isin(selected_genes)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "a6c9d518-7ab1-48ae-94b5-f3d668954c7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1823666, 1931)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "14ee09d1-76ea-4c4e-b2dc-7ddab9913955",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "🍳 Preparing data before training\n",
      "🔬 Input data has 1823666 cells and 1931 genes\n",
      "⚖️ Scaling input data\n",
      "🏋️ Training data using logistic regression\n",
      "✅ Model training done!\n"
     ]
    }
   ],
   "source": [
    "model_fs = multicelltypist.train(\n",
    "    adata, \n",
    "    label_column, \n",
    "    n_jobs = 60,\n",
    "    max_iter = 100,\n",
    "    multi_class = 'ovr',\n",
    "    check_expression = False\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eef272aa-a2c2-48bd-9fd7-ead244edcfc7",
   "metadata": {},
   "source": [
    "## Write outputs for storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "70bb4514-c6aa-43cd-96e7-4b295064dad3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "out_dir = 'output'\n",
    "if not os.path.isdir(out_dir):\n",
    "    os.makedirs(out_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "59fb4a55-52b5-492b-a16a-b81c1d8b66b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_genes = 'output/ref_pbmc_clean_celltypist_top{n}_features_{l}_{d}.csv'.format(\n",
    "    n = top_n,\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")\n",
    "\n",
    "selected_df.to_csv(out_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a4d082d0-9ab9-4cd0-96b3-a339739fc392",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_model = 'output/ref_pbmc_clean_celltypist_model_{l}_{d}.pkl'.format(\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")\n",
    "\n",
    "model_fs.write(out_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7b1d5230-6d90-4780-938b-8e2f7401c59a",
   "metadata": {},
   "source": [
    "## Upload model to HISE\n",
    "\n",
    "Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "66ac5956-b921-415a-b370-77578b021891",
   "metadata": {},
   "outputs": [],
   "source": [
    "study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'\n",
    "title = 'PBMC Reference {l} CellTypist Model {d}'.format(\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "848357fc-9231-4cb4-a30d-89ce0246b898",
   "metadata": {},
   "outputs": [],
   "source": [
    "in_files = [h5ad_uuid]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "5d6df346-cb61-4a06-a7f6-5609e9bc6e69",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['6e8972a5-9463-4230-84b4-a20de055b9c3']"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "in_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "910d2458-b478-4b9f-9bac-2d56c4c7ea08",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_files = [out_genes, out_model]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "64e92a74-a95a-4f95-bc1a-01cdfa42cb13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['output/ref_pbmc_clean_celltypist_top200_features_AIFI_L2_2024-03-10.csv',\n",
       " 'output/ref_pbmc_clean_celltypist_model_AIFI_L2_2024-03-10.pkl']"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ffd9a398-1be4-461d-8c6e-b55245212b26",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "output/ref_pbmc_clean_celltypist_top200_features_AIFI_L2_2024-03-10.csv\n",
      "output/ref_pbmc_clean_celltypist_model_AIFI_L2_2024-03-10.pkl\n",
      "Cannot determine the current notebook.\n",
      "1) /home/jupyter/scRNA-Reference-IH-A/06-Modeling/30-Python_celltypist_L2_model.ipynb\n",
      "2) /home/jupyter/scRNA-Reference-IH-A/06-Modeling/31-Python_celltypist_L3_model.ipynb\n",
      "3) /home/jupyter/scRNA-Reference-IH-A/06-Modeling/29-Python_celltypist_L1_model.ipynb\n",
      "Please select (1-3) \n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "you are trying to upload file_ids... ['output/ref_pbmc_clean_celltypist_top200_features_AIFI_L2_2024-03-10.csv', 'output/ref_pbmc_clean_celltypist_model_AIFI_L2_2024-03-10.pkl']. Do you truly want to proceed?\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "(y/n) y\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'trace_id': 'da03c2e2-b965-48ab-89a6-89b653f95b7d',\n",
       " 'files': ['output/ref_pbmc_clean_celltypist_top200_features_AIFI_L2_2024-03-10.csv',\n",
       "  'output/ref_pbmc_clean_celltypist_model_AIFI_L2_2024-03-10.pkl']}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hisepy.upload.upload_files(\n",
    "    files = out_files,\n",
    "    study_space_id = study_space_uuid,\n",
    "    title = title,\n",
    "    input_file_ids = in_files\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "9684bc1e-4da6-442f-b135-ef39c4342be5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<details>\n",
       "<summary>Click to view session information</summary>\n",
       "<pre>\n",
       "-----\n",
       "anndata             0.10.3\n",
       "hisepy              0.3.0\n",
       "multicelltypist     1.6.2\n",
       "numpy               1.25.2\n",
       "pandas              2.1.4\n",
       "scanpy              1.9.6\n",
       "session_info        1.0.0\n",
       "-----\n",
       "</pre>\n",
       "<details>\n",
       "<summary>Click to view modules imported as dependencies</summary>\n",
       "<pre>\n",
       "PIL                         10.0.1\n",
       "anyio                       NA\n",
       "arrow                       1.3.0\n",
       "asttokens                   NA\n",
       "attr                        23.2.0\n",
       "attrs                       23.2.0\n",
       "babel                       2.14.0\n",
       "beatrix_jupyterlab          NA\n",
       "brotli                      NA\n",
       "cachetools                  5.3.1\n",
       "certifi                     2024.02.02\n",
       "cffi                        1.16.0\n",
       "charset_normalizer          3.3.2\n",
       "cloudpickle                 2.2.1\n",
       "colorama                    0.4.6\n",
       "comm                        0.1.4\n",
       "cryptography                41.0.7\n",
       "cycler                      0.10.0\n",
       "cython_runtime              NA\n",
       "dateutil                    2.8.2\n",
       "db_dtypes                   1.1.1\n",
       "debugpy                     1.8.0\n",
       "decorator                   5.1.1\n",
       "defusedxml                  0.7.1\n",
       "deprecated                  1.2.14\n",
       "exceptiongroup              1.2.0\n",
       "executing                   2.0.1\n",
       "fastjsonschema              NA\n",
       "fqdn                        NA\n",
       "google                      NA\n",
       "greenlet                    2.0.2\n",
       "grpc                        1.58.0\n",
       "grpc_status                 NA\n",
       "h5py                        3.10.0\n",
       "idna                        3.6\n",
       "igraph                      0.10.8\n",
       "importlib_metadata          NA\n",
       "ipykernel                   6.28.0\n",
       "ipython_genutils            0.2.0\n",
       "ipywidgets                  8.1.1\n",
       "isoduration                 NA\n",
       "jedi                        0.19.1\n",
       "jinja2                      3.1.2\n",
       "joblib                      1.3.2\n",
       "json5                       NA\n",
       "jsonpointer                 2.4\n",
       "jsonschema                  4.20.0\n",
       "jsonschema_specifications   NA\n",
       "jupyter_events              0.9.0\n",
       "jupyter_server              2.12.1\n",
       "jupyterlab_server           2.25.2\n",
       "jwt                         2.8.0\n",
       "kiwisolver                  1.4.5\n",
       "leidenalg                   0.10.1\n",
       "llvmlite                    0.41.0\n",
       "lz4                         4.3.2\n",
       "markupsafe                  2.1.3\n",
       "matplotlib                  3.8.0\n",
       "matplotlib_inline           0.1.6\n",
       "mpl_toolkits                NA\n",
       "mpmath                      1.3.0\n",
       "natsort                     8.4.0\n",
       "nbformat                    5.9.2\n",
       "numba                       0.58.0\n",
       "opentelemetry               NA\n",
       "overrides                   NA\n",
       "packaging                   23.2\n",
       "parso                       0.8.3\n",
       "pexpect                     4.8.0\n",
       "pickleshare                 0.7.5\n",
       "pkg_resources               NA\n",
       "platformdirs                4.1.0\n",
       "plotly                      5.18.0\n",
       "prettytable                 3.9.0\n",
       "prometheus_client           NA\n",
       "prompt_toolkit              3.0.42\n",
       "proto                       NA\n",
       "psutil                      NA\n",
       "ptyprocess                  0.7.0\n",
       "pure_eval                   0.2.2\n",
       "pyarrow                     13.0.0\n",
       "pydev_ipython               NA\n",
       "pydevconsole                NA\n",
       "pydevd                      2.9.5\n",
       "pydevd_file_utils           NA\n",
       "pydevd_plugins              NA\n",
       "pydevd_tracing              NA\n",
       "pygments                    2.17.2\n",
       "pynvml                      NA\n",
       "pyparsing                   3.1.1\n",
       "pyreadr                     0.5.0\n",
       "pythonjsonlogger            NA\n",
       "pytz                        2023.3.post1\n",
       "referencing                 NA\n",
       "requests                    2.31.0\n",
       "rfc3339_validator           0.1.4\n",
       "rfc3986_validator           0.1.1\n",
       "rpds                        NA\n",
       "scipy                       1.11.4\n",
       "send2trash                  NA\n",
       "shapely                     1.8.5.post1\n",
       "six                         1.16.0\n",
       "sklearn                     1.3.2\n",
       "sniffio                     1.3.0\n",
       "socks                       1.7.1\n",
       "sql                         NA\n",
       "sqlalchemy                  2.0.21\n",
       "sqlparse                    0.4.4\n",
       "stack_data                  0.6.2\n",
       "sympy                       1.12\n",
       "termcolor                   NA\n",
       "texttable                   1.7.0\n",
       "threadpoolctl               3.2.0\n",
       "torch                       2.1.2+cu121\n",
       "torchgen                    NA\n",
       "tornado                     6.3.3\n",
       "tqdm                        4.66.1\n",
       "traitlets                   5.9.0\n",
       "typing_extensions           NA\n",
       "uri_template                NA\n",
       "urllib3                     1.26.18\n",
       "wcwidth                     0.2.12\n",
       "webcolors                   1.13\n",
       "websocket                   1.7.0\n",
       "wrapt                       1.15.0\n",
       "xarray                      2023.12.0\n",
       "yaml                        6.0.1\n",
       "zipp                        NA\n",
       "zmq                         25.1.2\n",
       "zoneinfo                    NA\n",
       "zstandard                   0.22.0\n",
       "</pre>\n",
       "</details> <!-- seems like this ends pre, so might as well be explicit -->\n",
       "<pre>\n",
       "-----\n",
       "IPython             8.19.0\n",
       "jupyter_client      8.6.0\n",
       "jupyter_core        5.6.1\n",
       "jupyterlab          4.1.2\n",
       "notebook            6.5.4\n",
       "-----\n",
       "Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]\n",
       "Linux-5.15.0-1042-gcp-x86_64-with-glibc2.31\n",
       "-----\n",
       "Session information updated at 2024-03-10 03:22\n",
       "</pre>\n",
       "</details>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import session_info\n",
    "session_info.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6824f32d-6603-4e9e-b1a8-abc87d0f7908",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}