{
 "cells": [
  {
   "cell_type": "raw",
   "id": "85555ba6-7a27-4bc3-9dc4-8f75a6cc2b21",
   "metadata": {},
   "source": [
    "!pip install -q https://github.com/aifimmunology/multicelltypist/archive/main.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "660c6563-9bc8-47e0-a39a-5caea64b2b18",
   "metadata": {},
   "outputs": [],
   "source": [
    "import multicelltypist\n",
    "from datetime import date\n",
    "import hisepy\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import scanpy as sc"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2ceef7cd-6533-4e9e-a459-9e5f7ee53f3e",
   "metadata": {},
   "source": [
    "Retrieve 10x Genomics Flex Probe gene list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ec900744-16fb-41c1-885a-775b0419135b",
   "metadata": {},
   "outputs": [],
   "source": [
    "cache_res = hisepy.download_from_project_store(\n",
    "    store_name = 'rds', # Reference Data Sets Project Store\n",
    "    file_name = 'AIFI-2024-03-11T02:09:16.856602896Z/Chromium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.probe_metadata.tsv', # File from 10x Genomics\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e77b5a30-41bd-441c-93db-ec00143822bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "probe_file = 'rds/Chromium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.probe_metadata.tsv'\n",
    "flex_probes = pd.read_csv(probe_file, sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8c9d7492-55a9-464f-b33d-1241d8cdd3ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "flex_genes = flex_probes['gene_name'].unique().tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "60e60a02-8cdf-4c20-8c34-381a6c5bf264",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18529"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(flex_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "2611fcb9-1cc9-4b22-a3b2-c4a6c4d7c4ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_adata_uuid(h5ad_uuid):\n",
    "    h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)\n",
    "    if not os.path.isdir(h5ad_path):\n",
    "        hise_res = hisepy.reader.cache_files([h5ad_uuid])\n",
    "    h5ad_filename = os.listdir(h5ad_path)[0]\n",
    "    h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)\n",
    "    adata = sc.read_h5ad(h5ad_file)\n",
    "    return adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b26381f4-75ce-4776-9859-9f7775c198d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def resample_anndata_min_max(adata, label_column, max_cells=None, min_cells=None, random_state = 3030):\n",
    "    \"\"\"\n",
    "    Resamples an AnnData object based on the cell labels, with the option to resample with \n",
    "    replacement for classes below a specified threshold.\n",
    "\n",
    "    Parameters:\n",
    "    ad (AnnData): The AnnData object to be resampled.\n",
    "    label_column (str): The column in ad.obs where the labels are stored.\n",
    "    max_cells (int, optional): The maximum number of cells to keep per label. If None, no upper limit is applied.\n",
    "    min_cells (int, optional): The minimum number of cells below which resampling with replacement occurs. If None, no lower limit is applied.\n",
    "    random_state (int, default = 3030): An integer used to set the state of the numpy.random.Generator\n",
    "    \n",
    "    Returns:\n",
    "    AnnData: The resampled AnnData object.\n",
    "    \"\"\"\n",
    "    \n",
    "    labels = adata.obs[label_column].unique()\n",
    "\n",
    "    subsets = []\n",
    "\n",
    "    rng = np.random.default_rng(random_state)\n",
    "        \n",
    "    for label in labels:\n",
    "        # Subset AnnData object for the current label\n",
    "        subset = adata.obs[adata.obs[label_column] == label]\n",
    "    \n",
    "        # Resample with replacement if the number of cells is below min_cells and min_cells is defined\n",
    "        if min_cells is not None and subset.shape[0] < min_cells:\n",
    "            subset = subset.sample(min_cells, replace = True, random_state = rng)\n",
    "        # Resample without replacement if the number of cells is greater than max_cells and max_cells is defined\n",
    "        elif max_cells is not None and subset.shape[0] > max_cells:\n",
    "            subset = subset.sample(max_cells, replace = False, random_state = rng)\n",
    "    \n",
    "        subsets.append(subset)\n",
    "\n",
    "    # Concatenate all subsets\n",
    "    resampled_obs = pd.concat(subsets)\n",
    "    \n",
    "    resampled_adata = adata[resampled_obs.index]\n",
    "    resampled_adata.obs_names_make_unique()\n",
    "\n",
    "    return resampled_adata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "025a1a8c-9a27-425d-856f-d57d06085150",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_column = 'AIFI_L3'\n",
    "max_cell_number = 20000"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f5d2a81-4c8b-41a6-887a-903305be8e41",
   "metadata": {},
   "source": [
    "## Read clean, annotated dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "402ba1b7-74b9-45d3-b840-f4e5ea967fdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "h5ad_uuid = '6e8972a5-9463-4230-84b4-a20de055b9c3'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d19d07d9-c602-4c16-ada7-16091f43ef4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = read_adata_uuid(h5ad_uuid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "0e7cc125-7921-4fb0-a3de-c8d5ca493d97",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1823666, 1261)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "02304406-4c05-459e-aad3-aae00f909516",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIFI_L3\n",
       "Core naive CD4 T cell      341521\n",
       "Core CD14 monocyte         217576\n",
       "CM CD4 T cell              161769\n",
       "Core naive CD8 T cell      115126\n",
       "GZMK- CD56dim NK cell      102908\n",
       "                            ...  \n",
       "ASDC                          522\n",
       "GZMK+ memory CD4 Treg         467\n",
       "Activated memory B cell       433\n",
       "CLP cell                      373\n",
       "BaEoMaP cell                   78\n",
       "Name: count, Length: 72, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.obs[label_column].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afd7084d-0f4e-4aa8-b538-af3b316e4992",
   "metadata": {},
   "source": [
    "## Sample and prepare reference data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "139d3a45-9492-4ffb-af7a-79b28722a034",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata_subset = resample_anndata_min_max(\n",
    "    adata, \n",
    "    label_column, \n",
    "    max_cells = max_cell_number,\n",
    "    random_state = 3030\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "bc027836-a9ce-44d2-95ec-50b90f4a74e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(636082, 1261)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "bb48435e-91ec-4e20-9ff6-1459c785d784",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIFI_L3\n",
       "CM CD4 T cell                       20000\n",
       "CM CD8 T cell                       20000\n",
       "KLRF1+ GZMB+ CD27- EM CD8 T cell    20000\n",
       "KLRF1- GZMB+ CD27- EM CD8 T cell    20000\n",
       "Core naive CD4 T cell               20000\n",
       "                                    ...  \n",
       "ASDC                                  522\n",
       "GZMK+ memory CD4 Treg                 467\n",
       "Activated memory B cell               433\n",
       "CLP cell                              373\n",
       "BaEoMaP cell                           78\n",
       "Name: count, Length: 72, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset.obs[label_column].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8d8c478d-2cf3-425e-9327-61af4db59d2c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(636082, 33538)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset = adata_subset.raw.to_adata()\n",
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "97f82c68-0aa5-4cc1-a9c2-c3d26d5248fd",
   "metadata": {},
   "source": [
    "### Limit to genes available in 10x Flex Probes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "81b69db8-f4e2-4c54-a56c-375a10c357c2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18329"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_var = adata_subset.var.index.isin(flex_genes)\n",
    "sum(keep_var)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c33e4f12-48ed-4343-a4af-19c8a930e2d1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(636082, 18329)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_subset = adata_subset[:,keep_var]\n",
    "adata_subset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2596500e-62c7-4fa5-bd16-63cacc8331b0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata_subset, target_sum=1e4)\n",
    "sc.pp.log1p(adata_subset)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b5fccae-79fd-4eb9-a4a8-f68536db9dda",
   "metadata": {},
   "source": [
    "## Generate Initial model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "7ed68f41-5230-477e-b57f-3bcf1ad6bba8",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "🍳 Preparing data before training\n",
      "✂️ 1096 non-expressed genes are filtered out\n",
      "🔬 Input data has 636082 cells and 17233 genes\n",
      "⚖️ Scaling input data\n",
      "🏋️ Training data using SGD logistic regression\n",
      "⚠️ Warning: it may take a long time to train this dataset with 636082 cells and 17233 genes, try to downsample cells and/or restrict genes to a subset (e.g., hvgs)\n",
      "✅ Model training done!\n"
     ]
    }
   ],
   "source": [
    "model_fs = multicelltypist.train(\n",
    "    adata_subset, \n",
    "    label_column, \n",
    "    n_jobs = 60, \n",
    "    max_iter = 10, \n",
    "    multi_class = 'ovr', \n",
    "    use_SGD = True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0b4b82a-787e-41bd-a2a9-b847b0e84770",
   "metadata": {},
   "source": [
    "## Identify top features used for the model"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "265dd0e4-14f4-4114-98fc-09af92018bf5",
   "metadata": {},
   "source": [
    "Detected genes:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "f98c3839-7e99-4643-82c8-254a2bd89ce1",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = adata_subset.X.toarray()\n",
    "flag = df.sum(axis = 0) == 0\n",
    "gene = adata_subset.var_names[ ~flag]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ce3de9a5-dcf9-4d02-85f4-034143633098",
   "metadata": {},
   "source": [
    "Features with high absolute classifier coefficients for each cell type class\n",
    "\n",
    "`np.argpartition` will take the coefficient scores for each class, and retrieve the positions of the highest absolute coefficient scores to the end of an array of positions. We then select the `top_n` positions from the end of our array of positions, which allow us to retrieve genes with the highest absolute coefficients for each class.\n",
    "\n",
    "We can then combine these to get a unique list of genes that are important for our model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "bcb18ca4-360c-40de-a3c8-233deff3d7ee",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of genes selected: 2452\n"
     ]
    }
   ],
   "source": [
    "top_n = 200\n",
    "\n",
    "gene_index = np.argpartition(\n",
    "    np.abs(model_fs.classifier.coef_),\n",
    "    -top_n,\n",
    "    axis = 1\n",
    ")\n",
    "gene_index = gene_index[:, -top_n:]\n",
    "gene_index = np.unique(gene_index)\n",
    "\n",
    "print('Number of genes selected: {n}'.format(n = len(gene_index)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "5e6a0dc7-418e-4400-8cc2-4ca286b393fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "selected_genes = gene[gene_index.tolist()]\n",
    "selected_df = pd.DataFrame({'gene': selected_genes})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "76116481-6d8c-4539-9cc8-095cc31d390e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>gene</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>HES4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ISG15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TTLL10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TNFRSF18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>TNFRSF4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       gene\n",
       "0      HES4\n",
       "1     ISG15\n",
       "2    TTLL10\n",
       "3  TNFRSF18\n",
       "4   TNFRSF4"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "selected_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "398634dc-57eb-4c6f-884a-226e1af3ad02",
   "metadata": {},
   "source": [
    "## Generate full model using selected features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "030648e4-e48c-4c69-82e6-16b598ecaa13",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = adata.raw.to_adata()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "13377112-3e01-4956-a1f0-0b0819fa6aa7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING: adata.X seems to be already log-transformed.\n"
     ]
    }
   ],
   "source": [
    "sc.pp.normalize_total(adata, target_sum=1e4)\n",
    "sc.pp.log1p(adata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "9890237b-6705-47ab-88ad-879392432998",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = adata[:, adata.var_names.isin(selected_genes)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "a6c9d518-7ab1-48ae-94b5-f3d668954c7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1823666, 2452)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14ee09d1-76ea-4c4e-b2dc-7ddab9913955",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "🍳 Preparing data before training\n",
      "🔬 Input data has 1823666 cells and 2452 genes\n",
      "⚖️ Scaling input data\n",
      "🏋️ Training data using logistic regression\n"
     ]
    }
   ],
   "source": [
    "model_fs = multicelltypist.train(\n",
    "    adata, \n",
    "    label_column, \n",
    "    n_jobs = 60,\n",
    "    max_iter = 100,\n",
    "    multi_class = 'multinomial',\n",
    "    check_expression = False\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eef272aa-a2c2-48bd-9fd7-ead244edcfc7",
   "metadata": {},
   "source": [
    "## Write outputs for storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "70bb4514-c6aa-43cd-96e7-4b295064dad3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "out_dir = 'output'\n",
    "if not os.path.isdir(out_dir):\n",
    "    os.makedirs(out_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "59fb4a55-52b5-492b-a16a-b81c1d8b66b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_genes = 'output/ref_pbmc_clean_celltypist_top{n}_flex-features_{l}_{d}.csv'.format(\n",
    "    n = top_n,\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")\n",
    "\n",
    "selected_df.to_csv(out_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "a4d082d0-9ab9-4cd0-96b3-a339739fc392",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_model = 'output/ref_pbmc_clean_celltypist_model_flex-features_{l}_{d}.pkl'.format(\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")\n",
    "\n",
    "model_fs.write(out_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7b1d5230-6d90-4780-938b-8e2f7401c59a",
   "metadata": {},
   "source": [
    "## Upload model to HISE\n",
    "\n",
    "Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "66ac5956-b921-415a-b370-77578b021891",
   "metadata": {},
   "outputs": [],
   "source": [
    "study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'\n",
    "title = 'PBMC Reference {l} CellTypist Model Flex Features {d}'.format(\n",
    "    l = label_column,\n",
    "    d = date.today()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "848357fc-9231-4cb4-a30d-89ce0246b898",
   "metadata": {},
   "outputs": [],
   "source": [
    "in_files = [h5ad_uuid]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "5d6df346-cb61-4a06-a7f6-5609e9bc6e69",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['6e8972a5-9463-4230-84b4-a20de055b9c3']"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "in_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "910d2458-b478-4b9f-9bac-2d56c4c7ea08",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_files = [out_genes, out_model]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "64e92a74-a95a-4f95-bc1a-01cdfa42cb13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L3_2024-03-12.csv',\n",
       " 'output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L3_2024-03-12.pkl']"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "ffd9a398-1be4-461d-8c6e-b55245212b26",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L3_2024-03-12.csv\n",
      "output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L3_2024-03-12.pkl\n",
      "Cannot determine the current notebook.\n",
      "1) /home/jupyter/scRNA-Reference-IH-A/06-Modeling/30-Python_celltypist_L1_model.ipynb\n",
      "2) /home/jupyter/scRNA-Reference-IH-A/06-Modeling/32a-Python_celltypist_L3_model_flex-features.ipynb\n",
      "3) /home/jupyter/scRNA-Reference-IH-A/05-Assembly/29-Python_clean_all_genes_normalized.ipynb\n",
      "Please select (1-3) \n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " 2\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "you are trying to upload file_ids... ['output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L3_2024-03-12.csv', 'output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L3_2024-03-12.pkl']. Do you truly want to proceed?\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "(y/n) y\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'trace_id': '2d7a5017-bc44-4e0a-80f3-0869f569c952',\n",
       " 'files': ['output/ref_pbmc_clean_celltypist_top200_flex-features_AIFI_L3_2024-03-12.csv',\n",
       "  'output/ref_pbmc_clean_celltypist_model_flex-features_AIFI_L3_2024-03-12.pkl']}"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hisepy.upload.upload_files(\n",
    "    files = out_files,\n",
    "    study_space_id = study_space_uuid,\n",
    "    title = title,\n",
    "    input_file_ids = in_files\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "9684bc1e-4da6-442f-b135-ef39c4342be5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<details>\n",
       "<summary>Click to view session information</summary>\n",
       "<pre>\n",
       "-----\n",
       "anndata             0.10.3\n",
       "hisepy              0.3.0\n",
       "multicelltypist     1.6.2\n",
       "numpy               1.25.2\n",
       "pandas              2.1.4\n",
       "scanpy              1.9.6\n",
       "session_info        1.0.0\n",
       "-----\n",
       "</pre>\n",
       "<details>\n",
       "<summary>Click to view modules imported as dependencies</summary>\n",
       "<pre>\n",
       "PIL                         10.0.1\n",
       "anyio                       NA\n",
       "arrow                       1.3.0\n",
       "asttokens                   NA\n",
       "attr                        23.2.0\n",
       "attrs                       23.2.0\n",
       "babel                       2.14.0\n",
       "beatrix_jupyterlab          NA\n",
       "brotli                      NA\n",
       "cachetools                  5.3.1\n",
       "certifi                     2024.02.02\n",
       "cffi                        1.16.0\n",
       "charset_normalizer          3.3.2\n",
       "cloudpickle                 2.2.1\n",
       "colorama                    0.4.6\n",
       "comm                        0.1.4\n",
       "cryptography                41.0.7\n",
       "cycler                      0.10.0\n",
       "cython_runtime              NA\n",
       "dateutil                    2.8.2\n",
       "db_dtypes                   1.1.1\n",
       "debugpy                     1.8.0\n",
       "decorator                   5.1.1\n",
       "defusedxml                  0.7.1\n",
       "deprecated                  1.2.14\n",
       "exceptiongroup              1.2.0\n",
       "executing                   2.0.1\n",
       "fastjsonschema              NA\n",
       "fqdn                        NA\n",
       "google                      NA\n",
       "greenlet                    2.0.2\n",
       "grpc                        1.58.0\n",
       "grpc_status                 NA\n",
       "h5py                        3.10.0\n",
       "idna                        3.6\n",
       "igraph                      0.10.8\n",
       "importlib_metadata          NA\n",
       "ipykernel                   6.28.0\n",
       "ipython_genutils            0.2.0\n",
       "ipywidgets                  8.1.1\n",
       "isoduration                 NA\n",
       "jedi                        0.19.1\n",
       "jinja2                      3.1.2\n",
       "joblib                      1.3.2\n",
       "json5                       NA\n",
       "jsonpointer                 2.4\n",
       "jsonschema                  4.20.0\n",
       "jsonschema_specifications   NA\n",
       "jupyter_events              0.9.0\n",
       "jupyter_server              2.12.1\n",
       "jupyterlab_server           2.25.2\n",
       "jwt                         2.8.0\n",
       "kiwisolver                  1.4.5\n",
       "leidenalg                   0.10.1\n",
       "llvmlite                    0.41.0\n",
       "lz4                         4.3.2\n",
       "markupsafe                  2.1.3\n",
       "matplotlib                  3.8.0\n",
       "matplotlib_inline           0.1.6\n",
       "mpl_toolkits                NA\n",
       "mpmath                      1.3.0\n",
       "natsort                     8.4.0\n",
       "nbformat                    5.9.2\n",
       "numba                       0.58.0\n",
       "opentelemetry               NA\n",
       "overrides                   NA\n",
       "packaging                   23.2\n",
       "parso                       0.8.3\n",
       "pexpect                     4.8.0\n",
       "pickleshare                 0.7.5\n",
       "pkg_resources               NA\n",
       "platformdirs                4.1.0\n",
       "plotly                      5.18.0\n",
       "prettytable                 3.9.0\n",
       "prometheus_client           NA\n",
       "prompt_toolkit              3.0.42\n",
       "proto                       NA\n",
       "psutil                      NA\n",
       "ptyprocess                  0.7.0\n",
       "pure_eval                   0.2.2\n",
       "pyarrow                     13.0.0\n",
       "pydev_ipython               NA\n",
       "pydevconsole                NA\n",
       "pydevd                      2.9.5\n",
       "pydevd_file_utils           NA\n",
       "pydevd_plugins              NA\n",
       "pydevd_tracing              NA\n",
       "pygments                    2.17.2\n",
       "pynvml                      NA\n",
       "pyparsing                   3.1.1\n",
       "pyreadr                     0.5.0\n",
       "pythonjsonlogger            NA\n",
       "pytz                        2023.3.post1\n",
       "referencing                 NA\n",
       "requests                    2.31.0\n",
       "rfc3339_validator           0.1.4\n",
       "rfc3986_validator           0.1.1\n",
       "rpds                        NA\n",
       "scipy                       1.11.4\n",
       "send2trash                  NA\n",
       "shapely                     1.8.5.post1\n",
       "six                         1.16.0\n",
       "sklearn                     1.3.2\n",
       "sniffio                     1.3.0\n",
       "socks                       1.7.1\n",
       "sql                         NA\n",
       "sqlalchemy                  2.0.21\n",
       "sqlparse                    0.4.4\n",
       "stack_data                  0.6.2\n",
       "sympy                       1.12\n",
       "termcolor                   NA\n",
       "texttable                   1.7.0\n",
       "threadpoolctl               3.2.0\n",
       "torch                       2.1.2+cu121\n",
       "torchgen                    NA\n",
       "tornado                     6.3.3\n",
       "tqdm                        4.66.1\n",
       "traitlets                   5.9.0\n",
       "typing_extensions           NA\n",
       "uri_template                NA\n",
       "urllib3                     1.26.18\n",
       "wcwidth                     0.2.12\n",
       "webcolors                   1.13\n",
       "websocket                   1.7.0\n",
       "wrapt                       1.15.0\n",
       "xarray                      2023.12.0\n",
       "yaml                        6.0.1\n",
       "zipp                        NA\n",
       "zmq                         25.1.2\n",
       "zoneinfo                    NA\n",
       "zstandard                   0.22.0\n",
       "</pre>\n",
       "</details> <!-- seems like this ends pre, so might as well be explicit -->\n",
       "<pre>\n",
       "-----\n",
       "IPython             8.19.0\n",
       "jupyter_client      8.6.0\n",
       "jupyter_core        5.6.1\n",
       "jupyterlab          4.1.2\n",
       "notebook            6.5.4\n",
       "-----\n",
       "Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]\n",
       "Linux-5.15.0-1053-gcp-x86_64-with-glibc2.31\n",
       "-----\n",
       "Session information updated at 2024-03-12 15:52\n",
       "</pre>\n",
       "</details>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import session_info\n",
    "session_info.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6824f32d-6603-4e9e-b1a8-abc87d0f7908",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}