{ "cells": [ { "cell_type": "code", "execution_count": 73, "id": "fb2092aa-e082-4827-a3ec-0e5c2af07bef", "metadata": { "tags": [] }, "outputs": [], "source": [ "import celltypist\n", "from celltypist import models\n", "import scanpy as sc\n", "import pandas as pd \n", "import numpy as np\n", "import anndata\n", "import re\n", "import h5py\n", "import scipy.sparse as scs\n", "import concurrent.futures\n", "import scanpy.external as sce\n", "import gc\n", "from concurrent.futures import ProcessPoolExecutor\n", "import copy" ] }, { "cell_type": "code", "execution_count": 74, "id": "33280502-9960-4c28-84a1-21bd85cb8970", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_2023-11-23.h5ad')" ] }, { "cell_type": "code", "execution_count": 75, "id": "346b3536-08b0-4814-a5ff-9692dbf8db9d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(1193859, 57)" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata.obs)" ] }, { "cell_type": "markdown", "id": "ab970439-e31a-46d7-8f82-a60acf9210f4", "metadata": { "tags": [] }, "source": [ "# Get T cells if annotation is final on first Round of clustering" ] }, { "cell_type": "code", "execution_count": 76, "id": "7a81bd68-845e-49ba-8e35-e484e27d81cd", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation=pd.read_csv('T_Cells_Res1.5_Annotation.csv')\n", "T_Cells_Annotation['leiden_resolution_1.5']=[str (x) for x in T_Cells_Annotation['leiden_resolution_1.5']]" ] }, { "cell_type": "code", "execution_count": 77, "id": "02c8a20b-eb35-473f-a0cc-201912a350e5", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_filtered=T_Cells_Annotation.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 78, "id": "ecbb4255-d96e-488b-b269-666ce374fe17", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata.obs=adata.obs.merge(T_Cells_Annotation_filtered, on=[\"leiden_resolution_1.5\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 79, "id": "3e86cba3-b90b-4d73-8e63-71dc4920434e", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_1=adata.obs[adata.obs['leiden_resolution_1.5'].isin(T_Cells_Annotation_filtered['leiden_resolution_1.5'])]" ] }, { "cell_type": "code", "execution_count": 80, "id": "2c2f3598-60f9-4fe4-84e5-2b97c147bd69", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_1.to_parquet(\"Annotation_Part_1.parquet\")" ] }, { "cell_type": "code", "execution_count": 81, "id": "03a5b89d-715e-4e21-a8c7-4c21ad07f7a3", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(886753, 63)" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_1)" ] }, { "cell_type": "markdown", "id": "7227465a-3e6d-4cbb-ba31-45e812be7ef2", "metadata": {}, "source": [ "# C5+C12" ] }, { "cell_type": "code", "execution_count": 82, "id": "546a5d71-3f4c-4151-b553-f323f0e1ea5d", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C5_C12=pd.read_csv('T_Cells_Res3_C5_C12_Annotation.csv')\n", "T_Cells_Annotation_C5_C12['leiden_resolution_3_C5_C12']=[str (x) for x in T_Cells_Annotation_C5_C12['leiden_resolution_3_C5_C12']]" ] }, { "cell_type": "code", "execution_count": 83, "id": "ed27cd8a-64ff-4f18-b708-b461f0f0d760", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C5_C12=T_Cells_Annotation_C5_C12.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 84, "id": "5f55438e-43ba-48d2-8445-9d6e301c357a", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C5_C12=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C5_C12_2023-11-28_redo.h5ad')" ] }, { "cell_type": "code", "execution_count": 85, "id": "0d147b5b-402e-4445-aa60-b24f7bed9054", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C5_C12.obs=adata_C5_C12.obs.merge(T_Cells_Annotation_C5_C12, on=[\"leiden_resolution_3_C5_C12\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 86, "id": "b169ebf2-c54a-43c8-aeea-0a03570da7f7", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_2=adata_C5_C12.obs[adata_C5_C12.obs['leiden_resolution_3_C5_C12'].isin(T_Cells_Annotation_C5_C12['leiden_resolution_3_C5_C12'])]" ] }, { "cell_type": "code", "execution_count": 87, "id": "3ded22b0-fff1-4b1a-a082-d63f53e57d40", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_2.to_parquet(\"Annotation_Part_2.parquet\")" ] }, { "cell_type": "code", "execution_count": 88, "id": "8a1b50bc-88b5-493e-a28d-2da448fd8af7", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(114083, 66)" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_2)" ] }, { "cell_type": "code", "execution_count": 89, "id": "68d24706-6cd7-44f7-b439-bc9e89d598dc", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(123022, 66)" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C5_C12.obs)" ] }, { "cell_type": "code", "execution_count": 90, "id": "51fb036e-60ad-4338-91f8-4701309c4fea", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(8939, 66)" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C5_C12.obs.query(\"leiden_resolution_3_C5_C12.isin(['8','14'])\"))" ] }, { "cell_type": "code", "execution_count": 91, "id": "1a0087ae-5721-4fc4-afdf-0e2c3245ba64", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(8939+114083)==np.shape(adata_C5_C12.obs)[0]" ] }, { "cell_type": "markdown", "id": "53d39c6e-7bb3-426d-a786-a83f4774124b", "metadata": {}, "source": [ "# C11" ] }, { "cell_type": "code", "execution_count": 92, "id": "de471c8d-9be5-4593-a81f-0ae98f7dd5ee", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C11=pd.read_csv('T_Cells_Res1.5_C11_Annotation.csv')\n", "T_Cells_Annotation_C11['leiden_resolution_1.5_C11']=[str (x) for x in T_Cells_Annotation_C11['leiden_resolution_1.5_C11']]" ] }, { "cell_type": "code", "execution_count": 93, "id": "1f65aad2-c7e1-4d6f-9cfe-65111b47a4b8", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C11=T_Cells_Annotation_C11.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 94, "id": "bd10420b-6fcb-4266-9e20-21937175da2c", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C11=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C11_2023-11-27.h5ad')" ] }, { "cell_type": "code", "execution_count": 95, "id": "a2867d00-1193-4cd3-9791-348e0126b8b9", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C11.obs=adata_C11.obs.merge(T_Cells_Annotation_C11, on=[\"leiden_resolution_1.5_C11\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 96, "id": "cb2c446f-cf8a-40b1-aaae-628dbd326e83", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_3=adata_C11.obs[adata_C11.obs['leiden_resolution_1.5_C11'].isin(T_Cells_Annotation_C11['leiden_resolution_1.5_C11'])]" ] }, { "cell_type": "code", "execution_count": 97, "id": "e3ce4646-3c53-494b-aabe-28cb5e3e491d", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_3.to_parquet(\"Annotation_Part_3.parquet\")" ] }, { "cell_type": "code", "execution_count": 98, "id": "95c465f4-f125-4dc3-b3af-92a04d747675", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(48095, 64)" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_3)" ] }, { "cell_type": "code", "execution_count": 99, "id": "83e5836f-b398-42f6-9785-af3d41545bca", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(50060, 64)" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C11.obs)" ] }, { "cell_type": "code", "execution_count": 100, "id": "327fb77c-6e8f-4799-a2ac-c34ebf4fea48", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(1965, 64)" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C11.obs.query(\"`leiden_resolution_1.5_C11`.isin(['8'])\"))" ] }, { "cell_type": "code", "execution_count": 101, "id": "b6e9b22c-ae20-4a03-b761-870797091f3d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(1965+48095)==np.shape(adata_C11.obs)[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "da6a9a72-25fd-453b-9e88-869d030ccd11", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "4d4878da-d487-4eb5-988e-7e2f7e925f2b", "metadata": {}, "source": [ "# C13" ] }, { "cell_type": "code", "execution_count": 102, "id": "71a43843-b7db-4847-95ef-317d759f9ea8", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C13=pd.read_csv('T_Cells_Res1.5_C13_Annotation.csv')\n", "T_Cells_Annotation_C13['leiden_resolution_1.5_C13']=[str (x) for x in T_Cells_Annotation_C13['leiden_resolution_1.5_C13']]" ] }, { "cell_type": "code", "execution_count": 103, "id": "23f16783-b933-4a57-b49f-0b97f60c8f6f", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C13=T_Cells_Annotation_C13.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 104, "id": "a55a00ce-0733-4224-9595-6b2c98152076", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C13=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C13_2023-11-27_retry.h5ad')" ] }, { "cell_type": "code", "execution_count": 105, "id": "e6f1f4d0-6005-4c72-890e-dfa7c85ad293", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C13.obs=adata_C13.obs.merge(T_Cells_Annotation_C13, on=[\"leiden_resolution_1.5_C13\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 106, "id": "ee61c4fd-9ea1-488f-96cb-ee0507d8cc67", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_4=adata_C13.obs[adata_C13.obs['leiden_resolution_1.5_C13'].isin(T_Cells_Annotation_C13['leiden_resolution_1.5_C13'])]" ] }, { "cell_type": "code", "execution_count": 107, "id": "c43765df-9d3d-42cc-bc3f-0265950960d9", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_4.to_parquet(\"Annotation_Part_4.parquet\")" ] }, { "cell_type": "code", "execution_count": 108, "id": "a1573aba-420f-4be2-a1fd-d7a74d991d05", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(36364, 64)" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_4)" ] }, { "cell_type": "code", "execution_count": 109, "id": "81da6062-584f-4e01-9bb4-3d435134a95d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(42191, 64)" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C13.obs)" ] }, { "cell_type": "code", "execution_count": 110, "id": "a8ddab3e-fbd4-4544-8504-0d8b83e1383d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(5827, 64)" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C13.obs.query(\"`leiden_resolution_1.5_C13`.isin(['1','11'])\"))" ] }, { "cell_type": "code", "execution_count": 111, "id": "4eb312d0-299e-476d-b3d3-19cf805e9e3f", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(5827+36364)==np.shape(adata_C13.obs)[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "0ee760a5-9507-4af9-8b46-7b8b64c72d91", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "f4aeb52a-c8ae-4a2f-9139-766288caaad8", "metadata": {}, "source": [ "# C14\n" ] }, { "cell_type": "code", "execution_count": 112, "id": "84deb058-bbb5-4eea-86ce-2982d8c0b7e8", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C14=pd.read_csv('T_Cells_Res1.5_C14_Annotation.csv')\n", "T_Cells_Annotation_C14['leiden_resolution_1.5_C14']=[str (x) for x in T_Cells_Annotation_C14['leiden_resolution_1.5_C14']]" ] }, { "cell_type": "code", "execution_count": 113, "id": "6f865ff5-2c96-4107-bc4c-0d69aaca349a", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C14=T_Cells_Annotation_C14.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 114, "id": "d8d98657-9cda-495e-abc8-6c07199b7f85", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C14=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C14_2023-11-27.h5ad')" ] }, { "cell_type": "code", "execution_count": 115, "id": "043b965b-3d4d-4cb9-9c36-79c1c3c21962", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C14.obs=adata_C14.obs.merge(T_Cells_Annotation_C14, on=[\"leiden_resolution_1.5_C14\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 116, "id": "8dc7b9f1-01f7-45cb-8c37-c8710e2f09ed", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_5=adata_C14.obs[adata_C14.obs['leiden_resolution_1.5_C14'].isin(T_Cells_Annotation_C14['leiden_resolution_1.5_C14'])]" ] }, { "cell_type": "code", "execution_count": 117, "id": "219d0a66-5cfe-441c-b68b-94af94ebd631", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_5.to_parquet(\"Annotation_Part_5.parquet\")" ] }, { "cell_type": "code", "execution_count": 118, "id": "3cfdbe90-c4e9-454d-8c2d-b30f520b0cf5", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(40051, 64)" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_5)" ] }, { "cell_type": "code", "execution_count": 119, "id": "c903bc01-0742-45a2-9f93-614b578f1629", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(40051, 64)" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C14.obs)" ] }, { "cell_type": "markdown", "id": "96df1c2a-ce7e-4889-a382-b123ed25b70d", "metadata": {}, "source": [ "# C15 gdT" ] }, { "cell_type": "code", "execution_count": 120, "id": "27c6b56a-6cfd-4b16-a5c1-a135f8d6bd08", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C15=pd.read_csv('T_Cells_Res1.5_C15_Annotation.csv')\n", "T_Cells_Annotation_C15['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13']=[str (x) for x in T_Cells_Annotation_C15['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13']]" ] }, { "cell_type": "code", "execution_count": 121, "id": "734aac32-7c6a-4ef2-8453-9dcc84086343", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C15=T_Cells_Annotation_C15.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 122, "id": "d5524e95-712f-41cf-9bb9-ae8d0be37459", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C15=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C15_with_gdT_from_C5_C12_C11_C13_2023-11-29.h5ad')" ] }, { "cell_type": "code", "execution_count": 123, "id": "57eed6fb-f92f-4e58-a06f-f8dcd47200f3", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C15.obs=adata_C15.obs.merge(T_Cells_Annotation_C15, on=[\"leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 124, "id": "80cf7f4b-e968-4f06-9c13-bf24dae9fad8", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_6=adata_C15.obs[adata_C15.obs['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13'].isin(T_Cells_Annotation_C15['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13'])]" ] }, { "cell_type": "code", "execution_count": 125, "id": "62bd050d-fb64-44ab-8f62-2e9c51e82269", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_6.to_parquet(\"Annotation_Part_6.parquet\")" ] }, { "cell_type": "code", "execution_count": 126, "id": "f9af1bff-5719-4545-9094-05dddcd24454", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(51009, 66)" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_6)" ] }, { "cell_type": "code", "execution_count": 127, "id": "43d39896-d324-4e0b-b06b-f88bcf2d6b9d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(51009, 66)" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C15.obs)" ] }, { "cell_type": "code", "execution_count": null, "id": "fe74a730-ca26-43fc-874e-a49a3dd83403", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "0a43de34-1e94-46df-a477-e544ac1cd193", "metadata": {}, "source": [ "# C19" ] }, { "cell_type": "code", "execution_count": 128, "id": "3ed8ca38-6979-4029-9634-9e6b74e92614", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C19=pd.read_csv('T_Cells_Res2_C19_Annotation.csv')\n", "T_Cells_Annotation_C19['leiden_resolution_2_C19']=[str (x) for x in T_Cells_Annotation_C19['leiden_resolution_2_C19']]" ] }, { "cell_type": "code", "execution_count": 129, "id": "5a5376e2-ebff-41e2-9560-e1ef00221c2f", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C19=T_Cells_Annotation_C19.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 130, "id": "278badcd-685a-4094-841d-6633c3586df0", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C19=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C19_2023-12-05.h5ad')" ] }, { "cell_type": "code", "execution_count": 131, "id": "6a7fe766-c18c-4c86-b48f-075cd4554930", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C19.obs=adata_C19.obs.merge(T_Cells_Annotation_C19, on=[\"leiden_resolution_2_C19\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 132, "id": "8b7d1bad-bc5c-45f4-b7a9-4262795bcadf", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_7=adata_C19.obs[adata_C19.obs['leiden_resolution_2_C19'].isin(T_Cells_Annotation_C19['leiden_resolution_2_C19'])]" ] }, { "cell_type": "code", "execution_count": 133, "id": "47b6145c-6bac-4cd4-b4e5-585ea29fa591", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_7.to_parquet(\"Annotation_Part_7.parquet\")" ] }, { "cell_type": "code", "execution_count": 134, "id": "95ecac23-05b4-425d-874b-b48cc9d3e4d2", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(13029, 65)" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(df_Part_7)" ] }, { "cell_type": "code", "execution_count": 135, "id": "c5f2eb4e-b3da-4b3e-9d21-71b5752ac656", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(13029, 65)" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata_C19.obs)" ] }, { "cell_type": "code", "execution_count": null, "id": "c8371fd6-ce01-4a75-8721-946c999e0481", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "db39d173-bbf2-45d0-a394-4816173651a8", "metadata": {}, "source": [ "# C21" ] }, { "cell_type": "code", "execution_count": 136, "id": "335a84f2-2edb-4f23-a26d-dfc1bb9f513c", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C21=pd.read_csv('T_Cells_Res1.5_C21_Annotation.csv')\n", "T_Cells_Annotation_C21['leiden_resolution_1.5_C21']=[str (x) for x in T_Cells_Annotation_C21['leiden_resolution_1.5_C21']]" ] }, { "cell_type": "code", "execution_count": 137, "id": "a8d37089-a8ec-4b64-85af-08e94e09d9df", "metadata": { "tags": [] }, "outputs": [], "source": [ "T_Cells_Annotation_C21=T_Cells_Annotation_C21.query(\"AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'\")" ] }, { "cell_type": "code", "execution_count": 138, "id": "d964460a-efea-4aea-b002-c07ea383f54d", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C21=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C21_2023-11-27.h5ad')" ] }, { "cell_type": "code", "execution_count": 139, "id": "65f49c5a-a335-4f29-8724-bf1bb57636e5", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata_C21.obs=adata_C21.obs.merge(T_Cells_Annotation_C21, on=[\"leiden_resolution_1.5_C21\"], how=\"left\")" ] }, { "cell_type": "code", "execution_count": 140, "id": "5b7a5d00-72c9-4ef5-a6b8-5fbe47335d30", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_8=adata_C21.obs[adata_C21.obs['leiden_resolution_1.5_C21'].isin(T_Cells_Annotation_C21['leiden_resolution_1.5_C21'])]" ] }, { "cell_type": "code", "execution_count": 141, "id": "bc94c565-ed70-464f-b6d8-b932ace5d488", "metadata": { "tags": [] }, "outputs": [], "source": [ "df_Part_8.to_parquet(\"Annotation_Part_8.parquet\")" ] }, { "cell_type": "markdown", "id": "536de8e4-c1d9-46df-841b-78f9c3c93dca", "metadata": {}, "source": [ "# Check if cell number is same with origional adata" ] }, { "cell_type": "code", "execution_count": 145, "id": "b058d2a5-eab1-4dde-8b07-1c3d75d96a8c", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.shape(adata.obs)[0]==np.shape(df_Part_1)[0]+np.shape(df_Part_2)[0]+np.shape(df_Part_3)[0]+np.shape(df_Part_4)[0]+np.shape(df_Part_5)[0]+np.shape(df_Part_6)[0]+np.shape(df_Part_7)[0]+np.shape(df_Part_8)[0]" ] }, { "cell_type": "markdown", "id": "d8f5236b-93e3-46de-827d-271526911007", "metadata": {}, "source": [ "# combine table" ] }, { "cell_type": "code", "execution_count": 143, "id": "1af95963-a4c6-4496-a255-bd1c8664a0e5", "metadata": { "tags": [] }, "outputs": [], "source": [ "import os\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 144, "id": "f4d68292-1760-4275-b903-9038b32f6dce", "metadata": { "tags": [] }, "outputs": [], "source": [ "csv_directory = '.' \n", "\n", "output_excel = 'combined.xlsx' \n", "writer = pd.ExcelWriter(output_excel, engine='openpyxl')\n", "with pd.ExcelWriter(output_excel, engine='openpyxl') as writer:\n", " for csv_file in os.listdir(csv_directory):\n", " if csv_file.endswith('.csv'):\n", " file_path = os.path.join(csv_directory, csv_file)\n", " \n", " df = pd.read_csv(file_path)\n", " \n", " sheet_name = os.path.splitext(csv_file)[0]\n", " df.to_excel(writer, sheet_name=sheet_name, index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "c307837e-4ae9-47c5-a8ea-89a22a2d1bcb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:root] *", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }