{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from sksurv.linear_model import CoxPHSurvivalAnalysis\n", "import numpy as np\n", "import pandas as pd\n", "import pkgutil\n", "import io\n", "import sys\n", "sys.path.append('../')\n", "sys.path.append('../auton-survival/')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def read_seer_csv(csv_file, drop_cols=None):\n", " \"\"\"Helper function to load and preprocess the SEER dataset.\n", "\n", " \"\"\"\n", " data = pkgutil.get_data(__name__, csv_file)\n", " data = pd.read_csv(io.BytesIO(data), low_memory=False)\n", "\n", " if drop_cols is not None:\n", " if 'Patient ID' in data.columns:\n", " drop_cols.append('Patient ID')\n", " data = data.drop(drop_cols, axis=1)\n", " else:\n", " if 'Patient ID' in data.columns:\n", " drop_cols = ['Patient ID']\n", " data.drop(drop_cols, axis=1, inplace=True)\n", "\n", " return data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "x = read_seer_csv('datasets/ConditionalER_2_breast condition_ga_modified.csv',drop_cols=['State'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "col_event = x['death'].map({0:False,1:True})\n", "col_time = x['d.time']\n", "y = np.empty(dtype=[('col_event', bool), ('col_time', np.float64)], shape=x.shape[0])\n", "\n", "y['col_event'] = col_event.values\n", "y['col_time'] = col_time.values\n", "\n", "x.drop(['d.time', 'death'], axis=1,inplace=True)\n", "x_one_h = pd.get_dummies(x)\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "x_train,x_test,y_train,y_test = train_test_split(x_one_h,y,test_size=0.2,random_state=101)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "estimator = CoxPHSurvivalAnalysis(alpha=0.0001).fit(x_train,y_train)\n", "print(estimator.score(x_train,y_train))\n", "estimator.score(x_test,y_test)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.6 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" } } }, "nbformat": 4, "nbformat_minor": 2 }