{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Section 4. from \"Piecewise exponential models for survival data with covariates\" by Michael Friedman" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "from lifelines.datasets import load_lupus\n", "\n", "df = load_lupus()\n", "\n", "# preprocessing\n", "T_col = 'time_between_diagnosis_and_last_observation_(years)'\n", "E_col = 'dead'\n", "df['time_elapsed_between_estimated_onset_and_diagnosis_binary'] = df['time_elapsed_between_estimated_onset_and_diagnosis_(months)'] <= 2*12\n", "df['recent'] = df['year_month_of_diagnosis'] < '1951-07'\n", "\n", "columns = ['is_male', 'is_white', \n", " 'age_at_diagnosis', \n", " 'time_elapsed_between_estimated_onset_and_diagnosis_binary',\n", " 'recent'] + [T_col, E_col]\n", "\n", "df = df[columns]\n", "df = df.dropna() # drop the individual with NaN\n", "\n", "# these models can naturally handle 0 durations, so we fudge a bit.\n", "df.loc[df[T_col] == 0, T_col] = 0.000001\n", "# add a constant column (only needed for non-Cox models.)\n", "df['constant'] = 1.\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from lifelines import PiecewiseExponentialRegressionFitter, CoxPHFitter\n", "\n", "\n", "breakpoints = [\n", " [0.5, 0.8, 1.1, 1.7, 2.5, 3.1], \n", " [0.3, 0.8, 1.0, 2.0, 3.0],\n", " [0.4, 0.9, 1.5, 2.5],\n", " [0.3, 1.0, 2.0, 3.0],\n", " [0.4],\n", " [0.3]\n", "]\n", "\n", "results = dict()\n", "for i, bp in enumerate(breakpoints, start=1):\n", " # by forcing the penalizer to be 1000, the coefs between periods are constrainted to be identical. \n", " pcf = PiecewiseExponentialRegressionFitter(penalizer=1000., breakpoints=bp)\n", " pcf.fit(df, T_col, E_col)\n", " # Note the negative sign. We use a different parameterization than the paper.\n", " results[\"model %d\" % i] = -pcf.params_['lambda_0_'].drop('constant')\n", " \n", "cph = CoxPHFitter().fit(df.drop('constant', axis=1), T_col, E_col)\n", "results['Cox'] = cph.params_" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "results = pd.DataFrame(results).T" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | is_male | \n", "is_white | \n", "age_at_diagnosis | \n", "time_elapsed_between_estimated_onset_and_diagnosis_binary | \n", "recent | \n", "
|---|---|---|---|---|---|
| model 1 | \n", "-0.454013 | \n", "-0.660882 | \n", "0.001334 | \n", "0.472303 | \n", "1.096200 | \n", "
| model 2 | \n", "-0.449296 | \n", "-0.664789 | \n", "0.001070 | \n", "0.435175 | \n", "1.174195 | \n", "
| model 3 | \n", "-0.415715 | \n", "-0.667667 | \n", "0.000182 | \n", "0.497052 | \n", "1.086471 | \n", "
| model 4 | \n", "-0.423635 | \n", "-0.637827 | \n", "0.001707 | \n", "0.409149 | \n", "1.247443 | \n", "
| model 5 | \n", "-0.455248 | \n", "-0.647714 | \n", "0.001018 | \n", "0.462002 | \n", "1.145437 | \n", "
| model 6 | \n", "-0.445260 | \n", "-0.638205 | \n", "0.001007 | \n", "0.456981 | \n", "1.189638 | \n", "
| Cox | \n", "-0.444981 | \n", "-0.619252 | \n", "0.000664 | \n", "0.475423 | \n", "1.144530 | \n", "