%matplotlib inline
import numpy
import pandas
from lifelines import KaplanMeierFitter
import matplotlib
from lifelines import CoxPHFitter
from lifelines.datasets import load_rossi
Kaplan-Meier estimator is a non-parametric method for estimating the survival function of a dataset that includes duration of lifetimes and the occurance of the event in interest.
df = pandas.read_csv('UNC_GEO_survival.csv')
print(df.head())
T = df['OS T']
E = df['OS E']
kmf = KaplanMeierFitter()
kmf.fit(T, E)
kmf.survival_function_
kmf.median_
kmf.plot();
groups = df['Type']
Basal = (groups == 'Basal')
Her2 = (groups == 'Her2')
LumA = (groups == 'LumA')
LumB = (groups == 'LumB')
kmf.fit(T[Basal], E[Basal], label='Basal')
ax = kmf.plot()
kmf.fit(T[Her2], E[Her2], label='Her2')
kmf.plot(ax=ax)
kmf.fit(T[LumA], E[LumA], label='LumA')
kmf.plot(ax=ax)
kmf.fit(T[LumB], E[LumB], label='LumB')
kmf.plot(ax=ax);
Cox proportional hazard model is a very commonly used tool in survival analysis to study how survival time depends on predictor variables.
cox_d = pandas.get_dummies(pandas.read_csv('UNC_GEO_cox_reg.csv'))
cf = CoxPHFitter()
cf.fit(cox_d, 'RFS T', event_col='RFS E')
cf.print_summary()
cox_d.head()