From b1d36b892645732d36a9336f67d0d06f482e9a10 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:08:58 +0100 Subject: [PATCH 1/5] cox_ph add all arguments --- ehrapy/tools/_sa.py | 161 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 149 insertions(+), 12 deletions(-) diff --git a/ehrapy/tools/_sa.py b/ehrapy/tools/_sa.py index fed63b9e..241e5dee 100644 --- a/ehrapy/tools/_sa.py +++ b/ehrapy/tools/_sa.py @@ -3,7 +3,6 @@ import warnings from typing import TYPE_CHECKING, Literal -import numpy as np # This package is implicitly used import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf @@ -23,6 +22,7 @@ if TYPE_CHECKING: from collections.abc import Iterable + import numpy as np from anndata import AnnData from statsmodels.genmod.generalized_linear_model import GLMResultsWrapper @@ -347,9 +347,7 @@ def anova_glm(result_1: GLMResultsWrapper, result_2: GLMResultsWrapper, formula_ return dataframe -def _regression_model( - model_class, adata: AnnData, duration_col: str, event_col: str, entry_col: str = None, accept_zero_duration=True -): +def _regression_model_data_frame_preparation(adata: AnnData, duration_col: str, accept_zero_duration=True): """Convenience function for regression models.""" df = anndata_to_df(adata) df = df.dropna() @@ -357,13 +355,35 @@ def _regression_model( if not accept_zero_duration: df.loc[df[duration_col] == 0, duration_col] += 1e-5 - model = model_class() - model.fit(df, duration_col, event_col, entry_col=entry_col) - - return model + return df -def cox_ph(adata: AnnData, duration_col: str, event_col: str, entry_col: str = None) -> CoxPHFitter: +def cox_ph( + adata: AnnData, + duration_col: str, + *, + inplace: bool = True, + key_added_prefix: str | None = None, + alpha: float = 0.05, + label: str | None = None, + baseline_estimation_method: Literal["breslow", "spline", "piecewise"] = "breslow", + penalizer: float | np.ndarray = 0.0, + l1_ratio: float = 0.0, + strata: list[str] | str | None = None, + n_baseline_knots: int = 4, + knots: list[float] | None = None, + breakpoints: list[float] | None = None, + event_col: str = None, + weights_col: str | None = None, + cluster_col: str | None = None, + entry_col: str = None, + robust: bool = False, + formula: str = None, + batch_mode: bool = None, + show_progress: bool = False, + initial_point: np.ndarray | None = None, + fit_options: dict | None = None, +) -> CoxPHFitter: """Fit the Cox’s proportional hazard for the survival function. The Cox proportional hazards model (CoxPH) examines the relationship between the survival time of subjects and one or more predictor variables. @@ -376,7 +396,26 @@ def cox_ph(adata: AnnData, duration_col: str, event_col: str, entry_col: str = N duration_col: The name of the column in the AnnData objects that contains the subjects’ lifetimes. event_col: The name of the column in anndata that contains the subjects’ death observation. If left as None, assume all individuals are uncensored. + inplace: Whether to modify the AnnData object in place. + alpha: The alpha value in the confidence intervals. + label: A string to name the column of the estimate. + baseline_estimation_method: The method used to estimate the baseline hazard. Options are 'breslow', 'spline', and 'piecewise'. + penalizer: Attach a penalty to the size of the coefficients during regression. This improves stability of the estimates and controls for high correlation between covariates. + l1_ratio: Specify what ratio to assign to a L1 vs L2 penalty. Same as scikit-learn. See penalizer above. + strata: specify a list of columns to use in stratification. This is useful if a categorical covariate does not obey the proportional hazard assumption. This is used similar to the strata expression in R. See http://courses.washington.edu/b515/l17.pdf. + n_baseline_knots: Used when baseline_estimation_method="spline". Set the number of knots (interior & exterior) in the baseline hazard, which will be placed evenly along the time axis. Should be at least 2. Royston et. al, the authors of this model, suggest 4 to start, but any values between 2 and 8 are reasonable. If you need to customize the timestamps used to calculate the curve, use the knots parameter instead. + knots: When baseline_estimation_method="spline", this allows customizing the points in the time axis for the baseline hazard curve. To use evenly-spaced points in time, the n_baseline_knots parameter can be employed instead. + breakpoints: Used when baseline_estimation_method="piecewise". Set the positions of the baseline hazard breakpoints. + event_col: he name of the column in DataFrame that contains the subjects’ death observation. If left as None, assume all individuals are uncensored. + weights_col: The name of the column in DataFrame that contains the weights for each subject. + cluster_col: The name of the column in DataFrame that contains the cluster variable. Using this forces the sandwich estimator (robust variance estimator) to be used. entry_col: Column denoting when a subject entered the study, i.e. left-truncation. + robust: Compute the robust errors using the Huber sandwich estimator, aka Wei-Lin estimate. This does not handle ties, so if there are high number of ties, results may significantly differ. + formula: an Wilkinson formula, like in R and statsmodels, for the right-hand-side. If left as None, all columns not assigned as durations, weights, etc. are used. Uses the library Formulaic for parsing. + batch_mode: enabling batch_mode can be faster for datasets with a large number of ties. If left as None, lifelines will choose the best option. + show_progress: since the fitter is iterative, show convergence diagnostics. Useful if convergence is failing. + initial_point: set the starting point for the iterative solver. + fit_options: Additional keyword arguments to pass into the estimator. Returns: Fitted CoxPHFitter. @@ -388,10 +427,80 @@ def cox_ph(adata: AnnData, duration_col: str, event_col: str, entry_col: str = N >>> adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0) >>> cph = ep.tl.cox_ph(adata, "mort_day_censored", "censor_flg") """ - return _regression_model(CoxPHFitter, adata, duration_col, event_col, entry_col) + df = _regression_model_data_frame_preparation(adata, duration_col) + cox_ph = CoxPHFitter( + alpha=alpha, + label=label, + strata=strata, + baseline_estimation_method=baseline_estimation_method, + penalizer=penalizer, + l1_ratio=l1_ratio, + n_baseline_knots=n_baseline_knots, + knots=knots, + breakpoints=breakpoints, + ) + cox_ph.fit( + df, + duration_col=duration_col, + event_col=event_col, + entry_col=entry_col, + robust=robust, + initial_point=initial_point, + weights_col=weights_col, + cluster_col=cluster_col, + batch_mode=batch_mode, + formula=formula, + fit_options=fit_options, + show_progress=show_progress, + ) + + # Add the results to the AnnData object + if inplace: + if key_added_prefix is None: + key_added_prefix = "" + else: + key_added_prefix = key_added_prefix + "_" + + cox_ph_summary = cox_ph.summary + print(cox_ph_summary) + + full_results = pd.DataFrame(index=adata.var.index) + + # Populate with CoxPH summary data + for key in cox_ph_summary.columns: + full_results[key_added_prefix + key] = cox_ph_summary[key] + + # Add a boolean column indicating rows populated by this function + full_results[key_added_prefix + "cox_ph_populated"] = full_results.notna().any(axis=1) + + # Assign results back to adata.var + for col in full_results.columns: + adata.var[col] = full_results[col] + + return cox_ph -def weibull_aft(adata: AnnData, duration_col: str, event_col: str, entry_col: str = None) -> WeibullAFTFitter: +def weibull_aft( + adata: AnnData, + duration_col: str, + *, + inplace: bool = True, + key_added_prefix: str | None = None, + alpha: float = 0.05, + fit_intercept: bool = True, + penalizer: float | np.ndarray = 0.0, + l1_ratio: float = 0.0, + model_ancillary: bool = True, + event_col: str | None = None, + ancillary: bool | pd.DataFrame | None = None, + show_progress: bool = False, + weights_col: str | None = None, + robust: bool = False, + initial_point=None, + entry_col: str | None = None, + formula: str | None = None, + fit_options: dict | None = None, +) -> WeibullAFTFitter: """Fit the Weibull accelerated failure time regression for the survival function. The Weibull Accelerated Failure Time (AFT) survival regression model is a statistical method used to analyze time-to-event data, @@ -417,10 +526,22 @@ def weibull_aft(adata: AnnData, duration_col: str, event_col: str, entry_col: st >>> adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0) >>> aft = ep.tl.weibull_aft(adata, "mort_day_censored", "censor_flg") """ + return _regression_model(WeibullAFTFitter, adata, duration_col, event_col, entry_col, accept_zero_duration=False) -def log_logistic_aft(adata: AnnData, duration_col: str, event_col: str, entry_col: str = None) -> LogLogisticAFTFitter: +def log_logistic_aft( + adata: AnnData, + duration_col: str, + *, + alpha: float = 0.05, + fit_intercept: bool = True, + penalizer: float | np.ndarray = 0.0, + l1_ratio: float = 0.0, + model_ancillary: bool = False, + event_col: str = None, + entry_col: str = None, +) -> LogLogisticAFTFitter: """Fit the log logistic accelerated failure time regression for the survival function. The Log-Logistic Accelerated Failure Time (AFT) survival regression model is a powerful statistical tool employed in the analysis of time-to-event data. This model operates under the assumption that the logarithm of survival time adheres to a log-logistic distribution, offering a flexible framework for understanding the impact of covariates on survival times. @@ -450,6 +571,22 @@ def log_logistic_aft(adata: AnnData, duration_col: str, event_col: str, entry_co ) +def _regression_model( + model_class, adata: AnnData, duration_col: str, event_col: str, entry_col: str = None, accept_zero_duration=True +): + """Convenience function for regression models.""" + df = anndata_to_df(adata) + df = df.dropna() + + if not accept_zero_duration: + df.loc[df[duration_col] == 0, duration_col] += 1e-5 + + model = model_class() + model.fit(df, duration_col, event_col, entry_col=entry_col) + + return model + + def _univariate_model( adata: AnnData, duration_col: str, From 35dbacf085a7ae3d7965b05254d7aa16cbe91ef3 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:29:57 +0100 Subject: [PATCH 2/5] updated test to use keywords --- tests/tools/test_sa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tools/test_sa.py b/tests/tools/test_sa.py index 48d85b36..e2bfec29 100644 --- a/tests/tools/test_sa.py +++ b/tests/tools/test_sa.py @@ -92,7 +92,7 @@ def _sa_function_assert(self, model, model_class): def _sa_func_test(self, sa_function, sa_class, mimic_2_sa): adata, duration_col, event_col = mimic_2_sa - sa = sa_function(adata, duration_col, event_col) + sa = sa_function(adata, duration_col=duration_col, event_col=event_col) self._sa_function_assert(sa, sa_class) def test_kmf(self, mimic_2_sa): From 22d190a19c07aa0b8fab49cc5bfa36e33f4bacce Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:47:09 +0100 Subject: [PATCH 3/5] weibull_aft arguments update --- ehrapy/tools/_sa.py | 102 +++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 25 deletions(-) diff --git a/ehrapy/tools/_sa.py b/ehrapy/tools/_sa.py index 241e5dee..13b74a8f 100644 --- a/ehrapy/tools/_sa.py +++ b/ehrapy/tools/_sa.py @@ -358,6 +358,26 @@ def _regression_model_data_frame_preparation(adata: AnnData, duration_col: str, return df +def _regression_model_populate_adata(adata: AnnData, model_summary: pd.DataFrame, key_added_prefix: str = None): + if key_added_prefix is None: + key_added_prefix = "" + else: + key_added_prefix = key_added_prefix + "_" + + full_results = pd.DataFrame(index=adata.var.index) + + # Populate with CoxPH summary data + for key in model_summary.columns: + full_results[key_added_prefix + key] = model_summary[key] + + # Add a boolean column indicating rows populated by this function + full_results[key_added_prefix + "cox_ph_populated"] = full_results.notna().any(axis=1) + + # Assign results back to adata.var + for col in full_results.columns: + adata.var[col] = full_results[col] + + def cox_ph( adata: AnnData, duration_col: str, @@ -397,6 +417,7 @@ def cox_ph( event_col: The name of the column in anndata that contains the subjects’ death observation. If left as None, assume all individuals are uncensored. inplace: Whether to modify the AnnData object in place. + key_added_prefix: Prefix to add to the column names in the AnnData object. An underscore will be added between the prefix and the column alpha: The alpha value in the confidence intervals. label: A string to name the column of the estimate. baseline_estimation_method: The method used to estimate the baseline hazard. Options are 'breslow', 'spline', and 'piecewise'. @@ -456,26 +477,7 @@ def cox_ph( # Add the results to the AnnData object if inplace: - if key_added_prefix is None: - key_added_prefix = "" - else: - key_added_prefix = key_added_prefix + "_" - - cox_ph_summary = cox_ph.summary - print(cox_ph_summary) - - full_results = pd.DataFrame(index=adata.var.index) - - # Populate with CoxPH summary data - for key in cox_ph_summary.columns: - full_results[key_added_prefix + key] = cox_ph_summary[key] - - # Add a boolean column indicating rows populated by this function - full_results[key_added_prefix + "cox_ph_populated"] = full_results.notna().any(axis=1) - - # Assign results back to adata.var - for col in full_results.columns: - adata.var[col] = full_results[col] + _regression_model_populate_adata(adata, cox_ph.summary, key_added_prefix) return cox_ph @@ -492,7 +494,7 @@ def weibull_aft( l1_ratio: float = 0.0, model_ancillary: bool = True, event_col: str | None = None, - ancillary: bool | pd.DataFrame | None = None, + ancillary: bool | pd.DataFrame | str | None = None, show_progress: bool = False, weights_col: str | None = None, robust: bool = False, @@ -512,9 +514,29 @@ def weibull_aft( Args: adata: AnnData object with necessary columns `duration_col` and `event_col`. duration_col: Name of the column in the AnnData objects that contains the subjects’ lifetimes. - event_col: Name of the column in anndata that contains the subjects’ death observation. + inplace: Whether to modify the AnnData object in place. + key_added_prefix: Prefix to add to the column names in the AnnData object. An underscore will be added between the prefix and the column name. + alpha: The alpha value in the confidence intervals. + fit_intercept: Whether to fit an intercept term in the model. + penalizer: Attach a penalty to the size of the coefficients during regression. This improves stability of the estimates and controls for high correlation between covariates. + l1_ratio: Specify what ratio to assign to a L1 vs L2 penalty. Same as scikit-learn. See penalizer above. + model_ancillary: set the model instance to always model the ancillary parameter with the supplied Dataframe. This is useful for grid-search optimization. + event_col: Name of the column in anndata that contains the subjects’ death observation. 1 if observed, 0 else (censored). If left as None, assume all individuals are uncensored. + ancillary: Choose to model the ancillary parameters. + If None or False, explicitly do not fit the ancillary parameters using any covariates. + If True, model the ancillary parameters with the same covariates as ``df``. + If DataFrame, provide covariates to model the ancillary parameters. Must be the same row count as ``df``. + If str, should be a formula + show_progress: since the fitter is iterative, show convergence diagnostics. Useful if convergence is failing. + weights_col: The name of the column in DataFrame that contains the weights for each subject. + robust: Compute the robust errors using the Huber sandwich estimator, aka Wei-Lin estimate. This does not handle ties, so if there are high number of ties, results may significantly differ. + initial_point: set the starting point for the iterative solver. entry_col: Column denoting when a subject entered the study, i.e. left-truncation. + formula: Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/ + If a formula is not provided, all variables in the dataframe are used (minus those used for other purposes like event_col, etc.) + fit_options: Additional keyword arguments to pass into the estimator. + Returns: Fitted WeibullAFTFitter. @@ -522,12 +544,41 @@ def weibull_aft( Examples: >>> import ehrapy as ep >>> adata = ep.dt.mimic_2(encoded=False) - >>> # Flip 'censor_fl' because 0 = death and 1 = censored >>> adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0) - >>> aft = ep.tl.weibull_aft(adata, "mort_day_censored", "censor_flg") + >>> adata = adata[:, ["mort_day_censored", "censor_flg"]] + >>> aft = ep.tl.weibull_aft(adata, duration_col="mort_day_censored", event_col="censor_flg") + >>> aft.print_summary() """ - return _regression_model(WeibullAFTFitter, adata, duration_col, event_col, entry_col, accept_zero_duration=False) + df = _regression_model_data_frame_preparation(adata, duration_col, accept_zero_duration=False) + + weibull_aft = WeibullAFTFitter( + alpha=alpha, + fit_intercept=fit_intercept, + penalizer=penalizer, + l1_ratio=l1_ratio, + model_ancillary=model_ancillary, + ) + + weibull_aft.fit( + df, + duration_col=duration_col, + event_col=event_col, + entry_col=entry_col, + ancillary=ancillary, + show_progress=show_progress, + weights_col=weights_col, + robust=robust, + initial_point=initial_point, + formula=formula, + fit_options=fit_options, + ) + + # Add the results to the AnnData object + if inplace: + _regression_model_populate_adata(adata, weibull_aft.summary, key_added_prefix) + + return weibull_aft def log_logistic_aft( @@ -566,6 +617,7 @@ def log_logistic_aft( >>> adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0) >>> llf = ep.tl.log_logistic_aft(adata, "mort_day_censored", "censor_flg") """ + return _regression_model( LogLogisticAFTFitter, adata, duration_col, event_col, entry_col, accept_zero_duration=False ) From 742d38ceb16519bbf41b8bbe15c1c46bf7453ebb Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:14:51 +0100 Subject: [PATCH 4/5] log_logistic update --- ehrapy/tools/_sa.py | 74 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/ehrapy/tools/_sa.py b/ehrapy/tools/_sa.py index 13b74a8f..395a54bf 100644 --- a/ehrapy/tools/_sa.py +++ b/ehrapy/tools/_sa.py @@ -585,13 +585,22 @@ def log_logistic_aft( adata: AnnData, duration_col: str, *, + inplace: bool = True, + key_added_prefix: str | None = None, alpha: float = 0.05, fit_intercept: bool = True, penalizer: float | np.ndarray = 0.0, l1_ratio: float = 0.0, model_ancillary: bool = False, - event_col: str = None, - entry_col: str = None, + event_col: str | None = None, + ancillary: bool | pd.DataFrame | str | None = None, + show_progress: bool = False, + weights_col: str | None = None, + robust: bool = False, + initial_point=None, + entry_col: str | None = None, + formula: str | None = None, + fit_options: dict | None = None, ) -> LogLogisticAFTFitter: """Fit the log logistic accelerated failure time regression for the survival function. The Log-Logistic Accelerated Failure Time (AFT) survival regression model is a powerful statistical tool employed in the analysis of time-to-event data. @@ -603,9 +612,29 @@ def log_logistic_aft( Args: adata: AnnData object with necessary columns `duration_col` and `event_col`. duration_col: Name of the column in the AnnData objects that contains the subjects’ lifetimes. - event_col: Name of the column in anndata that contains the subjects’ death observation. + inplace: Whether to modify the AnnData object in place. + key_added_prefix: Prefix to add to the column names in the AnnData object. An underscore will be added between the prefix and the column + alpha: The alpha value in the confidence intervals. + alpha: The alpha value in the confidence intervals. + fit_intercept: Whether to fit an intercept term in the model. + penalizer: Attach a penalty to the size of the coefficients during regression. This improves stability of the estimates and controls for high correlation between covariates. + l1_ratio: Specify what ratio to assign to a L1 vs L2 penalty. Same as scikit-learn. See penalizer above. + model_ancillary: set the model instance to always model the ancillary parameter with the supplied Dataframe. This is useful for grid-search optimization. + event_col: Name of the column in anndata that contains the subjects’ death observation. 1 if observed, 0 else (censored). If left as None, assume all individuals are uncensored. + ancillary: Choose to model the ancillary parameters. + If None or False, explicitly do not fit the ancillary parameters using any covariates. + If True, model the ancillary parameters with the same covariates as ``df``. + If DataFrame, provide covariates to model the ancillary parameters. Must be the same row count as ``df``. + If str, should be a formula + show_progress: since the fitter is iterative, show convergence diagnostics. Useful if convergence is failing. + weights_col: The name of the column in DataFrame that contains the weights for each subject. + robust: Compute the robust errors using the Huber sandwich estimator, aka Wei-Lin estimate. This does not handle ties, so if there are high number of ties, results may significantly differ. + initial_point: set the starting point for the iterative solver. entry_col: Column denoting when a subject entered the study, i.e. left-truncation. + formula: Use an R-style formula for modeling the dataset. See formula syntax: https://matthewwardrop.github.io/formulaic/basic/grammar/ + If a formula is not provided, all variables in the dataframe are used (minus those used for other purposes like event_col, etc.) + fit_options: Additional keyword arguments to pass into the estimator. Returns: Fitted LogLogisticAFTFitter. @@ -617,26 +646,35 @@ def log_logistic_aft( >>> adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0) >>> llf = ep.tl.log_logistic_aft(adata, "mort_day_censored", "censor_flg") """ + df = _regression_model_data_frame_preparation(adata, duration_col, accept_zero_duration=False) - return _regression_model( - LogLogisticAFTFitter, adata, duration_col, event_col, entry_col, accept_zero_duration=False + log_logistic_aft = LogLogisticAFTFitter( + alpha=alpha, + fit_intercept=fit_intercept, + penalizer=penalizer, + l1_ratio=l1_ratio, + model_ancillary=model_ancillary, ) + log_logistic_aft.fit( + df, + duration_col=duration_col, + event_col=event_col, + entry_col=entry_col, + ancillary=ancillary, + show_progress=show_progress, + weights_col=weights_col, + robust=robust, + initial_point=initial_point, + formula=formula, + fit_options=fit_options, + ) -def _regression_model( - model_class, adata: AnnData, duration_col: str, event_col: str, entry_col: str = None, accept_zero_duration=True -): - """Convenience function for regression models.""" - df = anndata_to_df(adata) - df = df.dropna() - - if not accept_zero_duration: - df.loc[df[duration_col] == 0, duration_col] += 1e-5 - - model = model_class() - model.fit(df, duration_col, event_col, entry_col=entry_col) + # Add the results to the AnnData object + if inplace: + _regression_model_populate_adata(adata, log_logistic_aft.summary, key_added_prefix) - return model + return log_logistic_aft def _univariate_model( From 02e343de3675138ae65b76498eea0948562e37b8 Mon Sep 17 00:00:00 2001 From: Carl Buchholz <32228189+aGuyLearning@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:28:55 +0100 Subject: [PATCH 5/5] updated log logistic example --- ehrapy/tools/_sa.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ehrapy/tools/_sa.py b/ehrapy/tools/_sa.py index 395a54bf..b8e23f03 100644 --- a/ehrapy/tools/_sa.py +++ b/ehrapy/tools/_sa.py @@ -644,7 +644,8 @@ def log_logistic_aft( >>> adata = ep.dt.mimic_2(encoded=False) >>> # Flip 'censor_fl' because 0 = death and 1 = censored >>> adata[:, ["censor_flg"]].X = np.where(adata[:, ["censor_flg"]].X == 0, 1, 0) - >>> llf = ep.tl.log_logistic_aft(adata, "mort_day_censored", "censor_flg") + >>> adata = adata[:, ["mort_day_censored", "censor_flg"]] + >>> llf = ep.tl.log_logistic_aft(adata, duration_col="mort_day_censored", event_col="censor_flg") """ df = _regression_model_data_frame_preparation(adata, duration_col, accept_zero_duration=False)