From 5393e364be9f4d52728e78525b24f611f79cea3a Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Thu, 5 Mar 2020 11:51:11 -0500 Subject: [PATCH] the spline scipy options were not propogating through --- README.md | 44 ++---------------------------- lifelines/fitters/coxph_fitter.py | 10 +++---- lifelines/fitters/mixins.py | 3 -- lifelines/fitters/spline_fitter.py | 2 ++ lifelines/tests/test_estimation.py | 2 +- perf_tests/cp_perf_test.py | 2 +- 6 files changed, 12 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 8ace08768..7c1a87f3e 100644 --- a/README.md +++ b/README.md @@ -15,52 +15,16 @@ But outside of medicine and actuarial science, there are many other interesting - SaaS providers are interested in measuring subscriber lifetimes, or time to some first action - inventory stock out is a censoring event for true "demand" of a good. - sociologists are interested in measuring political parties' lifetimes, or relationships, or marriages -- analyzing [Godwin's law](https://raw.githubusercontent.com/lukashalim/GODWIN/master/Kaplan-Meier-Godwin.png) in Reddit comments - A/B tests to determine how long it takes different groups to perform an action. *lifelines* is a pure Python implementation of the best parts of survival analysis. We'd love to hear if you are using *lifelines*, please leave an Issue and let us know your thoughts on the library. -## Installation: -You can install *lifelines* using +## Documentation and intro to survival analysis - pip install lifelines +If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please read the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html) -or conda install: - - conda install -c conda-forge lifelines - -Or getting the bleeding edge version with: - - pip install --upgrade --no-deps git+https://github.com/CamDavidsonPilon/lifelines.git - -from the command line. - -### Installation Issues? - -See the common [problems/solutions for installing lifelines](https://github.com/CamDavidsonPilon/lifelines/issues?utf8=%E2%9C%93&q=label%3Ainstallation+). - - -## *lifelines* documentation and an intro to survival analysis - -If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please check out the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html) - -Example: -```python -from lifelines import KaplanMeierFitter - -durations = [11, 74, 71, 76, 28, 92, 89, 48, 90, 39, 63, 36, 54, 64, 34, 73, 94, 37, 56, 76] -event_observed = [True, True, False, True, True, True, True, False, False, True, True, - True, True, True, True, True, False, True, False, True] - -kmf = KaplanMeierFitter() -kmf.fit(durations, event_observed) -kmf.plot() -``` - - - -## Contacting & troubleshooting +## Contact - There is a [Gitter](https://gitter.im/python-lifelines/) channel available. - Some users have posted common questions at [stats.stackexchange.com](https://stats.stackexchange.com/search?tab=votes&q=%22lifelines%22%20is%3aquestion) - creating an issue in the [Github repository](https://github.com/camdavidsonpilon/lifelines). @@ -72,8 +36,6 @@ You can find the roadmap for lifelines [here](https://www.notion.so/camdp/6e2965 See our [Contributing](https://github.com/CamDavidsonPilon/lifelines/blob/master/.github/CONTRIBUTING.md) guidelines. -------------------------------------------------------------------------------- - ## Citing lifelines You can use this badge below to generate a DOI and reference text for the latest related version of lifelines: diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index 019c90424..537ecf88b 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -62,6 +62,8 @@ class _PHSplineFitter(ParametricRegressionFitter, SplineFitterMixin, Proportiona """ _KNOWN_MODEL = True + _scipy_fit_method = "SLSQP" + _scipy_fit_options = {"maxiter": 1000, "iprint": 100} def __init__(self, n_baseline_knots=1, *args, **kwargs): self.n_baseline_knots = n_baseline_knots @@ -69,7 +71,7 @@ def __init__(self, n_baseline_knots=1, *args, **kwargs): super(_PHSplineFitter, self).__init__(*args, **kwargs) def set_knots(self, T, E): - self.knots = np.percentile(T[E.astype(bool).values], np.linspace(20, 80, self.n_baseline_knots + 2)) + self.knots = np.percentile(T[E.astype(bool).values], np.linspace(5, 95, self.n_baseline_knots + 2)) return def _pre_fit_model(self, Ts, E, df): @@ -78,10 +80,9 @@ def _pre_fit_model(self, Ts, E, df): def _create_initial_point(self, Ts, E, entries, weights, Xs): return [ { - **{"beta_": np.zeros(len(Xs.mappings["beta_"])), "phi1_": np.array([0.5]), "phi2_": np.array([-0.75])}, + **{"beta_": np.zeros(len(Xs.mappings["beta_"])), "phi1_": np.array([0.05]), "phi2_": np.array([-0.05])}, **{"phi%d_" % i: np.array([0.0]) for i in range(3, self.n_baseline_knots + 2)}, - }, - super(_PHSplineFitter, self)._create_initial_point(Ts, E, entries, weights, Xs), + } ] def _cumulative_hazard(self, params, T, Xs): @@ -724,7 +725,6 @@ def _fit_model_spline( df, "T", "E", weights_col="weights", show_progress=show_progress, robust=self.robust, regressors=regressors ) self._ll_null_ = cph._ll_null - cph.print_summary() baseline_hazard_ = cph.predict_hazard(df.mean()).rename(columns={0: "baseline hazard"}) baseline_cumulative_hazard_ = cph.predict_cumulative_hazard(df.mean()).rename( columns={0: "baseline cumulative hazard"} diff --git a/lifelines/fitters/mixins.py b/lifelines/fitters/mixins.py index a0b9a0eb3..ce761ec28 100644 --- a/lifelines/fitters/mixins.py +++ b/lifelines/fitters/mixins.py @@ -10,9 +10,6 @@ class SplineFitterMixin: - # TODO: this should implement the knot choosing logic. - _scipy_fit_method = "SLSQP" - @staticmethod def relu(x: np.array): return anp.maximum(0, x) diff --git a/lifelines/fitters/spline_fitter.py b/lifelines/fitters/spline_fitter.py index 376a94a83..dccb88a94 100644 --- a/lifelines/fitters/spline_fitter.py +++ b/lifelines/fitters/spline_fitter.py @@ -73,6 +73,8 @@ class SplineFitter(KnownModelParametricUnivariateFitter, SplineFitterMixin): The locations of the cubic breakpoints. """ + _scipy_fit_method = "SLSQP" + _scipy_fit_options = {"maxiter": 1000} def __init__(self, knot_locations: np.array, *args, **kwargs): self.knot_locations = knot_locations diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index 5202a70c9..a2a73080e 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -1747,7 +1747,7 @@ def test_duration_vector_can_be_normalized_up_to_an_intercept(self, regression_m check_less_precise=2, ) else: - assert_series_equal(hazards, hazards_norm, check_less_precise=2) + assert_series_equal(hazards, hazards_norm, check_less_precise=1) def test_prediction_methods_respect_index(self, regression_models, rossi): X = rossi.iloc[:4].sort_index(ascending=False) diff --git a/perf_tests/cp_perf_test.py b/perf_tests/cp_perf_test.py index 62f515e84..c0265201d 100644 --- a/perf_tests/cp_perf_test.py +++ b/perf_tests/cp_perf_test.py @@ -13,7 +13,7 @@ reps = 1 df = load_rossi() df = pd.concat([df] * reps) - cp_breslow = CoxPHFitter(penalizer=0.01, l1_ratio=0.0, baseline_estimation_method="spline") + cp_breslow = CoxPHFitter(penalizer=0.1, l1_ratio=1.0, baseline_estimation_method="spline") start_time = time.time() cp_breslow.fit(df, duration_col="week", event_col="arrest", show_progress=True) print("--- %s seconds ---" % (time.time() - start_time))