the spline scipy options were not propogating through

CamDavidsonPilon · Mar 6, 2020 · 5393e36 · 5393e36
1 parent 479fb78
commit 5393e36
Show file tree

Hide file tree

Showing 6 changed files with 12 additions and 51 deletions.
diff --git a/README.md b/README.md
@@ -15,52 +15,16 @@ But outside of medicine and actuarial science, there are many other interesting
 - SaaS providers are interested in measuring subscriber lifetimes, or time to some first action
 - inventory stock out is a censoring event for true "demand" of a good.
 - sociologists are interested in measuring political parties' lifetimes, or relationships, or marriages
-- analyzing [Godwin's law](https://raw.githubusercontent.com/lukashalim/GODWIN/master/Kaplan-Meier-Godwin.png) in Reddit comments
 - A/B tests to determine how long it takes different groups to perform an action.
 
 *lifelines* is a pure Python implementation of the best parts of survival analysis. We'd love to hear if you are using *lifelines*, please leave an Issue and let us know your thoughts on the library.
 
-## Installation:
 
-You can install *lifelines* using
+## Documentation and intro to survival analysis
 
-       pip install lifelines
+If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please read the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html)
 
-or conda install:
-
-       conda install -c conda-forge lifelines
-
-Or getting the bleeding edge version with:
-
-       pip install --upgrade --no-deps git+https://github.com/CamDavidsonPilon/lifelines.git
-
-from the command line.
-
-### Installation Issues?
-
-See the common [problems/solutions for installing lifelines](https://github.com/CamDavidsonPilon/lifelines/issues?utf8=%E2%9C%93&q=label%3Ainstallation+).
-
-
-## *lifelines* documentation and an intro to survival analysis
-
-If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please check out the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html)
-
-Example:
-```python
-from lifelines import KaplanMeierFitter
-
-durations = [11, 74, 71, 76, 28, 92, 89, 48, 90, 39, 63, 36, 54, 64, 34, 73, 94, 37, 56, 76]
-event_observed = [True, True, False, True, True, True, True, False, False, True, True,
-                  True, True, True, True, True, False, True, False, True]
-
-kmf = KaplanMeierFitter()
-kmf.fit(durations, event_observed)
-kmf.plot()
-```
-
-<img src="https://imgur.com/d4Gi5J0.png" width="600">
-
-## Contacting & troubleshooting
+## Contact
  - There is a [Gitter](https://gitter.im/python-lifelines/) channel available.
  - Some users have posted common questions at [stats.stackexchange.com](https://stats.stackexchange.com/search?tab=votes&q=%22lifelines%22%20is%3aquestion)
  - creating an issue in the [Github repository](https://github.com/camdavidsonpilon/lifelines).
@@ -72,8 +36,6 @@ You can find the roadmap for lifelines [here](https://www.notion.so/camdp/6e2965
 
 See our [Contributing](https://github.com/CamDavidsonPilon/lifelines/blob/master/.github/CONTRIBUTING.md) guidelines.
 
--------------------------------------------------------------------------------
-
 ## Citing lifelines
 
 You can use this badge below to generate a DOI and reference text for the latest related version of lifelines:

diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
@@ -62,14 +62,16 @@ class _PHSplineFitter(ParametricRegressionFitter, SplineFitterMixin, Proportiona
     """
 
     _KNOWN_MODEL = True
+    _scipy_fit_method = "SLSQP"
+    _scipy_fit_options = {"maxiter": 1000, "iprint": 100}
 
     def __init__(self, n_baseline_knots=1, *args, **kwargs):
         self.n_baseline_knots = n_baseline_knots
         self._fitted_parameter_names = ["beta_"] + ["phi%d_" % i for i in range(1, self.n_baseline_knots + 2)]
         super(_PHSplineFitter, self).__init__(*args, **kwargs)
 
     def set_knots(self, T, E):
-        self.knots = np.percentile(T[E.astype(bool).values], np.linspace(20, 80, self.n_baseline_knots + 2))
+        self.knots = np.percentile(T[E.astype(bool).values], np.linspace(5, 95, self.n_baseline_knots + 2))
         return
 
     def _pre_fit_model(self, Ts, E, df):
@@ -78,10 +80,9 @@ def _pre_fit_model(self, Ts, E, df):
     def _create_initial_point(self, Ts, E, entries, weights, Xs):
         return [
             {
-                **{"beta_": np.zeros(len(Xs.mappings["beta_"])), "phi1_": np.array([0.5]), "phi2_": np.array([-0.75])},
+                **{"beta_": np.zeros(len(Xs.mappings["beta_"])), "phi1_": np.array([0.05]), "phi2_": np.array([-0.05])},
                 **{"phi%d_" % i: np.array([0.0]) for i in range(3, self.n_baseline_knots + 2)},
-            },
-            super(_PHSplineFitter, self)._create_initial_point(Ts, E, entries, weights, Xs),
+            }
         ]
 
     def _cumulative_hazard(self, params, T, Xs):
@@ -724,7 +725,6 @@ def _fit_model_spline(
             df, "T", "E", weights_col="weights", show_progress=show_progress, robust=self.robust, regressors=regressors
         )
         self._ll_null_ = cph._ll_null
-        cph.print_summary()
         baseline_hazard_ = cph.predict_hazard(df.mean()).rename(columns={0: "baseline hazard"})
         baseline_cumulative_hazard_ = cph.predict_cumulative_hazard(df.mean()).rename(
             columns={0: "baseline cumulative hazard"}

diff --git a/lifelines/fitters/mixins.py b/lifelines/fitters/mixins.py
@@ -10,9 +10,6 @@
 
 
 class SplineFitterMixin:
-    # TODO: this should implement the knot choosing logic.
-    _scipy_fit_method = "SLSQP"
-
     @staticmethod
     def relu(x: np.array):
         return anp.maximum(0, x)

diff --git a/lifelines/fitters/spline_fitter.py b/lifelines/fitters/spline_fitter.py
@@ -73,6 +73,8 @@ class SplineFitter(KnownModelParametricUnivariateFitter, SplineFitterMixin):
         The locations of the cubic breakpoints.
 
     """
+    _scipy_fit_method = "SLSQP"
+    _scipy_fit_options = {"maxiter": 1000}
 
     def __init__(self, knot_locations: np.array, *args, **kwargs):
         self.knot_locations = knot_locations

diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py
@@ -1747,7 +1747,7 @@ def test_duration_vector_can_be_normalized_up_to_an_intercept(self, regression_m
                         check_less_precise=2,
                     )
                 else:
-                    assert_series_equal(hazards, hazards_norm, check_less_precise=2)
+                    assert_series_equal(hazards, hazards_norm, check_less_precise=1)
 
     def test_prediction_methods_respect_index(self, regression_models, rossi):
         X = rossi.iloc[:4].sort_index(ascending=False)

diff --git a/perf_tests/cp_perf_test.py b/perf_tests/cp_perf_test.py
@@ -13,7 +13,7 @@
     reps = 1
     df = load_rossi()
     df = pd.concat([df] * reps)
-    cp_breslow = CoxPHFitter(penalizer=0.01, l1_ratio=0.0, baseline_estimation_method="spline")
+    cp_breslow = CoxPHFitter(penalizer=0.1, l1_ratio=1.0, baseline_estimation_method="spline")
     start_time = time.time()
     cp_breslow.fit(df, duration_col="week", event_col="arrest", show_progress=True)
     print("--- %s seconds ---" % (time.time() - start_time))