Skip to content

Commit

Permalink
the spline scipy options were not propogating through
Browse files Browse the repository at this point in the history
  • Loading branch information
CamDavidsonPilon committed Mar 6, 2020
1 parent 479fb78 commit 5393e36
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 51 deletions.
44 changes: 3 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,52 +15,16 @@ But outside of medicine and actuarial science, there are many other interesting
- SaaS providers are interested in measuring subscriber lifetimes, or time to some first action
- inventory stock out is a censoring event for true "demand" of a good.
- sociologists are interested in measuring political parties' lifetimes, or relationships, or marriages
- analyzing [Godwin's law](https://raw.githubusercontent.com/lukashalim/GODWIN/master/Kaplan-Meier-Godwin.png) in Reddit comments
- A/B tests to determine how long it takes different groups to perform an action.

*lifelines* is a pure Python implementation of the best parts of survival analysis. We'd love to hear if you are using *lifelines*, please leave an Issue and let us know your thoughts on the library.

## Installation:

You can install *lifelines* using
## Documentation and intro to survival analysis

pip install lifelines
If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please read the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html)

or conda install:

conda install -c conda-forge lifelines

Or getting the bleeding edge version with:

pip install --upgrade --no-deps git+https://github.com/CamDavidsonPilon/lifelines.git

from the command line.

### Installation Issues?

See the common [problems/solutions for installing lifelines](https://github.com/CamDavidsonPilon/lifelines/issues?utf8=%E2%9C%93&q=label%3Ainstallation+).


## *lifelines* documentation and an intro to survival analysis

If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please check out the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html)

Example:
```python
from lifelines import KaplanMeierFitter

durations = [11, 74, 71, 76, 28, 92, 89, 48, 90, 39, 63, 36, 54, 64, 34, 73, 94, 37, 56, 76]
event_observed = [True, True, False, True, True, True, True, False, False, True, True,
True, True, True, True, True, False, True, False, True]

kmf = KaplanMeierFitter()
kmf.fit(durations, event_observed)
kmf.plot()
```

<img src="https://imgur.com/d4Gi5J0.png" width="600">

## Contacting & troubleshooting
## Contact
- There is a [Gitter](https://gitter.im/python-lifelines/) channel available.
- Some users have posted common questions at [stats.stackexchange.com](https://stats.stackexchange.com/search?tab=votes&q=%22lifelines%22%20is%3aquestion)
- creating an issue in the [Github repository](https://github.com/camdavidsonpilon/lifelines).
Expand All @@ -72,8 +36,6 @@ You can find the roadmap for lifelines [here](https://www.notion.so/camdp/6e2965

See our [Contributing](https://github.com/CamDavidsonPilon/lifelines/blob/master/.github/CONTRIBUTING.md) guidelines.

-------------------------------------------------------------------------------

## Citing lifelines

You can use this badge below to generate a DOI and reference text for the latest related version of lifelines:
Expand Down
10 changes: 5 additions & 5 deletions lifelines/fitters/coxph_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,16 @@ class _PHSplineFitter(ParametricRegressionFitter, SplineFitterMixin, Proportiona
"""

_KNOWN_MODEL = True
_scipy_fit_method = "SLSQP"
_scipy_fit_options = {"maxiter": 1000, "iprint": 100}

def __init__(self, n_baseline_knots=1, *args, **kwargs):
self.n_baseline_knots = n_baseline_knots
self._fitted_parameter_names = ["beta_"] + ["phi%d_" % i for i in range(1, self.n_baseline_knots + 2)]
super(_PHSplineFitter, self).__init__(*args, **kwargs)

def set_knots(self, T, E):
self.knots = np.percentile(T[E.astype(bool).values], np.linspace(20, 80, self.n_baseline_knots + 2))
self.knots = np.percentile(T[E.astype(bool).values], np.linspace(5, 95, self.n_baseline_knots + 2))
return

def _pre_fit_model(self, Ts, E, df):
Expand All @@ -78,10 +80,9 @@ def _pre_fit_model(self, Ts, E, df):
def _create_initial_point(self, Ts, E, entries, weights, Xs):
return [
{
**{"beta_": np.zeros(len(Xs.mappings["beta_"])), "phi1_": np.array([0.5]), "phi2_": np.array([-0.75])},
**{"beta_": np.zeros(len(Xs.mappings["beta_"])), "phi1_": np.array([0.05]), "phi2_": np.array([-0.05])},
**{"phi%d_" % i: np.array([0.0]) for i in range(3, self.n_baseline_knots + 2)},
},
super(_PHSplineFitter, self)._create_initial_point(Ts, E, entries, weights, Xs),
}
]

def _cumulative_hazard(self, params, T, Xs):
Expand Down Expand Up @@ -724,7 +725,6 @@ def _fit_model_spline(
df, "T", "E", weights_col="weights", show_progress=show_progress, robust=self.robust, regressors=regressors
)
self._ll_null_ = cph._ll_null
cph.print_summary()
baseline_hazard_ = cph.predict_hazard(df.mean()).rename(columns={0: "baseline hazard"})
baseline_cumulative_hazard_ = cph.predict_cumulative_hazard(df.mean()).rename(
columns={0: "baseline cumulative hazard"}
Expand Down
3 changes: 0 additions & 3 deletions lifelines/fitters/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@


class SplineFitterMixin:
# TODO: this should implement the knot choosing logic.
_scipy_fit_method = "SLSQP"

@staticmethod
def relu(x: np.array):
return anp.maximum(0, x)
Expand Down
2 changes: 2 additions & 0 deletions lifelines/fitters/spline_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class SplineFitter(KnownModelParametricUnivariateFitter, SplineFitterMixin):
The locations of the cubic breakpoints.
"""
_scipy_fit_method = "SLSQP"
_scipy_fit_options = {"maxiter": 1000}

def __init__(self, knot_locations: np.array, *args, **kwargs):
self.knot_locations = knot_locations
Expand Down
2 changes: 1 addition & 1 deletion lifelines/tests/test_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,7 +1747,7 @@ def test_duration_vector_can_be_normalized_up_to_an_intercept(self, regression_m
check_less_precise=2,
)
else:
assert_series_equal(hazards, hazards_norm, check_less_precise=2)
assert_series_equal(hazards, hazards_norm, check_less_precise=1)

def test_prediction_methods_respect_index(self, regression_models, rossi):
X = rossi.iloc[:4].sort_index(ascending=False)
Expand Down
2 changes: 1 addition & 1 deletion perf_tests/cp_perf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
reps = 1
df = load_rossi()
df = pd.concat([df] * reps)
cp_breslow = CoxPHFitter(penalizer=0.01, l1_ratio=0.0, baseline_estimation_method="spline")
cp_breslow = CoxPHFitter(penalizer=0.1, l1_ratio=1.0, baseline_estimation_method="spline")
start_time = time.time()
cp_breslow.fit(df, duration_col="week", event_col="arrest", show_progress=True)
print("--- %s seconds ---" % (time.time() - start_time))
Expand Down

0 comments on commit 5393e36

Please sign in to comment.