Skip to content

Commit

Permalink
Merge pull request #141 from washingtonpost/release/2.2.5
Browse files Browse the repository at this point in the history
Release/2.2.5 🎉
  • Loading branch information
dmnapolitano authored Nov 7, 2024
2 parents f141f7b + e616ecf commit 917df4e
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

# 2.2.5 (11/7/2024)
- fix: hot fixes for the extrapolation step + using the presidential margins to infer a ticket splitting estimate in each house / senate race [#140](https://github.com/washingtonpost/elex-live-model/pull/140)

# 2.2.4 (11/5/2024)
- fix: truncation can fail catastrophically when % reporting is too low [#138](https://github.com/washingtonpost/elex-live-model/pull/138)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
LONG_DESCRIPTION = f.read()

# The full version, including alpha/beta/rc tags
RELEASE = "2.2.4"
RELEASE = "2.2.5"
# The short X.Y version
VERSION = ".".join(RELEASE.split(".")[:2])

Expand Down
30 changes: 29 additions & 1 deletion src/elexmodel/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import defaultdict
from io import StringIO

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -339,6 +340,31 @@ def get_estimates(
versioned_data_handler = None
else:
versioned_data_handler = None

if model_parameters.get("correct_from_presidential", False):
s3_client = s3.S3CsvUtil(TARGET_BUCKET)
baseline_path = f"{S3_FILE_PATH}/{self.election_id}/data/P/data_county.csv"
results_path = f"{S3_FILE_PATH}/{self.election_id}/results/P/county/current.csv"
predictions_path = f"{S3_FILE_PATH}/{self.election_id}/predictions/P/county/unit_data/current.csv"
pres_baseline = pd.read_csv(StringIO(s3_client.get(baseline_path)), dtype={"geographic_unit_fips": str})
pres_baseline["baseline_normalized_margin"] = (pres_baseline.baseline_dem - pres_baseline.baseline_gop) / (
pres_baseline.baseline_dem + pres_baseline.baseline_gop
)
pres_results = pd.read_csv(StringIO(s3_client.get(results_path)), dtype={"geographic_unit_fips": str})
pres_predictions = pd.read_csv(
StringIO(s3_client.get(predictions_path)), dtype={"geographic_unit_fips": str}
)
pres_predictions = pres_predictions.merge(
pres_results[["geographic_unit_fips", "results_weights"]], on="geographic_unit_fips", how="left"
)
pres_predictions = pres_predictions.merge(
pres_baseline[["geographic_unit_fips", "baseline_normalized_margin"]],
on="geographic_unit_fips",
how="left",
)
else:
pres_predictions = None

LOG.info("Running model for %s", self.election_id)
LOG.info(
"Model parameters: \n prediction intervals: %s, percent reporting threshold: %s, \
Expand All @@ -359,7 +385,9 @@ def get_estimates(
self.model = GaussianElectionModel(model_settings=model_settings)
elif pi_method == "bootstrap":
self.model = BootstrapElectionModel(
model_settings=model_settings, versioned_data_handler=versioned_data_handler
model_settings=model_settings,
versioned_data_handler=versioned_data_handler,
pres_predictions=pres_predictions,
)

minimum_reporting_units_max = 0
Expand Down
8 changes: 6 additions & 2 deletions src/elexmodel/handlers/data/VersionedData.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,11 @@ def compute_estimated_margin(df):
# because the AP adjusted its model after the fact. We correct for this here.
# we recompute the percent_expected_vote using the last reported value as the max
perc_expected_vote_corr = np.divide(
results_turnout, results_turnout[-1], out=np.zeros_like(results_turnout), where=results_turnout[-1] != 0
results_turnout,
results_turnout[-1],
out=np.zeros_like(results_turnout),
where=results_turnout[-1] != 0,
casting="unsafe",
)

# check if perc_expected_vote_corr is monotone increasing (if not, give up and don't try to estimate a margin)
Expand Down Expand Up @@ -190,7 +194,7 @@ def compute_estimated_margin(df):

est_margins = observed_norm_margin * observed_vote + observed_batch_margin * (percs - observed_vote)
est_margins = np.divide(
est_margins, percs, where=percs != 0, out=np.zeros_like(est_margins)
est_margins, percs, where=percs != 0, out=np.zeros_like(est_margins), casting="unsafe"
) # Handle div-by-zero

# Return a DataFrame with the multi-index (geographic_unit_fips, perc)
Expand Down
50 changes: 49 additions & 1 deletion src/elexmodel/models/BootstrapElectionModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class BootstrapElectionModel(BaseElectionModel):
and the epsilons are contest (state/district) level random effects.
"""

def __init__(self, model_settings={}, versioned_data_handler=None):
def __init__(self, model_settings={}, versioned_data_handler=None, pres_predictions=None):
super().__init__(model_settings)
self.B = model_settings.get("B", 500) # number of bootstrap samples
self.strata = model_settings.get("strata", ["county_classification"]) # columns to stratify the data by
Expand All @@ -61,6 +61,7 @@ def __init__(self, model_settings={}, versioned_data_handler=None):
"agg_model_hard_threshold", True
) # use sigmoid or hard thresold when calculating agg model
self.district_election = model_settings.get("district_election", False)

self.lambda_ = model_settings.get("lambda_", None) # regularization parameter for OLS

# save versioned data for later use
Expand All @@ -70,6 +71,10 @@ def __init__(self, model_settings={}, versioned_data_handler=None):
self.extrapolate_std_method = model_settings.get("extrapolate_std_method", "std")
self.max_dist_to_observed = model_settings.get("max_dist_to_observed", 5)

# save presidenial predictions for later use
self.pres_predictions = pres_predictions
self.correct_from_presidential = model_settings.get("correct_from_presidential", False)

# upper and lower bounds for the quantile regression which define the strata distributions
# these make sure that we can control the worst cases for the distributions in case we
# haven't seen enough data ayet
Expand Down Expand Up @@ -1283,6 +1288,49 @@ def compute_bootstrap_errors(
extrap_filter
]

if self.correct_from_presidential:
nonreporting_units["geographic_unit_fips_p"] = nonreporting_units.geographic_unit_fips.apply(
lambda x: x.split("_")[1]
)
nonreporting_units = nonreporting_units.merge(
self.pres_predictions,
left_on="geographic_unit_fips_p",
right_on="geographic_unit_fips",
how="left",
suffixes=("", "_pres"),
)

# adjust results_normalized_margin_pres to account for split counties

nonreporting_units["margin_adj"] = (
nonreporting_units.baseline_normalized_margin - nonreporting_units.baseline_normalized_margin_pres
)

nonreporting_units["results_normalized_margin_pres"] = (
nonreporting_units.results_margin_pres / nonreporting_units.results_weights_pres
+ nonreporting_units.margin_adj
)
nonreporting_units["pred_normalized_margin_pres"] = (
nonreporting_units.pred_margin / nonreporting_units.pred_turnout + nonreporting_units.margin_adj
)

nonreporting_units["pred_normalized_margin"] = np.mean(
y_test_pred_B.clip(min=y_partial_reporting_lower, max=y_partial_reporting_upper), axis=1
)

nonreporting_units["margin_gap"] = (
nonreporting_units.results_normalized_margin - nonreporting_units.results_normalized_margin_pres
)

nonreporting_units["pred_normalized_margin_new"] = (
nonreporting_units.pred_normalized_margin_pres + nonreporting_units.margin_gap
)
adjustment = (
nonreporting_units["pred_normalized_margin_new"].values
- nonreporting_units["pred_normalized_margin"].values
)
y_test_pred_B[~np.isnan(adjustment)] += adjustment[~np.isnan(adjustment)].reshape(-1, 1)

y_test_pred_B = y_test_pred_B.clip(min=y_partial_reporting_lower, max=y_partial_reporting_upper)

# \tilde{y_i}^{b} * \tilde{z_i}^{b}
Expand Down

0 comments on commit 917df4e

Please sign in to comment.