From 3fd7851708623e16fae6b1875a8e0a1d64d725b9 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 11 Oct 2024 17:10:01 -0700 Subject: [PATCH] adding survey choice to choices_df in interaction_sample --- activitysim/abm/models/location_choice.py | 35 ----------- activitysim/abm/models/trip_destination.py | 14 +++-- .../abm/models/util/tour_destination.py | 33 ----------- activitysim/core/configuration/logit.py | 18 +++--- activitysim/core/estimation.py | 58 +++++++++++++++++++ activitysim/core/interaction_sample.py | 46 +++++++++++++++ 6 files changed, 121 insertions(+), 83 deletions(-) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 98c01288c..26fa42187 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -491,41 +491,6 @@ def run_location_sample( trace_label=trace_label, ) - # adding observed choice to alt set when running in estimation mode - if estimator: - # grabbing survey values - survey_persons = estimation.manager.get_survey_table("persons") - if "school_location" in trace_label: - survey_choices = survey_persons["school_zone_id"].reset_index() - elif ("workplace_location" in trace_label) and ("external" not in trace_label): - survey_choices = survey_persons["workplace_zone_id"].reset_index() - else: - return choices - survey_choices.columns = ["person_id", "alt_dest"] - survey_choices = survey_choices[ - survey_choices["person_id"].isin(choices.index) - & (survey_choices.alt_dest > 0) - ] - # merging survey destination into table if not available - joined_data = survey_choices.merge( - choices, on=["person_id", "alt_dest"], how="left", indicator=True - ) - missing_rows = joined_data[joined_data["_merge"] == "left_only"] - missing_rows["pick_count"] = 1 - if len(missing_rows) > 0: - new_choices = missing_rows[ - ["person_id", "alt_dest", "prob", "pick_count"] - ].set_index("person_id") - choices = choices.append(new_choices, ignore_index=False).sort_index() - # making prob 0 for missing rows so it does not influence model decision - choices["prob"] = choices["prob"].fillna(0) - # sort by person_id and alt_dest - choices = ( - choices.reset_index() - .sort_values(by=["person_id", "alt_dest"]) - .set_index("person_id") - ) - return choices diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 2b6b5a2ff..95b6ccde2 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -173,15 +173,17 @@ def _destination_sample( ) sample_size = model_settings.SAMPLE_SIZE - if state.settings.disable_destination_sampling or ( - estimator and estimator.want_unsampled_alternatives - ): - # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count + if estimator: + sample_size = model_settings.ESTIMATION_SAMPLE_SIZE logger.info( - f"Estimation mode for {trace_label} using " - f"unsampled alternatives short_circuit_choices" + f"Estimation mode for {trace_label} using sample size of {sample_size}" ) + + if state.settings.disable_destination_sampling: sample_size = 0 + logger.info( + f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set" + ) locals_dict = state.get_global_constants().copy() locals_dict.update(model_settings.CONSTANTS) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index 8feb6d55d..d07f27e4e 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -181,39 +181,6 @@ def destination_sample( trace_label=trace_label, ) - # adding observed choice to alt set when running in estimation mode - if estimator: - # grabbing survey values - survey_tours = estimation.manager.get_survey_table("tours") - survey_choices = survey_tours[["destination", "person_id"]].reset_index() - survey_choices.columns = ["tour_id", alt_dest_col_name, "person_id"] - survey_choices = survey_choices[ - survey_choices["tour_id"].isin(choices.index) - & (survey_choices[alt_dest_col_name] > 0) - ] - # merging survey destination into table if not available - joined_data = survey_choices.merge( - choices, - on=["tour_id", alt_dest_col_name, "person_id"], - how="left", - indicator=True, - ) - missing_rows = joined_data[joined_data["_merge"] == "left_only"] - missing_rows["pick_count"] = 1 - if len(missing_rows) > 0: - new_choices = missing_rows[ - ["tour_id", alt_dest_col_name, "prob", "pick_count", "person_id"] - ].set_index("tour_id") - choices = choices.append(new_choices, ignore_index=False).sort_index() - # making prob 0 for missing rows so it does not influence model decision - choices["prob"] = choices["prob"].fillna(0) - # sort by tour_id and alt_dest - choices = ( - choices.reset_index() - .sort_values(by=["tour_id", alt_dest_col_name]) - .set_index("tour_id") - ) - return choices diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index d03bcab77..94233db26 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -187,6 +187,15 @@ class LocationComponentSettings(BaseLogitComponentSettings): SAMPLE_SIZE: int """This many candidate alternatives will be sampled for each choice.""" + ESTIMATION_SAMPLE_SIZE: int = 0 + """ + The number of alternatives to sample for estimation mode. + If zero, then all alternatives are used. + Truth alternative will be included in the sample. + Larch does not yet support sampling alternatives for estimation, + but this setting is still helpful for estimation mode runtime. + """ + LOGSUM_SETTINGS: Path """Settings for the logsum computation.""" @@ -233,15 +242,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): ORIG_ZONE_ID: str | None = None """This setting appears to do nothing...""" - ESTIMATION_SAMPLE_SIZE: int = 0 - """ - The number of alternatives to sample for estimation mode. - If zero, then all alternatives are used. - Truth alternative will be included in the sample. - Larch does not yet support sampling alternatives for estimation, - but this setting is still helpful for estimation mode runtime. - """ - class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid"): MODE_CHOICE_LOGSUM_COLUMN_NAME: str | None = None diff --git a/activitysim/core/estimation.py b/activitysim/core/estimation.py index b79618509..73c65087c 100644 --- a/activitysim/core/estimation.py +++ b/activitysim/core/estimation.py @@ -933,5 +933,63 @@ def get_survey_values(self, model_values, table_name, column_names): return values[column_name] if column_name else values + def get_survey_destination_chocies(self, state, choosers, trace_label): + """ + Returning the survey choices for the destination choice model. + This gets called from inside interaction_sample and is used to + ensure the choices include the override choices when sampling alternatives. + + Parameters + ---------- + state : workflow.State + trace_label : str + The model name. + + Returns + ------- + pd.Series : The survey choices for the destination choice model. + """ + model = trace_label.split(".")[0] + if model == "school_location": + survey_choices = manager.get_survey_values( + choosers.index, "persons", "school_zone_id" + ) + elif model == "workplace_location": + survey_choices = manager.get_survey_values( + choosers.index, "persons", "workplace_zone_id" + ) + elif model in [ + "joint_tour_destination", + "atwork_subtour_destination", + "non_mandatory_tour_destination", + ]: + survey_choices = manager.get_survey_values( + choosers.index, "tours", "destination" + ) + elif model == "trip_destination": + survey_choices = manager.get_survey_values( + choosers.index, "trips", "destination" + ) + elif model == "parking_location": + # need to grab parking location column name from its settings + from activitysim.abm.models.parking_location_choice import ( + ParkingLocationSettings, + ) + + model_settings = ParkingLocationSettings.read_settings_file( + state.filesystem, + "parking_location_choice.yaml", + ) + survey_choices = manager.get_survey_values( + choosers.index, "trips", model_settings.ALT_DEST_COL_NAME + ) + else: + # since this fucntion is called from inside interaction_sample, + # we don't want to return anything for other models that aren't destination choice + # not implemented models include scheduling models and tour_od_choice + logger.debug(f"Not grabbing survey choices for {model}.") + return None + return survey_choices + manager = EstimationManager() diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index cabcbeb64..83ed39480 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -8,6 +8,7 @@ import pandas as pd from activitysim.core import ( + estimation, chunk, interaction_simulate, logit, @@ -501,6 +502,51 @@ def _interaction_sample( chunk_sizer.log_df(trace_label, "choices_df", choices_df) + if estimation.manager.enabled and sample_size > 0: + # we need to ensure chosen alternative is included in the sample + survey_choices = estimation.manager.get_survey_destination_chocies( + state, choosers, trace_label + ) + if survey_choices is not None: + survey_choices.name = alt_col_name + survey_choices = survey_choices.dropna().astype( + choices_df[alt_col_name].dtype + ) + comparison = pd.merge( + survey_choices, + choices_df, + on=[choosers.index.name, alt_col_name], + how="left", + indicator=True, + ) + missing_choices = comparison[comparison["_merge"] == "left_only"] + # need to get prob of missing choices and add them to choices_df + if not missing_choices.empty: + probs_df = probs.reset_index().melt( + id_vars=[choosers.index.name], + var_name=alt_col_name, + value_name="prob", + ) + # probs are numbered 0..n-1 so we need to map back to alt ids + zone_map = pd.Series(alternatives.index).to_dict() + probs_df[alt_col_name] = probs_df[alt_col_name].map(zone_map) + # merge the probs onto the missing chocies + missing_choices = pd.merge( + missing_choices.drop(columns=["prob", "_merge"]), + probs_df, + on=[choosers.index.name, alt_col_name], + how="left", + ) + assert ( + missing_choices.prob.isna().sum() == 0 + ), f"survey choices with no probs: {missing_choices[missing_choices.prob.isna()]}" + del probs_df + # random number is not important, filling with 0 + missing_choices["rand"] = 0 + # merge survey choices back into choices_df and sort by chooser + choices_df = pd.concat([choices_df, missing_choices], ignore_index=True) + choices_df.sort_values(by=[choosers.index.name], inplace=True) + del probs chunk_sizer.log_df(trace_label, "probs", None)