From 3fd7851708623e16fae6b1875a8e0a1d64d725b9 Mon Sep 17 00:00:00 2001
From: David Hensle <hensle93@gmail.com>
Date: Fri, 11 Oct 2024 17:10:01 -0700
Subject: [PATCH] adding survey choice to choices_df in interaction_sample

---
 activitysim/abm/models/location_choice.py     | 35 -----------
 activitysim/abm/models/trip_destination.py    | 14 +++--
 .../abm/models/util/tour_destination.py       | 33 -----------
 activitysim/core/configuration/logit.py       | 18 +++---
 activitysim/core/estimation.py                | 58 +++++++++++++++++++
 activitysim/core/interaction_sample.py        | 46 +++++++++++++++
 6 files changed, 121 insertions(+), 83 deletions(-)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
index 98c01288c..26fa42187 100644
--- a/activitysim/abm/models/location_choice.py
+++ b/activitysim/abm/models/location_choice.py
@@ -491,41 +491,6 @@ def run_location_sample(
             trace_label=trace_label,
         )
 
-    # adding observed choice to alt set when running in estimation mode
-    if estimator:
-        # grabbing survey values
-        survey_persons = estimation.manager.get_survey_table("persons")
-        if "school_location" in trace_label:
-            survey_choices = survey_persons["school_zone_id"].reset_index()
-        elif ("workplace_location" in trace_label) and ("external" not in trace_label):
-            survey_choices = survey_persons["workplace_zone_id"].reset_index()
-        else:
-            return choices
-        survey_choices.columns = ["person_id", "alt_dest"]
-        survey_choices = survey_choices[
-            survey_choices["person_id"].isin(choices.index)
-            & (survey_choices.alt_dest > 0)
-        ]
-        # merging survey destination into table if not available
-        joined_data = survey_choices.merge(
-            choices, on=["person_id", "alt_dest"], how="left", indicator=True
-        )
-        missing_rows = joined_data[joined_data["_merge"] == "left_only"]
-        missing_rows["pick_count"] = 1
-        if len(missing_rows) > 0:
-            new_choices = missing_rows[
-                ["person_id", "alt_dest", "prob", "pick_count"]
-            ].set_index("person_id")
-            choices = choices.append(new_choices, ignore_index=False).sort_index()
-            # making prob 0 for missing rows so it does not influence model decision
-            choices["prob"] = choices["prob"].fillna(0)
-            # sort by person_id and alt_dest
-            choices = (
-                choices.reset_index()
-                .sort_values(by=["person_id", "alt_dest"])
-                .set_index("person_id")
-            )
-
     return choices
 
 
diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py
index 2b6b5a2ff..95b6ccde2 100644
--- a/activitysim/abm/models/trip_destination.py
+++ b/activitysim/abm/models/trip_destination.py
@@ -173,15 +173,17 @@ def _destination_sample(
     )
 
     sample_size = model_settings.SAMPLE_SIZE
-    if state.settings.disable_destination_sampling or (
-        estimator and estimator.want_unsampled_alternatives
-    ):
-        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
+    if estimator:
+        sample_size = model_settings.ESTIMATION_SAMPLE_SIZE
         logger.info(
-            f"Estimation mode for {trace_label} using "
-            f"unsampled alternatives short_circuit_choices"
+            f"Estimation mode for {trace_label} using sample size of {sample_size}"
         )
+
+    if state.settings.disable_destination_sampling:
         sample_size = 0
+        logger.info(
+            f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set"
+        )
 
     locals_dict = state.get_global_constants().copy()
     locals_dict.update(model_settings.CONSTANTS)
diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py
index 8feb6d55d..d07f27e4e 100644
--- a/activitysim/abm/models/util/tour_destination.py
+++ b/activitysim/abm/models/util/tour_destination.py
@@ -181,39 +181,6 @@ def destination_sample(
         trace_label=trace_label,
     )
 
-    # adding observed choice to alt set when running in estimation mode
-    if estimator:
-        # grabbing survey values
-        survey_tours = estimation.manager.get_survey_table("tours")
-        survey_choices = survey_tours[["destination", "person_id"]].reset_index()
-        survey_choices.columns = ["tour_id", alt_dest_col_name, "person_id"]
-        survey_choices = survey_choices[
-            survey_choices["tour_id"].isin(choices.index)
-            & (survey_choices[alt_dest_col_name] > 0)
-        ]
-        # merging survey destination into table if not available
-        joined_data = survey_choices.merge(
-            choices,
-            on=["tour_id", alt_dest_col_name, "person_id"],
-            how="left",
-            indicator=True,
-        )
-        missing_rows = joined_data[joined_data["_merge"] == "left_only"]
-        missing_rows["pick_count"] = 1
-        if len(missing_rows) > 0:
-            new_choices = missing_rows[
-                ["tour_id", alt_dest_col_name, "prob", "pick_count", "person_id"]
-            ].set_index("tour_id")
-            choices = choices.append(new_choices, ignore_index=False).sort_index()
-            # making prob 0 for missing rows so it does not influence model decision
-            choices["prob"] = choices["prob"].fillna(0)
-            # sort by tour_id and alt_dest
-            choices = (
-                choices.reset_index()
-                .sort_values(by=["tour_id", alt_dest_col_name])
-                .set_index("tour_id")
-            )
-
     return choices
 
 
diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py
index d03bcab77..94233db26 100644
--- a/activitysim/core/configuration/logit.py
+++ b/activitysim/core/configuration/logit.py
@@ -187,6 +187,15 @@ class LocationComponentSettings(BaseLogitComponentSettings):
     SAMPLE_SIZE: int
     """This many candidate alternatives will be sampled for each choice."""
 
+    ESTIMATION_SAMPLE_SIZE: int = 0
+    """
+    The number of alternatives to sample for estimation mode.
+    If zero, then all alternatives are used.
+    Truth alternative will be included in the sample.
+    Larch does not yet support sampling alternatives for estimation,
+    but this setting is still helpful for estimation mode runtime.
+    """
+
     LOGSUM_SETTINGS: Path
     """Settings for the logsum computation."""
 
@@ -233,15 +242,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"):
     ORIG_ZONE_ID: str | None = None
     """This setting appears to do nothing..."""
 
-    ESTIMATION_SAMPLE_SIZE: int = 0
-    """
-    The number of alternatives to sample for estimation mode.
-    If zero, then all alternatives are used.
-    Truth alternative will be included in the sample.
-    Larch does not yet support sampling alternatives for estimation,
-    but this setting is still helpful for estimation mode runtime.
-    """
-
 
 class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid"):
     MODE_CHOICE_LOGSUM_COLUMN_NAME: str | None = None
diff --git a/activitysim/core/estimation.py b/activitysim/core/estimation.py
index b79618509..73c65087c 100644
--- a/activitysim/core/estimation.py
+++ b/activitysim/core/estimation.py
@@ -933,5 +933,63 @@ def get_survey_values(self, model_values, table_name, column_names):
 
         return values[column_name] if column_name else values
 
+    def get_survey_destination_chocies(self, state, choosers, trace_label):
+        """
+        Returning the survey choices for the destination choice model.
+        This gets called from inside interaction_sample and is used to
+        ensure the choices include the override choices when sampling alternatives.
+
+        Parameters
+        ----------
+        state : workflow.State
+        trace_label : str
+            The model name.
+
+        Returns
+        -------
+        pd.Series : The survey choices for the destination choice model.
+        """
+        model = trace_label.split(".")[0]
+        if model == "school_location":
+            survey_choices = manager.get_survey_values(
+                choosers.index, "persons", "school_zone_id"
+            )
+        elif model == "workplace_location":
+            survey_choices = manager.get_survey_values(
+                choosers.index, "persons", "workplace_zone_id"
+            )
+        elif model in [
+            "joint_tour_destination",
+            "atwork_subtour_destination",
+            "non_mandatory_tour_destination",
+        ]:
+            survey_choices = manager.get_survey_values(
+                choosers.index, "tours", "destination"
+            )
+        elif model == "trip_destination":
+            survey_choices = manager.get_survey_values(
+                choosers.index, "trips", "destination"
+            )
+        elif model == "parking_location":
+            # need to grab parking location column name from its settings
+            from activitysim.abm.models.parking_location_choice import (
+                ParkingLocationSettings,
+            )
+
+            model_settings = ParkingLocationSettings.read_settings_file(
+                state.filesystem,
+                "parking_location_choice.yaml",
+            )
+            survey_choices = manager.get_survey_values(
+                choosers.index, "trips", model_settings.ALT_DEST_COL_NAME
+            )
+        else:
+            # since this fucntion is called from inside interaction_sample,
+            # we don't want to return anything for other models that aren't destination choice
+            # not implemented models include scheduling models and tour_od_choice
+            logger.debug(f"Not grabbing survey choices for {model}.")
+            return None
+        return survey_choices
+
 
 manager = EstimationManager()
diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py
index cabcbeb64..83ed39480 100644
--- a/activitysim/core/interaction_sample.py
+++ b/activitysim/core/interaction_sample.py
@@ -8,6 +8,7 @@
 import pandas as pd
 
 from activitysim.core import (
+    estimation,
     chunk,
     interaction_simulate,
     logit,
@@ -501,6 +502,51 @@ def _interaction_sample(
 
     chunk_sizer.log_df(trace_label, "choices_df", choices_df)
 
+    if estimation.manager.enabled and sample_size > 0:
+        # we need to ensure chosen alternative is included in the sample
+        survey_choices = estimation.manager.get_survey_destination_chocies(
+            state, choosers, trace_label
+        )
+        if survey_choices is not None:
+            survey_choices.name = alt_col_name
+            survey_choices = survey_choices.dropna().astype(
+                choices_df[alt_col_name].dtype
+            )
+            comparison = pd.merge(
+                survey_choices,
+                choices_df,
+                on=[choosers.index.name, alt_col_name],
+                how="left",
+                indicator=True,
+            )
+            missing_choices = comparison[comparison["_merge"] == "left_only"]
+            # need to get prob of missing choices and add them to choices_df
+            if not missing_choices.empty:
+                probs_df = probs.reset_index().melt(
+                    id_vars=[choosers.index.name],
+                    var_name=alt_col_name,
+                    value_name="prob",
+                )
+                # probs are numbered 0..n-1 so we need to map back to alt ids
+                zone_map = pd.Series(alternatives.index).to_dict()
+                probs_df[alt_col_name] = probs_df[alt_col_name].map(zone_map)
+                # merge the probs onto the missing chocies
+                missing_choices = pd.merge(
+                    missing_choices.drop(columns=["prob", "_merge"]),
+                    probs_df,
+                    on=[choosers.index.name, alt_col_name],
+                    how="left",
+                )
+                assert (
+                    missing_choices.prob.isna().sum() == 0
+                ), f"survey choices with no probs: {missing_choices[missing_choices.prob.isna()]}"
+                del probs_df
+                # random number is not important, filling with 0
+                missing_choices["rand"] = 0
+                # merge survey choices back into choices_df and sort by chooser
+                choices_df = pd.concat([choices_df, missing_choices], ignore_index=True)
+                choices_df.sort_values(by=[choosers.index.name], inplace=True)
+
     del probs
     chunk_sizer.log_df(trace_label, "probs", None)