Skip to content

Commit

Permalink
adding survey choice to choices_df in interaction_sample
Browse files Browse the repository at this point in the history
  • Loading branch information
dhensle committed Oct 12, 2024
1 parent 1459e48 commit 3fd7851
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 83 deletions.
35 changes: 0 additions & 35 deletions activitysim/abm/models/location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,41 +491,6 @@ def run_location_sample(
trace_label=trace_label,
)

# adding observed choice to alt set when running in estimation mode
if estimator:
# grabbing survey values
survey_persons = estimation.manager.get_survey_table("persons")
if "school_location" in trace_label:
survey_choices = survey_persons["school_zone_id"].reset_index()
elif ("workplace_location" in trace_label) and ("external" not in trace_label):
survey_choices = survey_persons["workplace_zone_id"].reset_index()
else:
return choices
survey_choices.columns = ["person_id", "alt_dest"]
survey_choices = survey_choices[
survey_choices["person_id"].isin(choices.index)
& (survey_choices.alt_dest > 0)
]
# merging survey destination into table if not available
joined_data = survey_choices.merge(
choices, on=["person_id", "alt_dest"], how="left", indicator=True
)
missing_rows = joined_data[joined_data["_merge"] == "left_only"]
missing_rows["pick_count"] = 1
if len(missing_rows) > 0:
new_choices = missing_rows[
["person_id", "alt_dest", "prob", "pick_count"]
].set_index("person_id")
choices = choices.append(new_choices, ignore_index=False).sort_index()
# making prob 0 for missing rows so it does not influence model decision
choices["prob"] = choices["prob"].fillna(0)
# sort by person_id and alt_dest
choices = (
choices.reset_index()
.sort_values(by=["person_id", "alt_dest"])
.set_index("person_id")
)

return choices


Expand Down
14 changes: 8 additions & 6 deletions activitysim/abm/models/trip_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,17 @@ def _destination_sample(
)

sample_size = model_settings.SAMPLE_SIZE
if state.settings.disable_destination_sampling or (
estimator and estimator.want_unsampled_alternatives
):
# FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
if estimator:
sample_size = model_settings.ESTIMATION_SAMPLE_SIZE
logger.info(
f"Estimation mode for {trace_label} using "
f"unsampled alternatives short_circuit_choices"
f"Estimation mode for {trace_label} using sample size of {sample_size}"
)

if state.settings.disable_destination_sampling:
sample_size = 0
logger.info(
f"SAMPLE_SIZE set to 0 for {trace_label} because disable_destination_sampling is set"
)

locals_dict = state.get_global_constants().copy()
locals_dict.update(model_settings.CONSTANTS)
Expand Down
33 changes: 0 additions & 33 deletions activitysim/abm/models/util/tour_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,39 +181,6 @@ def destination_sample(
trace_label=trace_label,
)

# adding observed choice to alt set when running in estimation mode
if estimator:
# grabbing survey values
survey_tours = estimation.manager.get_survey_table("tours")
survey_choices = survey_tours[["destination", "person_id"]].reset_index()
survey_choices.columns = ["tour_id", alt_dest_col_name, "person_id"]
survey_choices = survey_choices[
survey_choices["tour_id"].isin(choices.index)
& (survey_choices[alt_dest_col_name] > 0)
]
# merging survey destination into table if not available
joined_data = survey_choices.merge(
choices,
on=["tour_id", alt_dest_col_name, "person_id"],
how="left",
indicator=True,
)
missing_rows = joined_data[joined_data["_merge"] == "left_only"]
missing_rows["pick_count"] = 1
if len(missing_rows) > 0:
new_choices = missing_rows[
["tour_id", alt_dest_col_name, "prob", "pick_count", "person_id"]
].set_index("tour_id")
choices = choices.append(new_choices, ignore_index=False).sort_index()
# making prob 0 for missing rows so it does not influence model decision
choices["prob"] = choices["prob"].fillna(0)
# sort by tour_id and alt_dest
choices = (
choices.reset_index()
.sort_values(by=["tour_id", alt_dest_col_name])
.set_index("tour_id")
)

return choices


Expand Down
18 changes: 9 additions & 9 deletions activitysim/core/configuration/logit.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,15 @@ class LocationComponentSettings(BaseLogitComponentSettings):
SAMPLE_SIZE: int
"""This many candidate alternatives will be sampled for each choice."""

ESTIMATION_SAMPLE_SIZE: int = 0
"""
The number of alternatives to sample for estimation mode.
If zero, then all alternatives are used.
Truth alternative will be included in the sample.
Larch does not yet support sampling alternatives for estimation,
but this setting is still helpful for estimation mode runtime.
"""

LOGSUM_SETTINGS: Path
"""Settings for the logsum computation."""

Expand Down Expand Up @@ -233,15 +242,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"):
ORIG_ZONE_ID: str | None = None
"""This setting appears to do nothing..."""

ESTIMATION_SAMPLE_SIZE: int = 0
"""
The number of alternatives to sample for estimation mode.
If zero, then all alternatives are used.
Truth alternative will be included in the sample.
Larch does not yet support sampling alternatives for estimation,
but this setting is still helpful for estimation mode runtime.
"""


class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid"):
MODE_CHOICE_LOGSUM_COLUMN_NAME: str | None = None
Expand Down
58 changes: 58 additions & 0 deletions activitysim/core/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,5 +933,63 @@ def get_survey_values(self, model_values, table_name, column_names):

return values[column_name] if column_name else values

def get_survey_destination_chocies(self, state, choosers, trace_label):
"""
Returning the survey choices for the destination choice model.
This gets called from inside interaction_sample and is used to
ensure the choices include the override choices when sampling alternatives.
Parameters
----------
state : workflow.State
trace_label : str
The model name.
Returns
-------
pd.Series : The survey choices for the destination choice model.
"""
model = trace_label.split(".")[0]
if model == "school_location":
survey_choices = manager.get_survey_values(
choosers.index, "persons", "school_zone_id"
)
elif model == "workplace_location":
survey_choices = manager.get_survey_values(
choosers.index, "persons", "workplace_zone_id"
)
elif model in [
"joint_tour_destination",
"atwork_subtour_destination",
"non_mandatory_tour_destination",
]:
survey_choices = manager.get_survey_values(
choosers.index, "tours", "destination"
)
elif model == "trip_destination":
survey_choices = manager.get_survey_values(
choosers.index, "trips", "destination"
)
elif model == "parking_location":
# need to grab parking location column name from its settings
from activitysim.abm.models.parking_location_choice import (
ParkingLocationSettings,
)

model_settings = ParkingLocationSettings.read_settings_file(
state.filesystem,
"parking_location_choice.yaml",
)
survey_choices = manager.get_survey_values(
choosers.index, "trips", model_settings.ALT_DEST_COL_NAME
)
else:
# since this fucntion is called from inside interaction_sample,
# we don't want to return anything for other models that aren't destination choice
# not implemented models include scheduling models and tour_od_choice
logger.debug(f"Not grabbing survey choices for {model}.")
return None
return survey_choices


manager = EstimationManager()
46 changes: 46 additions & 0 deletions activitysim/core/interaction_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd

from activitysim.core import (
estimation,
chunk,
interaction_simulate,
logit,
Expand Down Expand Up @@ -501,6 +502,51 @@ def _interaction_sample(

chunk_sizer.log_df(trace_label, "choices_df", choices_df)

if estimation.manager.enabled and sample_size > 0:
# we need to ensure chosen alternative is included in the sample
survey_choices = estimation.manager.get_survey_destination_chocies(
state, choosers, trace_label
)
if survey_choices is not None:
survey_choices.name = alt_col_name
survey_choices = survey_choices.dropna().astype(
choices_df[alt_col_name].dtype
)
comparison = pd.merge(
survey_choices,
choices_df,
on=[choosers.index.name, alt_col_name],
how="left",
indicator=True,
)
missing_choices = comparison[comparison["_merge"] == "left_only"]
# need to get prob of missing choices and add them to choices_df
if not missing_choices.empty:
probs_df = probs.reset_index().melt(
id_vars=[choosers.index.name],
var_name=alt_col_name,
value_name="prob",
)
# probs are numbered 0..n-1 so we need to map back to alt ids
zone_map = pd.Series(alternatives.index).to_dict()
probs_df[alt_col_name] = probs_df[alt_col_name].map(zone_map)
# merge the probs onto the missing chocies
missing_choices = pd.merge(
missing_choices.drop(columns=["prob", "_merge"]),
probs_df,
on=[choosers.index.name, alt_col_name],
how="left",
)
assert (
missing_choices.prob.isna().sum() == 0
), f"survey choices with no probs: {missing_choices[missing_choices.prob.isna()]}"
del probs_df
# random number is not important, filling with 0
missing_choices["rand"] = 0
# merge survey choices back into choices_df and sort by chooser
choices_df = pd.concat([choices_df, missing_choices], ignore_index=True)
choices_df.sort_values(by=[choosers.index.name], inplace=True)

del probs
chunk_sizer.log_df(trace_label, "probs", None)

Expand Down

0 comments on commit 3fd7851

Please sign in to comment.