RSGInc · dhensle · Sep 30, 2024 · Aug 17, 2024 · Aug 17, 2024 · Sep 6, 2024
diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml
@@ -438,6 +438,59 @@ jobs:
       - name: Test Estimation Mode
         run: |
           python -m pytest activitysim/estimation/test/test_larch_estimation.py --durations=0
+
+
+  estimation_edb_creation:
+    needs: foundation
+    env:
+      python-version: "3.10"
+      label: linux-64
+    defaults:
+      run:
+        shell: bash -l {0}
+    name: estimation_edb_creation_test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Mambaforge
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-variant: Mambaforge
+          miniforge-version: latest
+          activate-environment: asim-test
+          use-mamba: true
+          python-version: ${{ env.python-version }}
+
+      - name: Set cache date for year and month
+        run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV
+
+      - uses: actions/cache@v4
+        with:
+          path: ${{ env.CONDA }}/envs
+          key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
+        id: cache
+
+      - name: Update environment
+        run: |
+          mamba env update -n asim-test -f conda-environments/github-actions-tests.yml
+        if: steps.cache.outputs.cache-hit != 'true'
+
+      - name: Install activitysim
+        # installing without dependencies is faster, we trust that all needed dependencies
+        # are in the conda environment defined above.  Also, this avoids pip getting
+        # confused and reinstalling tables (pytables).
+        run: |
+          python -m pip install -e . --no-deps
+
+      - name: Conda checkup
+        run: |
+          mamba info -a
+          mamba list
+
+      - name: Test Estimation EDB Creation
+        run: |
+          python -m pytest activitysim/estimation/test/test_edb_creation/test_edb_formation.py --durations=0
 
   develop-docbuild:
     needs: foundation

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -180,7 +180,7 @@ def cdap_simulate(
         estimator.write_coefficients(coefficients_df, model_settings)
         estimator.write_table(
             cdap_interaction_coefficients,
-            "interaction_coefficients",
+            "cdap_interaction_coefficients",
             index=False,
             append=False,
         )

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -517,10 +517,13 @@ def run_location_sample(
                 ["person_id", "alt_dest", "prob", "pick_count"]
             ].set_index("person_id")
             choices = choices.append(new_choices, ignore_index=False).sort_index()
-            # making probability the mean of all other sampled destinations by person
-            # FIXME is there a better way to do this? Does this even matter for estimation?
-            choices["prob"] = choices["prob"].fillna(
-                choices.groupby("person_id")["prob"].transform("mean")
+            # making prob 0 for missing rows so it does not influence model decision
+            choices["prob"] = choices["prob"].fillna(0)
+            # sort by person_id and alt_dest
+            choices = (
+                choices.reset_index()
+                .sort_values(by=["person_id", "alt_dest"])
+                .set_index("person_id")
             )
 
     return choices

diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -288,14 +288,22 @@ def non_mandatory_tour_frequency(
         )
 
         if estimator:
-            estimator.write_spec(model_settings, bundle_directory=True)
+            bundle_directory = True
+            # writing to separte subdirectory for each segment if multiprocessing
+            if state.settings.multiprocess:
+                bundle_directory = False
+            estimator.write_spec(model_settings, bundle_directory=bundle_directory)
             estimator.write_model_settings(
-                model_settings, model_settings_file_name, bundle_directory=True
+                model_settings,
+                model_settings_file_name,
+                bundle_directory=bundle_directory,
             )
             # preserving coefficients file name makes bringing back updated coefficients more straightforward
             estimator.write_coefficients(coefficients_df, segment_settings)
             estimator.write_choosers(chooser_segment)
-            estimator.write_alternatives(alternatives, bundle_directory=True)
+            estimator.write_alternatives(
+                alternatives, bundle_directory=bundle_directory
+            )
 
             # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
             #  shuold we do it here or have interaction_simulate do it?

diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
@@ -493,7 +493,10 @@ def school_escorting(
                 coefficients_df, file_name=stage.upper() + "_COEFFICIENTS"
             )
             estimator.write_choosers(choosers)
-            estimator.write_alternatives(alts, bundle_directory=True)
+            if state.settings.multiprocess:
+                estimator.write_alternatives(alts, bundle_directory=False)
+            else:
+                estimator.write_alternatives(alts, bundle_directory=True)
 
             # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
             #  shuold we do it here or have interaction_simulate do it?

diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py
@@ -197,9 +197,15 @@ def stop_frequency(
 
         if estimator:
             estimator.write_spec(segment_settings, bundle_directory=False)
-            estimator.write_model_settings(
-                model_settings, model_settings_file_name, bundle_directory=True
-            )
+            # writing to separte subdirectory for each segment if multiprocessing
+            if state.settings.multiprocess:
+                estimator.write_model_settings(
+                    model_settings, model_settings_file_name, bundle_directory=False
+                )
+            else:
+                estimator.write_model_settings(
+                    model_settings, model_settings_file_name, bundle_directory=True
+                )
             estimator.write_coefficients(coefficients_df, segment_settings)
             estimator.write_choosers(chooser_segment)