handling multiple columns for parquet write

RSGInc · Nov 12, 2024 · 36dfb45 · 36dfb45
1 parent 4af3fa9
commit 36dfb45
Showing 1 changed file with 10 additions and 10 deletions.
diff --git a/activitysim/core/estimation.py b/activitysim/core/estimation.py
@@ -277,26 +277,26 @@ def write_parquet(self, df, file_path, index, append=False):
         ), f"file already exists: {file_path}"
 
         # Explicitly set the data types of the columns
-        for col in df.columns:
-            if "int" in str(df[col].dtype):
+        for col_name, col_data in df.iteritems():
+            if "int" in str(col_data.dtype):
                 pass
             elif (
-                df[col].dtype == "float16"
+                col_data.dtype == "float16"
             ):  # Handle halffloat type not allowed in parquet
-                df[col] = df[col].astype("float32")
-            elif "float" in str(df[col].dtype):
+                df[col_name] = col_data.astype("float32")
+            elif "float" in str(col_data.dtype):
                 pass
-            elif df[col].dtype == "bool":
+            elif col_data.dtype == "bool":
                 pass
-            elif df[col].dtype == "object":
+            elif col_data.dtype == "object":
                 # first try converting to numeric, if that fails, convert to string
                 try:
-                    df[col] = pd.to_numeric(df[col], errors="raise")
+                    df[col_name] = pd.to_numeric(col_data, errors="raise")
                 except ValueError:
-                    df[col] = df[col].astype(str)
+                    df[col_name] = col_data.astype(str)
             else:
                 # Convert any other unsupported types to string
-                df[col] = df[col].astype(str)
+                df[col_name] = col_data.astype(str)
 
         self.debug(f"writing table: {file_path}")
         # want parquet file to be exactly the same as df read from csv