Skip to content

Commit

Permalink
handling multiple columns for parquet write
Browse files Browse the repository at this point in the history
  • Loading branch information
dhensle committed Nov 12, 2024
1 parent 4af3fa9 commit 36dfb45
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions activitysim/core/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,26 +277,26 @@ def write_parquet(self, df, file_path, index, append=False):
), f"file already exists: {file_path}"

# Explicitly set the data types of the columns
for col in df.columns:
if "int" in str(df[col].dtype):
for col_name, col_data in df.iteritems():
if "int" in str(col_data.dtype):
pass
elif (
df[col].dtype == "float16"
col_data.dtype == "float16"
): # Handle halffloat type not allowed in parquet
df[col] = df[col].astype("float32")
elif "float" in str(df[col].dtype):
df[col_name] = col_data.astype("float32")
elif "float" in str(col_data.dtype):
pass
elif df[col].dtype == "bool":
elif col_data.dtype == "bool":
pass
elif df[col].dtype == "object":
elif col_data.dtype == "object":
# first try converting to numeric, if that fails, convert to string
try:
df[col] = pd.to_numeric(df[col], errors="raise")
df[col_name] = pd.to_numeric(col_data, errors="raise")
except ValueError:
df[col] = df[col].astype(str)
df[col_name] = col_data.astype(str)
else:
# Convert any other unsupported types to string
df[col] = df[col].astype(str)
df[col_name] = col_data.astype(str)

self.debug(f"writing table: {file_path}")
# want parquet file to be exactly the same as df read from csv
Expand Down

0 comments on commit 36dfb45

Please sign in to comment.