Skip to content

Commit

Permalink
Merge pull request #333 from justin-richling/adf-file-attributes
Browse files Browse the repository at this point in the history
Add global attrs to all ADF generated files
  • Loading branch information
justin-richling authored Sep 24, 2024
2 parents 41beaac + 167db5e commit 031e636
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 19 deletions.
72 changes: 58 additions & 14 deletions lib/adf_diag.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,14 +514,17 @@ def call_ncrcat(cmd):

# Loop over CAM history variables:
list_of_commands = []
list_of_ncattend_commands = []
list_of_hist_commands = []
vars_to_derive = []
# create copy of var list that can be modified for derivable variables
diag_var_list = self.diag_var_list

# Aerosol Calcs
# --------------
# Always make sure PMID is made if aerosols are desired in config file
# Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent:
# Since there's no requirement for `aerosol_zonal_list` to be included,
# allow it to be absent:

azl = res.get("aerosol_zonal_list", [])
if "PMID" not in diag_var_list:
Expand Down Expand Up @@ -567,7 +570,7 @@ def call_ncrcat(cmd):
constit_list = vres["derivable_from_cam_chem"]
if constit_list:
if all(item in hist_file_ds.data_vars for item in constit_list):
# Set check to look for regular CAM constituents in variable defaults
# Set check to look for regular CAM constituents
try_cam_constits = False
derive = True
msg = f"create time series for {case_name}:"
Expand Down Expand Up @@ -606,12 +609,12 @@ def call_ncrcat(cmd):
# Add constituent list to variable key in dictionary
constit_dict[var] = constit_list
continue
# Log if this variable can be derived but is missing list of constituents
# Log if variable can be derived but is missing list of constituents
elif (derive) and (not constit_list):
self.debug_log(constit_errmsg)
continue
# Lastly, raise error if the variable is not a derived quanitity but is also not
# in the history file(s)
# Lastly, raise error if the variable is not a derived quanitity
# but is also not in the history file(s)
else:
msg = f"WARNING: {var} is not in the file {hist_files[0]} "
msg += "nor can it be derived.\n"
Expand Down Expand Up @@ -654,7 +657,7 @@ def call_ncrcat(cmd):
if has_lev and vert_coord_type:
# For now, only add these variables if using CAM:
if "cam" in hist_str:
# PS might be in a different history file. If so, continue without error.
# PS might be in a different history file. If so, continue w/o error.
ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi"

if "PS" in hist_file_var_list:
Expand Down Expand Up @@ -691,20 +694,62 @@ def call_ncrcat(cmd):
+ ["-o", ts_outfil_str]
)

# Example ncatted command (you can modify it with the specific attribute changes you need)
#cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str]
# Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string
hist_files_str = ', '.join(str(f.name) for f in hist_files)
#3parent
#hist_locs = []
#for f in hist_files:
hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs)

# Step 2: Create the ncatted command to add both global attributes
cmd_ncatted = [
"ncatted", "-O",
"-a", "adf_user,global,a,c," + f"{self.user}",
"-a", "hist_file_locs,global,a,c," + f"{hist_locs_str}",
"-a", "hist_file_list,global,a,c," + f"{hist_files_str}",
ts_outfil_str
]

# Step 3: Create the ncatted command to remove the history attribute
cmd_remove_history = [
"ncatted", "-O", "-h",
"-a", "history,global,d,,",
ts_outfil_str
]

# Add to command list for use in multi-processing pool:
# -----------------------------------------------------
# generate time series files
list_of_commands.append(cmd)
# Add global attributes: user, original hist file loc(s) and all filenames
list_of_ncattend_commands.append(cmd_ncatted)
# Remove the `history` attr that gets tacked on (for clean up)
# NOTE: this may not be best practice, but it the history attr repeats
# the files attrs so the global attrs become obtrusive...
list_of_hist_commands.append(cmd_remove_history)

# End variable loop

# Now run the "ncrcat" subprocesses in parallel:
with mp.Pool(processes=self.num_procs) as mpool:
_ = mpool.map(call_ncrcat, list_of_commands)
# End with

# Run ncatted commands after ncrcat is done
with mp.Pool(processes=self.num_procs) as mpool:
_ = mpool.map(call_ncrcat, list_of_ncattend_commands)

# Run ncatted command to remove history attribute after the global attributes are set
with mp.Pool(processes=self.num_procs) as mpool:
_ = mpool.map(call_ncrcat, list_of_hist_commands)

if vars_to_derive:
self.derive_variables(
res=res, hist_str=hist_str, vars_to_derive=vars_to_derive,
constit_dict=constit_dict, ts_dir=ts_dir[case_idx]
)
if vars_to_derive:
self.derive_variables(
res=res, hist_str=hist_str, vars_to_derive=vars_to_derive,
constit_dict=constit_dict, ts_dir=ts_dir[case_idx]
)
# End with
# End for hist_str
# End cases loop
Expand Down Expand Up @@ -1333,7 +1378,6 @@ def move_tsfiles_for_mdtf(self, verbose):
freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] #values
freq_string_dict = dict(zip(freq_string_cesm,freq_string_options)) #make dict


hist_str_list = self.get_cam_info("hist_str")
case_names = self.get_cam_info("cam_case_name", required=True)
var_list = self.diag_var_list
Expand Down Expand Up @@ -1410,7 +1454,7 @@ def move_tsfiles_for_mdtf(self, verbose):
continue
freq = freq_string_dict.get(found_strings[0])
print(f"Translated {found_strings[0]} to {freq}")

#
# Destination file is MDTF directory and name structure
#
Expand Down Expand Up @@ -1481,4 +1525,4 @@ def my_formatwarning(msg, *args, **kwargs):
return xr.open_dataset(fils[0])
#End if
# End def
########
########
12 changes: 11 additions & 1 deletion lib/adf_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from pathlib import Path
import copy
import os
import getpass

#+++++++++++++++++++++++++++++++++++++++++++++++++
#import non-standard python modules, including ADF
Expand Down Expand Up @@ -94,6 +95,9 @@ def __init__(self, config_file, debug=False):
self.expand_references(self.__mdtf_info)
# End if

# Get the current system user
self.__user = getpass.getuser()

# Check if inputs are of the correct type:
# -------------------------------------------

Expand Down Expand Up @@ -569,6 +573,12 @@ def hist_str_to_list(self, conf_var, conf_val):

#########

# Create property needed to return "user" name to user:
@property
def user(self):
"""Return the "user" name if requested."""
return self.__user

# Create property needed to return "compare_obs" logical to user:
@property
def compare_obs(self):
Expand Down Expand Up @@ -832,4 +842,4 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name):

#++++++++++++++++++++
#End Class definition
#++++++++++++++++++++
#++++++++++++++++++++
17 changes: 14 additions & 3 deletions scripts/averaging/create_climo_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def create_climo_files(adf, clobber=False, search=None):
warnings.warn(errmsg)
continue

list_of_arguments.append((ts_files, syr, eyr, output_file))
list_of_arguments.append((adf, ts_files, syr, eyr, output_file))


#End of var_list loop
Expand All @@ -198,7 +198,7 @@ def create_climo_files(adf, clobber=False, search=None):
#
# Local functions
#
def process_variable(ts_files, syr, eyr, output_file):
def process_variable(adf, ts_files, syr, eyr, output_file):
'''
Compute and save the climatology file.
'''
Expand Down Expand Up @@ -227,6 +227,17 @@ def process_variable(ts_files, syr, eyr, output_file):
enc_c = {xname: {'_FillValue': None} for xname in cam_climo_data.coords}
enc = {**enc_c, **enc_dv}

# Create a dictionary of attributes
# Convert the list to a string (join with commas)
ts_files_str = [str(path) for path in ts_files]
ts_files_str = ', '.join(ts_files_str)
attrs_dict = {
"adf_user": adf.user,
"climo_yrs": f"{syr}-{eyr}",
"time_series_files": ts_files_str,
}
cam_climo_data = cam_climo_data.assign_attrs(attrs_dict)

#Output variable climatology to NetCDF-4 file:
cam_climo_data.to_netcdf(output_file, format='NETCDF4', encoding=enc)
return 1 # All funcs return something. Could do error checking with this if needed.
Expand Down Expand Up @@ -273,4 +284,4 @@ def check_averaging_interval(syear_in, eyear_in):
else:
eyr = None
#End if
return syr, eyr
return syr, eyr
40 changes: 39 additions & 1 deletion scripts/regridding/regrid_and_vert_interp.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ def regrid_and_vert_interp(adf):
case_names = adf.get_cam_info("cam_case_name", required=True)
input_climo_locs = adf.get_cam_info("cam_climo_loc", required=True)

#Grab case years
syear_cases = adf.climo_yrs["syears"]
eyear_cases = adf.climo_yrs["eyears"]

#Check if mid-level pressure, ocean fraction or land fraction exist
#in the variable list:
for var in ["PMID", "OCNFRAC", "LANDFRAC"]:
Expand Down Expand Up @@ -91,6 +95,9 @@ def regrid_and_vert_interp(adf):
#Regrid target variables (either obs or a baseline run):
if adf.compare_obs:

#Set obs name to match baseline (non-obs)
target_list = ["Obs"]

#Extract variable-obs dictionary:
var_obs_dict = adf.var_obs_dict

Expand All @@ -108,6 +115,13 @@ def regrid_and_vert_interp(adf):
target_list = [adf.get_baseline_info("cam_case_name", required=True)]
#End if

#Grab baseline years (which may be empty strings if using Obs):
syear_baseline = adf.climo_yrs["syear_baseline"]
eyear_baseline = adf.climo_yrs["eyear_baseline"]

#Set attributes dictionary for climo years to save in the file attributes
base_climo_yrs_attr = f"{target_list[0]}: {syear_baseline}-{eyear_baseline}"

#-----------------------------------------

#Set output/target data path variables:
Expand Down Expand Up @@ -137,6 +151,10 @@ def regrid_and_vert_interp(adf):
ps_loc_dict = {}
pmid_loc_dict = {}

#Get climo years for case
syear = syear_cases[case_idx]
eyear = eyear_cases[case_idx]

# probably want to do this one variable at a time:
for var in var_list:

Expand Down Expand Up @@ -274,6 +292,15 @@ def regrid_and_vert_interp(adf):
#End if

#Finally, write re-gridded data to output file:
#Convert the list of Path objects to a list of strings
climatology_files_str = [str(path) for path in mclim_fils]
climatology_files_str = ', '.join(climatology_files_str)
test_attrs_dict = {
"adf_user": adf.user,
"climo_yrs": f"{case_name}: {syear}-{eyear}",
"climatology_files": climatology_files_str,
}
rgdata_interp = rgdata_interp.assign_attrs(test_attrs_dict)
save_to_nc(rgdata_interp, regridded_file_loc)
rgdata_interp.close() # bpm: we are completely done with this data

Expand Down Expand Up @@ -339,6 +366,17 @@ def regrid_and_vert_interp(adf):
#End if
#End if

# Convert the list to a string (join with commas or another separator)
climatology_files_str = [str(path) for path in tclim_fils]
climatology_files_str = ', '.join(climatology_files_str)
# Create a dictionary of attributes
base_attrs_dict = {
"adf_user": adf.user,
"climo_yrs": f"{case_name}: {syear}-{eyear}; {base_climo_yrs_attr}",
"climatology_files": climatology_files_str,
}
tgdata_interp = tgdata_interp.assign_attrs(base_attrs_dict)

#Write interpolated baseline climatology to file:
save_to_nc(tgdata_interp, interp_bl_file)
#End if
Expand Down Expand Up @@ -658,4 +696,4 @@ def regrid_data(fromthis, tothis, method=1):
return result
#End if

#####
#####

0 comments on commit 031e636

Please sign in to comment.