From bf28785160e45a50f8f3cf33d2eab49e41c2502a Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 24 Sep 2024 12:10:25 -0600 Subject: [PATCH 1/2] Add global attrs to all ADF generated files This will add attributes for: - timeseries files: ADF user, history files location, and history file name(s) - climo files: ADF user, climo years, and time series file name(s) - regridded files: ADF user, climo years, and climo file name(s) --- lib/adf_diag.py | 56 +++++++++++++++++--- lib/adf_info.py | 12 ++++- scripts/averaging/create_climo_files.py | 17 ++++-- scripts/regridding/regrid_and_vert_interp.py | 40 +++++++++++++- 4 files changed, 114 insertions(+), 11 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index bf87cb498..de4d3228d 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -514,6 +514,8 @@ def call_ncrcat(cmd): # Loop over CAM history variables: list_of_commands = [] + list_of_ncattend_commands = [] + list_of_hist_commands = [] vars_to_derive = [] # create copy of var list that can be modified for derivable variables diag_var_list = self.diag_var_list @@ -691,20 +693,62 @@ def call_ncrcat(cmd): + ["-o", ts_outfil_str] ) + # Example ncatted command (you can modify it with the specific attribute changes you need) + #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str] + # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string + hist_files_str = ', '.join(str(f.name) for f in hist_files) + #3parent + #hist_locs = [] + #for f in hist_files: + hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs) + + # Step 2: Create the ncatted command to add both global attributes + cmd_ncatted = [ + "ncatted", "-O", + "-a", "adf_user,global,a,c," + f"{self.user}", + "-a", "hist_file_locs,global,a,c," + f"{hist_locs_str}", + "-a", "hist_file_list,global,a,c," + f"{hist_files_str}", + ts_outfil_str + ] + + # Step 3: Create the ncatted command to remove the history attribute + cmd_remove_history = [ + "ncatted", "-O", "-h", + "-a", "history,global,d,,", + ts_outfil_str + ] + # Add to command list for use in multi-processing pool: + # ----------------------------------------------------- + # generate time series files list_of_commands.append(cmd) + # Add global attributes: user, original hist file loc(s) and all filenames + list_of_ncattend_commands.append(cmd_ncatted) + # Remove the `history` attr that gets tacked on (for clean up) + # NOTE: this may not be best practice, but it the history attr repeats + # the files attrs so the global attrs become obtrusive... + list_of_hist_commands.append(cmd_remove_history) # End variable loop # Now run the "ncrcat" subprocesses in parallel: with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_commands) + # End with - if vars_to_derive: - self.derive_variables( - res=res, hist_str=hist_str, vars_to_derive=vars_to_derive, - constit_dict=constit_dict, ts_dir=ts_dir[case_idx] - ) + # Run ncatted commands after ncrcat is done + with mp.Pool(processes=self.num_procs) as mpool: + _ = mpool.map(call_ncrcat, list_of_ncattend_commands) + + # Run ncatted command to remove history attribute after the global attributes are set + with mp.Pool(processes=self.num_procs) as mpool: + _ = mpool.map(call_ncrcat, list_of_hist_commands) + + if vars_to_derive: + self.derive_variables( + res=res, hist_str=hist_str, vars_to_derive=vars_to_derive, + constit_dict=constit_dict, ts_dir=ts_dir[case_idx] + ) # End with # End for hist_str # End cases loop @@ -1481,4 +1525,4 @@ def my_formatwarning(msg, *args, **kwargs): return xr.open_dataset(fils[0]) #End if # End def -######## +######## \ No newline at end of file diff --git a/lib/adf_info.py b/lib/adf_info.py index f5b667b09..e8f285f3c 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -31,6 +31,7 @@ from pathlib import Path import copy import os +import getpass #+++++++++++++++++++++++++++++++++++++++++++++++++ #import non-standard python modules, including ADF @@ -94,6 +95,9 @@ def __init__(self, config_file, debug=False): self.expand_references(self.__mdtf_info) # End if + # Get the current system user + self.__user = getpass.getuser() + # Check if inputs are of the correct type: # ------------------------------------------- @@ -569,6 +573,12 @@ def hist_str_to_list(self, conf_var, conf_val): ######### + # Create property needed to return "user" name to user: + @property + def user(self): + """Return the "user" name if requested.""" + return self.__user + # Create property needed to return "compare_obs" logical to user: @property def compare_obs(self): @@ -832,4 +842,4 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name): #++++++++++++++++++++ #End Class definition -#++++++++++++++++++++ +#++++++++++++++++++++ \ No newline at end of file diff --git a/scripts/averaging/create_climo_files.py b/scripts/averaging/create_climo_files.py index 11844e189..d90bfbe52 100644 --- a/scripts/averaging/create_climo_files.py +++ b/scripts/averaging/create_climo_files.py @@ -178,7 +178,7 @@ def create_climo_files(adf, clobber=False, search=None): warnings.warn(errmsg) continue - list_of_arguments.append((ts_files, syr, eyr, output_file)) + list_of_arguments.append((adf, ts_files, syr, eyr, output_file)) #End of var_list loop @@ -198,7 +198,7 @@ def create_climo_files(adf, clobber=False, search=None): # # Local functions # -def process_variable(ts_files, syr, eyr, output_file): +def process_variable(adf, ts_files, syr, eyr, output_file): ''' Compute and save the climatology file. ''' @@ -227,6 +227,17 @@ def process_variable(ts_files, syr, eyr, output_file): enc_c = {xname: {'_FillValue': None} for xname in cam_climo_data.coords} enc = {**enc_c, **enc_dv} + # Create a dictionary of attributes + # Convert the list to a string (join with commas) + ts_files_str = [str(path) for path in ts_files] + ts_files_str = ', '.join(ts_files_str) + attrs_dict = { + "adf_user": adf.user, + "climo_yrs": f"{syr}-{eyr}", + "time_series_files": ts_files_str, + } + cam_climo_data = cam_climo_data.assign_attrs(attrs_dict) + #Output variable climatology to NetCDF-4 file: cam_climo_data.to_netcdf(output_file, format='NETCDF4', encoding=enc) return 1 # All funcs return something. Could do error checking with this if needed. @@ -273,4 +284,4 @@ def check_averaging_interval(syear_in, eyear_in): else: eyr = None #End if - return syr, eyr + return syr, eyr \ No newline at end of file diff --git a/scripts/regridding/regrid_and_vert_interp.py b/scripts/regridding/regrid_and_vert_interp.py index a501de16d..fab356b0e 100644 --- a/scripts/regridding/regrid_and_vert_interp.py +++ b/scripts/regridding/regrid_and_vert_interp.py @@ -59,6 +59,10 @@ def regrid_and_vert_interp(adf): case_names = adf.get_cam_info("cam_case_name", required=True) input_climo_locs = adf.get_cam_info("cam_climo_loc", required=True) + #Grab case years + syear_cases = adf.climo_yrs["syears"] + eyear_cases = adf.climo_yrs["eyears"] + #Check if mid-level pressure, ocean fraction or land fraction exist #in the variable list: for var in ["PMID", "OCNFRAC", "LANDFRAC"]: @@ -91,6 +95,9 @@ def regrid_and_vert_interp(adf): #Regrid target variables (either obs or a baseline run): if adf.compare_obs: + #Set obs name to match baseline (non-obs) + target_list = ["Obs"] + #Extract variable-obs dictionary: var_obs_dict = adf.var_obs_dict @@ -108,6 +115,13 @@ def regrid_and_vert_interp(adf): target_list = [adf.get_baseline_info("cam_case_name", required=True)] #End if + #Grab baseline years (which may be empty strings if using Obs): + syear_baseline = adf.climo_yrs["syear_baseline"] + eyear_baseline = adf.climo_yrs["eyear_baseline"] + + #Set attributes dictionary for climo years to save in the file attributes + base_climo_yrs_attr = f"{target_list[0]}: {syear_baseline}-{eyear_baseline}" + #----------------------------------------- #Set output/target data path variables: @@ -137,6 +151,10 @@ def regrid_and_vert_interp(adf): ps_loc_dict = {} pmid_loc_dict = {} + #Get climo years for case + syear = syear_cases[case_idx] + eyear = eyear_cases[case_idx] + # probably want to do this one variable at a time: for var in var_list: @@ -274,6 +292,15 @@ def regrid_and_vert_interp(adf): #End if #Finally, write re-gridded data to output file: + #Convert the list of Path objects to a list of strings + climatology_files_str = [str(path) for path in mclim_fils] + climatology_files_str = ', '.join(climatology_files_str) + test_attrs_dict = { + "adf_user": adf.user, + "climo_yrs": f"{case_name}: {syear}-{eyear}", + "climatology_files": climatology_files_str, + } + rgdata_interp = rgdata_interp.assign_attrs(test_attrs_dict) save_to_nc(rgdata_interp, regridded_file_loc) rgdata_interp.close() # bpm: we are completely done with this data @@ -339,6 +366,17 @@ def regrid_and_vert_interp(adf): #End if #End if + # Convert the list to a string (join with commas or another separator) + climatology_files_str = [str(path) for path in tclim_fils] + climatology_files_str = ', '.join(climatology_files_str) + # Create a dictionary of attributes + base_attrs_dict = { + "adf_user": adf.user, + "climo_yrs": f"{case_name}: {syear}-{eyear}; {base_climo_yrs_attr}", + "climatology_files": climatology_files_str, + } + tgdata_interp = tgdata_interp.assign_attrs(base_attrs_dict) + #Write interpolated baseline climatology to file: save_to_nc(tgdata_interp, interp_bl_file) #End if @@ -658,4 +696,4 @@ def regrid_data(fromthis, tothis, method=1): return result #End if -##### +##### \ No newline at end of file From 167db5ea8b8cf4c88e10be34417d2746c83de3b5 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 24 Sep 2024 12:21:31 -0600 Subject: [PATCH 2/2] github clean up --- lib/adf_diag.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index de4d3228d..27d7866f6 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -523,7 +523,8 @@ def call_ncrcat(cmd): # Aerosol Calcs # -------------- # Always make sure PMID is made if aerosols are desired in config file - # Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent: + # Since there's no requirement for `aerosol_zonal_list` to be included, + # allow it to be absent: azl = res.get("aerosol_zonal_list", []) if "PMID" not in diag_var_list: @@ -569,7 +570,7 @@ def call_ncrcat(cmd): constit_list = vres["derivable_from_cam_chem"] if constit_list: if all(item in hist_file_ds.data_vars for item in constit_list): - # Set check to look for regular CAM constituents in variable defaults + # Set check to look for regular CAM constituents try_cam_constits = False derive = True msg = f"create time series for {case_name}:" @@ -608,12 +609,12 @@ def call_ncrcat(cmd): # Add constituent list to variable key in dictionary constit_dict[var] = constit_list continue - # Log if this variable can be derived but is missing list of constituents + # Log if variable can be derived but is missing list of constituents elif (derive) and (not constit_list): self.debug_log(constit_errmsg) continue - # Lastly, raise error if the variable is not a derived quanitity but is also not - # in the history file(s) + # Lastly, raise error if the variable is not a derived quanitity + # but is also not in the history file(s) else: msg = f"WARNING: {var} is not in the file {hist_files[0]} " msg += "nor can it be derived.\n" @@ -656,7 +657,7 @@ def call_ncrcat(cmd): if has_lev and vert_coord_type: # For now, only add these variables if using CAM: if "cam" in hist_str: - # PS might be in a different history file. If so, continue without error. + # PS might be in a different history file. If so, continue w/o error. ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi" if "PS" in hist_file_var_list: @@ -1377,7 +1378,6 @@ def move_tsfiles_for_mdtf(self, verbose): freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] #values freq_string_dict = dict(zip(freq_string_cesm,freq_string_options)) #make dict - hist_str_list = self.get_cam_info("hist_str") case_names = self.get_cam_info("cam_case_name", required=True) var_list = self.diag_var_list @@ -1454,7 +1454,7 @@ def move_tsfiles_for_mdtf(self, verbose): continue freq = freq_string_dict.get(found_strings[0]) print(f"Translated {found_strings[0]} to {freq}") - + # # Destination file is MDTF directory and name structure #