Skip to content

Commit

Permalink
Merge branch 'main' into adf_case_dataclass
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-richling authored Jun 26, 2024
2 parents 2d5a7dc + 911bea9 commit de213fc
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 52 deletions.
20 changes: 19 additions & 1 deletion lib/adf_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#++++++++++++++++++++++++++++++

import logging
from datetime import datetime

#+++++++++++++++++++++++++
# ADF Error-handling class
Expand Down Expand Up @@ -47,15 +48,32 @@ def __init__(self, debug = False):
if not isinstance(debug, bool):
raise TypeError("'debug' must be a boolean type (True or False)")

self.__debug_fname = ''

# Create debug log, if requested:
if debug:
logging.basicConfig(filename="ADF_debug.log", level=logging.DEBUG)
# Get the current date and time
current_timestamp = datetime.now()
# Format the datetime object to a string without microseconds
dt_str = current_timestamp.strftime('%Y-%m-%d %H:%M:%S')
ext = f'{str(dt_str).replace(" ","-")}'
debug_fname = f"ADF_debug_{ext}.log"
self.__debug_fname = debug_fname
logging.basicConfig(filename=debug_fname, level=logging.DEBUG)
self.__debug_log = logging.getLogger("ADF")
else:
self.__debug_log = None



#########

# Create property needed to return the name of the debug log file (debug_fname) to user:
@property
def debug_fname(self):
"""Return the "debug_fname" string to the user."""
return self.__debug_fname

def debug_log(self, msg: str):

"""
Expand Down
188 changes: 154 additions & 34 deletions lib/adf_diag.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

"""
Location of the "AdfDiag" object, which
is used to store all relevant data and
Expand Down Expand Up @@ -541,7 +542,11 @@ def call_ncrcat(cmd):
# Aerosol Calcs
#--------------
#Always make sure PMID is made if aerosols are desired in config file
<<<<<<< adf_case_dataclass
# Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent:
=======
# Since there's no requirement for `aerosol_zonal_list`, allow it to be absent:
>>>>>>> main
azl = res.get("aerosol_zonal_list", [])
if "PMID" not in diag_var_list:
if any(item in azl for item in diag_var_list):
Expand All @@ -551,21 +556,95 @@ def call_ncrcat(cmd):
diag_var_list += ["T"]
#End aerosol calcs

#Initialize dictionary for derived variable with needed list of constituents
constit_dict = {}

for var in diag_var_list:
# Notify user of new time series file:
print(f"\t - time series for {var}")

# Set error messages for printing/debugging
# Derived variable, but missing constituent list
constit_errmsg = f"create time series for {case_name}:"
constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable"
constit_errmsg += " is flagged for derivation, but is missing list of constiuents."
constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' "
constit_errmsg += f"for {var} in variable defaults yaml file."

#Check if current variable is a derived quantity
if var not in hist_file_var_list:
vres = res.get(var, {})
if "derivable_from" in vres:
constit_list = vres["derivable_from"]

#Initialiaze list for constituents
#NOTE: This is if the variable is NOT derivable but needs
# an empty list as a check later
constit_list = []

#intialize boolean to check if variable is derivable
derive = False # assume it can't be derived and update if it can

#intialize boolean for regular CAM variable constituents
try_cam_constits = True

#Check first if variable is potentially part of a CAM-CHEM run
if "derivable_from_cam_chem" in vres:
constit_list = vres["derivable_from_cam_chem"]
if constit_list:
if all(item in hist_file_ds.data_vars for item in constit_list):
#Set check to look for regular CAM constituents in variable defaults
try_cam_constits = False
derive = True
msg = f"create time series for {case_name}:"
msg += "\n\tLooks like this a CAM-CHEM run, "
msg += f"checking constituents for '{var}'"
self.debug_log(msg)
else:
self.debug_log(constit_errmsg)
#End if
#End if

#If not CAM-CHEM, check regular CAM runs
if try_cam_constits:
if "derivable_from" in vres:
derive = True
constit_list = vres["derivable_from"]
else:
# Missing variable or missing derivable_from argument
der_from_msg = f"create time series for {case_name}:"
der_from_msg += f"\n Can't create time series for {var}.\n\tEither "
der_from_msg += "the variable is missing from CAM output or it is a "
der_from_msg += "derived quantity and is missing the 'derivable_from' "
der_from_msg += "config argument.\n\tPlease add variable to CAM run "
der_from_msg += "or set appropriate argument in variable "
der_from_msg += "defaults yaml file."
self.debug_log(der_from_msg)
#End if
#End if

#Check if this variable can be derived
if (derive) and (constit_list):
for constit in constit_list:
if constit not in diag_var_list:
diag_var_list.append(constit)
#Add variable to list to derive
vars_to_derive.append(var)
#Add constituent list to variable key in dictionary
constit_dict[var] = constit_list
continue
#Log if this variable can be derived but is missing list of constituents
elif (derive) and (not constit_list):
self.debug_log(constit_errmsg)
continue
#Lastly, raise error if the variable is not a derived quanitity but is also not
#in the history file(s)
else:
msg = f"WARNING: {var} is not in the file {hist_files[0]}."
msg += " No time series will be generated."
msg = f"WARNING: {var} is not in the file {hist_files[0]} "
msg += "nor can it be derived.\n"
msg += "\t ** No time series will be generated."
print(msg)
continue
#End if
#End if

# Check if variable has a "lev" dimension according to first file:
has_lev = bool("lev" in hist_file_ds[var].dims)
Expand All @@ -588,9 +667,6 @@ def call_ncrcat(cmd):
# If not, then simply skip this variable:
continue

# Notify user of new time series file:
print(f"\t - time series for {var}")

# Variable list starts with just the variable
ncrcat_var_list = f"{var}"

Expand Down Expand Up @@ -651,7 +727,8 @@ def call_ncrcat(cmd):

if vars_to_derive:
self.derive_variables(
res=res, vars_to_derive=vars_to_derive, ts_dir=ts_dir[case_idx]
res=res, hist_str=hist_str, vars_to_derive=vars_to_derive,
ts_dir=ts_dir[case_idx], constit_dict=constit_dict
)
# End with

Expand Down Expand Up @@ -904,6 +981,12 @@ def setup_run_cvdp(self):
)
# End if

#intialize objects that might not be declared later
case_name_baseline = None
baseline_ts_loc = None
syears_baseline = None
eyears_baseline = None

# check to see if there is a CAM baseline case. If there is, read in relevant information.
if not self.get_basic_info("compare_obs"):
case_name_baseline = self.get_baseline_info("cam_case_name")
Expand Down Expand Up @@ -1025,7 +1108,8 @@ def setup_run_cvdp(self):

#########

def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite=None):
def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=None,
constit_dict=None, overwrite=None):
"""
Derive variables acccording to steps given here. Since derivations will depend on the
variable, each variable to derive will need its own set of steps below.
Expand All @@ -1037,32 +1121,55 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
"""

#Loop through derived variables
for var in vars_to_derive:
print(f"\t - deriving time series for {var}")

#Check whether there are parts to derive from and if there is an associated equation
vres = res.get(var, {})
if "derivable_from" in vres:
constit_list = vres['derivable_from']
else:
print("WARNING: No constituents listed in defaults config file, moving on")
continue
#Grab list of constituents for this variable
constit_list = constit_dict[var]

#Grab all required time series files for derived var
#Grab all required time series files for derived variable
constit_files = []
for constit in constit_list:
if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")):
constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0])
#Check if the constituent file is present, if so add it to list
if hist_str:
const_glob_str = f"*{hist_str}*.{constit}.*.nc"
else:
const_glob_str = f"*.{constit}.*.nc"
#end if
if glob.glob(os.path.join(ts_dir, const_glob_str)):
constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0])

#Check if all the constituent files were found
#Check if all the necessary constituent files were found
if len(constit_files) != len(constit_list):
ermsg = f"Not all constituent files present; {var} cannot be calculated."
ermsg += f" Please remove {var} from diag_var_list or find the relevant CAM files."
ermsg = f"\t ** Not all constituent files present; {var} cannot be calculated."
ermsg += f" Please remove {var} from 'diag_var_list' or find the "
ermsg += "relevant CAM files.\n"
print(ermsg)
if constit_files:
#Add what's missing to debug log
dmsg = "create time series:"
dmsg += f"\n\tneeded constituents for derivation of "
dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in "
dmsg += f"{Path(constit_files[0]).parent}:\n\t\t"
dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}"
self.debug_log(dmsg)
else:
dmsg = "create time series:"
dmsg += f"\n\tneeded constituents for derivation of "
dmsg += f"{var}:\n\t\t- {constit_list}\n"
dmsg += f"\tNo constituent(s) found in history files"
self.debug_log(dmsg)

else:
#Open a new dataset with all the constituent files/variables
<<<<<<< adf_case_dataclass
ds = xr.open_mfdataset(constit_files).compute()

=======
ds = xr.open_mfdataset(constit_files)

>>>>>>> main
# create new file name for derived variable
derived_file = constit_files[0].replace(constit_list[0], var)

Expand All @@ -1071,9 +1178,9 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
if overwrite:
Path(derived_file).unlink()
else:
print(
f"[{__name__}] Warning: '{var}' file was found and overwrite is False. Will use existing file."
)
msg = f"[{__name__}] Warning: '{var}' file was found "
msg += "and overwrite is False. Will use existing file."
print(msg)
continue

#NOTE: this will need to be changed when derived equations are more complex! - JR
Expand All @@ -1084,33 +1191,44 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
der_val = 0
for v in constit_list:
der_val += ds[v]

#Set derived variable name and add to dataset
der_val.name = var
ds[var] = der_val

#Aerosol Calculations - used for zonal plots
#Aerosol Calculations
#----------------------------------------------------------------------------------
#These will be multiplied by rho (density of dry air)
ds_pmid_done = False
ds_t_done = False
<<<<<<< adf_case_dataclass
azl = res.get("aerosol_zonal_list", []) # User-defined defaults might not include aerosol zonal list
if var in azl:

=======

# User-defined defaults might not include aerosol zonal list
azl = res.get("aerosol_zonal_list", [])
if var in azl:

>>>>>>> main
#Only calculate once for all aerosol vars
if not ds_pmid_done:
ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0])
ds_pmid_done = True
if not ds_pmid:
errmsg = f"Missing necessary files for dry air density (rho) calculation.\n"
errmsg += "Please make sure 'PMID' is in the CAM run for aerosol calculations"
errmsg = "Missing necessary files for dry air density (rho) "
errmsg += "calculation.\nPlease make sure 'PMID' is in the CAM "
errmsg += "run for aerosol calculations"
print(errmsg)
continue
if not ds_t_done:
ds_t = _load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0])
ds_t_done = True
if not ds_t:
errmsg = f"Missing necessary files for dry air density (rho) calculation.\n"
errmsg += "Please make sure 'T' is in the CAM run for aerosol calculations"
errmsg = "Missing necessary files for dry air density (rho) "
errmsg += "calculation.\nPlease make sure 'T' is in the CAM "
errmsg += "run for aerosol calculations"
print(errmsg)
continue

Expand All @@ -1120,6 +1238,7 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
#Sulfate conversion factor
if var == "SO4":
ds[var] = ds[var]*(96./115.)
#----------------------------------------------------------------------------------

#Drop all constituents from final saved dataset
#These are not necessary because they have their own time series files
Expand All @@ -1131,7 +1250,8 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite
#Helper Function(s)
def _load_dataset(fils):
"""
This method exists to get an xarray Dataset from input file information that can be passed into the plotting methods.
This method exists to get an xarray Dataset from input file information that
can be passed into the plotting methods.
Parameters
----------
Expand All @@ -1157,10 +1277,10 @@ def my_formatwarning(msg, *args, **kwargs):
if len(fils) == 0:
warnings.warn("Input file list is empty.")
return None
elif len(fils) > 1:
if len(fils) > 1:
return xr.open_mfdataset(fils, combine='by_coords')
else:
return xr.open_dataset(fils[0])
#End if
#End def
########
########
4 changes: 2 additions & 2 deletions lib/adf_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def __init__(self, config_file, debug=False):

#Read hist_str (component.hist_num) from the yaml file, or set to default
hist_str = self.get_basic_info('hist_str')
#If hist_str is not present, then default to 'cam.h0':
#If hist_str is not present, then default to 'cam.h0a':
if not hist_str:
hist_str = 'cam.h0'
hist_str = 'cam.h0a'
#End if
self.__hist_str = hist_str

Expand Down
Loading

0 comments on commit de213fc

Please sign in to comment.