From b0bf039fffbe6586578441a42d6a1dd314264cff Mon Sep 17 00:00:00 2001 From: Brandon Paul Date: Thu, 7 Sep 2023 15:23:49 -0700 Subject: [PATCH] fix formatting for black and pylint --- .../ML_AI_Plugin/generate_gradient_data.py | 166 ++++++++++-------- 1 file changed, 88 insertions(+), 78 deletions(-) diff --git a/examples/other_files/ML_AI_Plugin/generate_gradient_data.py b/examples/other_files/ML_AI_Plugin/generate_gradient_data.py index 2405dfe3f..481e9229e 100644 --- a/examples/other_files/ML_AI_Plugin/generate_gradient_data.py +++ b/examples/other_files/ML_AI_Plugin/generate_gradient_data.py @@ -24,33 +24,34 @@ from sklearn.model_selection import train_test_split # Neural Net modules -from keras import Input -from keras.models import Model -from keras.layers import Dense -from keras.callbacks import EarlyStopping +from tensorflow.keras import Input +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Dense +from tensorflow.keras.callbacks import EarlyStopping + def finite_difference(m1, m2, y1, y2, n_x): """ - Calculate the first-order gradient between provided sample points m1 and - m2, where each point is assumed to be a vector with one or more input + Calculate the first-order gradient between provided sample points m1 and + m2, where each point is assumed to be a vector with one or more input variables x and exactly one output variable y. y1 is the value of y1 at m1, and y2 is the value of y at m2. - - The total graident is calculated via chain rule assuming a multivariate - function y(x1, x2, x3, ...). In the notation below, D/D denotes a total - derivative and d/d denotes a partial derivative. Total derivatives are - functions of all (x1, x2, x3, ...) whereas partial derivatives are + + The total graident is calculated via chain rule assuming a multivariate + function y(x1, x2, x3, ...). In the notation below, D/D denotes a total + derivative and d/d denotes a partial derivative. Total derivatives are + functions of all (x1, x2, x3, ...) whereas partial derivatives are functions of one input (e.g. x1) holding (x2, x3, ...) constant: - + Dy/Dx1 = (dy/dx1)(dx1/dx1) + (dy/dx2)(dx2/dx1) + (dy/dx3)(dx3/dx1) +... - - Note that (dx1/dx1) = 1. The partial derivatives dv2/dv1 are estimated + + Note that (dx1/dx1) = 1. The partial derivatives dv2/dv1 are estimated between sample points m1 and m2 as: - + dv2/dv1 at (m1+m2)/2 = [v2 at m2 - v2 at m1]/[v1 at m2 - v1 at m1] - - The method assumes that m1 is the first point and m2 is the second point, - and returns a vector dy_dm that is the same length as m1 and m2; m1 and m2 + + The method assumes that m1 is the first point and m2 is the second point, + and returns a vector dy_dm that is the same length as m1 and m2; m1 and m2 must be the same length. y1 and y2 must be float or integer values. """ @@ -58,7 +59,7 @@ def diff(y2, y1, x2, x1): """ Calculate derivative of y w.r.t. x. """ - dv2_dv1 = (y2 - y1)/(x2 - x1) + dv2_dv1 = (y2 - y1) / (x2 - x1) return dv2_dv1 @@ -67,26 +68,26 @@ def diff(y2, y1, x2, x1): for i in range(n_x): # for each input xi dy_dm[i] = sum( - diff(y2, y1, m2[j], m1[j]) * # dy/dxj - diff(m2[j], m1[j], m2[i], m1[i]) # dxj/dxi + diff(y2, y1, m2[j], m1[j]) + * diff(m2[j], m1[j], m2[i], m1[i]) # dy/dxj # dxj/dxi for j in range(n_x) - ) # for each input xj + ) # for each input xj mid_m[i] = m2[i] - m1[i] return mid_m, dy_dm + def predict_gradients(midpoints, gradients_midpoints, x, n_m, n_x): """ - Train MLP regression model with data normalization on gradients at + Train MLP regression model with data normalization on gradients at midpoints to predict gradients at sample point. """ # split and normalize data print("Splitting data into training and test sets...") - X_train, X_test, y_train, y_test = train_test_split(midpoints, - gradients_midpoints, - test_size=0.2, - random_state=123) + X_train, X_test, y_train, y_test = train_test_split( + midpoints, gradients_midpoints, test_size=0.2, random_state=123 + ) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # use minMax scaler @@ -95,48 +96,54 @@ def predict_gradients(midpoints, gradients_midpoints, x, n_m, n_x): X_train = min_max_scaler.fit_transform(X_train) X_test = min_max_scaler.transform(X_test) - print("Training gradient prediction model...") + print("Training gradient prediction model...") inputs = Input(shape=X_train.shape[1]) # input node, layer for x1, x2, ... - h1 = Dense(6, activation='relu')(inputs) - h2 = Dense(6, activation='relu')(h1) - outputs = Dense(n_x, activation='linear')(h2) # output node, layer for dy/dx1, dy/dx2, ... + h1 = Dense(6, activation="relu")(inputs) + h2 = Dense(6, activation="relu")(h1) + outputs = Dense(n_x, activation="linear")( + h2 + ) # output node, layer for dy/dx1, dy/dx2, ... model = Model(inputs=inputs, outputs=outputs) - model.summary() # see what your model looks like - + model.summary() # see what your model looks like + # compile the model - model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) + model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"]) # early stopping callback - es = EarlyStopping(monitor='val_loss', - mode='min', - patience=50, - restore_best_weights = True) + es = EarlyStopping( + monitor="val_loss", mode="min", patience=50, restore_best_weights=True + ) # fit the model! # attach it to a new variable called 'history' in case # to look at the learning curves - history = model.fit(X_train, y_train, - validation_data = (X_test, y_test), - callbacks=[es], - epochs=100, - batch_size=50, - verbose=1) - if len(history.history['loss']) == 100: + history = model.fit( + X_train, + y_train, + validation_data=(X_test, y_test), + callbacks=[es], + epochs=100, + batch_size=50, + verbose=1, + ) + if len(history.history["loss"]) == 100: print("Successfully completed, 100 epochs run.") else: - print("Validation loss stopped improving after ", - len(history.history['loss']), - "epochs. Successfully completed after early stopping.") + print( + "Validation loss stopped improving after ", + len(history.history["loss"]), + "epochs. Successfully completed after early stopping.", + ) history_dict = history.history - loss_values = history_dict['loss'] # you can change this - val_loss_values = history_dict['val_loss'] # you can also change this - epochs = range(1, len(loss_values) + 1) # range of X (no. of epochs) - plt.plot(epochs, loss_values, 'bo', label='Training loss') - plt.plot(epochs, val_loss_values, 'orange', label='Validation loss') - plt.title('Training and validation loss') - plt.xlabel('Epochs') - plt.ylabel('Loss') + loss_values = history_dict["loss"] # you can change this + val_loss_values = history_dict["val_loss"] # you can also change this + epochs = range(1, len(loss_values) + 1) # range of X (no. of epochs) + plt.plot(epochs, loss_values, "bo", label="Training loss") + plt.plot(epochs, val_loss_values, "orange", label="Validation loss") + plt.title("Training and validation loss") + plt.xlabel("Epochs") + plt.ylabel("Loss") plt.legend() plt.show() @@ -144,28 +151,29 @@ def predict_gradients(midpoints, gradients_midpoints, x, n_m, n_x): return gradients + def generate_gradients(xy_data, n_x): """ - This method implements finite difference approximation and NN regression - to estimate the first-order derivatives of a given dataset with columns - (x1, x2, ...., xN, y1, y2, ..., yM) where N is the number of input + This method implements finite difference approximation and NN regression + to estimate the first-order derivatives of a given dataset with columns + (x1, x2, ...., xN, y1, y2, ..., yM) where N is the number of input variables and M is the number of output variables. The method takes an - array of size (m, n_x + n_y) where m is the number of samples, n_x is the - number of input variables, and n_y is the number of output variables. The - method returns an array of size (m, n_x, n_y) where the first dimension + array of size (m, n_x + n_y) where m is the number of samples, n_x is the + number of input variables, and n_y is the number of output variables. The + method returns an array of size (m, n_x, n_y) where the first dimension spans samples, the second dimension spans gradients dy/dx for each x, and the third dimension spans gradients dy/dx for each y. - + For example, passing an array with 100 samples, 8 inputs and 2 outputs will return an array of size (100, 8, 2) where (:, :, 0) contains all dy1/dx and (:, :, 1) contains all dy2/dx. - + The workflow of this method is as follows: 1. Import xy data in array of size (m, n_x + n_y) and split into x, y - 2. Generate dy in n_y arrays of size (m-1, n_x) which correspond to + 2. Generate dy in n_y arrays of size (m-1, n_x) which correspond to points between samples 3. Normalize x, dy on [0, 1] and train MLP model dy(x) for each dy - 4. Predict dy(x) for m samples from xy data to generate n_y arrays of + 4. Predict dy(x) for m samples from xy data to generate n_y arrays of size (m, n_x) which correspond to sample points 5. Concatenate predicted gradients into array of size (m, n_x, n_y) """ @@ -180,32 +188,34 @@ def generate_gradients(xy_data, n_x): # between the sample points, i.e. len(y) - len(dy_midpoints) = 1. # in both midpoints and gradients_midpoints, each column corresponds to an # input variable xi and each row corresponds to a point between two samples - midpoints = np.empty((n_m-1, n_x)) - gradients_midpoints = np.empty((n_m-1, n_x)) + midpoints = np.empty((n_m - 1, n_x)) + gradients_midpoints = np.empty((n_m - 1, n_x)) # get midpoint gradients for one pair of samples at a time and save - for m in range(n_m-1): # we have (n_m - 1) adjacent sample pairs - print("Midpoint gradient ", m+1, " of ", n_m-1, " generated.") + for m in range(n_m - 1): # we have (n_m - 1) adjacent sample pairs + print("Midpoint gradient ", m + 1, " of ", n_m - 1, " generated.") midpoints[m], gradients_midpoints[m] = finite_difference( - m1 = x[m,:], - m2 = x[m+1,:], - y1 = y[m][0], # each entry in y is an array somehow - y2 = y[m+1][0], # each entry in y is an array somehow - n_x = n_x - ) + m1=x[m, :], + m2=x[m + 1, :], + y1=y[m][0], # each entry in y is an array somehow + y2=y[m + 1][0], # each entry in y is an array somehow + n_x=n_x, + ) print("Midpoint gradient generation complete.") print() # leverage NN regression to predict gradients at sample points gradients = predict_gradients( - midpoints=midpoints, + midpoints=midpoints, gradients_midpoints=gradients_midpoints, x=x, n_m=n_m, - n_x=n_x,) + n_x=n_x, + ) return gradients + if __name__ == "__main__": data = pd.read_csv(r"MEA_carbon_capture_dataset_mimo.csv") data_array = np.array(data, ndmin=2)