fix formatting for black and pylint

CCSI-Toolset · Sep 7, 2023 · b0bf039 · b0bf039
1 parent 4ebb2a2
commit b0bf039
Showing 1 changed file with 88 additions and 78 deletions.
diff --git a/examples/other_files/ML_AI_Plugin/generate_gradient_data.py b/examples/other_files/ML_AI_Plugin/generate_gradient_data.py
@@ -24,41 +24,42 @@
 from sklearn.model_selection import train_test_split
 
 # Neural Net modules
-from keras import Input
-from keras.models import Model
-from keras.layers import Dense
-from keras.callbacks import EarlyStopping
+from tensorflow.keras import Input
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.callbacks import EarlyStopping
+
 
 def finite_difference(m1, m2, y1, y2, n_x):
     """
-    Calculate the first-order gradient between provided sample points m1 and 
-    m2, where each point is assumed to be a vector with one or more input 
+    Calculate the first-order gradient between provided sample points m1 and
+    m2, where each point is assumed to be a vector with one or more input
     variables x and exactly one output variable y. y1 is the value of y1 at m1,
     and y2 is the value of y at m2.
-    
-    The total graident is calculated via chain rule assuming a multivariate 
-    function y(x1, x2, x3, ...). In the notation below, D/D denotes a total 
-    derivative and d/d denotes a partial derivative. Total derivatives are 
-    functions of all (x1, x2, x3, ...) whereas partial derivatives are 
+
+    The total graident is calculated via chain rule assuming a multivariate
+    function y(x1, x2, x3, ...). In the notation below, D/D denotes a total
+    derivative and d/d denotes a partial derivative. Total derivatives are
+    functions of all (x1, x2, x3, ...) whereas partial derivatives are
     functions of one input (e.g. x1) holding (x2, x3, ...) constant:
-        
+
         Dy/Dx1 = (dy/dx1)(dx1/dx1) + (dy/dx2)(dx2/dx1) + (dy/dx3)(dx3/dx1) +...
-        
-    Note that (dx1/dx1) = 1. The partial derivatives dv2/dv1 are estimated 
+
+    Note that (dx1/dx1) = 1. The partial derivatives dv2/dv1 are estimated
     between sample points m1 and m2 as:
-        
+
         dv2/dv1 at (m1+m2)/2 = [v2 at m2 - v2 at m1]/[v1 at m2 - v1 at m1]
-        
-    The method assumes that m1 is the first point and m2 is the second point, 
-    and returns a vector dy_dm that is the same length as m1 and m2; m1 and m2 
+
+    The method assumes that m1 is the first point and m2 is the second point,
+    and returns a vector dy_dm that is the same length as m1 and m2; m1 and m2
     must be the same length. y1 and y2 must be float or integer values.
     """
 
     def diff(y2, y1, x2, x1):
         """
         Calculate derivative of y w.r.t. x.
         """
-        dv2_dv1 = (y2 - y1)/(x2 - x1)
+        dv2_dv1 = (y2 - y1) / (x2 - x1)
 
         return dv2_dv1
 
@@ -67,26 +68,26 @@ def diff(y2, y1, x2, x1):
 
     for i in range(n_x):  # for each input xi
         dy_dm[i] = sum(
-            diff(y2, y1, m2[j], m1[j]) *  # dy/dxj
-            diff(m2[j], m1[j], m2[i], m1[i])  # dxj/dxi
+            diff(y2, y1, m2[j], m1[j])
+            * diff(m2[j], m1[j], m2[i], m1[i])  # dy/dxj  # dxj/dxi
             for j in range(n_x)
-            )  # for each input xj
+        )  # for each input xj
 
         mid_m[i] = m2[i] - m1[i]
 
     return mid_m, dy_dm
 
+
 def predict_gradients(midpoints, gradients_midpoints, x, n_m, n_x):
     """
-    Train MLP regression model with data normalization on gradients at 
+    Train MLP regression model with data normalization on gradients at
     midpoints to predict gradients at sample point.
     """
     # split and normalize data
     print("Splitting data into training and test sets...")
-    X_train, X_test, y_train, y_test = train_test_split(midpoints,
-                                                        gradients_midpoints,
-                                                        test_size=0.2,
-                                                        random_state=123)
+    X_train, X_test, y_train, y_test = train_test_split(
+        midpoints, gradients_midpoints, test_size=0.2, random_state=123
+    )
     print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
 
     # use minMax scaler
@@ -95,77 +96,84 @@ def predict_gradients(midpoints, gradients_midpoints, x, n_m, n_x):
     X_train = min_max_scaler.fit_transform(X_train)
     X_test = min_max_scaler.transform(X_test)
 
-    print("Training gradient prediction model...")    
+    print("Training gradient prediction model...")
     inputs = Input(shape=X_train.shape[1])  # input node, layer for x1, x2, ...
-    h1 = Dense(6, activation='relu')(inputs)
-    h2 = Dense(6, activation='relu')(h1)
-    outputs = Dense(n_x, activation='linear')(h2) # output node, layer for dy/dx1, dy/dx2, ...
+    h1 = Dense(6, activation="relu")(inputs)
+    h2 = Dense(6, activation="relu")(h1)
+    outputs = Dense(n_x, activation="linear")(
+        h2
+    )  # output node, layer for dy/dx1, dy/dx2, ...
     model = Model(inputs=inputs, outputs=outputs)
-    model.summary() # see what your model looks like
-    
+    model.summary()  # see what your model looks like
+
     # compile the model
-    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
+    model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
 
     # early stopping callback
-    es = EarlyStopping(monitor='val_loss',
-                       mode='min',
-                       patience=50,
-                       restore_best_weights = True)
+    es = EarlyStopping(
+        monitor="val_loss", mode="min", patience=50, restore_best_weights=True
+    )
 
     # fit the model!
     # attach it to a new variable called 'history' in case
     # to look at the learning curves
-    history = model.fit(X_train, y_train,
-                        validation_data = (X_test, y_test),
-                        callbacks=[es],
-                        epochs=100,
-                        batch_size=50,
-                        verbose=1)
-    if len(history.history['loss']) == 100:
+    history = model.fit(
+        X_train,
+        y_train,
+        validation_data=(X_test, y_test),
+        callbacks=[es],
+        epochs=100,
+        batch_size=50,
+        verbose=1,
+    )
+    if len(history.history["loss"]) == 100:
         print("Successfully completed, 100 epochs run.")
     else:
-        print("Validation loss stopped improving after ",
-              len(history.history['loss']),
-              "epochs. Successfully completed after early stopping.")
+        print(
+            "Validation loss stopped improving after ",
+            len(history.history["loss"]),
+            "epochs. Successfully completed after early stopping.",
+        )
 
     history_dict = history.history
-    loss_values = history_dict['loss'] # you can change this
-    val_loss_values = history_dict['val_loss'] # you can also change this
-    epochs = range(1, len(loss_values) + 1) # range of X (no. of epochs)
-    plt.plot(epochs, loss_values, 'bo', label='Training loss')
-    plt.plot(epochs, val_loss_values, 'orange', label='Validation loss')
-    plt.title('Training and validation loss')
-    plt.xlabel('Epochs')
-    plt.ylabel('Loss')
+    loss_values = history_dict["loss"]  # you can change this
+    val_loss_values = history_dict["val_loss"]  # you can also change this
+    epochs = range(1, len(loss_values) + 1)  # range of X (no. of epochs)
+    plt.plot(epochs, loss_values, "bo", label="Training loss")
+    plt.plot(epochs, val_loss_values, "orange", label="Validation loss")
+    plt.title("Training and validation loss")
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
     plt.legend()
     plt.show()
 
     gradients = model.predict(x)  # predict against original sample points
 
     return gradients
 
+
 def generate_gradients(xy_data, n_x):
     """
-    This method implements finite difference approximation and NN regression 
-    to estimate the first-order derivatives of a given dataset with columns 
-    (x1, x2, ...., xN, y1, y2, ..., yM) where N is the number of input 
+    This method implements finite difference approximation and NN regression
+    to estimate the first-order derivatives of a given dataset with columns
+    (x1, x2, ...., xN, y1, y2, ..., yM) where N is the number of input
     variables and M is the number of output variables. The method takes an
-    array of size (m, n_x + n_y) where m is the number of samples, n_x is the 
-    number of input variables, and n_y is the number of output variables. The 
-    method returns an array of size (m, n_x, n_y) where the first dimension 
+    array of size (m, n_x + n_y) where m is the number of samples, n_x is the
+    number of input variables, and n_y is the number of output variables. The
+    method returns an array of size (m, n_x, n_y) where the first dimension
     spans samples, the second dimension spans gradients dy/dx for each x, and
     the third dimension spans gradients dy/dx for each y.
-    
+
     For example, passing an array with 100 samples, 8 inputs and 2 outputs will
     return an array of size (100, 8, 2) where (:, :, 0) contains all dy1/dx and
     (:, :, 1) contains all dy2/dx.
-    
+
     The workflow of this method is as follows:
         1. Import xy data in array of size (m, n_x + n_y) and split into x, y
-        2. Generate dy in n_y arrays of size (m-1, n_x) which correspond to 
+        2. Generate dy in n_y arrays of size (m-1, n_x) which correspond to
         points between samples
         3. Normalize x, dy on [0, 1] and train MLP model dy(x) for each dy
-        4. Predict dy(x) for m samples from xy data to generate n_y arrays of 
+        4. Predict dy(x) for m samples from xy data to generate n_y arrays of
         size (m, n_x) which correspond to sample points
         5. Concatenate predicted gradients into array of size (m, n_x, n_y)
     """
@@ -180,32 +188,34 @@ def generate_gradients(xy_data, n_x):
     # between the sample points, i.e. len(y) - len(dy_midpoints) = 1.
     # in both midpoints and gradients_midpoints, each column corresponds to an
     # input variable xi and each row corresponds to a point between two samples
-    midpoints = np.empty((n_m-1, n_x))
-    gradients_midpoints = np.empty((n_m-1, n_x))
+    midpoints = np.empty((n_m - 1, n_x))
+    gradients_midpoints = np.empty((n_m - 1, n_x))
 
     # get midpoint gradients for one pair of samples at a time and save
-    for m in range(n_m-1):  # we have (n_m - 1) adjacent sample pairs
-        print("Midpoint gradient ", m+1, " of ", n_m-1, " generated.")
+    for m in range(n_m - 1):  # we have (n_m - 1) adjacent sample pairs
+        print("Midpoint gradient ", m + 1, " of ", n_m - 1, " generated.")
         midpoints[m], gradients_midpoints[m] = finite_difference(
-            m1 = x[m,:],
-            m2 = x[m+1,:],
-            y1 = y[m][0],  # each entry in y is an array somehow
-            y2 = y[m+1][0],  # each entry in y is an array somehow
-            n_x = n_x
-            )
+            m1=x[m, :],
+            m2=x[m + 1, :],
+            y1=y[m][0],  # each entry in y is an array somehow
+            y2=y[m + 1][0],  # each entry in y is an array somehow
+            n_x=n_x,
+        )
     print("Midpoint gradient generation complete.")
     print()
 
     # leverage NN regression to predict gradients at sample points
     gradients = predict_gradients(
-        midpoints=midpoints, 
+        midpoints=midpoints,
         gradients_midpoints=gradients_midpoints,
         x=x,
         n_m=n_m,
-        n_x=n_x,)
+        n_x=n_x,
+    )
 
     return gradients
 
+
 if __name__ == "__main__":
     data = pd.read_csv(r"MEA_carbon_capture_dataset_mimo.csv")
     data_array = np.array(data, ndmin=2)