diff --git a/openfasoc/MLoptimization/README.md b/openfasoc/MLoptimization/README.md deleted file mode 100644 index 7e661dbc3..000000000 --- a/openfasoc/MLoptimization/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Machine Learning Optimization -Code for reinforcement learning loop with openfasoc generators for optimizing metrics - -## Code Setup -The code is setup as follows: - -The top level directory contains two sub-directories: -* model.py: top level RL script, used to set hyperparameters and run training -* run_training.py: contains all OpenAI Gym environments. These function as the agent in the RL loop and contain information about parameter space, valid action steps and reward. -* eval.py: contains all of the code for evaluation -* gen_spec.py: contains all of the random specification generation - -## Training -Make sure that you have OpenAI Gym and Ray installed. To do this, run the following command: - -To generate the design specifications that the agent trains on, run: -``` -python3.10 gen_specs.py -``` -The result is a yaml file dumped to the ../generators/gdsfactory-gen/. - -To train the agent, open ipython from the top level directory and then: -``` -python3.10 model.py -``` -The training checkpoints will be saved in your home directory under ray\_results. Tensorboard can be used to load reward and loss plots using the command: - -``` -tensorboard --logdir path/to/checkpoint -``` - -## Validation -The evaluation script takes the trained agent and gives it new specs that the agent has never seen before. To generate new design specs, run the gen_specs.py file again with your desired number of specs to validate on. To run validation: - -``` -python3.10 eval.py -``` - -The evaluation result will be saved to the ../generators/gdsfactory-gen/. - -## Results -Please note that results vary greatly based on random seed and spec generation (both for testing and validation). An example spec file is provided that was used to generate the results below. - -
- -
diff --git a/openfasoc/MLoptimization/eval.py b/openfasoc/MLoptimization/eval.py deleted file mode 100644 index cb617e57a..000000000 --- a/openfasoc/MLoptimization/eval.py +++ /dev/null @@ -1,118 +0,0 @@ -#training import -import numpy as np -import gym -import ray -import ray.tune as tune -from ray.rllib.algorithms.ppo import PPO -from run_training import Envir -from ../generators/gdsfactory-gen/sky130_nist_tapeout import single_build_and_simulation -import pickle -import yaml -from pathlib import Path -import argparse - -def unlookup(norm_spec, goal_spec): - spec = -1*np.multiply((norm_spec+1), goal_spec)/(norm_spec-1) - return spec - -specs = yaml.safe_load(Path('newnew_eval_3.yaml').read_text()) - -# -#training set up -env_config = { - "generalize":True, - "num_valid":2, - "save_specs":False, - "inputspec":specs, - "run_valid":True, - "horizon":25, - } - -config_eval = { - #"sample_batch_size": 200, - "env": Envir, - "env_config":{ - "generalize":True, - "num_valid":2, - "save_specs":False, - "inputspec":specs, - "run_valid":True, - "horizon":25, - }, - } - -parser = argparse.ArgumentParser() -parser.add_argument('--checkpoint_dir', '-cpd', type=str) -args = parser.parse_args() -env = Envir(env_config=env_config) - -agent = PPO.from_checkpoint("/home/wentian/ray_results/brandnewBound_1/PPO_Envir_7fc09_00000_0_2023-08-18_20-40-42/checkpoint_000015") - - - -norm_spec_ref = env.global_g -spec_num = len(env.specs) - - -rollouts = [] -next_states = [] -obs_reached = [] -obs_nreached = [] -action_array = [] -action_arr_comp = [] -rollout_steps = 0 -reached_spec = 0 -f = open("newnewnew_eval__3.txt", "a") - -while rollout_steps < 100: - rollout_num = [] - state, info = env.reset() - - done = False - truncated = False - reward_total = 0.0 - steps=0 - f.write('new----------------------------------------') - while not done and not truncated: - action = agent.compute_single_action(state) - action_array.append(action) - - next_state, reward, done, truncated, info = env.step(action) - f.write(str(action)+'\n') - f.write(str(reward)+'\n') - f.write(str(done)+'n') - print(next_state) - print(action) - print(reward) - print(done) - reward_total += reward - - rollout_num.append(reward) - next_states.append(next_state) - - state = next_state - - norm_ideal_spec = state[spec_num:spec_num+spec_num] - ideal_spec = unlookup(norm_ideal_spec, norm_spec_ref) - if done == True: - reached_spec += 1 - obs_reached.append(ideal_spec) - action_arr_comp.append(action_array) - action_array = [] - pickle.dump(action_arr_comp, open("action_arr_test", "wb")) - else: - obs_nreached.append(ideal_spec) #save unreached observation - action_array=[] - f.write('done----------------------------------------') - rollouts.append(rollout_num) - print("Episode reward", reward_total) - rollout_steps+=1 - #if out is not None: - #pickle.dump(rollouts, open(str(out)+'reward', "wb")) - pickle.dump(obs_reached, open("opamp_obs_reached_test","wb")) - pickle.dump(obs_nreached, open("opamp_obs_nreached_test","wb")) - - f.write("Specs reached: " + str(reached_spec) + "/" + str(len(obs_nreached))) - print("Specs reached: " + str(reached_spec) + "/" + str(len(obs_nreached))) - -print("Num specs reached: " + str(reached_spec) + "/" + str(1)) \ No newline at end of file diff --git a/openfasoc/MLoptimization/gen_spec.py b/openfasoc/MLoptimization/gen_spec.py deleted file mode 100755 index 60c5023f2..000000000 --- a/openfasoc/MLoptimization/gen_spec.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python3 -## Generate the design specifications and then save to a pickle file - -import numpy as np -import random -import yaml -import os -import argparse - -def gen_data(env, num_specs): - - specs_range = { - "gain_min" : [float(1000338000.0), float(3000338000.0)], - "FOM" : [float(5*10**11), float(5*10**11)] - } - specs_range_vals = list(specs_range.values()) - specs_valid = [] - for spec in specs_range_vals: - if isinstance(spec[0],int): - list_val = [random.randint(int(spec[0]),int(spec[1])) for x in range(0,num_specs)] - else: - list_val = [random.uniform(float(spec[0]),float(spec[1])) for x in range(0,num_specs)] - specs_valid.append(tuple(list_val)) - i=0 - for key,value in specs_range.items(): - specs_range[key] = specs_valid[i] - i+=1 - - output = str(specs_range) - with open(env, 'w') as f: - f.write(output.replace('(','[').replace(')',']').replace(',',',\n')) - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--num_specs', type=str) - args = parser.parse_args() - - gen_data("newnew_eval_3.yaml", int(50)) - -if __name__=="__main__": - main() \ No newline at end of file diff --git a/openfasoc/MLoptimization/image1.png b/openfasoc/MLoptimization/image1.png deleted file mode 100644 index b94630ef7..000000000 Binary files a/openfasoc/MLoptimization/image1.png and /dev/null differ diff --git a/openfasoc/MLoptimization/image2.png b/openfasoc/MLoptimization/image2.png deleted file mode 100644 index 24dca66ae..000000000 Binary files a/openfasoc/MLoptimization/image2.png and /dev/null differ diff --git a/openfasoc/MLoptimization/model.py b/openfasoc/MLoptimization/model.py deleted file mode 100644 index 30defd612..000000000 --- a/openfasoc/MLoptimization/model.py +++ /dev/null @@ -1,47 +0,0 @@ -#training import -import gym -import ray -import ray.tune as tune -from ray.rllib.algorithms.ppo import PPO -from run_training import Envir -from sky130_nist_tapeout import single_build_and_simulation -sky130_nist_tapeout.path.append('../generators/gdsfactory-gen/') - -import argparse -# -#training set up -parser = argparse.ArgumentParser() -parser.add_argument('--checkpoint_dir', '-cpd', type=str) -args = parser.parse_args() -ray.init(num_cpus=33, num_gpus=0,include_dashboard=True, ignore_reinit_error=True) - -#configures training of the agent with associated hyperparameters -config_train = { - #"sample_batch_size": 200, - "env": Envir, - "train_batch_size": 1000, - #"sgd_minibatch_size": 1200, - #"num_sgd_iter": 3, - #"lr":1e-3, - #"vf_loss_coeff": 0.5, - #"rollout_fragment_length": 63, - "model":{"fcnet_hiddens": [64, 64]}, - "num_workers": 32, - "env_config":{"generalize":True, "run_valid":False, "horizon":20}, - } - -#Runs training and saves the result in ~/ray_results/train_ngspice_45nm -#If checkpoint fails for any reason, training can be restored -trials = tune.run( - "PPO", #You can replace this string with ppo.PPOTrainer if you want / have customized it - name="brandnewBound_1", # The name can be different. - stop={"episode_reward_mean": 12, "training_iteration": 15}, - checkpoint_freq=1, - config=config_train, - #restore="/home/wentian/ray_results/brandnewBound/PPO_Envir_cc8be_00000_0_2023-08-16_01-11-16/checkpoint_000002", - #restore="/home/wentian/ray_results/brandnewBound/PPO_Envir_f6236_00000_0_2023-08-16_04-40-01/checkpoint_000003", - #restore="/home/wentian/ray_results/brandnewBound/PPO_Envir_4615a_00000_0_2023-08-16_06-58-15/checkpoint_000006" - #restore="/home/wentian/ray_results/brandnewBound/PPO_Envir_d8b02_00000_0_2023-08-17_02-07-41/checkpoint_000012", - restore="/home/wentian/ray_results/brandnewBound_1/PPO_Envir_d6a0f_00000_0_2023-08-18_05-19-43/checkpoint_000012", -) -# \ No newline at end of file diff --git a/openfasoc/MLoptimization/run_training.py b/openfasoc/MLoptimization/run_training.py deleted file mode 100644 index ced833866..000000000 --- a/openfasoc/MLoptimization/run_training.py +++ /dev/null @@ -1,286 +0,0 @@ -#env import -import gymnasium as gym -from gymnasium import spaces -from gymnasium.spaces import Discrete -from gymnasium.wrappers import EnvCompatibility -from ray.rllib.env.wrappers.multi_agent_env_compatibility import MultiAgentEnvCompatibility -from sky130_nist_tapeout import single_build_and_simulation -sky130_nist_tapeout.path.append('../generators/gdsfactory-gen/') -import numpy as np -import random -import psutil - -from multiprocessing import Pool -from collections import OrderedDict -import yaml -import yaml.constructor -import statistics -import os -import itertools -import pickle -import yaml -from pathlib import Path - -# -#environment set up -class Envir(gym.Env): - metadata = {'render.modes': ['human']} - - PERF_LOW = -1 - PERF_HIGH = 1 - - def __init__(self, env_config): - PERF_LOW = -1 - PERF_HIGH = 1 - self.multi_goal = env_config.get("multi_goal",False) - self.generalize = env_config.get("generalize",False) - num_valid = env_config.get("num_valid",50) - self.specs_save = env_config.get("save_specs", False) - self.valid = env_config.get("run_valid", False) - self.horizon = env_config.get("horizon", 100) - inputspec = env_config.get("inputspec",{}) - - self.env_steps = 0 - #data = np.load('./training_params.npy') - #result = np.load('./training_results.npy') - #self.result = result - self.epi_steps = 0 - - specs = {} - print("num valid isssssssssssssssssssssss:" + str(self.horizon)) - if(self.valid): - print("hahahahahahaahahahahahahahahahaahah") - # design specs - if self.generalize == True: - if self.valid == False: - specs = yaml.safe_load(Path('new_spec_1.yaml').read_text()) - else: - specs = inputspec - print(inputspec) - print("haaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") - - - self.specs = specs - - self.specs_ideal = [] - self.specs_id = list(self.specs.keys()) - self.fixed_goal_idx = -1 - self.num_os = len(list(self.specs.values())[0]) - - # [10, 0, 0, 0, 0, 0, 0, 0, 10, 0, 5, 0, 10, 0, 0, 0, 0, 10] - #[10, 4, 10, 0, 0, 0, 10, 0, 10, 0, 10, 0, 0, 0, 0, 0, 0, 0] - #[9. 2. 6. 6. 2. 4. 9. 1. 2. 3. 10. 0.3 6. 3. 12. 12. 2. 1.] - #[9. 1. 6. 6. 2. 4. 9. 1. 6. 3. 10. 0.3 6. 3. 12. 12. 2. 1. ] - - # param array - params = { - "diffpair_params0" : [3, 9.6, 0.6], - "diffpair_params1" : [0.3, 2.17, 0.17], - "diffpair_params2" : [2, 7, 1], - "houtput_bias0" : [3, 9.6, 0.6], - "houtput_bias2" : [2, 7, 1], - "pamp_hparams0" : [4, 10.6, 0.6], - "pamp_hparams1" : [0.3, 2.1, 0.1], - "pamp_hparams2" : [6, 15, 1], - "mim_cap_rows" : [2, 4, 1], - "rmult" : [1, 3, 1], - } - self.params = [] - self.params_id = list(params.keys()) - - for value in params.values(): - param_vec = np.arange(value[0], value[1], value[2]) - self.params.append(param_vec) - - #params = data - #self.params = data - #self.params_id = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18'] - - - #initialize sim environment - self.action_meaning = [-1,0,1] - self.action_space = spaces.Tuple([spaces.Discrete(len(self.action_meaning))]*len(self.params_id)) - #self.action_space = spaces.Discrete(len(self.action_meaning)**len(self.params_id)) - self.observation_space = spaces.Box( - low=np.array([PERF_LOW]*2*len(self.specs_id)+len(self.params_id)*[0]), - high=np.array([PERF_HIGH]*2*len(self.specs_id)+len(self.params_id)*[100]), dtype=np.float32) - - #initialize current param/spec observations - self.cur_specs = np.zeros(len(self.specs_id), dtype=np.float32) - self.cur_params_idx = np.zeros(len(self.params_id), dtype=np.int32) - - #Get the g* (overall design spec) you want to reach - self.global_g = [] - for spec in list(self.specs.values()): - self.global_g.append(float(spec[self.fixed_goal_idx])) - self.g_star = np.array(self.global_g) - self.global_g = np.array([3000338000.0, 1.0*10**13]) - - #objective number (used for validation)g - self.obj_idx = 0 - - def reset(self, *, seed=None, options=None): - #if multi-goal is selected, every time reset occurs, it will select a different design spec as objective - if self.generalize == True: - if self.valid == True: - if self.obj_idx > self.num_os-1: - self.obj_idx = 0 - idx = self.obj_idx - self.obj_idx += 1 - else: - idx = random.randint(0,self.num_os-1) - self.specs_ideal = [] - for spec in list(self.specs.values()): - self.specs_ideal.append(spec[idx]) - self.specs_ideal = np.array(self.specs_ideal) - else: - if self.multi_goal == False: - self.specs_ideal = self.g_star - else: - idx = random.randint(0,self.num_os-1) - self.specs_ideal = [] - for spec in list(self.specs.values()): - self.specs_ideal.append(spec[idx]) - self.specs_ideal = np.array(self.specs_ideal) - print("num total:"+str(self.num_os) + "new reset!!!") - - #applicable only when you have multiple goals, normalizes everything to some global_g - self.specs_ideal_norm = self.lookup(self.specs_ideal, self.global_g) - - #initialize current parameters - #self.cur_params_idx = np.array([3.0, 0.3, 2.0, 6.0, 2.0, 4.0, 3.0,1.0,2.0,3.0,4.0,0.3,6.0,3.0, 12.0, 12.0, 2.0, 1.0]) - # self.cur_params_idx = np.array([10, 4, 10, 10, 10, 10, 0, 0, 0, 0]) - self.cur_params_idx = np.array([10, 10, 4, 5, 0, 5, 7, 0, 1, 1]) - self.cur_specs = self.update(self.cur_params_idx) - cur_spec_norm = self.lookup(self.cur_specs, self.global_g) - reward = self.reward(self.cur_specs, self.specs_ideal) - self.epi_steps = 0 - #observation is a combination of current specs distance from ideal, ideal spec, and current param vals - self.ob = np.concatenate([cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]) - return self.ob, {} - - def step(self, action): - """ - :param action: is vector with elements between 0 and 1 mapped to the index of the corresponding parameter - :return: - """ - - #Take action that RL agent returns to change current params - prevreward = self.reward(self.cur_specs, self.specs_ideal) - action = list(np.reshape(np.array(action),(np.array(action).shape[0],))) - self.cur_params_idx = self.cur_params_idx + np.array([self.action_meaning[a] for a in action]) - -# self.cur_params_idx = self.cur_params_idx + np.array(self.action_arr[int(action)]) - self.cur_params_idx = np.clip(self.cur_params_idx, [0]*len(self.params_id), [(len(param_vec)-1) for param_vec in self.params]) - #Get current specs and normalize - self.cur_specs = self.update(self.cur_params_idx) - cur_spec_norm = self.lookup(self.cur_specs, self.global_g) - reward = self.reward(self.cur_specs, self.specs_ideal) - terminated = False - #f = open("newnew_5.txt", "a") - f = open("newnewnew_eval_3.txt", "a") - #incentivize reaching goal state - if(prevreward >= 2.0 and reward < 2.0): - terminated = True - if (reward >= 2.0): - if (reward < prevreward): - terminated = True - f.write('-'*10 +'\n') - f.write('params = '+str(self.cur_params_idx)+'\n') - f.write('specs:'+str(self.cur_specs)+'\n') - f.write('ideal specs:'+str(self.specs_ideal)+'\n') - f.write('re:'+str(reward)+'\n') - f.write('-'*10+'\n') - print('-'*10) - print('params = ', self.cur_params_idx) - print('specs:', self.cur_specs) - print('ideal specs:', self.specs_ideal) - print('re:', reward) - print('-'*10) - - self.ob = np.concatenate([cur_spec_norm, self.specs_ideal_norm, self.cur_params_idx]) - self.env_steps = self.env_steps + 1 - self.epi_steps = self.epi_steps + 1 - - truncated = self.epi_steps >= self.horizon - f.write('params: ' + str(self.cur_params_idx) +'\n') - f.write('cur ob:' + str(self.cur_specs) +'\n') - f.write('ideal spec:' + str(self.specs_ideal)+'\n') - f.write('cur reward:' + str(reward)+'\n') - f.write('epi step:' + str(self.epi_steps)+'\n') - f.write('env steps:' + str(self.env_steps)+'\n') - - print('cur ob:' + str(self.cur_specs)) - print('ideal spec:' + str(self.specs_ideal)) - print('cur reward:' + str(reward)) - print('epi step:' + str(self.epi_steps)) - print('env steps:' + str(self.env_steps)) - return self.ob, reward, terminated, truncated, {} - - def lookup(self, spec, goal_spec): - goal_spec = [float(e) for e in goal_spec] - norm_spec = (spec-goal_spec)/(goal_spec+spec) - for i in range(len(spec)): - if spec[i] <= -1: - norm_spec[i] = -1 - #if(norm_spec[i] > 0): - #norm_spec[i] = 0 - return norm_spec - - def reward(self, spec, goal_spec): - rel_specs = self.lookup(spec, goal_spec) - pos_val = [] - reward = 0.0 - for i,rel_spec in enumerate(rel_specs): - #if(self.specs_id[i] == 'ibias_max'): - #rel_spec = rel_spec*-1.0#/10.0 - if rel_spec < 0: - reward += rel_spec - pos_val.append(0) - else: - if(self.specs_id[i] == 'FOM'): - reward += rel_spec - pos_val.append(1) - - trueorfalse = True - for i,rel_spec in enumerate(rel_specs): - if rel_spec < -0.02: - trueorfalse = False - - if trueorfalse: - return 2.0+reward - else: - return reward - - def update(self, params_idx): - #impose constraint tail1 = in - #params_idx[0] = params_idx[3] - params = np.array([self.params[i][params_idx[i]] for i in range(len(self.params_id))]) - #param_val = np.array[OrderedDict(list(zip(self.params_id,params)))] - - #run param vals and simulate - #cur_specs = OrderedDict(sorted(self.sim_env.create_design_and_simulate(param_val[0])[1].items(), key=lambda k:k[0])) - inputparam = np.array([0.0, 0.0, 0.0, 6.0, 2.0, 4.0, 0.0, 1.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0, 12.0, 12.0, 0.0, 0.0]) - inputparam[0:3] = params[0:3] - inputparam[6] = params[3] - inputparam[8] = params[4] - inputparam[10:13] = params[5:8] - inputparam[16:18] = params[8:10] - result = single_build_and_simulation(inputparam) - specs = np.array([0.0 , 0.0]) - specs[0] = result[0] - specs[1] = result[0]/result[6] - cur_specs = specs - - return cur_specs -#env end - -def main(): - env_config = {"generalize":True, "valid":True} - env = Envir(env_config) - env.reset() - env.step([2,2,2,2,2,2,2,2,2,2]) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/openfasoc/MLoptimization/sample_spec.yaml b/openfasoc/MLoptimization/sample_spec.yaml deleted file mode 100644 index 1a47d10cc..000000000 --- a/openfasoc/MLoptimization/sample_spec.yaml +++ /dev/null @@ -1,100 +0,0 @@ -{'gain_min': [2486222284.8408804, - 2595551369.5626364, - 2309495771.9680934, - 1145774876.7498627, - 2079516450.1469443, - 2391451213.479562, - 2676893053.18551, - 2675006145.1962533, - 2015194984.3624444, - 2278788513.705864, - 2205636091.187187, - 1506492988.8309956, - 1112077088.809704, - 2707199150.1559224, - 2903609807.9496045, - 2062437443.5261226, - 1403056317.139003, - 2835219051.771353, - 2925021136.524023, - 1886275826.821474, - 2157488694.831938, - 2641455527.072985, - 2402196770.75093, - 1199232764.3421414, - 2976991031.6658754, - 1646354535.6840703, - 2704204845.6734324, - 2716618295.460021, - 2413570061.996591, - 2121921633.2690156, - 2303933548.53714, - 2028867678.9123795, - 2719297024.9283357, - 2173990079.053155, - 1445459741.8651175, - 2472330865.96015, - 1001989471.0531384, - 2759910539.938794, - 1536809631.1164005, - 1135029016.3512793, - 1588011527.4173698, - 2049869876.6713438, - 2427789734.566804, - 1047544300.167047, - 1914004908.9098907, - 1393488128.8226607, - 2559241796.7458553, - 1565752673.493743, - 1295775028.0897965, - 1918300980.2757072], - 'FOM': [500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0, - 500000000000.0]} \ No newline at end of file