forked from jjalcaraz-upct/network-slicing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
experiments_rl.py
129 lines (113 loc) · 4.59 KB
/
experiments_rl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: juanjosealcaraz
This script evaluates the RL baselines algorithms provided by Stable Baselines
(PPO1, PPO2, TRPO, SAC, A2C, TD3, DDPG) in 3 network-slicing scenarios.
For each scenario, and each algorithm, the script launches 30 simulation runs.
Each run is divided into a learning phase of 40000 steps and an inference phase of 10000 steps.
The results of the K-th run of algorithm ALG on scenario N, are stored in:
./results/scenario_N/ALG/history_K.npz
"""
import os
from numpy.random import default_rng
from itertools import product
import concurrent.futures as cf
from scenario_creator import create_env
from wrapper import ReportWrapper
from stable_baselines import PPO1, PPO2, TRPO, SAC, A2C, TD3, DDPG
from stable_baselines.common.cmd_util import make_vec_env
from tensorflow import set_random_seed
RUNS = 30
PROCESSES = 4 # 30 if enough threads
TRAIN_STEPS = 39936 # must be a multiple of 256
EVALUATION_STEPS = 10500
CONTROL_STEPS = 60000
PENALTY = 1000
VERBOSE = False
run_list = list(range(RUNS))
scenarios = [0,1,2]
algorithms = {
'SAC':SAC,
'PPO1':PPO1,
'PPO2':PPO2,
'TRPO':TRPO,
'A2C': A2C,
'TD3': TD3,
'DDPG': DDPG
}
deterministic = {
'SAC':True,
'PPO1':True,
'PPO2':False,
'TRPO':False,
'A2C':False,
'TD3':False,
'DDPG':False
}
class RLEvaluator():
def __init__(self, scenario, algo_name, algorithm):
self.scenario = scenario
self.algo_name = algo_name
self.algorithm = algorithm
self.path = './results/scenario_{}/{}/'.format(scenario, algo_name)
if not os.path.isdir(self.path):
try:
os.makedirs(self.path)
except OSError:
print ("Creation of the directory %s failed" % self.path)
else:
print ("Successfully created the directory %s " % self.path)
self.model_path = './trained_models/scenario_{}/{}/'.format(scenario, algo_name)
if not os.path.isdir(self.model_path):
try:
os.makedirs(self.model_path)
except OSError:
print ("Creation of the directory %s failed" % self.model_path)
else:
print ("Successfully created the directory %s " % self.model_path)
def evaluate(self, i):
print('start evaluation of scenario {} run {} algorithm {}'.format(self.scenario, i, self.algo_name))
rng = default_rng(seed = i) # environment seed
set_random_seed(i) # tensorflow seed
node_env = create_env(rng, self.scenario, penalty = PENALTY)
print('environment created')
node_env = ReportWrapper(node_env, steps = TRAIN_STEPS,
control_steps = CONTROL_STEPS,
env_id = i,
path = self.path,
verbose = VERBOSE)
print('wrapped environment created')
env = make_vec_env(lambda: node_env, n_envs=1)
print('vectorised environment created')
model = self.algorithm('MlpPolicy', env, verbose=0)
print('scenario {}: run {} of algorithm {} ... '.format(self.scenario, i, self.algo_name))
model.learn(total_timesteps = TRAIN_STEPS)
print('trainning done!')
node_env.save_results()
model_path = '{}{}_agent_{}'.format(self.model_path, self.algo_name, i)
model.save(model_path)
print('model saved')
node_env.set_evaluation(EVALUATION_STEPS)
obs = node_env.obs
det = deterministic[self.algo_name]
action, state = model.predict(obs, deterministic = det)
for i in range(EVALUATION_STEPS):
action, state = model.predict(obs, state = state, deterministic = det)
obs, _, _, _ = node_env.step(action)
print('evaluation done')
node_env.save_results()
print('results saved')
if __name__=='__main__':
for scenario, (alg_name, alg) in product(scenarios, algorithms.items()):
evaluator = RLEvaluator(scenario, alg_name, alg)
# ################################################################
# # use this code for sequential execution
# for run in run_list:
# evaluator.evaluate(run)
# ################################################################
# ################################################################
# use this code for parallel execution
with cf.ProcessPoolExecutor(PROCESSES) as E:
results = E.map(evaluator.evaluate, run_list)
# ################################################################