We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
I am trying to create a GPU-based environment where a model is being trained say resnet18. I am following the the PointChasingEnv https://github.com/AI4Finance-Foundation/ElegantRL/blob/master/elegantrl/envs/PointChasingEnv.py. Does that implies that I will create several models (num_envs) and trainloader too? This is an example of what I have been able to come up with
class AdaptiveLearningRateOptimizer: """ Optimization environment. Can be used to learn an adaptive learning rate schedule. Observations (6): 0: Training loss 1: Validation loss 2: Variance of predictions 3: Variance of prediction changes 4: Mean of output weight matrix 5: Variance of output weight matrix 6: Learning rate Actions - Discrete (3): 0: Increases the learning rate 1: Decreases the learning rate 2: No-op Actions - Continuous (1): 0: Scaling factor for the learning rate """ def __init__(self, dataset, architecture, batch_size, max_steps, initial_lr, prefix, num_envs=10, device_id=0, discrete=True, action_range=1.05, lr_noise=True): super().__init__() class SpecDummy(): def __init__(self, id): self.id = id self.spec = SpecDummy(id='AdaptiveLearningRateContinuous-v0' if not discrete else 'AdaptiveLearningRate-v0') self.dataset = dataset self.architecture = architecture self.device = torch.device(f'cuda:{device_id}' if torch.cuda.is_available() else 'cpu') self.num_envs = num_envs self.train_dataset, self.val_dataset, self.test_dataset = load_dataset(dataset) self.net = [load_models(architecture, dataset)() for _ in range(self.num_envs)] self.optimizers = [torch.optim.Adam(net.parameters(), lr=initial_lr) for net in self.net] self.batch_size = batch_size self.max_steps = max_steps self.initial_lr = initial_lr self.ep_initial_lr = torch.tensor([initial_lr], dtype=torch.float32).repeat(self.num_envs, 1) self.discrete = discrete self.action_range = action_range self.last_network_predictions = torch.zeros(self.num_envs, 1) self.latest_end_val = torch.empty(self.num_envs, 1) self.reward_tolerance = 1e-7 self.prev_reward = torch.zeros(self.num_envs, 1) self.reward_buffer = torch.zeros((self.num_envs, 0), dtype=torch.float32, device=self.device) self.episode_reward_buffer = torch.zeros((self.num_envs, 0), dtype=torch.float32, device=self.device) self.episode_mean_reward = torch.zeros(self.num_envs, 1) self.prefix = prefix self.test_generator = DataLoader(self.test_dataset, batch_size=batch_size, shuffle=True) self.lr_noise = lr_noise self.lr = torch.tensor([self.initial_lr], dtype=torch.float32).repeat(self.num_envs, 1) self.avg_train_loss = torch.zeros(self.num_envs, 1) self.avg_val_loss = torch.zeros(self.num_envs, 1) self.network_prediction_var = torch.zeros(self.num_envs, 1) self.network_prediction_change_var = torch.zeros(self.num_envs, 1) self.output_layer_weights_mean = torch.zeros(self.num_envs, 1) self.output_layer_weights_var = torch.zeros(self.num_envs, 1) self.current_step = torch.zeros(self.num_envs, 1) self.env_name = "AdaptiveLearningRateVecEnv" # Initialize the criterion (loss function) self.criterion = torch.nn.CrossEntropyLoss() self.train_generators = [DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True) for _ in range(self.num_envs)] self.val_generators = [DataLoader(self.val_dataset, batch_size=batch_size, shuffle=True) for _ in range(self.num_envs)] self.test_generators = [DataLoader(self.test_dataset, batch_size=batch_size, shuffle=True) for _ in range(self.num_envs)] self.state = torch.zeros(self.num_envs, 7) def get_state(self) -> Tensor: return torch.cat([self.avg_train_loss, self.avg_val_loss, self.network_prediction_var, self.network_prediction_change_var, self.output_layer_weights_mean, self.output_layer_weights_var, self.lr], dim=1) def _clip_lr(self, idx): self.lr[idx] = torch.clamp(self.lr[idx], min=1e-6, max=1e-1) def _add_lr_noise(self, idx, std=None, clip=True): if std is None: std = 1e-5 # Generate noise with the same shape as self.lr noise = torch.normal(mean=0, std=std, size=self.lr.shape, device=self.lr[idx].device) # Add noise to lr self.lr[idx] = self.lr[idx] + noise if clip: self._clip_lr(idx=idx) def _update_lr(self, action, idx, clip=True): # Ensure action is a tensor and get the first element if it's not a scalar if not isinstance(action, torch.Tensor): action = torch.tensor(action) if action.dim() > 0: action = action[0] # Update learning rate based on action if self.discrete: if action == 0: self.lr[idx] *= self.action_range elif action == 1: self.lr[idx] /= self.action_range else: self.lr[idx] *= action.float() # Only apply noise and scheduler step if it's not the first step if self.current_step != 0: # Add noise if enabled if self.lr_noise: self._add_lr_noise(clip=clip, idx=idx) # Clip lr if required if clip: self._clip_lr(idx) # apply the new learning rate to the optimizer self.optimizers[idx] = torch.optim.Adam(self.net[idx].parameters(), lr=self.lr[idx]) def test(self, idx): self.net[idx].eval() # Set the model to evaluation mode total_test_loss = 0.0 total_correct = 0 total_test_samples = 0 with torch.no_grad(): # Disable gradient calculation for x, y in self.test_generators[idx]: x, y = x.to(self.device), y.to(self.device) outputs = self.net[idx](x) loss = F.cross_entropy(outputs, y) total_test_loss += loss.item() * x.size(0) # Get the predicted class by taking the argmax of the output _, predicted = torch.max(outputs, 1) total_correct += (predicted == y).sum().item() total_test_samples += x.size(0) avg_loss = total_test_loss / total_test_samples accuracy = total_correct / total_test_samples return avg_loss, accuracy def train_epoch(self, idx): self.net[idx].train() # Ensure the network is in training mode total_train_loss = 0.0 total_train_samples = 0 for x, y in self.train_generators[idx]: x, y = x.to(self.device), y.to(self.device) self.optimizers[idx].zero_grad() outputs = self.net[idx](x) loss = F.cross_entropy(outputs, y) loss.backward() self.optimizers[idx].step() total_train_loss += loss.item() * x.size(0) total_train_samples += x.size(0) avg_train_loss = total_train_loss / total_train_samples return avg_train_loss def eval_epoch(self, idx): self.net[idx].eval() # Ensure the network is in evaluation mode running_loss = 0.0 network_predictions = [] network_prediction_var = 0 with torch.no_grad(): for inputs, targets in self.val_generators[idx]: inputs, targets = inputs.to(self.device), targets.to(self.device) outputs = self.net[idx](inputs) network_prediction_var += outputs.var().cpu() network_predictions.append(outputs) loss = self.criterion(outputs, targets) running_loss += loss.item() avg_val_loss = running_loss / len(self.val_generators[idx]) return avg_val_loss, network_predictions, network_prediction_var def reset_idx(self, idx, seed=None): #setproctitle(f'PPO-ALRS-v0-{idx}') self.train_generators[idx] = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True) self.val_generators[idx] = DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=True) self.last_network_predictions[idx] = [] self.current_step[idx] = torch.tensor(0, device=self.device) self.prev_reward[idx] = torch.tensor(0.0, device=self.device) self.reward_buffer[idx] = torch.zeros(0, device=self.device) self.episode_reward_buffer[idx] = torch.zeros(0, device=self.device) if self.initial_lr is None: self.lr[idx] = torch.tensor(float(np.random.choice([1e-2, 1e-3, 1e-4])), device=self.device) else: self.lr[idx] = torch.tensor(self.initial_lr, device=self.device) self.optimizers[idx] = torch.optim.Adam(self.net[idx].parameters(), lr=self.lr[idx].item()) avg_train_loss = self.train_epoch(idx) avg_val_loss, network_predictions, network_prediction_var = self.eval_epoch(idx) # Calculate network prediction change variance network_prediction_change_var = [] for idx in range(self.num_envs): env_change_vars = [] for i, pred in enumerate(network_predictions[idx]): last_pred = self.last_network_predictions[idx][i] if len(self.last_network_predictions[idx]) > i else torch.zeros_like(pred) env_change_vars.append((pred - last_pred).var()) network_prediction_change_var.append(torch.stack(env_change_vars).mean()) self.network_prediction_change_var = torch.stack(network_prediction_change_var).unsqueeze(1) output_layer_weights = list(self.net[idx].parameters())[-2] reward = -avg_val_loss self.reward_buffer[idx] = torch.tensor([reward], device=self.device) self.episode_reward_buffer[idx] = torch.tensor([reward], device=self.device) self.avg_train_loss[idx] = avg_train_loss self.avg_val_loss[idx] = avg_val_loss self.network_prediction_var[idx] = network_prediction_var self.network_prediction_change_var[idx] = network_prediction_change_var self.output_layer_weights_mean[idx] = output_layer_weights.mean() self.output_layer_weights_var[idx] = output_layer_weights.var() self.state[idx] = torch.tensor([ avg_train_loss, avg_val_loss, network_prediction_var, network_prediction_change_var, output_layer_weights.mean().item(), output_layer_weights.var().item(), self.lr[idx].item() ], dtype=torch.float32, device=self.device) return self.state[idx], {} def step(self, actions): for idx in range(self.num_envs): self._update_lr(actions[idx], idx) # Training self.avg_train_loss = torch.stack([self.train_epoch(i) for i in range(self.num_envs)]).unsqueeze(1) # Evaluation eval_results = [self.eval_epoch(i) for i in range(self.num_envs)] self.avg_val_loss = torch.stack([res[0] for res in eval_results]).unsqueeze(1) network_predictions = [res[1] for res in eval_results] self.network_prediction_var = torch.stack([res[2] for res in eval_results]).unsqueeze(1) # Calculate network prediction change variance network_prediction_change_var = [] for idx in range(self.num_envs): env_change_vars = [] for i, pred in enumerate(network_predictions[idx]): last_pred = self.last_network_predictions[idx][i] if self.last_network_predictions[idx] and i < len(self.last_network_predictions[idx]) else torch.zeros_like(pred) env_change_vars.append((pred - last_pred).var()) network_prediction_change_var.append(torch.stack(env_change_vars).mean()) self.network_prediction_change_var = torch.stack(network_prediction_change_var).unsqueeze(1) self.last_network_predictions = network_predictions # Update output layer weights statistics output_layer_weights = torch.stack([list(self.net[i].parameters())[-2] for i in range(self.num_envs)]) self.output_layer_weights_mean = output_layer_weights.mean(dim=[1, 2]).unsqueeze(1) self.output_layer_weights_var = output_layer_weights.var(dim=[1, 2]).unsqueeze(1) rewards = -self.avg_val_loss # Update reward buffers self.reward_buffer = torch.cat([self.reward_buffer, rewards], dim=1) if self.reward_buffer.shape[1] > 5: self.reward_buffer = self.reward_buffer[:, -5:] self.episode_reward_buffer = torch.cat([self.episode_reward_buffer, rewards], dim=1) self.state = torch.cat([ self.avg_train_loss, self.avg_val_loss, self.network_prediction_var, self.network_prediction_change_var, self.output_layer_weights_mean, self.output_layer_weights_var, self.lr.unsqueeze(1) ], dim=1) self.current_step += 1 # This now increments all elements of the tensor info = [{ 'train_loss': self.avg_train_loss[i].item(), 'val_loss': self.avg_val_loss[i].item(), 'lr': self.lr[i].item() } for i in range(self.num_envs)] truncated = (self.current_step >= self.max_steps).unsqueeze(1) terminated = (self.reward_buffer.max(dim=1)[0] - self.reward_buffer.min(dim=1)[0] < self.reward_tolerance).unsqueeze(1) if self.prefix != 'train_agent': wandb.log({ f'{self.prefix}_learning_rate': self.lr.mean().item(), f'{self.prefix}_final_val_loss': self.avg_val_loss.mean().item(), f'{self.prefix}_final_train_loss': self.avg_train_loss.mean().item(), f'action': actions.mean().item() }) self.prev_reward = rewards episode_ended = truncated | terminated for i in range(self.num_envs): if episode_ended[i]: self.episode_mean_reward[i] = self.episode_reward_buffer[i].mean() wandb.log({f'{self.prefix}_episode_mean_reward_{i}': self.episode_mean_reward[i].item()}) # Reset episode-specific data for this environment self.episode_reward_buffer[i] = torch.zeros(0, device=self.device) return self.state, rewards, episode_ended, info def reset(self): for idx in range(self.num_envs): self.reset_idx(idx) return self.get_state() def render(self, mode='human'): pass def check_chasing_vec_env(): env = AdaptiveLearningRateOptimizer(max_steps=200, dataset='cifar10', batch_size=256, initial_lr=0.001, prefix='PPO_LR', discrete=False, num_envs=4, architecture='resnet18') reward_sums = [ 0, ] * env.num_envs reward_sums_list = [ [], ] * env.num_envs states = env.reset() for _ in range(env.max_steps * 4): actions = env.get_action(states) states, rewards, dones, _ = env.step(actions) for env_i in range(env.num_envs): reward_sums[env_i] += rewards[env_i] if dones[env_i]: print( f"{env.distances[env_i].item():8.4f} {actions[env_i].detach().cpu().numpy().round(2)}" ) reward_sums_list[env_i].append(reward_sums[env_i]) reward_sums[env_i] = 0.0 reward_sums_list = np.array(reward_sums_list) print("shape:", reward_sums_list.shape) print("mean: ", np.mean(reward_sums_list, axis=1)) print("std: ", np.std(reward_sums_list, axis=1)) if __name__ == '__main__': print("Running test") check_chasing_vec_env()
Any suggestion?
The text was updated successfully, but these errors were encountered:
No branches or pull requests
I am trying to create a GPU-based environment where a model is being trained say resnet18. I am following the the PointChasingEnv https://github.com/AI4Finance-Foundation/ElegantRL/blob/master/elegantrl/envs/PointChasingEnv.py. Does that implies that I will create several models (num_envs) and trainloader too? This is an example of what I have been able to come up with
Any suggestion?
The text was updated successfully, but these errors were encountered: