From 6951a8bbb92c84f08881eddc537cf2a28b0a42ea Mon Sep 17 00:00:00 2001 From: Aarsh2001 Date: Fri, 6 Oct 2023 20:27:48 +0100 Subject: [PATCH] minor modifications to gcp_auth --- .github/auth/vm_auth.py | 54 ++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/.github/auth/vm_auth.py b/.github/auth/vm_auth.py index 32742f14..9accf1a8 100644 --- a/.github/auth/vm_auth.py +++ b/.github/auth/vm_auth.py @@ -9,12 +9,11 @@ def authenticate_vm(path): credentials = Credentials.from_service_account_file(path) - return discovery.build('compute', 'v1', credentials=credentials) + return discovery.build("compute", "v1", credentials=credentials) -def _start_ssh_session(compute, creds, username, passphrase): - response = compute.instances().get(project="gpu-insatnce", zone='us-central1-a', instance='demos-tests').execute() - external_ip = response['networkInterfaces'][0]['accessConfigs'][0]['natIP'] +def _start_ssh_session(response, creds, username, passphrase): + external_ip = response["networkInterfaces"][0]["accessConfigs"][0]["natIP"] ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) @@ -31,35 +30,62 @@ def _start_ssh_session(compute, creds, username, passphrase): channel = transport.open_session() # Execute the command on the instance in the background - command = 'cd actions-runner; nohup ./run.sh &' + command = "cd actions-runner; nohup ./run.sh &" channel.exec_command(command) # Close the SSH channel and session immediately - channel.close() ssh.close() - -def start_runner(creds, ssh_creds, ssh_user, key_passphrase, id="gpu-insatnce", zone='us-central1-a', - instance='demos-tests'): +def start_runner( + creds, + ssh_creds, + ssh_user, + key_passphrase, + id="gpu-insatnce", + zone="us-central1-a", + instance="demos-tests", +): compute = authenticate_vm(creds) request = compute.instances().start(project=id, zone=zone, instance=instance) request.execute() - time.sleep(60) - _start_ssh_session(compute, ssh_creds, ssh_user, key_passphrase) + max_wait_time = 600 + wait_interval = 10 + waited_time = 0 + response = None + + while waited_time < max_wait_time: + response = ( + compute.instances().get(project=id, zone=zone, instance=instance).execute() + ) + status = response.get("status") + + if status == "RUNNING": + break + + time.sleep(wait_interval) + waited_time += wait_interval + + if waited_time >= max_wait_time: + raise Exception(f"Instance {instance} did not start within the expected time.") + + # Once the instance is running, start the SSH session + _start_ssh_session(response, ssh_creds, ssh_user, key_passphrase) def stop_runner(creds): compute = authenticate_vm(creds) - request = compute.instances().start(project="gpu-insatnce", zone='us-central1-a', instance='demos-tests') + request = compute.instances().start( + project="gpu-insatnce", zone="us-central1-a", instance="demos-tests" + ) request.execute() if __name__ == "__main__": ssh_user, key_passphrase, stop_vm = sys.argv[1], sys.argv[2], sys.argv[3] - gcp_credentials = 'gcp_auth.json' - ssh_credentials = '~/.ssh/id_rsa' + gcp_credentials = "gcp_auth.json" + ssh_credentials = "~/.ssh/id_rsa" if stop_vm == "true": # Stop the instance