Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor how jobs are logged in the project manager #3740

Merged
merged 4 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/dashboard/app/controllers/projects_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def job_details
cluster = OodAppkit.clusters[cluster_str.to_sym]
render(:status => 404) if cluster.nil?

hpc_job = project.job_from_id(job_details_params[:jobid].to_s, cluster_str)
hpc_job = project.job(job_details_params[:jobid].to_s, cluster_str)

render(partial: 'job_details', locals: { job: hpc_job })
end
Expand Down
50 changes: 50 additions & 0 deletions apps/dashboard/app/models/concerns/job_logger.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

module JobLogger

def upsert_job!(directory, job)
existing_jobs = jobs(directory)
stored_job = existing_jobs.detect { |j| j.id == job.id && j.cluster == job.cluster }

if stored_job.nil?
new_jobs = (existing_jobs + [job.to_h]).map(&:to_h)
else
Comment on lines +6 to +10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like completed jobs are not being saved with clusters, which is causing there to be duplicate entries in the job log that appear to never load in the projects#show page.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for checking! I thought there was something buggy here but couldn't quite put my finger on it.

new_jobs = existing_jobs.map(&:to_h)
idx = existing_jobs.index(stored_job)
new_jobs[idx].merge!(job.to_h) { |_key, old_val, new_val| new_val.nil? ? old_val : new_val }
end

JobLoggerHelper.write_log(directory, new_jobs)
end

# def write_job_log!(directory, jobs)
# JobLoggerHelper.write_log!(directory, jobs)
# endd

def jobs(directory)
file = JobLoggerHelper.log_file(directory)
begin
data = YAML.safe_load(File.read(file.to_s), permitted_classes: [Time]).to_a
data.map { |job_data| HpcJob.new(**job_data) }
rescue StandardError => e
Rails.logger.error("Cannot read job log file '#{file}' due to error: #{e}")
[]
end
end

# helper methods here are located here so that they don't
# bleed into the class that uses this module
class JobLoggerHelper
class << self
def log_file(directory)
Pathname.new("#{directory}/.ondemand/job_log.yml").tap do |path|
FileUtils.touch(path.to_s) unless path.exist?
end
end

def write_log(directory, jobs)
file = log_file(directory)
File.write(file.to_s, jobs.to_yaml)
end
end
end
end
50 changes: 2 additions & 48 deletions apps/dashboard/app/models/launcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

class Launcher
include ActiveModel::Model
include JobLogger

class ClusterNotFound < StandardError; end

Expand Down Expand Up @@ -196,28 +197,6 @@ def submit(options)
nil
end

def active_jobs
@active_jobs ||= jobs.reject { |job| job.completed? }
end

def jobs
@jobs ||= begin
data = YAML.safe_load(File.read(job_log_file.to_s), permitted_classes: [Time]).to_a
data.map do |job_data|
HpcJob.new(**job_data)
end
end
end

# When a job is requested, update before returning
def job_from_id(job_id, cluster)
job = stored_job_from_id(job_id, cluster)
unless job.nil? || job.status.to_s == 'completed'
update_job_log(job_id, cluster)
end
stored_job_from_id(job_id, cluster)
end

def create_default_script
return false if Launcher.scripts?(project_dir) || default_script_path.exist?

Expand Down Expand Up @@ -310,37 +289,12 @@ def cached_values
end
end

def most_recent_job
jobs.sort_by do |data|
data['submit_time']
end.reverse.first.to_h
end

def stored_job_from_id(job_id, cluster)
jobs.detect { |j| j.id == job_id && j.cluster == cluster }
end

def update_job_log(job_id, cluster)
adapter = adapter(cluster).job_adapter
info = adapter.info(job_id)
job = HpcJob.from_core_info(info: info, cluster: cluster)
existing_jobs = jobs
stored_job = stored_job_from_id(job_id, cluster)
if stored_job.nil?
new_jobs = (jobs + [job.to_h]).map(&:to_h)
else
new_jobs = existing_jobs.map(&:to_h)
idx = existing_jobs.index(stored_job)
new_jobs[idx].merge!(job.to_h) { |key, old_val, new_val| new_val.nil? ? old_val : new_val }
end

File.write(job_log_file.to_s, new_jobs.to_yaml)
end

def job_log_file
@job_log_file ||= Pathname.new(File.join(Launcher.script_path(project_dir, id), "job_history.log")).tap do |path|
FileUtils.touch(path.to_s)
end
upsert_job!(project_dir, job)
end

def submit_opts(options, render_format)
Expand Down
25 changes: 15 additions & 10 deletions apps/dashboard/app/models/project.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class Project
include ActiveModel::Validations
include ActiveModel::Validations::Callbacks
include IconWithUri
extend JobLogger

class << self
def lookup_file
Expand Down Expand Up @@ -197,18 +198,22 @@ def size
end

def jobs
launchers = Launcher.all(directory)
launchers.map do |launcher|
launcher.jobs
end.flatten
Project.jobs(directory)
end

def job_from_id(job_id, cluster)
launchers = Launcher.all(directory)
launchers.each do |launcher|
job = launcher.job_from_id(job_id, cluster)
return job unless job.nil?
end
def job(job_id, cluster)
cached_job = jobs.detect { |j| j.id == job_id && j.cluster == cluster }
return cached_job if cached_job.completed?

active_job = adapter(cluster).info(job_id)
active_job = HpcJob.new(**active_job.to_h)
Project.upsert_job!(directory, active_job)
active_job
end

def adapter(cluster_id)
cluster = OodAppkit.clusters[cluster_id] || raise(StandardError, "Job specifies nonexistent '#{cluster_id}' cluster id.")
cluster.job_adapter
end

def readme_path
Expand Down
25 changes: 13 additions & 12 deletions apps/dashboard/test/system/project_manager_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -337,15 +337,16 @@ def add_auto_environment_variable(project_id, script_id, save: true)
project_id = setup_project(dir)
script_id = setup_script(project_id)
project_dir = File.join(dir, 'projects', project_id)
script_dir = File.join(project_dir, '.ondemand', 'scripts', script_id)
ondemand_dir = File.join(project_dir, '.ondemand')
script_dir = File.join(ondemand_dir, 'scripts', script_id)

# ASSERT SCRIPT DIRECTORY IS CREATED
assert_equal true, File.directory?(script_dir)

expected_script_files = ["#{script_dir}/form.yml", "#{script_dir}/job_history.log"]
expected_script_files = ["#{script_dir}/form.yml", "#{ondemand_dir}/job_log.yml"]
# ASSERT EXPECTED SCRIPT FILES
expected_script_files.each do |file_path|
assert_equal true, File.exist?(file_path)
assert_equal true, File.exist?(file_path), "#{file_path} does not exist"
end

accept_confirm do
Expand All @@ -363,7 +364,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)
project_id = setup_project(dir)
script_id = setup_script(project_id)
project_dir = File.join(dir, 'projects', project_id)
script_dir = File.join(project_dir, '.ondemand', 'scripts', script_id)
ondemand_dir = File.join(project_dir, '.ondemand')
add_account(project_id, script_id)

launcher_path = project_launcher_path(project_id, script_id)
Expand All @@ -374,7 +375,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)
assert_equal 'oakley', find('#launcher_auto_batch_clusters').value
assert_equal 'pzs0715', find('#launcher_auto_accounts').value
assert_equal "#{project_dir}/my_cool_script.sh", find('#launcher_auto_scripts').value
assert_nil YAML.safe_load(File.read("#{script_dir}/job_history.log"))
assert_nil YAML.safe_load(File.read("#{ondemand_dir}/job_log.yml"))

select('owens', from: 'launcher_auto_batch_clusters')
select('pas2051', from: 'launcher_auto_accounts')
Expand All @@ -391,7 +392,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)

click_on 'Launch'
assert_selector('.alert-success', text: 'job-id-123')
jobs = YAML.safe_load(File.read("#{script_dir}/job_history.log"), permitted_classes: [Time])
jobs = YAML.safe_load(File.read("#{ondemand_dir}/job_log.yml"), permitted_classes: [Time])

assert_equal(1, jobs.size)
assert_equal('job-id-123', jobs[0]['id'])
Expand All @@ -404,7 +405,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)
project_id = setup_project(dir)
script_id = setup_script(project_id)
project_dir = File.join(dir, 'projects', project_id)
script_dir = File.join(project_dir, '.ondemand', 'scripts', script_id)
ondemand_dir = File.join(project_dir, '.ondemand')
add_account(project_id, script_id, save: false)

click_on('Add new option')
Expand All @@ -421,7 +422,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)
assert_equal 'oakley', find('#launcher_auto_batch_clusters').value
assert_equal 'pzs0715', find('#launcher_auto_accounts').value
assert_equal "#{project_dir}/my_cool_script.sh", find('#launcher_auto_scripts').value
assert_nil YAML.safe_load(File.read("#{script_dir}/job_history.log"))
assert_nil YAML.safe_load(File.read("#{ondemand_dir}/job_log.yml"))

select('owens', from: 'launcher_auto_batch_clusters')
select('pas2051', from: 'launcher_auto_accounts')
Expand All @@ -439,7 +440,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)

click_on 'Launch'
assert_selector('.alert-success', text: 'job-id-123')
jobs = YAML.safe_load(File.read("#{script_dir}/job_history.log"), permitted_classes: [Time])
jobs = YAML.safe_load(File.read("#{ondemand_dir}/job_log.yml"), permitted_classes: [Time])

assert_equal(1, jobs.size)
assert_equal('job-id-123', jobs[0]['id'])
Expand All @@ -451,7 +452,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)
project_id = setup_project(dir)
script_id = setup_script(project_id)
project_dir = File.join(dir, 'projects', project_id)
script_dir = File.join(project_dir, '.ondemand', 'scripts', script_id)
ondemand_dir = File.join(project_dir, '.ondemand')
add_account(project_id, script_id)

launcher_path = project_launcher_path(project_id, script_id)
Expand All @@ -462,7 +463,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)
assert_equal 'oakley', find('#launcher_auto_batch_clusters').value
assert_equal 'pzs0715', find('#launcher_auto_accounts').value
assert_equal "#{project_dir}/my_cool_script.sh", find('#launcher_auto_scripts').value
assert_nil YAML.safe_load(File.read("#{script_dir}/job_history.log"))
assert_nil YAML.safe_load(File.read("#{ondemand_dir}/job_log.yml"))

select('owens', from: 'launcher_auto_batch_clusters')
select('pas2051', from: 'launcher_auto_accounts')
Expand All @@ -476,7 +477,7 @@ def add_auto_environment_variable(project_id, script_id, save: true)

click_on 'Launch'
assert_selector('.alert-danger', text: "Close\nsome error message")
assert_nil YAML.safe_load(File.read("#{script_dir}/job_history.log"))
assert_nil YAML.safe_load(File.read("#{ondemand_dir}/job_log.yml"))
end
end

Expand Down
Loading