From be53bdc1bb9d0f394112211aa81de1401059015e Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Mon, 12 Jun 2023 14:32:23 -0400 Subject: [PATCH 1/5] Check metrics auth valid before perf_capture_queue Check that the EMS's metrics authentication is valid before queueing perf_capture --- .../container_manager/metrics_capture.rb | 30 +++++++++++++++---- .../container_manager/metrics_capture_spec.rb | 27 +++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb index 2772b9a05d..f536757621 100644 --- a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb +++ b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb @@ -49,6 +49,17 @@ def log_severity } } + def capture_ems_targets(_options = {}) + begin + verify_metrics_connection!(ems) + rescue TargetValidationError, TargetValidationWarning => e + _log.send(e.log_severity, e.message) + return [] + end + + super + end + def prometheus_capture_context(target, start_time, end_time) PrometheusCaptureContext.new(target, start_time, end_time, INTERVAL) end @@ -57,6 +68,19 @@ def metrics_connection(ems) ems.connection_configurations.prometheus end + def metrics_connection_valid?(ems) + metrics_connection(ems)&.authentication&.status == "Valid" + end + + def verify_metrics_connection!(ems) + t = target || ems + target_name = "#{t.class.name.demodulize}(#{t.id})" + raise TargetValidationError, "no provider for #{target_name}" if ems.nil? + + raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if metrics_connection(ems).nil? + raise TargetValidationWarning, "metrics authentication isn't valid for #{target_name}" unless metrics_connection_valid?(ems) + end + def capture_context(_ems, target, start_time, end_time) # make start_time align to minutes start_time = start_time.beginning_of_minute @@ -73,11 +97,7 @@ def perf_collect_metrics(interval_name, start_time = nil, end_time = nil) "[#{start_time}] [#{end_time}]") begin - raise TargetValidationError, "no provider for #{target_name}" if ems.nil? - - connection = metrics_connection(ems) - raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if connection.nil? - raise TargetValidationWarning, "metrics authentication isn't valid for #{target_name}" unless connection.authentication&.status == "Valid" + verify_metrics_connection!(ems) context = capture_context(ems, target, start_time, end_time) diff --git a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb index 45118ba9b8..61d99e2ecb 100644 --- a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb +++ b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb @@ -59,6 +59,33 @@ end end + context "#perf_capture_all_queue" do + it "returns the objects" do + expect(@ems_kubernetes.perf_capture_object.perf_capture_all_queue).to include("Container" => [@container], "ContainerNode" => [@node]) + end + + context "with a missing metrics endpoint" do + before do + @ems_kubernetes.endpoints.find_by(:role => "prometheus").destroy + end + + it "returns no objects" do + expect(@ems_kubernetes.perf_capture_object.perf_capture_all_queue).to be_empty + end + end + + context "with invalid authentication on the metrics endpoint" do + before do + @ems_kubernetes.authentications.find_by(:authtype => "prometheus").update!(:status => "Error") + @ems_kubernetes.reload + end + + it "returns no objects" do + expect(@ems_kubernetes.perf_capture_object.perf_capture_all_queue).to be_empty + end + end + end + context "#perf_collect_metrics" do it "fails when no ems is defined" do @node.ext_management_system = nil From 4df0a936ab5d57fe91764edc98e46ccc1f898e97 Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Mon, 12 Jun 2023 16:35:00 -0400 Subject: [PATCH 2/5] Refactor capture_context to check endpoints and auth --- .../container_manager/metrics_capture.rb | 14 +++++++------- .../container_manager/metrics_capture_spec.rb | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb index f536757621..0a2d824614 100644 --- a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb +++ b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb @@ -81,11 +81,15 @@ def verify_metrics_connection!(ems) raise TargetValidationWarning, "metrics authentication isn't valid for #{target_name}" unless metrics_connection_valid?(ems) end - def capture_context(_ems, target, start_time, end_time) + def build_capture_context!(ems, target, start_time, end_time) + verify_metrics_connection!(ems) # make start_time align to minutes start_time = start_time.beginning_of_minute - prometheus_capture_context(target, start_time, end_time) + context = prometheus_capture_context(target, start_time, end_time) + raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if context.nil? + + context end def perf_collect_metrics(interval_name, start_time = nil, end_time = nil) @@ -97,11 +101,7 @@ def perf_collect_metrics(interval_name, start_time = nil, end_time = nil) "[#{start_time}] [#{end_time}]") begin - verify_metrics_connection!(ems) - - context = capture_context(ems, target, start_time, end_time) - - raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if context.nil? + context = build_capture_context!(ems, target, start_time, end_time) rescue TargetValidationError, TargetValidationWarning => e _log.send(e.log_severity, "[#{target_name}] #{e.message}") ems.try(:update, diff --git a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb index 61d99e2ecb..77674ca0e2 100644 --- a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb +++ b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb @@ -45,10 +45,10 @@ end end - context "#capture_context" do + context "#build_capture_context!" do it "detect prometheus metrics provider" do metric_capture = described_class.new(@node) - context = metric_capture.capture_context( + context = metric_capture.build_capture_context!( @ems_kubernetes, @node, 5.minutes.ago, From b8a12a9ad3063bd6fc0ddf63446ce62a6ecff241 Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Tue, 13 Jun 2023 10:03:36 -0400 Subject: [PATCH 3/5] Refactor metrics_capture_spec --- spec/factories/container_group.rb | 4 + spec/factories/ext_management_system.rb | 13 +++ .../container_manager/metrics_capture_spec.rb | 84 +++++-------------- 3 files changed, 39 insertions(+), 62 deletions(-) create mode 100644 spec/factories/container_group.rb diff --git a/spec/factories/container_group.rb b/spec/factories/container_group.rb new file mode 100644 index 0000000000..1a470f4ab2 --- /dev/null +++ b/spec/factories/container_group.rb @@ -0,0 +1,4 @@ +FactoryBot.define do + factory :kubernetes_container_group, :parent => :container_group, + :class => "ManageIQ::Providers::Kubernetes::ContainerManager::ContainerGroup" +end diff --git a/spec/factories/ext_management_system.rb b/spec/factories/ext_management_system.rb index cca14918cc..d1d562d891 100644 --- a/spec/factories/ext_management_system.rb +++ b/spec/factories/ext_management_system.rb @@ -5,4 +5,17 @@ zone end end + + trait :with_metrics_endpoint do + after(:create) do |ems| + ems.endpoints << FactoryBot.create(:endpoint, :role => "prometheus") + ems.authentications << FactoryBot.create(:authentication, :authtype => "prometheus", :status => "Valid") + end + end + + trait :with_invalid_auth do + after(:create) do |ems| + ems.authentications.update_all(:status => "invalid") + end + end end diff --git a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb index 77674ca0e2..21e453dfa0 100644 --- a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb +++ b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb @@ -1,59 +1,24 @@ describe ManageIQ::Providers::Kubernetes::ContainerManager::MetricsCapture do - before do - # @miq_server is required for worker_settings to work - @miq_server = EvmSpecHelper.local_miq_server(:is_master => true) - @ems_kubernetes = FactoryBot.create( - :ems_kubernetes, - :connection_configurations => [{:endpoint => {:role => :prometheus}, - :authentication => {:role => :prometheus}}], - ).tap { |ems| ems.authentications.each { |auth| auth.update!(:status => "Valid") } } - @container_project = FactoryBot.create(:container_project, :ext_management_system => @ems_kubernetes) - - @node = FactoryBot.create( - :kubernetes_node, - :name => 'node', - :ext_management_system => @ems_kubernetes, - :ems_ref => 'target' - ) - - @node.computer_system.hardware = FactoryBot.create( - :hardware, - :cpu_total_cores => 2, - :memory_mb => 2048 - ) - - @group = FactoryBot.create( - :container_group, - :ext_management_system => @ems_kubernetes, - :container_node => @node, - :ems_ref => 'group' - ) - - @container = FactoryBot.create( - :kubernetes_container, - :name => 'container', - :container_group => @group, - :container_project => @container_project, - :ext_management_system => @ems_kubernetes, - :ems_ref => 'target' - ) + let(:ems) { FactoryBot.create(:ems_kubernetes_with_zone, :with_metrics_endpoint) } + let(:container_project) { FactoryBot.create(:container_project, :ext_management_system => ems) } + let!(:group) { FactoryBot.create(:kubernetes_container_group, :ext_management_system => ems, :container_node => node) } + let!(:container) { FactoryBot.create(:kubernetes_container, :ext_management_system => ems, :container_group => group, :container_project => container_project) } + let(:node) do + FactoryBot.create(:kubernetes_node, :name => 'node', :ext_management_system => ems, :ems_ref => 'target').tap do |node| + node.computer_system.hardware = FactoryBot.create(:hardware, :cpu_total_cores => 2, :memory_mb => 2_048) + end end context "#perf_capture_object" do it "returns the correct class" do - expect(@ems_kubernetes.perf_capture_object.class).to eq(described_class) + expect(ems.perf_capture_object.class).to eq(described_class) end end context "#build_capture_context!" do it "detect prometheus metrics provider" do - metric_capture = described_class.new(@node) - context = metric_capture.build_capture_context!( - @ems_kubernetes, - @node, - 5.minutes.ago, - 0.minutes.ago - ) + metric_capture = described_class.new(node) + context = metric_capture.build_capture_context!(ems, node, 5.minutes.ago, 0.minutes.ago) expect(context).to be_a(described_class::PrometheusCaptureContext) end @@ -61,45 +26,40 @@ context "#perf_capture_all_queue" do it "returns the objects" do - expect(@ems_kubernetes.perf_capture_object.perf_capture_all_queue).to include("Container" => [@container], "ContainerNode" => [@node]) + expect(ems.perf_capture_object.perf_capture_all_queue).to include("Container" => [container], "ContainerGroup" => [group], "ContainerNode" => [node]) end context "with a missing metrics endpoint" do - before do - @ems_kubernetes.endpoints.find_by(:role => "prometheus").destroy - end + let(:ems) { FactoryBot.create(:ems_kubernetes) } it "returns no objects" do - expect(@ems_kubernetes.perf_capture_object.perf_capture_all_queue).to be_empty + expect(ems.perf_capture_object.perf_capture_all_queue).to be_empty end end context "with invalid authentication on the metrics endpoint" do - before do - @ems_kubernetes.authentications.find_by(:authtype => "prometheus").update!(:status => "Error") - @ems_kubernetes.reload - end + let(:ems) { FactoryBot.create(:ems_kubernetes_with_zone, :with_metrics_endpoint, :with_invalid_auth) } it "returns no objects" do - expect(@ems_kubernetes.perf_capture_object.perf_capture_all_queue).to be_empty + expect(ems.perf_capture_object.perf_capture_all_queue).to be_empty end end end context "#perf_collect_metrics" do it "fails when no ems is defined" do - @node.ext_management_system = nil - expect { @node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError) + node.ext_management_system = nil + expect { node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError) end it "fails when no cpu cores are defined" do - @node.hardware.cpu_total_cores = nil - expect { @node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError) + node.hardware.cpu_total_cores = nil + expect { node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError) end it "fails when memory is not defined" do - @node.hardware.memory_mb = nil - expect { @node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError) + node.hardware.memory_mb = nil + expect { node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError) end end end From 5e0794840cd867805f937cc0671a83873bbcfb12 Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Tue, 13 Jun 2023 10:13:29 -0400 Subject: [PATCH 4/5] Extract target_name to a method --- .../container_manager/metrics_capture.rb | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb index 0a2d824614..63e3df3cf7 100644 --- a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb +++ b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb @@ -73,8 +73,6 @@ def metrics_connection_valid?(ems) end def verify_metrics_connection!(ems) - t = target || ems - target_name = "#{t.class.name.demodulize}(#{t.id})" raise TargetValidationError, "no provider for #{target_name}" if ems.nil? raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if metrics_connection(ems).nil? @@ -96,9 +94,7 @@ def perf_collect_metrics(interval_name, start_time = nil, end_time = nil) start_time ||= 15.minutes.ago.beginning_of_minute.utc ems = target.ext_management_system - target_name = "#{target.class.name.demodulize}(#{target.id})" - _log.info("Collecting metrics for #{target_name} [#{interval_name}] " \ - "[#{start_time}] [#{end_time}]") + _log.info("Collecting metrics for #{target_name} [#{interval_name}] [#{start_time}] [#{end_time}]") begin context = build_capture_context!(ems, target, start_time, end_time) @@ -130,5 +126,14 @@ def perf_collect_metrics(interval_name, start_time = nil, end_time = nil) [{target.ems_ref => VIM_STYLE_COUNTERS}, {target.ems_ref => context.ts_values}] end + + private + + def target_name + @target_name ||= begin + t = target || ems + target_name = "#{t.class.name.demodulize}(#{t.id})" + end + end end end From 0c258a3c6e8451afc3fe956fbb5a1d59a47d717e Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Tue, 13 Jun 2023 10:17:53 -0400 Subject: [PATCH 5/5] Add test for invalid capture context --- .../kubernetes/container_manager/metrics_capture.rb | 2 +- spec/factories/container_group.rb | 5 +++-- .../container_manager/metrics_capture_spec.rb | 10 ++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb index 63e3df3cf7..7069c176c8 100644 --- a/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb +++ b/app/models/manageiq/providers/kubernetes/container_manager/metrics_capture.rb @@ -132,7 +132,7 @@ def perf_collect_metrics(interval_name, start_time = nil, end_time = nil) def target_name @target_name ||= begin t = target || ems - target_name = "#{t.class.name.demodulize}(#{t.id})" + "#{t.class.name.demodulize}(#{t.id})" end end end diff --git a/spec/factories/container_group.rb b/spec/factories/container_group.rb index 1a470f4ab2..4c54428ab6 100644 --- a/spec/factories/container_group.rb +++ b/spec/factories/container_group.rb @@ -1,4 +1,5 @@ FactoryBot.define do - factory :kubernetes_container_group, :parent => :container_group, - :class => "ManageIQ::Providers::Kubernetes::ContainerManager::ContainerGroup" + factory :kubernetes_container_group, + :parent => :container_group, + :class => "ManageIQ::Providers::Kubernetes::ContainerManager::ContainerGroup" end diff --git a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb index 21e453dfa0..aeab5ca2ce 100644 --- a/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb +++ b/spec/models/manageiq/providers/kubernetes/container_manager/metrics_capture_spec.rb @@ -22,6 +22,16 @@ expect(context).to be_a(described_class::PrometheusCaptureContext) end + + context "on an invalid target" do + let(:group) { FactoryBot.create(:kubernetes_container_group, :ext_management_system => ems) } + + it "raises an exception" do + metric_capture = described_class.new(group) + expect { metric_capture.build_capture_context!(ems, group, 5.minutes.ago, 0.minutes.ago) } + .to raise_error(described_class::TargetValidationWarning, "no associated node") + end + end end context "#perf_capture_all_queue" do