Skip to content

Commit

Permalink
Merge pull request #493 from agrare/check_metrics_connection_validity…
Browse files Browse the repository at this point in the history
…_before_queueing_perf_captures

Check metrics authentication validity before perf_capture_queue
  • Loading branch information
kbrock authored Jun 13, 2023
2 parents 7be0a73 + 0c258a3 commit ba04739
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ def log_severity
}
}

def capture_ems_targets(_options = {})
begin
verify_metrics_connection!(ems)
rescue TargetValidationError, TargetValidationWarning => e
_log.send(e.log_severity, e.message)
return []
end

super
end

def prometheus_capture_context(target, start_time, end_time)
PrometheusCaptureContext.new(target, start_time, end_time, INTERVAL)
end
Expand All @@ -57,31 +68,36 @@ def metrics_connection(ems)
ems.connection_configurations.prometheus
end

def capture_context(_ems, target, start_time, end_time)
def metrics_connection_valid?(ems)
metrics_connection(ems)&.authentication&.status == "Valid"
end

def verify_metrics_connection!(ems)
raise TargetValidationError, "no provider for #{target_name}" if ems.nil?

raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if metrics_connection(ems).nil?
raise TargetValidationWarning, "metrics authentication isn't valid for #{target_name}" unless metrics_connection_valid?(ems)
end

def build_capture_context!(ems, target, start_time, end_time)
verify_metrics_connection!(ems)
# make start_time align to minutes
start_time = start_time.beginning_of_minute

prometheus_capture_context(target, start_time, end_time)
context = prometheus_capture_context(target, start_time, end_time)
raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if context.nil?

context
end

def perf_collect_metrics(interval_name, start_time = nil, end_time = nil)
start_time ||= 15.minutes.ago.beginning_of_minute.utc
ems = target.ext_management_system

target_name = "#{target.class.name.demodulize}(#{target.id})"
_log.info("Collecting metrics for #{target_name} [#{interval_name}] " \
"[#{start_time}] [#{end_time}]")
_log.info("Collecting metrics for #{target_name} [#{interval_name}] [#{start_time}] [#{end_time}]")

begin
raise TargetValidationError, "no provider for #{target_name}" if ems.nil?

connection = metrics_connection(ems)
raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if connection.nil?
raise TargetValidationWarning, "metrics authentication isn't valid for #{target_name}" unless connection.authentication&.status == "Valid"

context = capture_context(ems, target, start_time, end_time)

raise TargetValidationWarning, "no metrics endpoint found for #{target_name}" if context.nil?
context = build_capture_context!(ems, target, start_time, end_time)
rescue TargetValidationError, TargetValidationWarning => e
_log.send(e.log_severity, "[#{target_name}] #{e.message}")
ems.try(:update,
Expand Down Expand Up @@ -110,5 +126,14 @@ def perf_collect_metrics(interval_name, start_time = nil, end_time = nil)
[{target.ems_ref => VIM_STYLE_COUNTERS},
{target.ems_ref => context.ts_values}]
end

private

def target_name
@target_name ||= begin
t = target || ems
"#{t.class.name.demodulize}(#{t.id})"
end
end
end
end
5 changes: 5 additions & 0 deletions spec/factories/container_group.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FactoryBot.define do
factory :kubernetes_container_group,
:parent => :container_group,
:class => "ManageIQ::Providers::Kubernetes::ContainerManager::ContainerGroup"
end
13 changes: 13 additions & 0 deletions spec/factories/ext_management_system.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,17 @@
zone
end
end

trait :with_metrics_endpoint do
after(:create) do |ems|
ems.endpoints << FactoryBot.create(:endpoint, :role => "prometheus")
ems.authentications << FactoryBot.create(:authentication, :authtype => "prometheus", :status => "Valid")
end
end

trait :with_invalid_auth do
after(:create) do |ems|
ems.authentications.update_all(:status => "invalid")
end
end
end
Original file line number Diff line number Diff line change
@@ -1,78 +1,75 @@
describe ManageIQ::Providers::Kubernetes::ContainerManager::MetricsCapture do
before do
# @miq_server is required for worker_settings to work
@miq_server = EvmSpecHelper.local_miq_server(:is_master => true)
@ems_kubernetes = FactoryBot.create(
:ems_kubernetes,
:connection_configurations => [{:endpoint => {:role => :prometheus},
:authentication => {:role => :prometheus}}],
).tap { |ems| ems.authentications.each { |auth| auth.update!(:status => "Valid") } }
@container_project = FactoryBot.create(:container_project, :ext_management_system => @ems_kubernetes)

@node = FactoryBot.create(
:kubernetes_node,
:name => 'node',
:ext_management_system => @ems_kubernetes,
:ems_ref => 'target'
)

@node.computer_system.hardware = FactoryBot.create(
:hardware,
:cpu_total_cores => 2,
:memory_mb => 2048
)

@group = FactoryBot.create(
:container_group,
:ext_management_system => @ems_kubernetes,
:container_node => @node,
:ems_ref => 'group'
)

@container = FactoryBot.create(
:kubernetes_container,
:name => 'container',
:container_group => @group,
:container_project => @container_project,
:ext_management_system => @ems_kubernetes,
:ems_ref => 'target'
)
let(:ems) { FactoryBot.create(:ems_kubernetes_with_zone, :with_metrics_endpoint) }
let(:container_project) { FactoryBot.create(:container_project, :ext_management_system => ems) }
let!(:group) { FactoryBot.create(:kubernetes_container_group, :ext_management_system => ems, :container_node => node) }
let!(:container) { FactoryBot.create(:kubernetes_container, :ext_management_system => ems, :container_group => group, :container_project => container_project) }
let(:node) do
FactoryBot.create(:kubernetes_node, :name => 'node', :ext_management_system => ems, :ems_ref => 'target').tap do |node|
node.computer_system.hardware = FactoryBot.create(:hardware, :cpu_total_cores => 2, :memory_mb => 2_048)
end
end

context "#perf_capture_object" do
it "returns the correct class" do
expect(@ems_kubernetes.perf_capture_object.class).to eq(described_class)
expect(ems.perf_capture_object.class).to eq(described_class)
end
end

context "#capture_context" do
context "#build_capture_context!" do
it "detect prometheus metrics provider" do
metric_capture = described_class.new(@node)
context = metric_capture.capture_context(
@ems_kubernetes,
@node,
5.minutes.ago,
0.minutes.ago
)
metric_capture = described_class.new(node)
context = metric_capture.build_capture_context!(ems, node, 5.minutes.ago, 0.minutes.ago)

expect(context).to be_a(described_class::PrometheusCaptureContext)
end

context "on an invalid target" do
let(:group) { FactoryBot.create(:kubernetes_container_group, :ext_management_system => ems) }

it "raises an exception" do
metric_capture = described_class.new(group)
expect { metric_capture.build_capture_context!(ems, group, 5.minutes.ago, 0.minutes.ago) }
.to raise_error(described_class::TargetValidationWarning, "no associated node")
end
end
end

context "#perf_capture_all_queue" do
it "returns the objects" do
expect(ems.perf_capture_object.perf_capture_all_queue).to include("Container" => [container], "ContainerGroup" => [group], "ContainerNode" => [node])
end

context "with a missing metrics endpoint" do
let(:ems) { FactoryBot.create(:ems_kubernetes) }

it "returns no objects" do
expect(ems.perf_capture_object.perf_capture_all_queue).to be_empty
end
end

context "with invalid authentication on the metrics endpoint" do
let(:ems) { FactoryBot.create(:ems_kubernetes_with_zone, :with_metrics_endpoint, :with_invalid_auth) }

it "returns no objects" do
expect(ems.perf_capture_object.perf_capture_all_queue).to be_empty
end
end
end

context "#perf_collect_metrics" do
it "fails when no ems is defined" do
@node.ext_management_system = nil
expect { @node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError)
node.ext_management_system = nil
expect { node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError)
end

it "fails when no cpu cores are defined" do
@node.hardware.cpu_total_cores = nil
expect { @node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError)
node.hardware.cpu_total_cores = nil
expect { node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError)
end

it "fails when memory is not defined" do
@node.hardware.memory_mb = nil
expect { @node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError)
node.hardware.memory_mb = nil
expect { node.perf_collect_metrics('interval_name') }.to raise_error(described_class::TargetValidationError)
end
end
end

0 comments on commit ba04739

Please sign in to comment.