Skip to content

Commit

Permalink
24-3: Add disable evict vdisks option to config (#9812) (#10339)
Browse files Browse the repository at this point in the history
  • Loading branch information
pixcc authored Oct 11, 2024
1 parent d81aef4 commit 780e345
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 2 deletions.
6 changes: 6 additions & 0 deletions ydb/core/cms/cms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,12 @@ bool TCms::CheckEvictVDisks(const TAction &action, TErrorInfo &error) const {
return false;
}

if (State->Config.SentinelConfig.EvictVDisksStatus.Empty()) {
error.Code = TStatus::ERROR;
error.Reason = "Evict vdisks is disabled in Sentinel (self heal)";
return false;
}

switch (action.GetType()) {
case TAction::RESTART_SERVICES:
case TAction::SHUTDOWN_HOST:
Expand Down
40 changes: 40 additions & 0 deletions ydb/core/cms/cms_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1951,6 +1951,46 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
env.CheckDonePermission("user", permission2.GetPermissions(0).GetId());
}

Y_UNIT_TEST(DisabledEvictVDisks)
{
auto opts = TTestEnvOpts(8).WithSentinel();
TCmsTestEnv env(opts);
env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG);

// Make transition faster for tests purposes
auto cmsConfig = env.GetCmsConfig();
cmsConfig.MutableSentinelConfig()->SetDefaultStateLimit(1);
env.SetCmsConfig(cmsConfig);

// Evict VDisks
auto request = env.CheckPermissionRequest(
MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(),
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage")
),
TStatus::DISALLOW_TEMP // ok, waiting for move VDisks
);

// Check that FAULTY BSC request is sent
env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY);

// Disable VDisks eviction
cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::DISABLED);
env.SetCmsConfig(cmsConfig);

// Check that ACTIVE BSC request is sent
env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::ACTIVE);

// Check that CMS returns ERROR when VDisks eviction is disabled
env.CheckRequest("user", request.GetRequestId(), false, TStatus::ERROR, 0);

// Enable VDisks eviction again
cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::FAULTY);
env.SetCmsConfig(cmsConfig);

// Check that FAULTY BSC request is sent again
env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY);
}

Y_UNIT_TEST(EmergencyDuringRollingRestart)
{
TCmsTestEnv env(8);
Expand Down
32 changes: 32 additions & 0 deletions ydb/core/cms/config.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#pragma once

#include "pdisk_state.h"
#include "pdisk_status.h"

#include <ydb/core/protos/cms.pb.h>

#include <util/datetime/base.h>
#include <util/generic/hash.h>
#include <util/generic/map.h>
#include <util/generic/maybe.h>

namespace NKikimr::NCms {

Expand All @@ -30,6 +32,8 @@ struct TCmsSentinelConfig {
ui32 RoomRatio;
ui32 RackRatio;

TMaybeFail<EPDiskStatus> EvictVDisksStatus;

void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
config.SetEnable(Enable);
config.SetDryRun(DryRun);
Expand All @@ -45,6 +49,7 @@ struct TCmsSentinelConfig {
config.SetRackRatio(RackRatio);

SaveStateLimits(config);
SaveEvictVDisksStatus(config);
}

void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) {
Expand All @@ -63,6 +68,8 @@ struct TCmsSentinelConfig {

auto newStateLimits = LoadStateLimits(config);
StateLimits.swap(newStateLimits);

EvictVDisksStatus = LoadEvictVDisksStatus(config);
}

void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
Expand Down Expand Up @@ -129,6 +136,31 @@ struct TCmsSentinelConfig {

return stateLimits;
}

static TMaybeFail<EPDiskStatus> LoadEvictVDisksStatus(const NKikimrCms::TCmsConfig::TSentinelConfig &config) {
using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig;
switch (config.GetEvictVDisksStatus()) {
case EEvictVDisksStatus::UNKNOWN:
case EEvictVDisksStatus::FAULTY:
return EPDiskStatus::FAULTY;
case EEvictVDisksStatus::DISABLED:
return Nothing();
}
return EPDiskStatus::FAULTY;
}

void SaveEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig;

if (EvictVDisksStatus.Empty()) {
config.SetEvictVDisksStatus(EEvictVDisksStatus::DISABLED);
return;
}

if (*EvictVDisksStatus == EPDiskStatus::FAULTY) {
config.SetEvictVDisksStatus(EEvictVDisksStatus::FAULTY);
}
}
};

struct TCmsLogConfig {
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/cms/sentinel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,8 +895,8 @@ class TSentinel: public TActorBootstrapped<TSentinel> {
continue;
}

if (it->second.HasFaultyMarker()) {
info.SetForcedStatus(EPDiskStatus::FAULTY);
if (it->second.HasFaultyMarker() && Config.EvictVDisksStatus.Defined()) {
info.SetForcedStatus(*Config.EvictVDisksStatus);
} else {
info.ResetForcedStatus();
}
Expand Down
7 changes: 7 additions & 0 deletions ydb/core/protos/cms.proto
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ message TCmsConfig {
optional uint32 Limit = 2;
}

enum EEvictVDisksStatus {
UNKNOWN = 0;
DISABLED = 1;
FAULTY = 2;
}

optional bool Enable = 1 [default = true];
// Updater's config
optional uint64 UpdateConfigInterval = 2 [default = 3600000000];
Expand All @@ -449,6 +455,7 @@ message TCmsConfig {

optional bool DryRun = 13;
repeated TStateLimit StateLimits = 14;
optional EEvictVDisksStatus EvictVDisksStatus = 15;
}

message TLogConfig {
Expand Down

0 comments on commit 780e345

Please sign in to comment.