Skip to content

Commit

Permalink
Add issue about using sync spaces with wrong failover (#2131)
Browse files Browse the repository at this point in the history

Co-authored-by: Michael Filonenko <[email protected]>
  • Loading branch information
yngvar-antonsson and filonenko-mikhail authored Aug 2, 2023
1 parent e70b7ba commit 5e64f31
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ Added

- New Failover API function ``set_options`` to change failover internal params.

- Issue about sync spaces usage with a wrong failover setup.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Changed
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
3 changes: 3 additions & 0 deletions cartridge/confapplier.lua
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ local cluster_cookie = require('cartridge.cluster-cookie')
local ClusterwideConfig = require('cartridge.clusterwide-config')
local logging_whitelist = require('cartridge.logging_whitelist')
local invalid_format = require('cartridge.invalid-format')
local sync_spaces = require('cartridge.sync-spaces')

yaml.cfg({
encode_load_metatables = false,
Expand Down Expand Up @@ -558,12 +559,14 @@ local function boot_instance(clusterwide_config)
-- It recovers snapshot
-- Or bootstraps replication
invalid_format.start_check()
sync_spaces.start_check()
local snap1 = hotreload.snap_fibers()

box.cfg(box_opts)
local snap2 = hotreload.snap_fibers()
hotreload.whitelist_fibers(hotreload.diff(snap1, snap2))
invalid_format.end_check()
sync_spaces.end_check()
require('membership.options').SUSPICIOUSNESS = true

local username = cluster_cookie.username()
Expand Down
18 changes: 18 additions & 0 deletions cartridge/failover.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,14 @@ local function is_suppressed()
return vars.failover_suppressed
end

--- Check if failover synchro mode enabled.
-- @function is_synchro_mode_enabled
-- @local
-- @treturn boolean true / false
local function is_synchro_mode_enabled()
return vars.enable_synchro_mode
end

--- Check if current configuration implies consistent switchover.
-- @function consistency_needed
-- @local
Expand All @@ -1033,6 +1041,14 @@ local function consistency_needed()
return vars.consistency_needed
end

--- Get failover mode.
-- @function mode
-- @local
-- @treturn string
local function mode()
return vars.mode
end

--- Get current stateful failover coordinator
-- @function get_coordinator
-- @treturn[1] table coordinator
Expand Down Expand Up @@ -1182,6 +1198,8 @@ return {
is_rw = is_rw,
is_paused = is_paused,
is_suppressed = is_suppressed,
is_synchro_mode_enabled = is_synchro_mode_enabled,
mode = mode,

force_inconsistency = force_inconsistency,
wait_consistency = wait_consistency,
Expand Down
21 changes: 19 additions & 2 deletions cartridge/issues.lua
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ local failover = require('cartridge.failover')
local confapplier = require('cartridge.confapplier')
local lua_api_proxy = require('cartridge.lua-api.proxy')
local invalid_format = require('cartridge.invalid-format')
local sync_spaces = require('cartridge.sync-spaces')

local ValidateConfigError = errors.new_class('ValidateConfigError')

Expand Down Expand Up @@ -305,6 +306,23 @@ local function list_on_instance(opts)
})
end

local sync_spaces_list = sync_spaces.spaces_list_str()
if sync_spaces_list ~= ''
and (failover.is_leader()
and (failover.mode() == 'eventual'
or (failover.mode() == 'stateful' and not failover.is_synchro_mode_enabled()))) then
table.insert(ret, {
level = 'warning',
topic = 'failover',
instance_uuid = instance_uuid,
replicaset_uuid = replicaset_uuid,
message = 'Having sync spaces may cause failover errors. ' ..
'Consider to change failover type to stateful and enable synchro_mode or use ' ..
'raft failover mode. Sync spaces: ' .. sync_spaces_list
})
end


-- It should be a vclockkeeper, but it's not
if failover.consistency_needed()
and failover.get_active_leaders()[replicaset_uuid] == instance_uuid
Expand Down Expand Up @@ -557,8 +575,7 @@ local function list_on_cluster()

-- Check stateful failover issues

local failover_cfg = topology.get_failover_params(topology_cfg)
if failover_cfg.mode == 'stateful' then
if failover.mode() == 'stateful' then
local coordinator, err = failover.get_coordinator()

if err ~= nil then
Expand Down
75 changes: 75 additions & 0 deletions cartridge/sync-spaces.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
local log = require('log')
local vars = require('cartridge.vars').new('cartridge.sync-spaces')

vars:new('sync_spaces', {})

local function check_sync(old, new)
if new == nil then
vars.sync_spaces[old[3]] = nil
return
end
local name = new[3]

if new[6].is_sync then
vars.sync_spaces[name] = true
else
vars.sync_spaces[name] = nil
end
end

local function before_replace(old, new)
check_sync(old, new)
return new
end

local function on_schema_init()
box.space._space:before_replace(before_replace)
end

--- List sync spaces.
--
-- @function spaces_list_str
-- @treturn string String of spaces with invalid params, delimeted by comma
local function spaces_list_str()
local res = ''
for name, _ in pairs(vars.sync_spaces) do
res = res .. name .. ', '
end
return res:sub(1, -3)
end

--- Start check if spaces have invalid format in Tarantool 2.x.x.
-- In Tarantool 1.10 you can perform check by youself with function `run_check`.
--
-- @function start_check
local function start_check()
if box.ctl.on_schema_init ~= nil then
box.ctl.on_schema_init(on_schema_init)
end
end


--- Remove set triggers and write message to log.
--
-- @function end_check
local function end_check()
if box.ctl.on_schema_init ~= nil then
box.space._space:before_replace(nil, before_replace)
box.ctl.on_schema_init(nil, on_schema_init)
end

if next(vars.sync_spaces) then
log.warn(
"Having sync spaces may cause failover errors. " ..
"Consider to change failover type to stateful and enable synchro_mode or use " ..
"raft failover mode. Sync spaces: " ..
spaces_list_str()
)
end
end

return {
start_check = start_check,
end_check = end_check,
spaces_list_str = spaces_list_str,
}
33 changes: 33 additions & 0 deletions test/integration/failover_eventual_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,39 @@ g.after_test('test_sigstop', function()
cluster:wait_until_healthy()
end)


function g.test_sync_spaces_is_prohibited()
t.skip_if(not helpers.tarantool_version_ge('2.6.1'))
local master = cluster:server('storage-1')
master:exec(function()
box.schema.space.create('test', {if_not_exists = true, is_sync=true})
end)

master:restart()

helpers.retrying({}, function()
t.assert_items_equals(helpers.list_cluster_issues(master), {
{
level = 'warning',
topic = 'failover',
message = 'Having sync spaces may cause failover errors. ' ..
'Consider to change failover type to stateful and enable synchro_mode or use ' ..
'raft failover mode. Sync spaces: test',
instance_uuid = master.instance_uuid,
replicaset_uuid = master.replicaset_uuid,
},
})
end)
end

g.after_test('test_sync_spaces_is_prohibited', function()
cluster:server('storage-1'):exec(function()
box.space.test:drop()
end)
cluster:server('storage-1'):restart()
end)


local function failover_pause()
cluster.main_server:graphql({
query = [[
Expand Down

0 comments on commit 5e64f31

Please sign in to comment.