From 680d12f5df3069a8977e1f2a3d76e6a58b75fde9 Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Tue, 11 Jul 2023 15:44:48 +0300 Subject: [PATCH 1/6] node: Provide callback for morph reconnection handling Signed-off-by: Pavel Karpy --- cmd/neofs-node/main.go | 4 ++++ cmd/neofs-node/morph.go | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 6edca30b63..19cf490cb5 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -152,3 +152,7 @@ func shutdown(c *cfg) { func (c *cfg) onShutdown(f func()) { c.closers = append(c.closers, f) } + +func (c *cfg) restartMorph() error { + return nil +} diff --git a/cmd/neofs-node/morph.go b/cmd/neofs-node/morph.go index 35481d0f49..7a9f49ee77 100644 --- a/cmd/neofs-node/morph.go +++ b/cmd/neofs-node/morph.go @@ -37,6 +37,12 @@ func initMorphComponents(c *cfg) { client.WithEndpoints(addresses), client.WithReconnectionRetries(morphconfig.ReconnectionRetriesNumber(c.appCfg)), client.WithReconnectionsDelay(morphconfig.ReconnectionRetriesDelay(c.appCfg)), + client.WithConnSwitchCallback(func() { + err = c.restartMorph() + if err != nil { + c.internalErr <- fmt.Errorf("restarting after morph connection was lost: %w", err) + } + }), client.WithConnLostCallback(func() { c.internalErr <- errors.New("morph connection has been lost") }), From 12674bdc256419326975a495945ad01d3416f6ff Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Tue, 11 Jul 2023 16:02:24 +0300 Subject: [PATCH 2/6] node: Reset caches on morph switch Their values may be non-actual after reconnection. Signed-off-by: Pavel Karpy --- cmd/neofs-node/cache.go | 27 ++++++++++++++++++++++++++- cmd/neofs-node/config.go | 20 ++++++++++++++++++++ cmd/neofs-node/container.go | 4 ++++ cmd/neofs-node/main.go | 2 ++ cmd/neofs-node/morph.go | 4 +++- 5 files changed, 55 insertions(+), 2 deletions(-) diff --git a/cmd/neofs-node/cache.go b/cmd/neofs-node/cache.go index a7d9ada834..e1d7271c45 100644 --- a/cmd/neofs-node/cache.go +++ b/cmd/neofs-node/cache.go @@ -129,6 +129,11 @@ func (c *ttlNetCache[K, V]) remove(key K) { c.cache.Remove(key) } +// reset removes every cached value. +func (c *ttlNetCache[K, V]) reset() { + c.cache.Purge() +} + // entity that provides LRU cache interface. type lruNetCache struct { cache *lru.Cache[uint64, *netmapSDK.NetMap] @@ -168,6 +173,10 @@ func (c *lruNetCache) get(key uint64) (*netmapSDK.NetMap, error) { return val, nil } +func (c *lruNetCache) reset() { + c.cache.Purge() +} + // wrapper over TTL cache of values read from the network // that implements container storage. type ttlContainerStorage struct { @@ -199,6 +208,10 @@ func (s *ttlContainerStorage) Get(cnr cid.ID) (*container.Container, error) { return val, nil } +func (s *ttlContainerStorage) reset() { + s.tc.reset() +} + type ttlEACLStorage struct { tc ttlNetCache[cid.ID, *container.EACL] } @@ -229,13 +242,17 @@ func (s *ttlEACLStorage) InvalidateEACL(cnr cid.ID) { s.tc.remove(cnr) } +func (s *ttlEACLStorage) reset() { + s.tc.reset() +} + type lruNetmapSource struct { netState netmap.State cache *lruNetCache } -func newCachedNetmapStorage(s netmap.State, v netmap.Source) netmap.Source { +func newCachedNetmapStorage(s netmap.State, v netmap.Source) *lruNetmapSource { const netmapCacheSize = 10 return &lruNetmapSource{ @@ -267,6 +284,10 @@ func (s *lruNetmapSource) Epoch() (uint64, error) { return s.netState.CurrentEpoch(), nil } +func (s *lruNetmapSource) reset() { + s.cache.reset() +} + // wrapper over TTL cache of values read from the network // that implements container lister. type ttlContainerLister struct { @@ -378,6 +399,10 @@ func (s *ttlContainerLister) update(owner user.ID, cnr cid.ID, add bool) { item.mtx.Unlock() } +func (s *ttlContainerLister) reset() { + s.inner.reset() +} + type cachedIRFetcher struct { tc ttlNetCache[struct{}, [][]byte] } diff --git a/cmd/neofs-node/config.go b/cmd/neofs-node/config.go index 44882caa26..3bffcf0eb0 100644 --- a/cmd/neofs-node/config.go +++ b/cmd/neofs-node/config.go @@ -349,6 +349,11 @@ type shared struct { putClientCache *cache.ClientCache localAddr network.AddressGroup + containerCache *ttlContainerStorage + eaclCache *ttlEACLStorage + containerListCache *ttlContainerLister + netmapCache *lruNetmapSource + key *keys.PrivateKey binPublicKey []byte ownerIDFromKey user.ID // user ID calculated from key @@ -371,6 +376,21 @@ type shared struct { metricsCollector *metrics.NodeMetrics } +func (s shared) resetCaches() { + if s.containerCache != nil { + s.containerCache.reset() + } + if s.eaclCache != nil { + s.eaclCache.reset() + } + if s.containerListCache != nil { + s.containerListCache.reset() + } + if s.netmapCache != nil { + s.netmapCache.reset() + } +} + // dynamicConfiguration stores parameters of the // components that supports runtime reconfigurations. type dynamicConfiguration struct { diff --git a/cmd/neofs-node/container.go b/cmd/neofs-node/container.go index 5bc8129c9b..aa330a9ef6 100644 --- a/cmd/neofs-node/container.go +++ b/cmd/neofs-node/container.go @@ -76,6 +76,10 @@ func initContainerService(c *cfg) { cachedEACLStorage := newCachedEACLStorage(eACLFetcher, c.cfgMorph.cacheTTL) cachedContainerLister := newCachedContainerLister(wrap, c.cfgMorph.cacheTTL) + c.shared.containerCache = cachedContainerStorage + c.shared.eaclCache = cachedEACLStorage + c.shared.containerListCache = cachedContainerLister + subscribeToContainerCreation(c, func(e event.Event) { ev := e.(containerEvent.PutSuccess) diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 19cf490cb5..b0de633897 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -154,5 +154,7 @@ func (c *cfg) onShutdown(f func()) { } func (c *cfg) restartMorph() error { + c.shared.resetCaches() + return nil } diff --git a/cmd/neofs-node/morph.go b/cmd/neofs-node/morph.go index 7a9f49ee77..63f79f3ea3 100644 --- a/cmd/neofs-node/morph.go +++ b/cmd/neofs-node/morph.go @@ -90,8 +90,10 @@ func initMorphComponents(c *cfg) { if c.cfgMorph.cacheTTL < 0 { netmapSource = wrap } else { + c.shared.netmapCache = newCachedNetmapStorage(c.cfgNetmap.state, wrap) + // use RPC node as source of netmap (with caching) - netmapSource = newCachedNetmapStorage(c.cfgNetmap.state, wrap) + netmapSource = c.shared.netmapCache } c.netMapSource = netmapSource From 54e7e95355a4e5df52cda40757ab82e053e063ce Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Wed, 12 Jul 2023 23:26:13 +0300 Subject: [PATCH 3/6] node: Update network state on morph switch Signed-off-by: Pavel Karpy --- cmd/neofs-node/main.go | 7 +++++++ cmd/neofs-node/netmap.go | 25 ++++++++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index b0de633897..1310ecd2cf 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -156,5 +156,12 @@ func (c *cfg) onShutdown(f func()) { func (c *cfg) restartMorph() error { c.shared.resetCaches() + epoch, ni, err := getNetworkState(c) + if err != nil { + return fmt.Errorf("getting network state: %w", err) + } + + updateLocalState(c, epoch, ni) + return nil } diff --git a/cmd/neofs-node/netmap.go b/cmd/neofs-node/netmap.go index 59cb0c3736..5a3a431c74 100644 --- a/cmd/neofs-node/netmap.go +++ b/cmd/neofs-node/netmap.go @@ -247,11 +247,8 @@ func setNetmapNotificationParser(c *cfg, sTyp string, p event.NotificationParser // initNetmapState inits current Network map state. // Must be called after Morph components initialization. func initNetmapState(c *cfg) { - epoch, err := c.cfgNetmap.wrapper.Epoch() - fatalOnErrDetails("could not initialize current epoch number", err) - - ni, err := c.netmapLocalNodeState(epoch) - fatalOnErrDetails("could not init network state", err) + epoch, ni, err := getNetworkState(c) + fatalOnErrDetails("getting network state", err) stateWord := "undefined" @@ -269,6 +266,24 @@ func initNetmapState(c *cfg) { zap.String("state", stateWord), ) + updateLocalState(c, epoch, ni) +} + +func getNetworkState(c *cfg) (uint64, *netmapSDK.NodeInfo, error) { + epoch, err := c.cfgNetmap.wrapper.Epoch() + if err != nil { + return 0, nil, fmt.Errorf("could not get current epoch number: %w", err) + } + + ni, err := c.netmapLocalNodeState(epoch) + if err != nil { + return 0, nil, fmt.Errorf("could not init network state: %w", err) + } + + return epoch, ni, nil +} + +func updateLocalState(c *cfg, epoch uint64, ni *netmapSDK.NodeInfo) { c.cfgNetmap.state.setCurrentEpoch(epoch) c.cfgNetmap.startEpoch = epoch c.handleLocalNodeInfo(ni) From c038f31e3a272a89fae3cd60a681e96bff9b3111 Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Wed, 12 Jul 2023 23:48:12 +0300 Subject: [PATCH 4/6] node: Re-bootstrap node after every morph switch Signed-off-by: Pavel Karpy --- cmd/neofs-node/main.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 1310ecd2cf..56236fa37d 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -163,5 +163,19 @@ func (c *cfg) restartMorph() error { updateLocalState(c, epoch, ni) + // bootstrap node after every reconnection cause the longevity of + // a connection downstate is unpredictable and bootstrap TX is a + // way to make a heartbeat so nothing is wrong in making sure the + // node is online (if it should be) + + if !c.needBootstrap() || c.cfgNetmap.reBoostrapTurnedOff.Load() { + return nil + } + + err = c.bootstrap() + if err != nil { + c.log.Warn("failed to re-bootstrap", zap.Error(err)) + } + return nil } From 83ee23b04c74e3ba1f9904006f956d88eabd1bed Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Thu, 13 Jul 2023 00:20:33 +0300 Subject: [PATCH 5/6] node: Drop expired sessions on morph switch Closes #1337. Signed-off-by: Pavel Karpy --- CHANGELOG.md | 2 +- cmd/neofs-node/main.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97631d3822..0b060a9daa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ Changelog for NeoFS Node - `renew-domain` command for adm ### Fixed -- `neo-go` RPC connection lost handling by IR (#1337) +- `neo-go` RPC connection loss handling (#1337) - Concurrent morph cache misses (#1248) ### Removed diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 56236fa37d..4ecedf29df 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -163,6 +163,9 @@ func (c *cfg) restartMorph() error { updateLocalState(c, epoch, ni) + // drop expired sessions if any has appeared while node was sleeping + c.shared.privateTokenStore.RemoveOld(epoch) + // bootstrap node after every reconnection cause the longevity of // a connection downstate is unpredictable and bootstrap TX is a // way to make a heartbeat so nothing is wrong in making sure the From 25522cefaef571a0e890d0c0982bcb74ae9997ae Mon Sep 17 00:00:00 2001 From: Pavel Karpy Date: Thu, 13 Jul 2023 00:30:29 +0300 Subject: [PATCH 6/6] node: Add INFO logs for morph switches Signed-off-by: Pavel Karpy --- cmd/neofs-node/main.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/neofs-node/main.go b/cmd/neofs-node/main.go index 4ecedf29df..907e85d63e 100644 --- a/cmd/neofs-node/main.go +++ b/cmd/neofs-node/main.go @@ -154,6 +154,8 @@ func (c *cfg) onShutdown(f func()) { } func (c *cfg) restartMorph() error { + c.log.Info("restarting internal services because of RPC connection loss...") + c.shared.resetCaches() epoch, ni, err := getNetworkState(c) @@ -180,5 +182,7 @@ func (c *cfg) restartMorph() error { c.log.Warn("failed to re-bootstrap", zap.Error(err)) } + c.log.Info("internal services have been restarted after RPC connection loss") + return nil }