Skip to content

Commit

Permalink
node/engine: do not skip objects if shards are busy
Browse files Browse the repository at this point in the history
If every shard's pool is overloaded with routines, choose the best one and try
to PUT an object to it 30 seconds. Closes #2871.

Signed-off-by: Pavel Karpy <[email protected]>
  • Loading branch information
carpawell committed Oct 16, 2024
1 parent 34d187a commit 13b546f
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 17 deletions.
6 changes: 3 additions & 3 deletions pkg/local_object_storage/engine/evacuate.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ mainLoop:
if _, ok := shardMap[shards[j].ID().String()]; ok {
continue
}
putDone, exists := e.putToShard(shards[j].hashedShard, j, shards[j].pool, addr, PutPrm{obj: getRes.Object()})
if putDone || exists {
if putDone {
exists, err := e.putToShard(shards[j].hashedShard, j, shards[j].pool, addr, PutPrm{obj: getRes.Object()})
if err == nil {
if !exists {
e.log.Debug("object is moved to another shard",
zap.String("from", sidList[n]),
zap.Stringer("to", shards[j].ID()),
Expand Down
76 changes: 62 additions & 14 deletions pkg/local_object_storage/engine/put.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package engine

import (
"errors"
"fmt"
"time"

"github.com/nspcc-dev/neofs-node/pkg/core/object"
"github.com/nspcc-dev/neofs-node/pkg/local_object_storage/blobstor"
Expand All @@ -10,6 +12,7 @@ import (
"github.com/nspcc-dev/neofs-node/pkg/util"
objectSDK "github.com/nspcc-dev/neofs-sdk-go/object"
oid "github.com/nspcc-dev/neofs-sdk-go/object/id"
"github.com/panjf2000/ants/v2"
"go.uber.org/zap"
)

Expand Down Expand Up @@ -76,38 +79,77 @@ func (e *StorageEngine) put(prm PutPrm) (PutRes, error) {
}

finished := false
var bestShard hashedShard
var bestPool util.WorkerPool

e.iterateOverSortedShards(addr, func(ind int, sh hashedShard) (stop bool) {
e.mtx.RLock()
pool, ok := e.shardPools[sh.ID().String()]
if ok && bestPool == nil {
bestShard = sh
bestPool = pool
}
e.mtx.RUnlock()
if !ok {
// Shard was concurrently removed, skip.
return false
}

putDone, exists := e.putToShard(sh, ind, pool, addr, prm)
finished = putDone || exists
exists, err := e.putToShard(sh, ind, pool, addr, prm)
finished = err == nil || exists
return finished
})

if !finished {
err = errPutShard
err = e.putToShardWithDeadLine(bestShard, 0, bestPool, addr, prm)
if err != nil {
e.log.Warn("last stand to put object to the best shard",
zap.Stringer("addr", addr),
zap.Stringer("shard", bestShard.ID()),
zap.Error(err))

return PutRes{}, errPutShard
}
}

return PutRes{}, err
return PutRes{}, nil
}

func (e *StorageEngine) putToShardWithDeadLine(sh hashedShard, ind int, pool util.WorkerPool, addr oid.Address, prm PutPrm) error {
const deadline = 30 * time.Second
timer := time.NewTimer(deadline)
defer timer.Stop()

const putCooldown = 100 * time.Millisecond
ticker := time.NewTicker(putCooldown)
defer ticker.Stop()

for {
select {
case <-timer.C:
return fmt.Errorf("could not put object within %s", deadline)
case <-ticker.C:
_, err := e.putToShard(sh, ind, pool, addr, prm)
if errors.Is(err, ants.ErrPoolOverload) {
ticker.Reset(putCooldown)
continue
}

return err
}
}
}

// putToShard puts object to sh.
// First return value is true iff put has been successfully done.
// Second return value is true iff object already exists.
func (e *StorageEngine) putToShard(sh hashedShard, ind int, pool util.WorkerPool, addr oid.Address, prm PutPrm) (bool, bool) {
var putSuccess, alreadyExists bool
// Return value is true iff object already exists.
func (e *StorageEngine) putToShard(sh hashedShard, ind int, pool util.WorkerPool, addr oid.Address, prm PutPrm) (bool, error) {
var alreadyExists bool
var errGlobal error
id := sh.ID()

exitCh := make(chan struct{})

if err := pool.Submit(func() {
err := pool.Submit(func() {
defer close(exitCh)

var existPrm shard.ExistsPrm
Expand All @@ -124,8 +166,11 @@ func (e *StorageEngine) putToShard(sh hashedShard, ind int, pool util.WorkerPool
// object is already found but
// expired => do nothing with it
alreadyExists = true
return
}

errGlobal = err

return // this is not ErrAlreadyRemoved error so we can go to the next shard
}

Expand Down Expand Up @@ -159,6 +204,8 @@ func (e *StorageEngine) putToShard(sh hashedShard, ind int, pool util.WorkerPool

_, err = sh.Put(putPrm)
if err != nil {
errGlobal = err

if errors.Is(err, shard.ErrReadOnlyMode) || errors.Is(err, blobstor.ErrNoPlaceFound) ||
errors.Is(err, common.ErrReadOnly) || errors.Is(err, common.ErrNoSpace) {
e.log.Warn("could not put object to shard",
Expand All @@ -167,19 +214,20 @@ func (e *StorageEngine) putToShard(sh hashedShard, ind int, pool util.WorkerPool
return
}

e.reportShardError(sh, "could not put object to shard", err)
e.reportShardError(sh, "could not put object to shard", errGlobal)
return
}

putSuccess = true
}); err != nil {
})
if err != nil {
e.log.Warn("object put: pool task submitting", zap.Stringer("shard", id), zap.Error(err))
close(exitCh)

return false, err
}

<-exitCh

return putSuccess, alreadyExists
return alreadyExists, errGlobal
}

// Put writes provided object to local storage.
Expand Down

0 comments on commit 13b546f

Please sign in to comment.