Skip to content

Commit

Permalink
fix a little bug in shuffle (#19522)
Browse files Browse the repository at this point in the history
if data distribution is not uniformed, should use shuffle range to avoid shuffle imbalance

Approved by: @ouyuanning
  • Loading branch information
badboynt1 authored Oct 23, 2024
1 parent 8fa7fc7 commit 0817ad0
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions pkg/sql/plan/shuffle.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"math/bits"
"unsafe"

"github.com/matrixorigin/matrixone/pkg/logutil"

"github.com/matrixorigin/matrixone/pkg/catalog"

"github.com/matrixorigin/matrixone/pkg/container/hashtable"
Expand Down Expand Up @@ -292,7 +294,7 @@ func determinShuffleType(col *plan.ColRef, n *plan.Node, builder *QueryBuilder)
n.Stats.HashmapStats.ShuffleType = plan.ShuffleType_Range
n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[colName])
n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[colName])
n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[colName])
n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[colName], colName)
n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[colName])
}

Expand Down Expand Up @@ -532,7 +534,7 @@ func determinShuffleForScan(n *plan.Node, builder *QueryBuilder) {
n.Stats.HashmapStats.ShuffleColIdx = int32(n.TableDef.Cols[firstSortColID].Seqnum)
n.Stats.HashmapStats.ShuffleColMin = int64(s.MinValMap[firstSortColName])
n.Stats.HashmapStats.ShuffleColMax = int64(s.MaxValMap[firstSortColName])
n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[firstSortColName])
n.Stats.HashmapStats.Ranges = shouldUseShuffleRanges(s.ShuffleRangeMap[firstSortColName], firstSortColName)
n.Stats.HashmapStats.Nullcnt = int64(s.NullCntMap[firstSortColName])
}
}
Expand Down Expand Up @@ -597,9 +599,13 @@ func shouldUseHashShuffle(s *pb.ShuffleRange) bool {
return false
}

func shouldUseShuffleRanges(s *pb.ShuffleRange) []float64 {
if s == nil || math.IsNaN(s.Uniform) || s.Uniform < uniformThreshold {
func shouldUseShuffleRanges(s *pb.ShuffleRange, colname string) []float64 {
if s == nil || math.IsNaN(s.Uniform) || s.Result == nil {
return nil
}
return s.Result
if s.Uniform < uniformThreshold {
logutil.Infof("col %v data distribution is not uniformed, use shuffle range to avoid shuffle imbalance", colname)
return s.Result
}
return nil
}

0 comments on commit 0817ad0

Please sign in to comment.