From 04809d7cbf8189d17e60708986df7b414ffb7bd8 Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Mon, 15 Jul 2024 15:47:14 +0530 Subject: [PATCH 01/10] Add Batch Consumer --- xkafka/batch.go | 106 ++++++++++++++ xkafka/batch_consumer.go | 290 +++++++++++++++++++++++++++++++++++++ xkafka/consumer.go | 2 +- xkafka/consumer_options.go | 18 +++ xkafka/generate.go | 1 + xkafka/handler.go | 42 +++++- xkafka/producer.go | 2 +- 7 files changed, 451 insertions(+), 10 deletions(-) create mode 100644 xkafka/batch.go create mode 100644 xkafka/batch_consumer.go diff --git a/xkafka/batch.go b/xkafka/batch.go new file mode 100644 index 0000000..d292499 --- /dev/null +++ b/xkafka/batch.go @@ -0,0 +1,106 @@ +package xkafka + +import ( + "sync" + + "github.com/confluentinc/confluent-kafka-go/v2/kafka" + "github.com/rs/xid" +) + +// Batch is a group of messages that are processed together. +type Batch struct { + ID string + Messages []*Message + Status Status + + err error + lock sync.Mutex +} + +// NewBatch creates a new Batch. +func NewBatch() *Batch { + return &Batch{ + ID: xid.New().String(), + Messages: make([]*Message, 0), + } +} + +// AckSuccess marks the batch as successfully processed. +func (b *Batch) AckSuccess() { + b.lock.Lock() + defer b.lock.Unlock() + + b.Status = Success +} + +// AckFail marks the batch as failed to process. +func (b *Batch) AckFail(err error) { + b.lock.Lock() + defer b.lock.Unlock() + + b.Status = Fail + b.err = err +} + +// AckSkip marks the batch as skipped. +func (b *Batch) AckSkip() { + b.lock.Lock() + defer b.lock.Unlock() + + b.Status = Skip +} + +// Err returns the error that caused the batch to fail. +func (b *Batch) Err() error { + b.lock.Lock() + defer b.lock.Unlock() + + return b.err +} + +// MaxOffset returns the maximum offset among the +// messages in the batch. +func (b *Batch) MaxOffset() int64 { + b.lock.Lock() + defer b.lock.Unlock() + + var max int64 + for _, m := range b.Messages { + if m.Offset > max { + max = m.Offset + } + } + + return max +} + +// GroupMaxOffset returns the maximum offset for each +// topic-partition in the batch. +func (b *Batch) GroupMaxOffset() []kafka.TopicPartition { + b.lock.Lock() + defer b.lock.Unlock() + + offsets := make(map[string]map[int32]int64) + for _, m := range b.Messages { + if _, ok := offsets[m.Topic]; !ok { + offsets[m.Topic] = make(map[int32]int64) + } + + if m.Offset > offsets[m.Topic][m.Partition] { + offsets[m.Topic][m.Partition] = m.Offset + } + } + + var tps []kafka.TopicPartition + for topic, partitions := range offsets { + for partition, offset := range partitions { + tps = append(tps, kafka.TopicPartition{ + Topic: &topic, + Partition: partition, + Offset: kafka.Offset(offset + 1), + }) + } + } + + return tps +} diff --git a/xkafka/batch_consumer.go b/xkafka/batch_consumer.go new file mode 100644 index 0000000..815c7d9 --- /dev/null +++ b/xkafka/batch_consumer.go @@ -0,0 +1,290 @@ +package xkafka + +import ( + "context" + "errors" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/confluentinc/confluent-kafka-go/v2/kafka" + "github.com/sourcegraph/conc/pool" + "github.com/sourcegraph/conc/stream" +) + +// BatchConsumer manages consumption & processing of messages +// from kafka topics in batches. +type BatchConsumer struct { + name string + kafka consumerClient + handler BatchHandler + middlewares []BatchMiddlewarer + config *consumerConfig + batch *BatchManager + cancelCtx atomic.Pointer[context.CancelFunc] +} + +// NewBatchConsumer creates a new BatchConsumer instance. +func NewBatchConsumer(name string, handler BatchHandler, opts ...ConsumerOption) (*BatchConsumer, error) { + cfg, err := newConsumerConfig(opts...) + if err != nil { + return nil, err + } + + // override kafka configs + _ = cfg.configMap.SetKey("enable.auto.offset.store", false) + _ = cfg.configMap.SetKey("bootstrap.servers", strings.Join(cfg.brokers, ",")) + _ = cfg.configMap.SetKey("group.id", name) + + if cfg.manualCommit { + _ = cfg.configMap.SetKey("enable.auto.commit", false) + } + + consumer, err := cfg.consumerFn(&cfg.configMap) + if err != nil { + return nil, err + } + + return &BatchConsumer{ + name: name, + config: cfg, + kafka: consumer, + handler: handler, + batch: NewBatchManager(cfg.batchSize, cfg.batchTimeout), + }, nil +} + +// GetMetadata returns the metadata for the consumer. +func (c *BatchConsumer) GetMetadata() (*Metadata, error) { + return c.kafka.GetMetadata(nil, false, int(c.config.metadataTimeout.Milliseconds())) +} + +// Use appends a BatchMiddleware to the chain. +func (c *BatchConsumer) Use(mws ...BatchMiddlewarer) { + c.middlewares = append(c.middlewares, mws...) +} + +// Run starts the consumer and blocks until context is cancelled. +func (c *BatchConsumer) Run(ctx context.Context) error { + if err := c.start(ctx); err != nil { + return err + } + + return nil +} + +func (c *BatchConsumer) start(ctx context.Context) error { + c.handler = c.concatMiddlewares(c.handler) + + pool := pool.New(). + WithContext(ctx). + WithMaxGoroutines(2). + WithCancelOnError() + + pool.Go(func(ctx context.Context) error { + if c.config.concurrency > 1 { + return c.processAsync(ctx) + } + + return c.process(ctx) + }) + + pool.Go(func(ctx context.Context) error { + return c.consume(ctx) + }) + + return pool.Wait() +} + +func (c *BatchConsumer) process(ctx context.Context) error { + for { + select { + case <-ctx.Done(): + return nil + case batch := <-c.batch.Receive(): + err := c.handler.HandleBatch(ctx, batch) + if ferr := c.config.errorHandler(err); ferr != nil { + return ferr + } + + err = c.saveOffset(batch) + if err != nil { + return err + } + } + } +} + +func (c *BatchConsumer) processAsync(ctx context.Context) error { + st := stream.New().WithMaxGoroutines(c.config.concurrency) + ctx, cancel := context.WithCancelCause(ctx) + + for { + select { + case <-ctx.Done(): + st.Wait() + + return context.Cause(ctx) + case batch := <-c.batch.Receive(): + st.Go(func() stream.Callback { + err := c.handler.HandleBatch(ctx, batch) + if ferr := c.config.errorHandler(err); ferr != nil { + cancel(ferr) + + return func() {} + } + + return func() { + if err := c.saveOffset(batch); err != nil { + cancel(err) + } + } + }) + } + } +} + +func (c *BatchConsumer) consume(ctx context.Context) (err error) { + if err := c.subscribe(); err != nil { + return err + } + + defer func() { + if uerr := c.unsubscribe(); uerr != nil { + err = errors.Join(err, uerr) + } + }() + + for { + select { + case <-ctx.Done(): + return err + default: + km, err := c.kafka.ReadMessage(c.config.pollTimeout) + if err != nil { + var kerr kafka.Error + if errors.As(err, &kerr) && kerr.Code() == kafka.ErrTimedOut { + continue + } + + if ferr := c.config.errorHandler(err); ferr != nil { + err = ferr + + return err + } + + continue + } + + msg := newMessage(c.name, km) + + c.batch.Add(msg) + } + } +} + +func (c *BatchConsumer) subscribe() error { + return c.kafka.SubscribeTopics(c.config.topics, nil) +} + +func (c *BatchConsumer) unsubscribe() error { + return c.kafka.Unsubscribe() +} + +func (c *BatchConsumer) concatMiddlewares(handler BatchHandler) BatchHandler { + for i := len(c.middlewares) - 1; i >= 0; i-- { + handler = c.middlewares[i].BatchMiddleware(handler) + } + + return handler +} + +func (c *BatchConsumer) saveOffset(batch *Batch) error { + if batch.Status == Fail { + return nil + } + + offsets := batch.GroupMaxOffset() + + _, err := c.kafka.StoreOffsets(offsets) + if err != nil { + return err + } + + if c.config.manualCommit { + if _, err := c.kafka.Commit(); err != nil { + return err + } + } + + return nil +} + +// BatchManager manages aggregation and processing of Message batches. +type BatchManager struct { + size int + timeout time.Duration + batch *Batch + mutex *sync.RWMutex + flushChan chan *Batch +} + +// NewBatchManager creates a new BatchManager. +func NewBatchManager(size int, timeout time.Duration) *BatchManager { + b := &BatchManager{ + size: size, + timeout: timeout, + mutex: &sync.RWMutex{}, + batch: &Batch{ + Messages: make([]*Message, 0, size), + }, + flushChan: make(chan *Batch), + } + + go b.runFlushByTime() + + return b +} + +// Add adds to batch and flushes when MaxSize is reached. +func (b *BatchManager) Add(m *Message) { + b.mutex.Lock() + b.batch.Messages = append(b.batch.Messages, m) + + if len(b.batch.Messages) >= b.size { + b.flush() + } + + b.mutex.Unlock() +} + +// Receive returns a channel to read batched Messages. +func (b *BatchManager) Receive() <-chan *Batch { + return b.flushChan +} + +func (b *BatchManager) runFlushByTime() { + t := time.NewTicker(b.timeout) + + for range t.C { + b.mutex.Lock() + b.flush() + b.mutex.Unlock() + } +} + +// flush sends the batch to the flush channel and resets the batch. +// DESIGN: flush does NOT acquire a mutex lock. Locks should be managed by caller. +// nolint:gosimple +func (b *BatchManager) flush() { + if len(b.batch.Messages) == 0 { + return + } + + b.flushChan <- b.batch + + b.batch = &Batch{ + Messages: make([]*Message, 0, b.size), + } +} diff --git a/xkafka/consumer.go b/xkafka/consumer.go index 2ba0c9d..1506751 100644 --- a/xkafka/consumer.go +++ b/xkafka/consumer.go @@ -17,7 +17,7 @@ type Consumer struct { name string kafka consumerClient handler Handler - middlewares []middleware + middlewares []Middlewarer config *consumerConfig cancelCtx atomic.Pointer[context.CancelFunc] } diff --git a/xkafka/consumer_options.go b/xkafka/consumer_options.go index b3fd49d..e3b3e97 100644 --- a/xkafka/consumer_options.go +++ b/xkafka/consumer_options.go @@ -21,6 +21,10 @@ type consumerConfig struct { pollTimeout time.Duration concurrency int manualCommit bool + + // batch options + batchSize int + batchTimeout time.Duration } func newConsumerConfig(opts ...ConsumerOption) (*consumerConfig, error) { @@ -100,3 +104,17 @@ type ManualCommit bool func (mc ManualCommit) setConsumerConfig(o *consumerConfig) { o.manualCommit = bool(mc) } + +// BatchSize defines the maximum number of messages in a batch. +type BatchSize int + +func (bs BatchSize) setConsumerConfig(o *consumerConfig) { + o.batchSize = int(bs) +} + +// BatchTimeout defines the maximum time to wait for a batch to be filled. +type BatchTimeout time.Duration + +func (bt BatchTimeout) setConsumerConfig(o *consumerConfig) { + o.batchTimeout = time.Duration(bt) +} diff --git a/xkafka/generate.go b/xkafka/generate.go index 3f87faf..e119be1 100644 --- a/xkafka/generate.go +++ b/xkafka/generate.go @@ -2,3 +2,4 @@ //go:generate mockery --name producerClient --structname MockProducerClient --filename producer_mock_test.go --outpkg xkafka --output . package xkafka + diff --git a/xkafka/handler.go b/xkafka/handler.go index 6f2bd2b..e13c6bf 100644 --- a/xkafka/handler.go +++ b/xkafka/handler.go @@ -4,12 +4,12 @@ import ( "context" ) -// Handler responds to a Message from a Kafka topic. +// Handler defines a message handler. type Handler interface { Handle(ctx context.Context, m *Message) error } -// HandlerFunc defines signature of a message handler function. +// HandlerFunc defines a function for handling messages. type HandlerFunc func(ctx context.Context, m *Message) error // Handle implements Handler interface on HandlerFunc. @@ -17,15 +17,41 @@ func (h HandlerFunc) Handle(ctx context.Context, m *Message) error { return h(ctx, m) } -// MiddlewareFunc functions are closures that intercept Messages. -type MiddlewareFunc func(Handler) Handler - -// middleware interface is anything which implements a MiddlewareFunc named Middleware. -type middleware interface { +// Middlewarer is an interface for message handler middleware. +type Middlewarer interface { Middleware(handler Handler) Handler } -// Middleware allows MiddlewareFunc to implement the middleware interface. +// MiddlewareFunc defines a function for middleware. +type MiddlewareFunc func(Handler) Handler + +// Middleware implements Middlewarer interface. func (mw MiddlewareFunc) Middleware(handler Handler) Handler { return mw(handler) } + +// BatchHandler defines a handler for a batch of messages. +type BatchHandler interface { + HandleBatch(ctx context.Context, b *Batch) error +} + +// BatchHandlerFunc defines a function for handling a batch. +type BatchHandlerFunc func(ctx context.Context, b *Batch) error + +// HandleBatch implements BatchHandler interface. +func (h BatchHandlerFunc) HandleBatch(ctx context.Context, b *Batch) error { + return h(ctx, b) +} + +// BatchMiddlewarer is an interface for batch message handler middleware. +type BatchMiddlewarer interface { + BatchMiddleware(handler BatchHandler) BatchHandler +} + +// BatchMiddlewareFunc defines a function for batch middleware. +type BatchMiddlewareFunc func(BatchHandler) BatchHandler + +// BatchMiddleware implements BatchMiddlewarer interface. +func (mw BatchMiddlewareFunc) BatchMiddleware(handler BatchHandler) BatchHandler { + return mw(handler) +} diff --git a/xkafka/producer.go b/xkafka/producer.go index 0b8c837..3b70a69 100644 --- a/xkafka/producer.go +++ b/xkafka/producer.go @@ -15,7 +15,7 @@ type Producer struct { config *producerConfig kafka producerClient events chan kafka.Event - middlewares []middleware + middlewares []Middlewarer wrappedPublish Handler wrappedAsyncPublish Handler } From 82ec3897a3d9a92396aa0bc8f9b586a1d671a541 Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Tue, 16 Jul 2024 14:45:36 +0530 Subject: [PATCH 02/10] Added basic tests --- xkafka/batch.go | 3 + xkafka/batch_consumer.go | 50 ++++++- xkafka/batch_consumer_test.go | 263 ++++++++++++++++++++++++++++++++++ xkafka/consumer_options.go | 2 + xkafka/generate.go | 1 - 5 files changed, 316 insertions(+), 3 deletions(-) create mode 100644 xkafka/batch_consumer_test.go diff --git a/xkafka/batch.go b/xkafka/batch.go index d292499..bfa54bc 100644 --- a/xkafka/batch.go +++ b/xkafka/batch.go @@ -92,7 +92,10 @@ func (b *Batch) GroupMaxOffset() []kafka.TopicPartition { } var tps []kafka.TopicPartition + for topic, partitions := range offsets { + topic := topic + for partition, offset := range partitions { tps = append(tps, kafka.TopicPartition{ Topic: &topic, diff --git a/xkafka/batch_consumer.go b/xkafka/batch_consumer.go index 815c7d9..4ad0613 100644 --- a/xkafka/batch_consumer.go +++ b/xkafka/batch_consumer.go @@ -67,13 +67,46 @@ func (c *BatchConsumer) Use(mws ...BatchMiddlewarer) { // Run starts the consumer and blocks until context is cancelled. func (c *BatchConsumer) Run(ctx context.Context) error { + if err := c.subscribe(); err != nil { + return err + } + if err := c.start(ctx); err != nil { return err } + return c.close() +} + +// Start subscribes to the configured topics and starts consuming messages. +// This method is non-blocking and returns immediately post subscribe. +// Instead, use Run if you want to block until the context is closed or an error occurs. +// +// Errors are handled by the ErrorHandler if set, otherwise they stop the consumer +// and are returned. +func (c *BatchConsumer) Start() error { + if err := c.subscribe(); err != nil { + return err + } + + ctx, cancel := context.WithCancel(context.Background()) + c.cancelCtx.Store(&cancel) + + go func() { _ = c.start(ctx) }() + return nil } +// Close closes the consumer. +func (c *BatchConsumer) Close() { + cancel := c.cancelCtx.Load() + if cancel != nil { + (*cancel)() + } + + _ = c.close() +} + func (c *BatchConsumer) start(ctx context.Context) error { c.handler = c.concatMiddlewares(c.handler) @@ -125,7 +158,12 @@ func (c *BatchConsumer) processAsync(ctx context.Context) error { case <-ctx.Done(): st.Wait() - return context.Cause(ctx) + err := context.Cause(ctx) + if errors.Is(err, context.Canceled) { + return nil + } + + return err case batch := <-c.batch.Receive(): st.Go(func() stream.Callback { err := c.handler.HandleBatch(ctx, batch) @@ -189,9 +227,17 @@ func (c *BatchConsumer) subscribe() error { } func (c *BatchConsumer) unsubscribe() error { + _, _ = c.kafka.Commit() + return c.kafka.Unsubscribe() } +func (c *BatchConsumer) close() error { + <-time.After(c.config.shutdownTimeout) + + return c.kafka.Close() +} + func (c *BatchConsumer) concatMiddlewares(handler BatchHandler) BatchHandler { for i := len(c.middlewares) - 1; i >= 0; i-- { handler = c.middlewares[i].BatchMiddleware(handler) @@ -201,7 +247,7 @@ func (c *BatchConsumer) concatMiddlewares(handler BatchHandler) BatchHandler { } func (c *BatchConsumer) saveOffset(batch *Batch) error { - if batch.Status == Fail { + if batch.Status != Success && batch.Status != Skip { return nil } diff --git a/xkafka/batch_consumer_test.go b/xkafka/batch_consumer_test.go new file mode 100644 index 0000000..fb11c59 --- /dev/null +++ b/xkafka/batch_consumer_test.go @@ -0,0 +1,263 @@ +package xkafka + +import ( + "context" + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/confluentinc/confluent-kafka-go/v2/kafka" + "github.com/stretchr/testify/assert" + mock "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestNewBatchConsumer(t *testing.T) { + t.Parallel() + + cfg := ConfigMap{ + "enable.auto.commit": true, + "auto.offset.reset": "earliest", + } + + consumer, err := NewBatchConsumer( + "test-batch-consumer", + noopBatchHandler(), + testTopics, + testBrokers, + cfg, + PollTimeout(testTimeout), + MetadataTimeout(testTimeout), + ShutdownTimeout(testTimeout), + Concurrency(2), + ErrorHandler(NoopErrorHandler), + BatchSize(10), + BatchTimeout(testTimeout), + ) + assert.NoError(t, err) + assert.NotNil(t, consumer) + + assert.Equal(t, "test-batch-consumer", consumer.name) + assert.EqualValues(t, testTopics, consumer.config.topics) + assert.EqualValues(t, testBrokers, consumer.config.brokers) + assert.EqualValues(t, testTimeout, consumer.config.pollTimeout) + assert.EqualValues(t, testTimeout, consumer.config.metadataTimeout) + assert.EqualValues(t, testTimeout, consumer.config.shutdownTimeout) + assert.EqualValues(t, 2, consumer.config.concurrency) + assert.NotNil(t, consumer.config.errorHandler) + assert.EqualValues(t, 10, consumer.config.batchSize) + assert.EqualValues(t, testTimeout, consumer.config.batchTimeout) + + expectedConfig := kafka.ConfigMap{ + "bootstrap.servers": "localhost:9092", + "group.id": "test-batch-consumer", + "auto.offset.reset": "earliest", + "enable.auto.commit": true, + "enable.auto.offset.store": false, + } + + assert.EqualValues(t, expectedConfig, consumer.config.configMap) +} + +func TestBatchConsumer_Lifecycle(t *testing.T) { + t.Parallel() + + t.Run("StartSubscribeError", func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) + + expectError := errors.New("error in subscribe") + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(expectError) + + assert.Error(t, consumer.Start()) + + mockKafka.AssertExpectations(t) + }) + + t.Run("StartSuccessCloseError", func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("ReadMessage", testTimeout).Return(newFakeKafkaMessage(), nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("Close").Return(errors.New("error in close")) + + assert.NoError(t, consumer.Start()) + <-time.After(100 * time.Millisecond) + consumer.Close() + + mockKafka.AssertExpectations(t) + }) + + t.Run("StartCloseSuccess", func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("ReadMessage", testTimeout).Return(newFakeKafkaMessage(), nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("Close").Return(nil) + + assert.NoError(t, consumer.Start()) + <-time.After(100 * time.Millisecond) + consumer.Close() + + mockKafka.AssertExpectations(t) + }) +} + +func TestBatchConsumer_HandleBatch(t *testing.T) { + t.Parallel() + + opts := append(defaultOpts, BatchSize(10)) + consumer, mockKafka := newTestBatchConsumer(t, opts...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + assert.NotNil(t, b) + assert.Len(t, b.Messages, 10) + + cancel() + return nil + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + consumer.handler = handler + err := consumer.Run(ctx) + assert.NoError(t, err) + + mockKafka.AssertExpectations(t) +} + +func TestBatchConsumer_Async(t *testing.T) { + t.Parallel() + + opts := append(defaultOpts, + Concurrency(2), + BatchSize(3), + ) + consumer, mockKafka := newTestBatchConsumer(t, opts...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + count := atomic.Int32{} + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + b.AckSuccess() + + assert.NotNil(t, b) + assert.Len(t, b.Messages, 3) + + n := count.Add(1) + + if n == 2 { + cancel() + } + + return nil + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("StoreOffsets", mock.Anything).Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + consumer.handler = handler + err := consumer.Run(ctx) + assert.NoError(t, err) + + mockKafka.AssertExpectations(t) +} + +func TestBatchConsumer_MiddlewareExecutionOrder(t *testing.T) { + t.Parallel() + + consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + cancel() + + return nil + }) + + var preExec []string + var postExec []string + + middlewares := []BatchMiddlewarer{ + testBatchMiddleware("middleware1", &preExec, &postExec), + testBatchMiddleware("middleware2", &preExec, &postExec), + testBatchMiddleware("middleware3", &preExec, &postExec), + } + + consumer.Use(middlewares...) + consumer.handler = handler + + err := consumer.Run(ctx) + assert.NoError(t, err) + + // middleware execution order should be FIFO + // but we only test the first 3 values because the + // consumer keeps reading messages until the context + // is canceled + assert.Equal(t, []string{"middleware1", "middleware2", "middleware3"}, preExec[:3]) + assert.Equal(t, []string{"middleware3", "middleware2", "middleware1"}, postExec[:3]) + + mockKafka.AssertExpectations(t) +} + +func noopBatchHandler() BatchHandler { + return BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + return nil + }) +} + +func newTestBatchConsumer(t *testing.T, opts ...ConsumerOption) (*BatchConsumer, *MockConsumerClient) { + t.Helper() + + mockConsumer := &MockConsumerClient{} + + opts = append(opts, mockConsumerFunc(mockConsumer)) + + consumer, err := NewBatchConsumer( + "test-batch-consumer", + noopBatchHandler(), + opts..., + ) + require.NoError(t, err) + require.NotNil(t, consumer) + + return consumer, mockConsumer +} + +func testBatchMiddleware(name string, preExec, postExec *[]string) BatchMiddlewarer { + return BatchMiddlewareFunc(func(next BatchHandler) BatchHandler { + return BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + *preExec = append(*preExec, name) + err := next.HandleBatch(ctx, b) + *postExec = append(*postExec, name) + + return err + }) + }) +} diff --git a/xkafka/consumer_options.go b/xkafka/consumer_options.go index e3b3e97..2bfba0d 100644 --- a/xkafka/consumer_options.go +++ b/xkafka/consumer_options.go @@ -37,6 +37,8 @@ func newConsumerConfig(opts ...ConsumerOption) (*consumerConfig, error) { pollTimeout: 10 * time.Second, shutdownTimeout: 1 * time.Second, concurrency: 1, + batchSize: 1000, + batchTimeout: 5 * time.Second, } for _, opt := range opts { diff --git a/xkafka/generate.go b/xkafka/generate.go index e119be1..3f87faf 100644 --- a/xkafka/generate.go +++ b/xkafka/generate.go @@ -2,4 +2,3 @@ //go:generate mockery --name producerClient --structname MockProducerClient --filename producer_mock_test.go --outpkg xkafka --output . package xkafka - From 62e92ee1ff6d4d94d239c8abac499572bdf7b765 Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Fri, 19 Jul 2024 09:25:35 +0530 Subject: [PATCH 03/10] Add log middleware for batch --- examples/xkafka/async.go | 6 ++-- examples/xkafka/go.mod | 5 ++-- examples/xkafka/go.sum | 7 ++--- examples/xkafka/sequential.go | 11 +++++-- examples/xkafka/setup.go | 3 +- xkafka/batch_consumer_test.go | 30 +++++++++++++++++++ xkafka/go.mod | 1 + xkafka/go.sum | 2 ++ xkafka/middleware/go.mod | 1 + xkafka/middleware/go.sum | 2 ++ xkafka/middleware/retry/go.mod | 1 + xkafka/middleware/retry/go.sum | 2 ++ xkafka/middleware/slog/slog.go | 29 +++++++++++++++++++ xkafka/middleware/slog/slog_test.go | 45 ++++++++++++++++++++++++++++- xkafka/middleware/zerolog/go.mod | 1 + xkafka/middleware/zerolog/go.sum | 2 ++ xprom/xpromkafka/go.mod | 1 + xprom/xpromkafka/go.sum | 2 ++ 18 files changed, 135 insertions(+), 16 deletions(-) diff --git a/examples/xkafka/async.go b/examples/xkafka/async.go index 585715c..ca18e9d 100644 --- a/examples/xkafka/async.go +++ b/examples/xkafka/async.go @@ -4,9 +4,10 @@ import ( "context" "time" + "log/slog" + "github.com/rs/xid" "github.com/urfave/cli/v2" - "log/slog" "github.com/gojekfarm/xrun" "github.com/gojekfarm/xtools/xkafka" @@ -30,8 +31,7 @@ func runAsync(c *cli.Context) error { }, xkafka.Concurrency(2), xkafka.ErrorHandler(func(err error) error { - slog.Error(err.Error()) - return nil + return err }), } diff --git a/examples/xkafka/go.mod b/examples/xkafka/go.mod index 3f66897..56d59fb 100644 --- a/examples/xkafka/go.mod +++ b/examples/xkafka/go.mod @@ -9,16 +9,15 @@ replace ( ) require ( - github.com/confluentinc/confluent-kafka-go v1.9.2 + github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 github.com/gojekfarm/xrun v0.3.0 github.com/gojekfarm/xtools/xkafka v0.9.0 github.com/lmittmann/tint v1.0.3 - github.com/rs/xid v1.4.0 + github.com/rs/xid v1.5.0 github.com/urfave/cli/v2 v2.23.7 ) require ( - github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect diff --git a/examples/xkafka/go.sum b/examples/xkafka/go.sum index 6490c40..dd87742 100644 --- a/examples/xkafka/go.sum +++ b/examples/xkafka/go.sum @@ -17,8 +17,6 @@ github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XP github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/confluentinc/confluent-kafka-go v1.9.2 h1:gV/GxhMBUb03tFWkN+7kdhg+zf+QUM+wVkI9zwh770Q= -github.com/confluentinc/confluent-kafka-go v1.9.2/go.mod h1:ptXNqsuDfYbAE/LBW6pnwWZElUoWxHoV8E43DCrliyo= github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 h1:YmUjjRp1mSTqTxtHQYMQKBLa2hfgIZz9PSqoSRDkwf4= github.com/confluentinc/confluent-kafka-go/v2 v2.0.2/go.mod h1:qWGwym8EpAsIP5lZsTKhYTnYSGqkbxEfRB4A489Jo64= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= @@ -116,8 +114,8 @@ github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a/go.mod h1:4r5QyqhjI github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= -github.com/rs/xid v1.4.0 h1:qd7wPTDkN6KQx2VmMBLrpHkiyQwgFXRnkOLacUiaSNY= -github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= @@ -171,7 +169,6 @@ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/examples/xkafka/sequential.go b/examples/xkafka/sequential.go index ba599d8..3c51568 100644 --- a/examples/xkafka/sequential.go +++ b/examples/xkafka/sequential.go @@ -6,9 +6,10 @@ import ( "math/rand" "time" + "log/slog" + "github.com/rs/xid" "github.com/urfave/cli/v2" - "log/slog" "github.com/gojekfarm/xrun" "github.com/gojekfarm/xtools/xkafka" @@ -36,8 +37,7 @@ func runSequential(c *cli.Context) error { "auto.offset.reset": "earliest", }, xkafka.ErrorHandler(func(err error) error { - slog.Error(err.Error()) - return nil + return err }), } @@ -115,6 +115,11 @@ func publishMessages(messages []*xkafka.Message) error { producer, err := xkafka.NewProducer( "test-seq-producer", xkafka.Brokers(brokers), + xkafka.ErrorHandler(func(err error) error { + slog.Error(err.Error()) + + return nil + }), ) if err != nil { return err diff --git a/examples/xkafka/setup.go b/examples/xkafka/setup.go index 4c0750e..aa25935 100644 --- a/examples/xkafka/setup.go +++ b/examples/xkafka/setup.go @@ -3,8 +3,9 @@ package main import ( "context" - "github.com/confluentinc/confluent-kafka-go/kafka" "log/slog" + + "github.com/confluentinc/confluent-kafka-go/v2/kafka" ) func createTopic(name string, partitions int) error { diff --git a/xkafka/batch_consumer_test.go b/xkafka/batch_consumer_test.go index fb11c59..c831b9d 100644 --- a/xkafka/batch_consumer_test.go +++ b/xkafka/batch_consumer_test.go @@ -226,6 +226,36 @@ func TestBatchConsumer_MiddlewareExecutionOrder(t *testing.T) { mockKafka.AssertExpectations(t) } +func TestBatchConsumer_ManualCommit(t *testing.T) { + t.Parallel() + + consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("StoreOffsets", mock.Anything).Return(nil, nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + b.AckSuccess() + + cancel() + + return nil + }) + + consumer.handler = handler + err := consumer.Run(ctx) + assert.NoError(t, err) + + mockKafka.AssertExpectations(t) +} + func noopBatchHandler() BatchHandler { return BatchHandlerFunc(func(ctx context.Context, b *Batch) error { return nil diff --git a/xkafka/go.mod b/xkafka/go.mod index f805dbc..40e9316 100644 --- a/xkafka/go.mod +++ b/xkafka/go.mod @@ -5,6 +5,7 @@ go 1.21 require ( github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 github.com/pkg/errors v0.9.1 + github.com/rs/xid v1.5.0 github.com/sourcegraph/conc v0.3.0 github.com/stretchr/testify v1.8.1 ) diff --git a/xkafka/go.sum b/xkafka/go.sum index e7ff3eb..a950fa1 100644 --- a/xkafka/go.sum +++ b/xkafka/go.sum @@ -108,6 +108,8 @@ github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTE github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= diff --git a/xkafka/middleware/go.mod b/xkafka/middleware/go.mod index 291f64f..3b40452 100644 --- a/xkafka/middleware/go.mod +++ b/xkafka/middleware/go.mod @@ -16,6 +16,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rs/xid v1.5.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.9.0 // indirect diff --git a/xkafka/middleware/go.sum b/xkafka/middleware/go.sum index e7ff3eb..a950fa1 100644 --- a/xkafka/middleware/go.sum +++ b/xkafka/middleware/go.sum @@ -108,6 +108,8 @@ github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTE github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= diff --git a/xkafka/middleware/retry/go.mod b/xkafka/middleware/retry/go.mod index 7589ba6..0176dcf 100644 --- a/xkafka/middleware/retry/go.mod +++ b/xkafka/middleware/retry/go.mod @@ -15,6 +15,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rs/xid v1.5.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.9.0 // indirect diff --git a/xkafka/middleware/retry/go.sum b/xkafka/middleware/retry/go.sum index 6e63574..7b8c152 100644 --- a/xkafka/middleware/retry/go.sum +++ b/xkafka/middleware/retry/go.sum @@ -110,6 +110,8 @@ github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTE github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= diff --git a/xkafka/middleware/slog/slog.go b/xkafka/middleware/slog/slog.go index 8e3bf92..f2722fa 100644 --- a/xkafka/middleware/slog/slog.go +++ b/xkafka/middleware/slog/slog.go @@ -80,3 +80,32 @@ func LoggingMiddleware(opts ...Option) xkafka.MiddlewareFunc { }) } } + +// BatchLoggingMiddleware is a middleware that logs batch messages using log/slog. +func BatchLoggingMiddleware(opts ...Option) xkafka.BatchMiddlewareFunc { + cfg := newLogOptions(opts...) + + return func(next xkafka.BatchHandler) xkafka.BatchHandler { + return xkafka.BatchHandlerFunc(func(ctx context.Context, b *xkafka.Batch) error { + start := time.Now() + logger := cfg.logger.WithGroup("xkafka") + + err := next.HandleBatch(ctx, b) + + args := []slog.Attr{ + slog.Int("count", len(b.Messages)), + slog.Int64("max_offset", b.MaxOffset()), + slog.Duration("duration", time.Since(start)), + slog.String("batch_id", b.ID), + } + + if err != nil { + logger.LogAttrs(ctx, slog.LevelError, "[xkafka] batch processing failed", args...) + } else { + logger.LogAttrs(ctx, cfg.level, "[xkafka] batch processed", args...) + } + + return err + }) + } +} diff --git a/xkafka/middleware/slog/slog_test.go b/xkafka/middleware/slog/slog_test.go index d1c4c2e..859568a 100644 --- a/xkafka/middleware/slog/slog_test.go +++ b/xkafka/middleware/slog/slog_test.go @@ -4,9 +4,10 @@ import ( "context" "testing" - "github.com/stretchr/testify/assert" "log/slog" + "github.com/stretchr/testify/assert" + "github.com/gojekfarm/xtools/xkafka" ) @@ -55,3 +56,45 @@ func TestLoggingMiddlewareWithError(t *testing.T) { err := handler.Handle(context.Background(), msg) assert.ErrorIs(t, err, assert.AnError) } + +func TestBatchLoggingMiddleware(t *testing.T) { + batch := xkafka.NewBatch() + msg := &xkafka.Message{ + Topic: "test-topic", + Partition: 0, + Offset: 0, + Key: []byte("test-key"), + } + + batch.Messages = append(batch.Messages, msg) + + t.Run("success", func(t *testing.T) { + loggingMiddleware := BatchLoggingMiddleware( + Level(slog.LevelInfo), + logger, + ) + handler := loggingMiddleware(xkafka.BatchHandlerFunc(func(ctx context.Context, b *xkafka.Batch) error { + b.AckSuccess() + + return nil + })) + + err := handler.HandleBatch(context.Background(), batch) + assert.NoError(t, err) + }) + + t.Run("error", func(t *testing.T) { + loggingMiddleware := BatchLoggingMiddleware( + Level(slog.LevelInfo), + logger, + ) + handler := loggingMiddleware(xkafka.BatchHandlerFunc(func(ctx context.Context, b *xkafka.Batch) error { + b.AckFail(assert.AnError) + + return assert.AnError + })) + + err := handler.HandleBatch(context.Background(), batch) + assert.ErrorIs(t, err, assert.AnError) + }) +} diff --git a/xkafka/middleware/zerolog/go.mod b/xkafka/middleware/zerolog/go.mod index c77afde..69254a7 100644 --- a/xkafka/middleware/zerolog/go.mod +++ b/xkafka/middleware/zerolog/go.mod @@ -16,6 +16,7 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.16 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/rs/xid v1.5.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.9.0 // indirect diff --git a/xkafka/middleware/zerolog/go.sum b/xkafka/middleware/zerolog/go.sum index dfe9893..91f9577 100644 --- a/xkafka/middleware/zerolog/go.sum +++ b/xkafka/middleware/zerolog/go.sum @@ -113,6 +113,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.29.0 h1:Zes4hju04hjbvkVkOhdl2HpZa+0PmVwigmo8XoORE5w= github.com/rs/zerolog v1.29.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= diff --git a/xprom/xpromkafka/go.mod b/xprom/xpromkafka/go.mod index 34b6e84..75d240c 100644 --- a/xprom/xpromkafka/go.mod +++ b/xprom/xpromkafka/go.mod @@ -26,6 +26,7 @@ require ( github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect + github.com/rs/xid v1.5.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.9.0 // indirect diff --git a/xprom/xpromkafka/go.sum b/xprom/xpromkafka/go.sum index a2d46a1..ef72742 100644 --- a/xprom/xpromkafka/go.sum +++ b/xprom/xpromkafka/go.sum @@ -243,6 +243,8 @@ github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTE github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= From fd752326f9d69e19e9c3496bed8cf135cd9b2bfb Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Wed, 24 Jul 2024 16:02:02 +0530 Subject: [PATCH 04/10] Refactored xkafka Example --- examples/xkafka/README.md | 28 +++- examples/xkafka/async.go | 106 ------------- examples/xkafka/basic.go | 114 ++++++++++++++ examples/xkafka/batch.go | 109 ++++++++++++++ examples/xkafka/main.go | 43 +++++- examples/xkafka/producer.go | 34 +++++ examples/xkafka/sequential.go | 222 ---------------------------- examples/xkafka/setup.go | 31 +++- examples/xkafka/state.go | 87 ++++++++++- xkafka/middleware/slog/slog_test.go | 3 +- 10 files changed, 423 insertions(+), 354 deletions(-) delete mode 100644 examples/xkafka/async.go create mode 100644 examples/xkafka/basic.go create mode 100644 examples/xkafka/batch.go create mode 100644 examples/xkafka/producer.go delete mode 100644 examples/xkafka/sequential.go diff --git a/examples/xkafka/README.md b/examples/xkafka/README.md index 0f0cdc1..39ba2a8 100644 --- a/examples/xkafka/README.md +++ b/examples/xkafka/README.md @@ -1,4 +1,4 @@ -Examples of how to use the xkafka package to read and write messages to a Kafka topic using `xkafka.Consumer` and `xkafka.Producer`. +Examples of how to use the xkafka package to publish and consume messages from a Kafka broker. Simulates different Kafka consumer use cases. ## Running Kafka @@ -10,18 +10,34 @@ $ docker-compose up -d ## Scenarios -### Sequential Consumer +### Basic Consumer -This is the default behavior of the consumer. The consumer will process messages sequentially and commit the offset based on the `auto.commit.interval.ms` configuration. +The basic consumer reads messages from a Kafka topic and prints them to the console. It simulates a process crash by restarting the consumer after a random number of messages have been consumed. ```bash -go run *.go sequential +$ go run *.go basic --partitions=2 --consumers=2 --messages=10 ``` ### Async Consumer -Async mode is enabled by setting `xkafka.Concurrency` to a value greater than 1. The consumer will process messages using a pool of Go routines. +The async consumer reads messages concurrently using a configurable pool of goroutines. ```bash -go run *.go async +$ go run *.go basic --partitions=2 --consumers=2 --messages=10 --concurrency=4 +``` + +### Batch Consumer + +The batch consumer reads messages in batches of a configurable size. + +```bash +$ go run *.go batch --partitions=2 --consumers=2 --messages=1000 --batch-size=100 +``` + +### Async Batch Consumer + +The async batch consumer processes batches concurrently using a configurable pool of goroutines. + +```bash +$ go run *.go batch --partitions=2 --consumers=2 --messages=1000 --batch-size=100 --concurrency=4 ``` diff --git a/examples/xkafka/async.go b/examples/xkafka/async.go deleted file mode 100644 index ca18e9d..0000000 --- a/examples/xkafka/async.go +++ /dev/null @@ -1,106 +0,0 @@ -package main - -import ( - "context" - "time" - - "log/slog" - - "github.com/rs/xid" - "github.com/urfave/cli/v2" - - "github.com/gojekfarm/xrun" - "github.com/gojekfarm/xtools/xkafka" -) - -// nolint -func runAsync(c *cli.Context) error { - topic := "async-" + xid.New().String() - - if err := createTopic(topic, partitions); err != nil { - return err - } - - s.generated = generateMessages(topic, 10) - - opts := []xkafka.ConsumerOption{ - xkafka.Brokers(brokers), - xkafka.Topics{topic}, - xkafka.ConfigMap{ - "auto.offset.reset": "earliest", - }, - xkafka.Concurrency(2), - xkafka.ErrorHandler(func(err error) error { - return err - }), - } - - // start consumers first - s.consumers = make([]*xkafka.Consumer, partitions) - components := make([]xrun.Component, partitions) - - for i := 0; i < partitions; i++ { - consumer, err := xkafka.NewConsumer( - "test-async-consumer", - handleMessagesWithErrors(), - opts..., - ) - if err != nil { - panic(err) - } - - consumer.Use(loggingMiddleware()) - - s.consumers[i] = consumer - components[i] = consumer - } - - // publish messages to the topic - if err := publishMessages(s.generated); err != nil { - return err - } - - runConsumers(c.Context, s.consumers) - - slog.Info("[SEQUENTIAL] Consumers exited", "count", len(s.received)) - - // create new consumers with the same group id - // these consumers will start consuming from the last committed offset - s.consumers = make([]*xkafka.Consumer, partitions) - components = make([]xrun.Component, partitions) - - for i := 0; i < partitions; i++ { - consumer, err := xkafka.NewConsumer( - "test-async-consumer", - handleMessages(), - opts..., - ) - if err != nil { - panic(err) - } - - consumer.Use(loggingMiddleware()) - - s.consumers[i] = consumer - components[i] = consumer - } - - ctx, cancel := context.WithCancel(c.Context) - - go runConsumers(ctx, s.consumers) - - // wait for all messages to be processed - for { - if len(s.received) == len(s.generated) { - <-time.After(1 * time.Second) - - cancel() - - break - } - } - - slog.Info("[ASYNC] Messages received", "count", len(s.received)) - - return nil -} diff --git a/examples/xkafka/basic.go b/examples/xkafka/basic.go new file mode 100644 index 0000000..c4b516a --- /dev/null +++ b/examples/xkafka/basic.go @@ -0,0 +1,114 @@ +package main + +import ( + "context" + + "github.com/urfave/cli/v2" + "log/slog" + + "github.com/gojekfarm/xrun" + "github.com/gojekfarm/xtools/xkafka" + slogmw "github.com/gojekfarm/xtools/xkafka/middleware/slog" +) + +func runBasic(c *cli.Context) error { + partitions := c.Int("partitions") + pods := c.Int("consumers") + count := c.Int("messages") + concurrency := c.Int("concurrency") + + ctx, cancel := context.WithCancel(c.Context) + + // create topic + topic := createTopic(partitions) + + // generate messages + messages := generateMessages(topic, count) + + // publish messages + publishMessages(messages) + + tracker := NewTracker(messages, cancel) + + // consumer options + opts := []xkafka.ConsumerOption{ + xkafka.Brokers(brokers), + xkafka.Topics{topic}, + xkafka.ConfigMap{ + "auto.offset.reset": "earliest", + }, + xkafka.Concurrency(concurrency), + xkafka.ErrorHandler(func(err error) error { + // return error to stop consumer + return err + }), + } + + // create and run consumers + runConsumers(ctx, tracker, pods, opts...) + + tracker.Summary() + + return nil +} + +func basicHandler(tracker *Tracker) xkafka.HandlerFunc { + return func(ctx context.Context, msg *xkafka.Message) error { + err := tracker.SimulateWork() + if err != nil { + msg.AckFail(err) + + return err + } + + tracker.Ack(msg) + msg.AckSuccess() + + tracker.CancelIfDone() + + return nil + } +} + +func runConsumers( + ctx context.Context, + tracker *Tracker, + pods int, + opts ...xkafka.ConsumerOption, +) { + handler := basicHandler(tracker) + + for { + select { + case <-ctx.Done(): + return + default: + components := make([]xrun.Component, 0, pods) + + for i := 0; i < pods; i++ { + consumer := createConsumer(handler, opts...) + components = append(components, consumer) + } + + err := xrun.All(xrun.NoTimeout, components...).Run(ctx) + if err != nil { + slog.Error("Error running consumers", "error", err) + } + } + } +} + +func createConsumer(handler xkafka.HandlerFunc, opts ...xkafka.ConsumerOption) *xkafka.Consumer { + consumer, err := xkafka.NewConsumer( + "test-basic-consumer", + handler, + opts..., + ) + if err != nil { + panic(err) + } + + consumer.Use(slogmw.LoggingMiddleware()) + + return consumer +} diff --git a/examples/xkafka/batch.go b/examples/xkafka/batch.go new file mode 100644 index 0000000..c6f1880 --- /dev/null +++ b/examples/xkafka/batch.go @@ -0,0 +1,109 @@ +package main + +import ( + "context" + + "github.com/urfave/cli/v2" + "log/slog" + + "github.com/gojekfarm/xrun" + "github.com/gojekfarm/xtools/xkafka" + slogmw "github.com/gojekfarm/xtools/xkafka/middleware/slog" +) + +func runBatch(c *cli.Context) error { + partitions := c.Int("partitions") + pods := c.Int("consumers") + count := c.Int("messages") + concurrency := c.Int("concurrency") + size := c.Int("batch-size") + + ctx, cancel := context.WithCancel(c.Context) + + // create topic + topic := createTopic(partitions) + + // generate messages + messages := generateMessages(topic, count) + + // publish messages + publishMessages(messages) + + tracker := NewTracker(messages, cancel) + + // consumer options + opts := []xkafka.ConsumerOption{ + xkafka.Brokers(brokers), + xkafka.Topics{topic}, + xkafka.ConfigMap{ + "auto.offset.reset": "earliest", + }, + xkafka.Concurrency(concurrency), + xkafka.BatchSize(size), + xkafka.ErrorHandler(func(err error) error { + // return error to stop consumer + return err + }), + } + + // create and run consumers + runBatchConsumers(ctx, tracker, pods, opts...) + + tracker.Summary() + + return nil +} + +func batchHandler(tracker *Tracker) xkafka.BatchHandlerFunc { + return func(ctx context.Context, batch *xkafka.Batch) error { + err := tracker.SimulateWork() + if err != nil { + batch.AckFail(err) + + return err + } + + for _, msg := range batch.Messages { + tracker.Ack(msg) + } + + batch.AckSuccess() + + tracker.CancelIfDone() + + return nil + } +} + +func runBatchConsumers(ctx context.Context, tracker *Tracker, pods int, opts ...xkafka.ConsumerOption) { + handler := batchHandler(tracker) + + for { + select { + case <-ctx.Done(): + return + default: + components := make([]xrun.Component, 0, pods) + + for i := 0; i < pods; i++ { + bc, err := xkafka.NewBatchConsumer( + "test-batch-consumer", + handler, + opts..., + ) + if err != nil { + panic(err) + } + + bc.Use(slogmw.BatchLoggingMiddleware()) + + components = append(components, bc) + } + + err := xrun.All(xrun.NoTimeout, components...).Run(ctx) + if err != nil { + slog.Error("Error running consumers", "error", err) + } + } + } +} diff --git a/examples/xkafka/main.go b/examples/xkafka/main.go index 4ff1e8c..e09728c 100644 --- a/examples/xkafka/main.go +++ b/examples/xkafka/main.go @@ -22,16 +22,47 @@ func main() { app.Name = "xkafka" + flags := []cli.Flag{ + &cli.IntFlag{ + Name: "partitions", + Aliases: []string{"p"}, + Value: 4, + }, + &cli.IntFlag{ + Name: "consumers", + Aliases: []string{"c"}, + Value: 2, + }, + &cli.IntFlag{ + Name: "messages", + Aliases: []string{"m"}, + Value: 10, + }, + &cli.IntFlag{ + Name: "concurrency", + Aliases: []string{"cc"}, + Value: 1, + }, + } + app.Commands = []*cli.Command{ { - Name: "sequential", - Flags: []cli.Flag{&cli.BoolFlag{Name: "manual"}}, - Action: runSequential, + Name: "basic", + Usage: "Run basic consumer example", + Flags: flags, + Action: runBasic, }, { - Name: "async", - Flags: []cli.Flag{&cli.BoolFlag{Name: "manual"}}, - Action: runAsync, + Name: "batch", + Usage: "Run batch consumer example", + Flags: append(flags, + &cli.IntFlag{ + Name: "batch-size", + Aliases: []string{"bs"}, + Value: 10, + }, + ), + Action: runBatch, }, } diff --git a/examples/xkafka/producer.go b/examples/xkafka/producer.go new file mode 100644 index 0000000..6e98089 --- /dev/null +++ b/examples/xkafka/producer.go @@ -0,0 +1,34 @@ +package main + +import ( + "context" + + "log/slog" + + "github.com/gojekfarm/xtools/xkafka" +) + +func publishMessages(messages []*xkafka.Message) { + producer, err := xkafka.NewProducer( + "test-seq-producer", + xkafka.Brokers(brokers), + xkafka.ErrorHandler(func(err error) error { + slog.Error(err.Error()) + + return err + }), + ) + if err != nil { + panic(err) + } + + defer producer.Close() + + for _, msg := range messages { + if err := producer.Publish(context.Background(), msg); err != nil { + panic(err) + } + } + + slog.Info("[PRODUCER] published messages", "count", len(messages)) +} diff --git a/examples/xkafka/sequential.go b/examples/xkafka/sequential.go deleted file mode 100644 index 3c51568..0000000 --- a/examples/xkafka/sequential.go +++ /dev/null @@ -1,222 +0,0 @@ -package main - -import ( - "context" - "fmt" - "math/rand" - "time" - - "log/slog" - - "github.com/rs/xid" - "github.com/urfave/cli/v2" - - "github.com/gojekfarm/xrun" - "github.com/gojekfarm/xtools/xkafka" -) - -var ( - brokers = []string{"localhost:9092"} - partitions = 2 -) - -// nolint -func runSequential(c *cli.Context) error { - topic := "seq-" + xid.New().String() - - if err := createTopic(topic, partitions); err != nil { - return err - } - - s.generated = generateMessages(topic, 10) - - opts := []xkafka.ConsumerOption{ - xkafka.Brokers(brokers), - xkafka.Topics{topic}, - xkafka.ConfigMap{ - "auto.offset.reset": "earliest", - }, - xkafka.ErrorHandler(func(err error) error { - return err - }), - } - - // start consumers first - s.consumers = make([]*xkafka.Consumer, partitions) - components := make([]xrun.Component, partitions) - - for i := 0; i < partitions; i++ { - consumer, err := xkafka.NewConsumer( - "test-seq-consumer", - handleMessagesWithErrors(), - opts..., - ) - if err != nil { - panic(err) - } - - consumer.Use(loggingMiddleware()) - - s.consumers[i] = consumer - components[i] = consumer - } - - // publish messages to the topic - if err := publishMessages(s.generated); err != nil { - return err - } - - runConsumers(c.Context, s.consumers) - - slog.Info("[SEQUENTIAL] Consumers exited", "count", len(s.received)) - - // create new consumers with the same group id - // these consumers will start consuming from the last committed offset - s.consumers = make([]*xkafka.Consumer, partitions) - components = make([]xrun.Component, partitions) - - for i := 0; i < partitions; i++ { - consumer, err := xkafka.NewConsumer( - "test-seq-consumer", - handleMessages(), - opts..., - ) - if err != nil { - panic(err) - } - - consumer.Use(loggingMiddleware()) - - s.consumers[i] = consumer - components[i] = consumer - } - - ctx, cancel := context.WithCancel(c.Context) - - go runConsumers(ctx, s.consumers) - - // wait for all messages to be processed - for { - if len(s.received) == len(s.generated) { - <-time.After(1 * time.Second) - - cancel() - - break - } - } - - slog.Info("[SEQUENTIAL] Messages processed", "count", len(s.received)) - - return nil -} - -func publishMessages(messages []*xkafka.Message) error { - producer, err := xkafka.NewProducer( - "test-seq-producer", - xkafka.Brokers(brokers), - xkafka.ErrorHandler(func(err error) error { - slog.Error(err.Error()) - - return nil - }), - ) - if err != nil { - return err - } - - defer producer.Close() - - for _, msg := range messages { - if err := producer.Publish(context.Background(), msg); err != nil { - return err - } - - slog.Info("[PRODUCER] published message", "key", string(msg.Key)) - } - - return nil -} - -func generateMessages(topic string, count int) []*xkafka.Message { - messages := make([]*xkafka.Message, count) - - for i := 0; i < count; i++ { - messages[i] = &xkafka.Message{ - Topic: topic, - Key: []byte(fmt.Sprintf("key-%d", i)), - Value: xid.New().Bytes(), - } - } - - return messages -} - -func loggingMiddleware() xkafka.MiddlewareFunc { - return func(next xkafka.Handler) xkafka.Handler { - return xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { - start := time.Now() - - err := next.Handle(ctx, msg) - - slog.Info("[MESSAGE] Completed", - "key", string(msg.Key), - "offset", msg.Offset, "partition", msg.Partition, - "duration", time.Since(start), - "status", msg.Status, - ) - - return err - }) - } -} - -func simulateWork() { - <-time.After(time.Duration(rand.Int63n(200)) * time.Millisecond) -} - -func runConsumers(ctx context.Context, consumers []*xkafka.Consumer) error { - components := make([]xrun.Component, len(consumers)) - - for i, consumer := range consumers { - components[i] = consumer - } - - return xrun.All(xrun.NoTimeout, components...).Run(ctx) -} - -func handleMessages() xkafka.HandlerFunc { - return func(ctx context.Context, msg *xkafka.Message) error { - s.mu.Lock() - s.received[string(msg.Key)] = msg - s.mu.Unlock() - - simulateWork() - - msg.AckSuccess() - - return nil - } -} - -func handleMessagesWithErrors() xkafka.HandlerFunc { - return func(ctx context.Context, msg *xkafka.Message) error { - s.mu.Lock() - s.received[string(msg.Key)] = msg - s.mu.Unlock() - - simulateWork() - - if msg.Offset > 1 { - err := fmt.Errorf("simulated error for key %s", string(msg.Key)) - - msg.AckFail(err) - - return err - } - - msg.AckSuccess() - - return nil - } -} diff --git a/examples/xkafka/setup.go b/examples/xkafka/setup.go index aa25935..26d5fed 100644 --- a/examples/xkafka/setup.go +++ b/examples/xkafka/setup.go @@ -2,18 +2,25 @@ package main import ( "context" + "fmt" + "github.com/confluentinc/confluent-kafka-go/v2/kafka" + "github.com/rs/xid" "log/slog" - "github.com/confluentinc/confluent-kafka-go/v2/kafka" + "github.com/gojekfarm/xtools/xkafka" ) -func createTopic(name string, partitions int) error { +var brokers = []string{"localhost:9092"} + +func createTopic(partitions int) string { + name := "xkafka-" + xid.New().String() + admin, err := kafka.NewAdminClient(&kafka.ConfigMap{ "bootstrap.servers": "localhost:9092", }) if err != nil { - return err + panic(err) } res, err := admin.CreateTopics( @@ -25,10 +32,24 @@ func createTopic(name string, partitions int) error { }}, ) if err != nil { - return err + panic(err) } slog.Info("[ADMIN] created topic", "name", name, "partitions", partitions, "result", res) - return nil + return name +} + +func generateMessages(topic string, count int) []*xkafka.Message { + messages := make([]*xkafka.Message, count) + + for i := 0; i < count; i++ { + messages[i] = &xkafka.Message{ + Topic: topic, + Key: []byte(fmt.Sprintf("key-%d", i)), + Value: []byte(fmt.Sprintf("value-%d : %s", i, xid.New().String())), + } + } + + return messages } diff --git a/examples/xkafka/state.go b/examples/xkafka/state.go index 7cdda7a..8196bc1 100644 --- a/examples/xkafka/state.go +++ b/examples/xkafka/state.go @@ -1,18 +1,91 @@ package main import ( + "errors" "sync" + "time" + + "golang.org/x/exp/rand" + "log/slog" "github.com/gojekfarm/xtools/xkafka" ) -type state struct { - generated []*xkafka.Message - received map[string]*xkafka.Message - mu sync.Mutex - consumers []*xkafka.Consumer +type Tracker struct { + expect map[string]*xkafka.Message + mu sync.Mutex + received map[string]*xkafka.Message + order []string + cancel func() + simulateError bool +} + +func NewTracker(messages []*xkafka.Message, cancel func()) *Tracker { + t := &Tracker{ + expect: make(map[string]*xkafka.Message), + received: make(map[string]*xkafka.Message), + order: make([]string, 0), + cancel: cancel, + } + + for _, m := range messages { + t.expect[string(m.Key)] = m + } + + return t +} + +func (t *Tracker) Ack(msg *xkafka.Message) { + t.mu.Lock() + defer t.mu.Unlock() + + t.received[string(msg.Key)] = msg + t.order = append(t.order, string(msg.Key)) +} + +func (t *Tracker) SimulateWork() error { + <-time.After(time.Duration(rand.Int63n(200)) * time.Millisecond) + + t.mu.Lock() + defer t.mu.Unlock() + + after := len(t.expect) / 3 + + // simulate error after 1/3 of messages are received + if len(t.received) >= after && !t.simulateError { + t.simulateError = true + + return errors.New("simulated error") + } + + return nil } -var s = &state{ - received: make(map[string]*xkafka.Message), +func (t *Tracker) CancelIfDone() { + t.mu.Lock() + defer t.mu.Unlock() + + if len(t.received) == len(t.expect) { + t.cancel() + } +} + +func (t *Tracker) Summary() { + t.mu.Lock() + defer t.mu.Unlock() + + slog.Info("[TRACKER] summary", + "received", len(t.received), + "expected", len(t.expect), + ) + + for _, key := range t.order { + msg := t.received[key] + slog.Info("[TRACKER] message", + "key", string(msg.Key), + "offset", msg.Offset, + "partition", msg.Partition, + "topic", msg.Topic, + ) + } } diff --git a/xkafka/middleware/slog/slog_test.go b/xkafka/middleware/slog/slog_test.go index 859568a..5f97275 100644 --- a/xkafka/middleware/slog/slog_test.go +++ b/xkafka/middleware/slog/slog_test.go @@ -4,9 +4,8 @@ import ( "context" "testing" - "log/slog" - "github.com/stretchr/testify/assert" + "log/slog" "github.com/gojekfarm/xtools/xkafka" ) From 07e8882627d1cf42cee72dc7917a0f3072bd7dd2 Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Thu, 14 Nov 2024 10:58:37 +0530 Subject: [PATCH 05/10] Iimprove batch consumer error handling --- Makefile | 10 +++ examples/xkafka/README.md | 8 +-- examples/xkafka/batch.go | 6 +- examples/xkafka/state.go | 2 + xkafka/batch.go | 4 +- xkafka/batch_consumer.go | 111 +++++++++++++++++++++++----------- xkafka/batch_consumer_test.go | 52 ++++++++++++++++ 7 files changed, 153 insertions(+), 40 deletions(-) diff --git a/Makefile b/Makefile index 7769b04..c4adcb8 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,12 @@ test-cov: gocov test-xml: test-cov gocov-xml @jq -n '{ Packages: [ inputs.Packages ] | add }' $(shell find . -type f -name 'coverage.json' | sort) | $(GOCOVXML) > coverage.xml +.PHONY: test-html + +test-html: test-cov gocov-html + @jq -n '{ Packages: [ inputs.Packages ] | add }' $(shell find . -type f -name 'coverage.json' | sort) | $(GOCOVHTML) -t kit -r > coverage.html + @open coverage.html + .PHONY: check check: fmt vet imports lint @git diff --quiet || test $$(git diff --name-only | grep -v -e 'go.mod$$' -e 'go.sum$$' | wc -l) -eq 0 || ( echo "The following changes (result of code generators and code checks) have been detected:" && git --no-pager diff && false ) # fail if Git working tree is dirty @@ -84,6 +90,10 @@ GOCOVXML = $(BIN_DIR)/gocov-xml gocov-xml: $(call go-get-tool,$(GOCOVXML),github.com/AlekSi/gocov-xml@v1.0.0) +GOCOVHTML = $(BIN_DIR)/gocov-html +gocov-html: + $(call go-get-tool,$(GOCOVHTML),github.com/matm/gocov-html/cmd/gocov-html@v1.4.0) + MOCKERY = $(BIN_DIR)/mockery mockery: $(call go-get-tool,$(MOCKERY),github.com/vektra/mockery/v2@v2.43.0) diff --git a/examples/xkafka/README.md b/examples/xkafka/README.md index 39ba2a8..59e34d5 100644 --- a/examples/xkafka/README.md +++ b/examples/xkafka/README.md @@ -15,7 +15,7 @@ $ docker-compose up -d The basic consumer reads messages from a Kafka topic and prints them to the console. It simulates a process crash by restarting the consumer after a random number of messages have been consumed. ```bash -$ go run *.go basic --partitions=2 --consumers=2 --messages=10 +go run *.go basic --partitions=2 --consumers=2 --messages=10 ``` ### Async Consumer @@ -23,7 +23,7 @@ $ go run *.go basic --partitions=2 --consumers=2 --messages=10 The async consumer reads messages concurrently using a configurable pool of goroutines. ```bash -$ go run *.go basic --partitions=2 --consumers=2 --messages=10 --concurrency=4 +go run *.go basic --partitions=2 --consumers=2 --messages=10 --concurrency=4 ``` ### Batch Consumer @@ -31,7 +31,7 @@ $ go run *.go basic --partitions=2 --consumers=2 --messages=10 --concurrency=4 The batch consumer reads messages in batches of a configurable size. ```bash -$ go run *.go batch --partitions=2 --consumers=2 --messages=1000 --batch-size=100 +go run *.go batch --partitions=2 --consumers=2 --messages=20 --batch-size=3 ``` ### Async Batch Consumer @@ -39,5 +39,5 @@ $ go run *.go batch --partitions=2 --consumers=2 --messages=1000 --batch-size=10 The async batch consumer processes batches concurrently using a configurable pool of goroutines. ```bash -$ go run *.go batch --partitions=2 --consumers=2 --messages=1000 --batch-size=100 --concurrency=4 +go run *.go batch --partitions=2 --consumers=2 --messages=20 --batch-size=3 --concurrency=4 ``` diff --git a/examples/xkafka/batch.go b/examples/xkafka/batch.go index c6f1880..8871a20 100644 --- a/examples/xkafka/batch.go +++ b/examples/xkafka/batch.go @@ -2,10 +2,12 @@ package main import ( "context" + "time" - "github.com/urfave/cli/v2" "log/slog" + "github.com/urfave/cli/v2" + "github.com/gojekfarm/xrun" "github.com/gojekfarm/xtools/xkafka" slogmw "github.com/gojekfarm/xtools/xkafka/middleware/slog" @@ -38,8 +40,10 @@ func runBatch(c *cli.Context) error { xkafka.ConfigMap{ "auto.offset.reset": "earliest", }, + xkafka.PollTimeout(10 * time.Second), xkafka.Concurrency(concurrency), xkafka.BatchSize(size), + xkafka.BatchTimeout(10 * time.Second), xkafka.ErrorHandler(func(err error) error { // return error to stop consumer return err diff --git a/examples/xkafka/state.go b/examples/xkafka/state.go index 8196bc1..89a5f51 100644 --- a/examples/xkafka/state.go +++ b/examples/xkafka/state.go @@ -66,6 +66,8 @@ func (t *Tracker) CancelIfDone() { defer t.mu.Unlock() if len(t.received) == len(t.expect) { + slog.Info("[TRACKER] all messages received, cancelling context") + t.cancel() } } diff --git a/xkafka/batch.go b/xkafka/batch.go index bfa54bc..3765351 100644 --- a/xkafka/batch.go +++ b/xkafka/batch.go @@ -34,12 +34,14 @@ func (b *Batch) AckSuccess() { } // AckFail marks the batch as failed to process. -func (b *Batch) AckFail(err error) { +func (b *Batch) AckFail(err error) error { b.lock.Lock() defer b.lock.Unlock() b.Status = Fail b.err = err + + return err } // AckSkip marks the batch as skipped. diff --git a/xkafka/batch_consumer.go b/xkafka/batch_consumer.go index 4ad0613..5ee4584 100644 --- a/xkafka/batch_consumer.go +++ b/xkafka/batch_consumer.go @@ -9,7 +9,6 @@ import ( "time" "github.com/confluentinc/confluent-kafka-go/v2/kafka" - "github.com/sourcegraph/conc/pool" "github.com/sourcegraph/conc/stream" ) @@ -66,16 +65,19 @@ func (c *BatchConsumer) Use(mws ...BatchMiddlewarer) { } // Run starts the consumer and blocks until context is cancelled. -func (c *BatchConsumer) Run(ctx context.Context) error { +func (c *BatchConsumer) Run(ctx context.Context) (err error) { + defer func() { + cerr := c.close() + err = errors.Join(err, cerr) + }() + if err := c.subscribe(); err != nil { return err } - if err := c.start(ctx); err != nil { - return err - } + err = c.start(ctx) - return c.close() + return err } // Start subscribes to the configured topics and starts consuming messages. @@ -110,24 +112,54 @@ func (c *BatchConsumer) Close() { func (c *BatchConsumer) start(ctx context.Context) error { c.handler = c.concatMiddlewares(c.handler) - pool := pool.New(). - WithContext(ctx). - WithMaxGoroutines(2). - WithCancelOnError() + // Create a context that can be cancelled with cause + ctx, cancel := context.WithCancelCause(ctx) + defer func() { + cancel(nil) + c.batch.Stop() + }() + + errChan := make(chan error, 2) + var wg sync.WaitGroup + wg.Add(2) - pool.Go(func(ctx context.Context) error { + // Start process goroutine + go func() { + defer wg.Done() + var err error if c.config.concurrency > 1 { - return c.processAsync(ctx) + err = c.processAsync(ctx) + } else { + err = c.process(ctx) } + if err != nil { + cancel(err) + errChan <- err + } + }() - return c.process(ctx) - }) + // Start consume goroutine + go func() { + defer wg.Done() + err := c.consume(ctx) + if err != nil { + cancel(err) + errChan <- err + } + }() - pool.Go(func(ctx context.Context) error { - return c.consume(ctx) - }) + // Wait for completion and collect errors + go func() { + wg.Wait() + close(errChan) + }() - return pool.Wait() + // Return the first error that occurred + for err := range errChan { + return err + } + + return context.Cause(ctx) } func (c *BatchConsumer) process(ctx context.Context) error { @@ -157,7 +189,6 @@ func (c *BatchConsumer) processAsync(ctx context.Context) error { select { case <-ctx.Done(): st.Wait() - err := context.Cause(ctx) if errors.Is(err, context.Canceled) { return nil @@ -197,7 +228,7 @@ func (c *BatchConsumer) consume(ctx context.Context) (err error) { for { select { case <-ctx.Done(): - return err + return context.Cause(ctx) default: km, err := c.kafka.ReadMessage(c.config.pollTimeout) if err != nil { @@ -216,7 +247,6 @@ func (c *BatchConsumer) consume(ctx context.Context) (err error) { } msg := newMessage(c.name, km) - c.batch.Add(msg) } } @@ -274,18 +304,18 @@ type BatchManager struct { batch *Batch mutex *sync.RWMutex flushChan chan *Batch + done chan struct{} } // NewBatchManager creates a new BatchManager. func NewBatchManager(size int, timeout time.Duration) *BatchManager { b := &BatchManager{ - size: size, - timeout: timeout, - mutex: &sync.RWMutex{}, - batch: &Batch{ - Messages: make([]*Message, 0, size), - }, + size: size, + timeout: timeout, + mutex: &sync.RWMutex{}, + batch: NewBatch(), flushChan: make(chan *Batch), + done: make(chan struct{}), } go b.runFlushByTime() @@ -312,11 +342,21 @@ func (b *BatchManager) Receive() <-chan *Batch { func (b *BatchManager) runFlushByTime() { t := time.NewTicker(b.timeout) + defer t.Stop() - for range t.C { - b.mutex.Lock() - b.flush() - b.mutex.Unlock() + for { + select { + case <-b.done: + b.mutex.Lock() + b.flush() + close(b.flushChan) + b.mutex.Unlock() + return + case <-t.C: + b.mutex.Lock() + b.flush() + b.mutex.Unlock() + } } } @@ -330,7 +370,10 @@ func (b *BatchManager) flush() { b.flushChan <- b.batch - b.batch = &Batch{ - Messages: make([]*Message, 0, b.size), - } + b.batch = NewBatch() +} + +// Stop signals the batch manager to stop and clean up +func (b *BatchManager) Stop() { + close(b.done) } diff --git a/xkafka/batch_consumer_test.go b/xkafka/batch_consumer_test.go index c831b9d..226dd77 100644 --- a/xkafka/batch_consumer_test.go +++ b/xkafka/batch_consumer_test.go @@ -138,6 +138,58 @@ func TestBatchConsumer_HandleBatch(t *testing.T) { mockKafka.AssertExpectations(t) } +func TestBatchConsumer_HandleBatchError(t *testing.T) { + t.Parallel() + + testcases := []struct { + name string + options []ConsumerOption + }{ + { + name: "sequential", + options: []ConsumerOption{ + BatchSize(10), + BatchTimeout(testTimeout), + }, + }, + { + name: "async", + options: []ConsumerOption{ + Concurrency(2), + BatchSize(10), + BatchTimeout(testTimeout), + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, append(defaultOpts, tc.options...)...) + + km := newFakeKafkaMessage() + ctx := context.Background() + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + err := assert.AnError + + return b.AckFail(err) + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + consumer.handler = handler + err := consumer.Run(ctx) + assert.ErrorIs(t, err, assert.AnError) + + mockKafka.AssertExpectations(t) + }) + } +} + func TestBatchConsumer_Async(t *testing.T) { t.Parallel() From 054ab92d9807fafc3b8120e31ac512b7af54b6cc Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Fri, 15 Nov 2024 09:16:35 +0530 Subject: [PATCH 06/10] Refactor logging to use zerolog across xkafka examples and middleware --- examples/xkafka/basic.go | 9 ++-- examples/xkafka/batch.go | 10 ++-- examples/xkafka/go.mod | 9 +++- examples/xkafka/go.sum | 19 ++++++- examples/xkafka/main.go | 17 +++--- examples/xkafka/producer.go | 9 ++-- examples/xkafka/setup.go | 8 ++- examples/xkafka/state.go | 23 +++----- xkafka/middleware/slog/slog_test.go | 46 +++++++--------- xkafka/middleware/zerolog/go.mod | 4 ++ xkafka/middleware/zerolog/go.sum | 7 +++ xkafka/middleware/zerolog/log.go | 33 ++++++++++++ xkafka/middleware/zerolog/log_test.go | 75 +++++++++++++++++++++++++++ 13 files changed, 195 insertions(+), 74 deletions(-) create mode 100644 xkafka/middleware/zerolog/log_test.go diff --git a/examples/xkafka/basic.go b/examples/xkafka/basic.go index c4b516a..8497f81 100644 --- a/examples/xkafka/basic.go +++ b/examples/xkafka/basic.go @@ -3,12 +3,12 @@ package main import ( "context" + "github.com/rs/zerolog" "github.com/urfave/cli/v2" - "log/slog" "github.com/gojekfarm/xrun" "github.com/gojekfarm/xtools/xkafka" - slogmw "github.com/gojekfarm/xtools/xkafka/middleware/slog" + logmw "github.com/gojekfarm/xtools/xkafka/middleware/zerolog" ) func runBasic(c *cli.Context) error { @@ -76,6 +76,7 @@ func runConsumers( pods int, opts ...xkafka.ConsumerOption, ) { + log := zerolog.Ctx(ctx) handler := basicHandler(tracker) for { @@ -92,7 +93,7 @@ func runConsumers( err := xrun.All(xrun.NoTimeout, components...).Run(ctx) if err != nil { - slog.Error("Error running consumers", "error", err) + log.Error().Err(err).Msg("Error running consumers") } } } @@ -108,7 +109,7 @@ func createConsumer(handler xkafka.HandlerFunc, opts ...xkafka.ConsumerOption) * panic(err) } - consumer.Use(slogmw.LoggingMiddleware()) + consumer.Use(logmw.LoggingMiddleware(zerolog.DebugLevel)) return consumer } diff --git a/examples/xkafka/batch.go b/examples/xkafka/batch.go index 8871a20..7667319 100644 --- a/examples/xkafka/batch.go +++ b/examples/xkafka/batch.go @@ -4,13 +4,12 @@ import ( "context" "time" - "log/slog" - + "github.com/rs/zerolog" "github.com/urfave/cli/v2" "github.com/gojekfarm/xrun" "github.com/gojekfarm/xtools/xkafka" - slogmw "github.com/gojekfarm/xtools/xkafka/middleware/slog" + logmw "github.com/gojekfarm/xtools/xkafka/middleware/zerolog" ) func runBatch(c *cli.Context) error { @@ -80,6 +79,7 @@ func batchHandler(tracker *Tracker) xkafka.BatchHandlerFunc { } func runBatchConsumers(ctx context.Context, tracker *Tracker, pods int, opts ...xkafka.ConsumerOption) { + log := zerolog.Ctx(ctx) handler := batchHandler(tracker) for { @@ -99,14 +99,14 @@ func runBatchConsumers(ctx context.Context, tracker *Tracker, pods int, opts ... panic(err) } - bc.Use(slogmw.BatchLoggingMiddleware()) + bc.Use(logmw.BatchLoggingMiddleware(zerolog.DebugLevel)) components = append(components, bc) } err := xrun.All(xrun.NoTimeout, components...).Run(ctx) if err != nil { - slog.Error("Error running consumers", "error", err) + log.Error().Err(err).Msg("Error running consumers") } } } diff --git a/examples/xkafka/go.mod b/examples/xkafka/go.mod index 56d59fb..3709ee8 100644 --- a/examples/xkafka/go.mod +++ b/examples/xkafka/go.mod @@ -5,26 +5,31 @@ go 1.21 replace ( github.com/gojekfarm/xtools => ../../ github.com/gojekfarm/xtools/xkafka => ../../xkafka - + github.com/gojekfarm/xtools/xkafka/middleware/zerolog => ../../xkafka/middleware/zerolog ) require ( github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 github.com/gojekfarm/xrun v0.3.0 github.com/gojekfarm/xtools/xkafka v0.9.0 - github.com/lmittmann/tint v1.0.3 + github.com/gojekfarm/xtools/xkafka/middleware/zerolog v0.0.0-00010101000000-000000000000 github.com/rs/xid v1.5.0 + github.com/rs/zerolog v1.29.0 github.com/urfave/cli/v2 v2.23.7 + golang.org/x/exp v0.0.0-20190121172915-509febef88a4 ) require ( github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.16 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.9.0 // indirect + golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 // indirect ) diff --git a/examples/xkafka/go.sum b/examples/xkafka/go.sum index dd87742..340978d 100644 --- a/examples/xkafka/go.sum +++ b/examples/xkafka/go.sum @@ -19,6 +19,7 @@ github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 h1:YmUjjRp1mSTqTxtHQYMQKBLa2hfgIZz9PSqoSRDkwf4= github.com/confluentinc/confluent-kafka-go/v2 v2.0.2/go.mod h1:qWGwym8EpAsIP5lZsTKhYTnYSGqkbxEfRB4A489Jo64= +github.com/coreos/go-systemd/v22 v22.3.3-0.20220203105225-a9a7ef127534/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -37,6 +38,7 @@ github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03D github.com/frankban/quicktest v1.10.0/go.mod h1:ui7WezCLWMWxVWr1GETZY3smRy0G4KWq9vcPtJmFl7Y= github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gojekfarm/xrun v0.3.0 h1:jwWKhok3orT7IMxjWWfwFYzaI7LLjmOI0JG0wjNHjQg= github.com/gojekfarm/xrun v0.3.0/go.mod h1:fmcikYfGnzwSKnnpHPIonP05ZU6WkBLgsee2QqgXCmw= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -97,8 +99,12 @@ github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MEl github.com/linkedin/goavro/v2 v2.10.0/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/linkedin/goavro/v2 v2.10.1/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/linkedin/goavro/v2 v2.11.1/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= -github.com/lmittmann/tint v1.0.3 h1:W5PHeA2D8bBJVvabNfQD/XW9HPLZK1XoPZH0cq8NouQ= -github.com/lmittmann/tint v1.0.3/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= @@ -114,8 +120,11 @@ github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a/go.mod h1:4r5QyqhjI github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= +github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/rs/zerolog v1.29.0 h1:Zes4hju04hjbvkVkOhdl2HpZa+0PmVwigmo8XoORE5w= +github.com/rs/zerolog v1.29.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0= @@ -144,6 +153,7 @@ go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTV golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4 h1:c2HOrn5iMezYjSlGPncknSEr/8x5LELb/ilJbXi9DEA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= @@ -178,7 +188,12 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 h1:v6hYoSR9T5oet+pMXwUWkbiVqx/63mlHjefrHmxwfeY= +golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= diff --git a/examples/xkafka/main.go b/examples/xkafka/main.go index e09728c..e810032 100644 --- a/examples/xkafka/main.go +++ b/examples/xkafka/main.go @@ -1,22 +1,19 @@ package main import ( - "log" "os" "time" - "github.com/lmittmann/tint" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" "github.com/urfave/cli/v2" - "log/slog" ) func main() { - slog.SetDefault(slog.New( - tint.NewHandler(os.Stderr, &tint.Options{ - Level: slog.LevelDebug, - TimeFormat: time.Kitchen, - }), - )) + // Configure zerolog + zerolog.TimeFieldFormat = time.Kitchen + console := zerolog.ConsoleWriter{Out: os.Stderr} + log.Logger = log.Output(console) app := cli.NewApp() @@ -67,6 +64,6 @@ func main() { } if err := app.Run(os.Args); err != nil { - log.Fatal(err) + log.Fatal().Err(err) } } diff --git a/examples/xkafka/producer.go b/examples/xkafka/producer.go index 6e98089..4946b87 100644 --- a/examples/xkafka/producer.go +++ b/examples/xkafka/producer.go @@ -3,7 +3,7 @@ package main import ( "context" - "log/slog" + "github.com/rs/zerolog/log" "github.com/gojekfarm/xtools/xkafka" ) @@ -13,8 +13,7 @@ func publishMessages(messages []*xkafka.Message) { "test-seq-producer", xkafka.Brokers(brokers), xkafka.ErrorHandler(func(err error) error { - slog.Error(err.Error()) - + log.Error().Err(err).Msg("") return err }), ) @@ -30,5 +29,7 @@ func publishMessages(messages []*xkafka.Message) { } } - slog.Info("[PRODUCER] published messages", "count", len(messages)) + log.Info(). + Int("count", len(messages)). + Msg("[PRODUCER] published messages") } diff --git a/examples/xkafka/setup.go b/examples/xkafka/setup.go index 26d5fed..a4c9a7d 100644 --- a/examples/xkafka/setup.go +++ b/examples/xkafka/setup.go @@ -6,7 +6,7 @@ import ( "github.com/confluentinc/confluent-kafka-go/v2/kafka" "github.com/rs/xid" - "log/slog" + "github.com/rs/zerolog/log" "github.com/gojekfarm/xtools/xkafka" ) @@ -35,7 +35,11 @@ func createTopic(partitions int) string { panic(err) } - slog.Info("[ADMIN] created topic", "name", name, "partitions", partitions, "result", res) + log.Info(). + Str("name", name). + Int("partitions", partitions). + Interface("result", res). + Msg("[ADMIN] created topic") return name } diff --git a/examples/xkafka/state.go b/examples/xkafka/state.go index 89a5f51..c3d0d87 100644 --- a/examples/xkafka/state.go +++ b/examples/xkafka/state.go @@ -5,8 +5,8 @@ import ( "sync" "time" + "github.com/rs/zerolog/log" "golang.org/x/exp/rand" - "log/slog" "github.com/gojekfarm/xtools/xkafka" ) @@ -66,8 +66,7 @@ func (t *Tracker) CancelIfDone() { defer t.mu.Unlock() if len(t.received) == len(t.expect) { - slog.Info("[TRACKER] all messages received, cancelling context") - + log.Info().Msg("[TRACKER] all messages received, cancelling context") t.cancel() } } @@ -76,18 +75,8 @@ func (t *Tracker) Summary() { t.mu.Lock() defer t.mu.Unlock() - slog.Info("[TRACKER] summary", - "received", len(t.received), - "expected", len(t.expect), - ) - - for _, key := range t.order { - msg := t.received[key] - slog.Info("[TRACKER] message", - "key", string(msg.Key), - "offset", msg.Offset, - "partition", msg.Partition, - "topic", msg.Topic, - ) - } + log.Info(). + Int("received", len(t.received)). + Int("expected", len(t.expect)). + Msg("[TRACKER] summary") } diff --git a/xkafka/middleware/slog/slog_test.go b/xkafka/middleware/slog/slog_test.go index 5f97275..889bf22 100644 --- a/xkafka/middleware/slog/slog_test.go +++ b/xkafka/middleware/slog/slog_test.go @@ -4,9 +4,10 @@ import ( "context" "testing" - "github.com/stretchr/testify/assert" "log/slog" + "github.com/stretchr/testify/assert" + "github.com/gojekfarm/xtools/xkafka" ) @@ -24,36 +25,25 @@ func TestLoggingMiddleware(t *testing.T) { Level(slog.LevelInfo), logger, ) - handler := loggingMiddleware(xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { - msg.AckSuccess() - - return nil - })) - - err := handler.Handle(context.Background(), msg) - assert.NoError(t, err) -} - -func TestLoggingMiddlewareWithError(t *testing.T) { - msg := &xkafka.Message{ - Topic: "test-topic", - Partition: 0, - Offset: 0, - Key: []byte("test-key"), - } + t.Run("success", func(t *testing.T) { + handler := loggingMiddleware(xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { + msg.AckSuccess() + return nil + })) - loggingMiddleware := LoggingMiddleware( - Level(slog.LevelInfo), - logger, - ) - handler := loggingMiddleware(xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { - msg.AckFail(assert.AnError) + err := handler.Handle(context.Background(), msg) + assert.NoError(t, err) + }) - return assert.AnError - })) + t.Run("error", func(t *testing.T) { + handler := loggingMiddleware(xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { + msg.AckFail(assert.AnError) + return assert.AnError + })) - err := handler.Handle(context.Background(), msg) - assert.ErrorIs(t, err, assert.AnError) + err := handler.Handle(context.Background(), msg) + assert.ErrorIs(t, err, assert.AnError) + }) } func TestBatchLoggingMiddleware(t *testing.T) { diff --git a/xkafka/middleware/zerolog/go.mod b/xkafka/middleware/zerolog/go.mod index 69254a7..e0aa8aa 100644 --- a/xkafka/middleware/zerolog/go.mod +++ b/xkafka/middleware/zerolog/go.mod @@ -9,16 +9,20 @@ replace github.com/gojekfarm/xtools/xkafka => ../../ require ( github.com/gojekfarm/xtools/xkafka v0.9.0 github.com/rs/zerolog v1.29.0 + github.com/stretchr/testify v1.8.1 ) require ( github.com/confluentinc/confluent-kafka-go/v2 v2.0.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.16 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rs/xid v1.5.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect go.uber.org/atomic v1.10.0 // indirect go.uber.org/multierr v1.9.0 // indirect golang.org/x/sys v0.0.0-20220829200755-d48e67d00261 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/xkafka/middleware/zerolog/go.sum b/xkafka/middleware/zerolog/go.sum index 91f9577..9d7e353 100644 --- a/xkafka/middleware/zerolog/go.sum +++ b/xkafka/middleware/zerolog/go.sum @@ -83,9 +83,11 @@ github.com/juju/qthttptest v0.1.1/go.mod h1:aTlAv8TYaflIiTDIQYzxnl1QdPjAg8Q8qJME github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/linkedin/goavro v2.1.0+incompatible/go.mod h1:bBCwI2eGYpUI/4820s67MElg9tdeLbINjLjiM2xZFYM= github.com/linkedin/goavro/v2 v2.10.0/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= @@ -112,6 +114,8 @@ github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a/go.mod h1:4r5QyqhjI github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= @@ -121,6 +125,7 @@ github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= @@ -128,6 +133,7 @@ github.com/stretchr/testify v1.3.1-0.20190311161405-34c6fa2dc709/go.mod h1:M5WIy github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -226,6 +232,7 @@ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183/go.mod h1:FvqrFXt+jCsyQibeRv4xxEJBL5iG2DDW5aeJwzDiq4A= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v1 v1.0.0/go.mod h1:CxwszS/Xz1C49Ucd2i6Zil5UToP1EmyrFhKaMVbg1mk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= diff --git a/xkafka/middleware/zerolog/log.go b/xkafka/middleware/zerolog/log.go index f991d7d..b9a97a9 100644 --- a/xkafka/middleware/zerolog/log.go +++ b/xkafka/middleware/zerolog/log.go @@ -3,6 +3,7 @@ package zerolog import ( "context" + "time" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -43,3 +44,35 @@ func LoggingMiddleware(lvl zerolog.Level) xkafka.MiddlewareFunc { }) } } + +// BatchLoggingMiddleware is a middleware that logs batch processing using zerolog. +// Also adds a structured logger to the context. +func BatchLoggingMiddleware(lvl zerolog.Level) xkafka.BatchMiddlewareFunc { + return func(next xkafka.BatchHandler) xkafka.BatchHandler { + return xkafka.BatchHandlerFunc(func(ctx context.Context, b *xkafka.Batch) error { + start := time.Now() + fields := log.With(). + Int("count", len(b.Messages)). + Int64("max_offset", b.MaxOffset()). + Str("batch_id", b.ID) + + l := fields.Logger() + + ctx = l.WithContext(ctx) + + err := next.HandleBatch(ctx, b) + if err != nil { + l.WithLevel(zerolog.ErrorLevel). + Dur("duration", time.Since(start)). + Err(err). + Msg("[xkafka] batch processing failed") + } else { + l.WithLevel(lvl). + Dur("duration", time.Since(start)). + Msg("[xkafka] batch processed") + } + + return err + }) + } +} diff --git a/xkafka/middleware/zerolog/log_test.go b/xkafka/middleware/zerolog/log_test.go new file mode 100644 index 0000000..f714203 --- /dev/null +++ b/xkafka/middleware/zerolog/log_test.go @@ -0,0 +1,75 @@ +package zerolog + +import ( + "context" + "testing" + + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + + "github.com/gojekfarm/xtools/xkafka" +) + +func TestLoggingMiddleware(t *testing.T) { + msg := &xkafka.Message{ + Topic: "test-topic", + Partition: 0, + Offset: 0, + Key: []byte("test-key"), + } + + loggingMiddleware := LoggingMiddleware(zerolog.InfoLevel) + t.Run("success", func(t *testing.T) { + handler := loggingMiddleware(xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { + msg.AckSuccess() + return nil + })) + + err := handler.Handle(context.Background(), msg) + assert.NoError(t, err) + }) + + t.Run("error", func(t *testing.T) { + handler := loggingMiddleware(xkafka.HandlerFunc(func(ctx context.Context, msg *xkafka.Message) error { + msg.AckFail(assert.AnError) + return assert.AnError + })) + + err := handler.Handle(context.Background(), msg) + assert.ErrorIs(t, err, assert.AnError) + }) +} + +func TestBatchLoggingMiddleware(t *testing.T) { + batch := xkafka.NewBatch() + msg := &xkafka.Message{ + Topic: "test-topic", + Partition: 0, + Offset: 0, + Key: []byte("test-key"), + } + + batch.Messages = append(batch.Messages, msg) + + loggingMiddleware := BatchLoggingMiddleware(zerolog.InfoLevel) + + t.Run("success", func(t *testing.T) { + handler := loggingMiddleware(xkafka.BatchHandlerFunc(func(ctx context.Context, b *xkafka.Batch) error { + b.AckSuccess() + return nil + })) + + err := handler.HandleBatch(context.Background(), batch) + assert.NoError(t, err) + }) + + t.Run("error", func(t *testing.T) { + handler := loggingMiddleware(xkafka.BatchHandlerFunc(func(ctx context.Context, b *xkafka.Batch) error { + b.AckFail(assert.AnError) + return assert.AnError + })) + + err := handler.HandleBatch(context.Background(), batch) + assert.ErrorIs(t, err, assert.AnError) + }) +} From 2ff19fdb5852981a6af5dead91f1a7c734f34ca4 Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Fri, 15 Nov 2024 09:20:51 +0530 Subject: [PATCH 07/10] Refactor batch consumer to simply creating and processing batches --- xkafka/batch_consumer.go | 365 ++++++++++------------------ xkafka/batch_consumer_test.go | 31 ++- xkafka/middleware/slog/slog_test.go | 3 +- 3 files changed, 149 insertions(+), 250 deletions(-) diff --git a/xkafka/batch_consumer.go b/xkafka/batch_consumer.go index 5ee4584..35c75c3 100644 --- a/xkafka/batch_consumer.go +++ b/xkafka/batch_consumer.go @@ -4,24 +4,20 @@ import ( "context" "errors" "strings" - "sync" - "sync/atomic" "time" "github.com/confluentinc/confluent-kafka-go/v2/kafka" "github.com/sourcegraph/conc/stream" ) -// BatchConsumer manages consumption & processing of messages -// from kafka topics in batches. +// BatchConsumer manages the consumption of messages from kafka topics +// and processes them in batches. type BatchConsumer struct { name string kafka consumerClient handler BatchHandler middlewares []BatchMiddlewarer config *consumerConfig - batch *BatchManager - cancelCtx atomic.Pointer[context.CancelFunc] } // NewBatchConsumer creates a new BatchConsumer instance. @@ -50,185 +46,135 @@ func NewBatchConsumer(name string, handler BatchHandler, opts ...ConsumerOption) config: cfg, kafka: consumer, handler: handler, - batch: NewBatchManager(cfg.batchSize, cfg.batchTimeout), }, nil } -// GetMetadata returns the metadata for the consumer. -func (c *BatchConsumer) GetMetadata() (*Metadata, error) { - return c.kafka.GetMetadata(nil, false, int(c.config.metadataTimeout.Milliseconds())) +// Use appends a BatchMiddlewareFunc to the chain. +func (c *BatchConsumer) Use(mwf ...BatchMiddlewarer) { + c.middlewares = append(c.middlewares, mwf...) } -// Use appends a BatchMiddleware to the chain. -func (c *BatchConsumer) Use(mws ...BatchMiddlewarer) { - c.middlewares = append(c.middlewares, mws...) -} - -// Run starts the consumer and blocks until context is cancelled. +// Run starts running the BatchConsumer. The component will stop running +// when the context is closed. Run blocks until the context is closed or +// an error occurs. func (c *BatchConsumer) Run(ctx context.Context) (err error) { - defer func() { - cerr := c.close() - err = errors.Join(err, cerr) - }() - if err := c.subscribe(); err != nil { return err } - err = c.start(ctx) + defer func() { + if cerr := c.close(); cerr != nil { + err = errors.Join(err, cerr) + } + }() - return err + return c.start(ctx) } -// Start subscribes to the configured topics and starts consuming messages. -// This method is non-blocking and returns immediately post subscribe. -// Instead, use Run if you want to block until the context is closed or an error occurs. -// -// Errors are handled by the ErrorHandler if set, otherwise they stop the consumer -// and are returned. -func (c *BatchConsumer) Start() error { - if err := c.subscribe(); err != nil { - return err - } - - ctx, cancel := context.WithCancel(context.Background()) - c.cancelCtx.Store(&cancel) - - go func() { _ = c.start(ctx) }() - - return nil -} +func (c *BatchConsumer) start(ctx context.Context) error { + c.handler = c.concatMiddlewares(c.handler) -// Close closes the consumer. -func (c *BatchConsumer) Close() { - cancel := c.cancelCtx.Load() - if cancel != nil { - (*cancel)() + if c.config.concurrency > 1 { + return c.runAsync(ctx) } - _ = c.close() + return c.runSequential(ctx) } -func (c *BatchConsumer) start(ctx context.Context) error { - c.handler = c.concatMiddlewares(c.handler) - - // Create a context that can be cancelled with cause - ctx, cancel := context.WithCancelCause(ctx) +func (c *BatchConsumer) runSequential(ctx context.Context) (err error) { defer func() { - cancel(nil) - c.batch.Stop() - }() - - errChan := make(chan error, 2) - var wg sync.WaitGroup - wg.Add(2) - - // Start process goroutine - go func() { - defer wg.Done() - var err error - if c.config.concurrency > 1 { - err = c.processAsync(ctx) - } else { - err = c.process(ctx) - } - if err != nil { - cancel(err) - errChan <- err - } - }() - - // Start consume goroutine - go func() { - defer wg.Done() - err := c.consume(ctx) - if err != nil { - cancel(err) - errChan <- err + if uerr := c.unsubscribe(); uerr != nil { + err = errors.Join(err, uerr) } }() - // Wait for completion and collect errors - go func() { - wg.Wait() - close(errChan) - }() - - // Return the first error that occurred - for err := range errChan { - return err - } + batch := NewBatch() + timer := time.NewTimer(c.config.batchTimeout) + defer timer.Stop() - return context.Cause(ctx) -} - -func (c *BatchConsumer) process(ctx context.Context) error { for { select { case <-ctx.Done(): - return nil - case batch := <-c.batch.Receive(): - err := c.handler.HandleBatch(ctx, batch) - if ferr := c.config.errorHandler(err); ferr != nil { - return ferr + if len(batch.Messages) > 0 { + if err := c.processBatch(ctx, batch); err != nil { + return err + } + } + return err + + case <-timer.C: + if len(batch.Messages) > 0 { + if err := c.processBatch(ctx, batch); err != nil { + return err + } + batch = NewBatch() } + timer.Reset(c.config.batchTimeout) - err = c.saveOffset(batch) + default: + km, err := c.kafka.ReadMessage(c.config.pollTimeout) if err != nil { - return err + var kerr kafka.Error + if errors.As(err, &kerr) && kerr.Code() == kafka.ErrTimedOut { + continue + } + + if ferr := c.config.errorHandler(err); ferr != nil { + return ferr + } + continue + } + + msg := newMessage(c.name, km) + batch.Messages = append(batch.Messages, msg) + + if len(batch.Messages) >= c.config.batchSize { + if err := c.processBatch(ctx, batch); err != nil { + return err + } + batch = NewBatch() + timer.Reset(c.config.batchTimeout) } } } } -func (c *BatchConsumer) processAsync(ctx context.Context) error { +func (c *BatchConsumer) runAsync(ctx context.Context) error { st := stream.New().WithMaxGoroutines(c.config.concurrency) ctx, cancel := context.WithCancelCause(ctx) + batch := NewBatch() + timer := time.NewTimer(c.config.batchTimeout) + defer timer.Stop() + for { select { case <-ctx.Done(): st.Wait() - err := context.Cause(ctx) - if errors.Is(err, context.Canceled) { - return nil - } - return err - case batch := <-c.batch.Receive(): - st.Go(func() stream.Callback { - err := c.handler.HandleBatch(ctx, batch) - if ferr := c.config.errorHandler(err); ferr != nil { - cancel(ferr) + var err error - return func() {} - } + if len(batch.Messages) > 0 { + err = c.processBatch(ctx, batch) + } - return func() { - if err := c.saveOffset(batch); err != nil { - cancel(err) - } - } - }) - } - } -} + uerr := c.unsubscribe() + err = errors.Join(err, uerr) -func (c *BatchConsumer) consume(ctx context.Context) (err error) { - if err := c.subscribe(); err != nil { - return err - } + cerr := context.Cause(ctx) + if cerr != nil && !errors.Is(cerr, context.Canceled) { + err = errors.Join(err, cerr) + } - defer func() { - if uerr := c.unsubscribe(); uerr != nil { - err = errors.Join(err, uerr) - } - }() + return err + + case <-timer.C: + if len(batch.Messages) > 0 { + c.processBatchAsync(ctx, batch, st, cancel) + batch = NewBatch() + } + timer.Reset(c.config.batchTimeout) - for { - select { - case <-ctx.Done(): - return context.Cause(ctx) default: km, err := c.kafka.ReadMessage(c.config.pollTimeout) if err != nil { @@ -238,58 +184,63 @@ func (c *BatchConsumer) consume(ctx context.Context) (err error) { } if ferr := c.config.errorHandler(err); ferr != nil { - err = ferr - - return err + cancel(ferr) } continue } msg := newMessage(c.name, km) - c.batch.Add(msg) + batch.Messages = append(batch.Messages, msg) + + if len(batch.Messages) >= c.config.batchSize { + c.processBatchAsync(ctx, batch, st, cancel) + batch = NewBatch() + timer.Reset(c.config.batchTimeout) + } } } } -func (c *BatchConsumer) subscribe() error { - return c.kafka.SubscribeTopics(c.config.topics, nil) -} - -func (c *BatchConsumer) unsubscribe() error { - _, _ = c.kafka.Commit() - - return c.kafka.Unsubscribe() -} - -func (c *BatchConsumer) close() error { - <-time.After(c.config.shutdownTimeout) +func (c *BatchConsumer) processBatch(ctx context.Context, batch *Batch) error { + err := c.handler.HandleBatch(ctx, batch) + if ferr := c.config.errorHandler(err); ferr != nil { + return ferr + } - return c.kafka.Close() + return c.storeBatch(batch) } -func (c *BatchConsumer) concatMiddlewares(handler BatchHandler) BatchHandler { - for i := len(c.middlewares) - 1; i >= 0; i-- { - handler = c.middlewares[i].BatchMiddleware(handler) - } +func (c *BatchConsumer) processBatchAsync(ctx context.Context, batch *Batch, st *stream.Stream, cancel context.CancelCauseFunc) { + st.Go(func() stream.Callback { + err := c.handler.HandleBatch(ctx, batch) + if ferr := c.config.errorHandler(err); ferr != nil { + cancel(ferr) + return func() {} + } - return handler + return func() { + if err := c.storeBatch(batch); err != nil { + cancel(err) + } + } + }) } -func (c *BatchConsumer) saveOffset(batch *Batch) error { +func (c *BatchConsumer) storeBatch(batch *Batch) error { if batch.Status != Success && batch.Status != Skip { return nil } - offsets := batch.GroupMaxOffset() - - _, err := c.kafka.StoreOffsets(offsets) + tps := batch.GroupMaxOffset() + _, err := c.kafka.StoreOffsets(tps) if err != nil { return err } if c.config.manualCommit { - if _, err := c.kafka.Commit(); err != nil { + _, err := c.kafka.Commit() + if err != nil { return err } } @@ -297,83 +248,23 @@ func (c *BatchConsumer) saveOffset(batch *Batch) error { return nil } -// BatchManager manages aggregation and processing of Message batches. -type BatchManager struct { - size int - timeout time.Duration - batch *Batch - mutex *sync.RWMutex - flushChan chan *Batch - done chan struct{} -} - -// NewBatchManager creates a new BatchManager. -func NewBatchManager(size int, timeout time.Duration) *BatchManager { - b := &BatchManager{ - size: size, - timeout: timeout, - mutex: &sync.RWMutex{}, - batch: NewBatch(), - flushChan: make(chan *Batch), - done: make(chan struct{}), - } - - go b.runFlushByTime() - - return b -} - -// Add adds to batch and flushes when MaxSize is reached. -func (b *BatchManager) Add(m *Message) { - b.mutex.Lock() - b.batch.Messages = append(b.batch.Messages, m) - - if len(b.batch.Messages) >= b.size { - b.flush() +func (c *BatchConsumer) concatMiddlewares(h BatchHandler) BatchHandler { + for i := len(c.middlewares) - 1; i >= 0; i-- { + h = c.middlewares[i].BatchMiddleware(h) } - - b.mutex.Unlock() + return h } -// Receive returns a channel to read batched Messages. -func (b *BatchManager) Receive() <-chan *Batch { - return b.flushChan -} - -func (b *BatchManager) runFlushByTime() { - t := time.NewTicker(b.timeout) - defer t.Stop() - - for { - select { - case <-b.done: - b.mutex.Lock() - b.flush() - close(b.flushChan) - b.mutex.Unlock() - return - case <-t.C: - b.mutex.Lock() - b.flush() - b.mutex.Unlock() - } - } +func (c *BatchConsumer) subscribe() error { + return c.kafka.SubscribeTopics(c.config.topics, nil) } -// flush sends the batch to the flush channel and resets the batch. -// DESIGN: flush does NOT acquire a mutex lock. Locks should be managed by caller. -// nolint:gosimple -func (b *BatchManager) flush() { - if len(b.batch.Messages) == 0 { - return - } - - b.flushChan <- b.batch - - b.batch = NewBatch() +func (c *BatchConsumer) unsubscribe() error { + _, _ = c.kafka.Commit() + return c.kafka.Unsubscribe() } -// Stop signals the batch manager to stop and clean up -func (b *BatchManager) Stop() { - close(b.done) +func (c *BatchConsumer) close() error { + <-time.After(c.config.shutdownTimeout) + return c.kafka.Close() } diff --git a/xkafka/batch_consumer_test.go b/xkafka/batch_consumer_test.go index 226dd77..fe683e4 100644 --- a/xkafka/batch_consumer_test.go +++ b/xkafka/batch_consumer_test.go @@ -63,19 +63,19 @@ func TestNewBatchConsumer(t *testing.T) { func TestBatchConsumer_Lifecycle(t *testing.T) { t.Parallel() - t.Run("StartSubscribeError", func(t *testing.T) { + t.Run("RunSubscribeError", func(t *testing.T) { consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) expectError := errors.New("error in subscribe") mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(expectError) - assert.Error(t, consumer.Start()) + assert.Error(t, consumer.Run(context.Background())) mockKafka.AssertExpectations(t) }) - t.Run("StartSuccessCloseError", func(t *testing.T) { + t.Run("RunCloseError", func(t *testing.T) { consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) @@ -84,14 +84,19 @@ func TestBatchConsumer_Lifecycle(t *testing.T) { mockKafka.On("Commit").Return(nil, nil) mockKafka.On("Close").Return(errors.New("error in close")) - assert.NoError(t, consumer.Start()) - <-time.After(100 * time.Millisecond) - consumer.Close() + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + <-time.After(100 * time.Millisecond) + cancel() + }() + + assert.Error(t, consumer.Run(ctx)) mockKafka.AssertExpectations(t) }) - t.Run("StartCloseSuccess", func(t *testing.T) { + t.Run("RunSuccess", func(t *testing.T) { consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) @@ -100,9 +105,14 @@ func TestBatchConsumer_Lifecycle(t *testing.T) { mockKafka.On("Commit").Return(nil, nil) mockKafka.On("Close").Return(nil) - assert.NoError(t, consumer.Start()) - <-time.After(100 * time.Millisecond) - consumer.Close() + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + <-time.After(100 * time.Millisecond) + cancel() + }() + + assert.NoError(t, consumer.Run(ctx)) mockKafka.AssertExpectations(t) }) @@ -208,7 +218,6 @@ func TestBatchConsumer_Async(t *testing.T) { b.AckSuccess() assert.NotNil(t, b) - assert.Len(t, b.Messages, 3) n := count.Add(1) diff --git a/xkafka/middleware/slog/slog_test.go b/xkafka/middleware/slog/slog_test.go index 889bf22..eaa74bf 100644 --- a/xkafka/middleware/slog/slog_test.go +++ b/xkafka/middleware/slog/slog_test.go @@ -4,9 +4,8 @@ import ( "context" "testing" - "log/slog" - "github.com/stretchr/testify/assert" + "log/slog" "github.com/gojekfarm/xtools/xkafka" ) From 38bd50e0e2b13b01ae30c922ba5a1e9060fd204f Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Fri, 15 Nov 2024 09:56:11 +0530 Subject: [PATCH 08/10] Fix lint errors --- examples/xkafka/batch.go | 4 +--- examples/xkafka/producer.go | 1 + xkafka/batch_consumer.go | 41 ++++++++++++++++++++++++++----------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/examples/xkafka/batch.go b/examples/xkafka/batch.go index 7667319..fbc6012 100644 --- a/examples/xkafka/batch.go +++ b/examples/xkafka/batch.go @@ -61,9 +61,7 @@ func batchHandler(tracker *Tracker) xkafka.BatchHandlerFunc { return func(ctx context.Context, batch *xkafka.Batch) error { err := tracker.SimulateWork() if err != nil { - batch.AckFail(err) - - return err + return batch.AckFail(err) } for _, msg := range batch.Messages { diff --git a/examples/xkafka/producer.go b/examples/xkafka/producer.go index 4946b87..4356ba1 100644 --- a/examples/xkafka/producer.go +++ b/examples/xkafka/producer.go @@ -14,6 +14,7 @@ func publishMessages(messages []*xkafka.Message) { xkafka.Brokers(brokers), xkafka.ErrorHandler(func(err error) error { log.Error().Err(err).Msg("") + return err }), ) diff --git a/xkafka/batch_consumer.go b/xkafka/batch_consumer.go index 35c75c3..f47895e 100644 --- a/xkafka/batch_consumer.go +++ b/xkafka/batch_consumer.go @@ -90,25 +90,27 @@ func (c *BatchConsumer) runSequential(ctx context.Context) (err error) { batch := NewBatch() timer := time.NewTimer(c.config.batchTimeout) + defer timer.Stop() for { select { case <-ctx.Done(): - if len(batch.Messages) > 0 { - if err := c.processBatch(ctx, batch); err != nil { - return err - } + if err := c.processBatch(ctx, batch); err != nil { + return err } - return err + + return nil case <-timer.C: if len(batch.Messages) > 0 { if err := c.processBatch(ctx, batch); err != nil { return err } + batch = NewBatch() } + timer.Reset(c.config.batchTimeout) default: @@ -122,6 +124,7 @@ func (c *BatchConsumer) runSequential(ctx context.Context) (err error) { if ferr := c.config.errorHandler(err); ferr != nil { return ferr } + continue } @@ -132,7 +135,9 @@ func (c *BatchConsumer) runSequential(ctx context.Context) (err error) { if err := c.processBatch(ctx, batch); err != nil { return err } + batch = NewBatch() + timer.Reset(c.config.batchTimeout) } } @@ -145,6 +150,7 @@ func (c *BatchConsumer) runAsync(ctx context.Context) error { batch := NewBatch() timer := time.NewTimer(c.config.batchTimeout) + defer timer.Stop() for { @@ -152,12 +158,7 @@ func (c *BatchConsumer) runAsync(ctx context.Context) error { case <-ctx.Done(): st.Wait() - var err error - - if len(batch.Messages) > 0 { - err = c.processBatch(ctx, batch) - } - + err := c.processBatch(ctx, batch) uerr := c.unsubscribe() err = errors.Join(err, uerr) @@ -173,6 +174,7 @@ func (c *BatchConsumer) runAsync(ctx context.Context) error { c.processBatchAsync(ctx, batch, st, cancel) batch = NewBatch() } + timer.Reset(c.config.batchTimeout) default: @@ -196,6 +198,7 @@ func (c *BatchConsumer) runAsync(ctx context.Context) error { if len(batch.Messages) >= c.config.batchSize { c.processBatchAsync(ctx, batch, st, cancel) batch = NewBatch() + timer.Reset(c.config.batchTimeout) } } @@ -203,6 +206,10 @@ func (c *BatchConsumer) runAsync(ctx context.Context) error { } func (c *BatchConsumer) processBatch(ctx context.Context, batch *Batch) error { + if len(batch.Messages) == 0 { + return nil + } + err := c.handler.HandleBatch(ctx, batch) if ferr := c.config.errorHandler(err); ferr != nil { return ferr @@ -211,11 +218,17 @@ func (c *BatchConsumer) processBatch(ctx context.Context, batch *Batch) error { return c.storeBatch(batch) } -func (c *BatchConsumer) processBatchAsync(ctx context.Context, batch *Batch, st *stream.Stream, cancel context.CancelCauseFunc) { +func (c *BatchConsumer) processBatchAsync( + ctx context.Context, + batch *Batch, + st *stream.Stream, + cancel context.CancelCauseFunc, +) { st.Go(func() stream.Callback { err := c.handler.HandleBatch(ctx, batch) if ferr := c.config.errorHandler(err); ferr != nil { cancel(ferr) + return func() {} } @@ -233,6 +246,7 @@ func (c *BatchConsumer) storeBatch(batch *Batch) error { } tps := batch.GroupMaxOffset() + _, err := c.kafka.StoreOffsets(tps) if err != nil { return err @@ -252,6 +266,7 @@ func (c *BatchConsumer) concatMiddlewares(h BatchHandler) BatchHandler { for i := len(c.middlewares) - 1; i >= 0; i-- { h = c.middlewares[i].BatchMiddleware(h) } + return h } @@ -261,10 +276,12 @@ func (c *BatchConsumer) subscribe() error { func (c *BatchConsumer) unsubscribe() error { _, _ = c.kafka.Commit() + return c.kafka.Unsubscribe() } func (c *BatchConsumer) close() error { <-time.After(c.config.shutdownTimeout) + return c.kafka.Close() } From 0fd3b6dc892ce67a3d352b312743ee773d81e66e Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Mon, 18 Nov 2024 09:49:39 +0530 Subject: [PATCH 09/10] Fix async consumer to support graceful shutdown by introducing stopOffset flag --- examples/xkafka/basic.go | 2 +- examples/xkafka/batch.go | 14 ++++++++------ examples/xkafka/producer.go | 4 +++- examples/xkafka/state.go | 13 +++++++++---- xkafka/batch_consumer.go | 10 +++++++++- xkafka/consumer.go | 9 ++++++++- 6 files changed, 38 insertions(+), 14 deletions(-) diff --git a/examples/xkafka/basic.go b/examples/xkafka/basic.go index 8497f81..c5a963c 100644 --- a/examples/xkafka/basic.go +++ b/examples/xkafka/basic.go @@ -54,7 +54,7 @@ func runBasic(c *cli.Context) error { func basicHandler(tracker *Tracker) xkafka.HandlerFunc { return func(ctx context.Context, msg *xkafka.Message) error { - err := tracker.SimulateWork() + err := tracker.SimulateWork(msg) if err != nil { msg.AckFail(err) diff --git a/examples/xkafka/batch.go b/examples/xkafka/batch.go index fbc6012..dcb5c57 100644 --- a/examples/xkafka/batch.go +++ b/examples/xkafka/batch.go @@ -5,6 +5,7 @@ import ( "time" "github.com/rs/zerolog" + "github.com/rs/zerolog/log" "github.com/urfave/cli/v2" "github.com/gojekfarm/xrun" @@ -59,12 +60,12 @@ func runBatch(c *cli.Context) error { func batchHandler(tracker *Tracker) xkafka.BatchHandlerFunc { return func(ctx context.Context, batch *xkafka.Batch) error { - err := tracker.SimulateWork() - if err != nil { - return batch.AckFail(err) - } - for _, msg := range batch.Messages { + err := tracker.SimulateWork(msg) + if err != nil { + return batch.AckFail(err) + } + tracker.Ack(msg) } @@ -77,7 +78,6 @@ func batchHandler(tracker *Tracker) xkafka.BatchHandlerFunc { } func runBatchConsumers(ctx context.Context, tracker *Tracker, pods int, opts ...xkafka.ConsumerOption) { - log := zerolog.Ctx(ctx) handler := batchHandler(tracker) for { @@ -102,6 +102,8 @@ func runBatchConsumers(ctx context.Context, tracker *Tracker, pods int, opts ... components = append(components, bc) } + log.Info().Msg("Running consumers") + err := xrun.All(xrun.NoTimeout, components...).Run(ctx) if err != nil { log.Error().Err(err).Msg("Error running consumers") diff --git a/examples/xkafka/producer.go b/examples/xkafka/producer.go index 4356ba1..a1ef07e 100644 --- a/examples/xkafka/producer.go +++ b/examples/xkafka/producer.go @@ -24,8 +24,10 @@ func publishMessages(messages []*xkafka.Message) { defer producer.Close() + ctx := context.Background() + for _, msg := range messages { - if err := producer.Publish(context.Background(), msg); err != nil { + if err := producer.AsyncPublish(ctx, msg); err != nil { panic(err) } } diff --git a/examples/xkafka/state.go b/examples/xkafka/state.go index c3d0d87..cb6171c 100644 --- a/examples/xkafka/state.go +++ b/examples/xkafka/state.go @@ -1,7 +1,7 @@ package main import ( - "errors" + "fmt" "sync" "time" @@ -41,10 +41,12 @@ func (t *Tracker) Ack(msg *xkafka.Message) { t.received[string(msg.Key)] = msg t.order = append(t.order, string(msg.Key)) + + log.Info().Msgf("[TRACKER] %d/%d", len(t.received), len(t.expect)) } -func (t *Tracker) SimulateWork() error { - <-time.After(time.Duration(rand.Int63n(200)) * time.Millisecond) +func (t *Tracker) SimulateWork(msg *xkafka.Message) error { + <-time.After(time.Duration(rand.Int63n(50)) * time.Millisecond) t.mu.Lock() defer t.mu.Unlock() @@ -55,7 +57,10 @@ func (t *Tracker) SimulateWork() error { if len(t.received) >= after && !t.simulateError { t.simulateError = true - return errors.New("simulated error") + return fmt.Errorf( + "simulated error. partition %d, offset %d", + msg.Partition, msg.Offset, + ) } return nil diff --git a/xkafka/batch_consumer.go b/xkafka/batch_consumer.go index f47895e..e8faef7 100644 --- a/xkafka/batch_consumer.go +++ b/xkafka/batch_consumer.go @@ -4,6 +4,7 @@ import ( "context" "errors" "strings" + "sync/atomic" "time" "github.com/confluentinc/confluent-kafka-go/v2/kafka" @@ -18,6 +19,7 @@ type BatchConsumer struct { handler BatchHandler middlewares []BatchMiddlewarer config *consumerConfig + stopOffset atomic.Bool } // NewBatchConsumer creates a new BatchConsumer instance. @@ -229,7 +231,9 @@ func (c *BatchConsumer) processBatchAsync( if ferr := c.config.errorHandler(err); ferr != nil { cancel(ferr) - return func() {} + return func() { + c.stopOffset.Store(true) + } } return func() { @@ -245,6 +249,10 @@ func (c *BatchConsumer) storeBatch(batch *Batch) error { return nil } + if c.stopOffset.Load() { + return nil + } + tps := batch.GroupMaxOffset() _, err := c.kafka.StoreOffsets(tps) diff --git a/xkafka/consumer.go b/xkafka/consumer.go index 1506751..7423a56 100644 --- a/xkafka/consumer.go +++ b/xkafka/consumer.go @@ -20,6 +20,7 @@ type Consumer struct { middlewares []Middlewarer config *consumerConfig cancelCtx atomic.Pointer[context.CancelFunc] + stopOffset atomic.Bool } // NewConsumer creates a new Consumer instance. @@ -207,7 +208,9 @@ func (c *Consumer) runAsync(ctx context.Context) error { if ferr := c.config.errorHandler(err); ferr != nil { cancel(ferr) - return func() {} + return func() { + c.stopOffset.Store(true) + } } return func() { @@ -225,6 +228,10 @@ func (c *Consumer) storeMessage(msg *Message) error { return nil } + if c.stopOffset.Load() { + return nil + } + // similar to StoreMessage in confluent-kafka-go/consumer.go // msg.Offset + 1 it ensures that the consumer starts with // next message when it restarts From 37603db703ed1d8864639d10d96e1625a75a78d2 Mon Sep 17 00:00:00 2001 From: Ravi Atluri Date: Mon, 18 Nov 2024 12:17:23 +0530 Subject: [PATCH 10/10] Add tests for async consumer error handling and batch processing --- xkafka/batch_consumer_test.go | 345 +++++++++++++++++++++++++++++++++- xkafka/batch_test.go | 120 ++++++++++++ xkafka/consumer_test.go | 50 +++++ 3 files changed, 514 insertions(+), 1 deletion(-) create mode 100644 xkafka/batch_test.go diff --git a/xkafka/batch_consumer_test.go b/xkafka/batch_consumer_test.go index fe683e4..11339b7 100644 --- a/xkafka/batch_consumer_test.go +++ b/xkafka/batch_consumer_test.go @@ -34,6 +34,7 @@ func TestNewBatchConsumer(t *testing.T) { ErrorHandler(NoopErrorHandler), BatchSize(10), BatchTimeout(testTimeout), + ManualCommit(true), ) assert.NoError(t, err) assert.NotNil(t, consumer) @@ -53,13 +54,57 @@ func TestNewBatchConsumer(t *testing.T) { "bootstrap.servers": "localhost:9092", "group.id": "test-batch-consumer", "auto.offset.reset": "earliest", - "enable.auto.commit": true, + "enable.auto.commit": false, "enable.auto.offset.store": false, } assert.EqualValues(t, expectedConfig, consumer.config.configMap) } +func TestNewBatchConsumer_ConfigValidation(t *testing.T) { + t.Parallel() + + testcases := []struct { + name string + options []ConsumerOption + expect error + }{ + { + name: "missing brokers", + options: []ConsumerOption{testTopics, errHandler}, + expect: ErrRequiredOption, + }, + { + name: "missing topics", + options: []ConsumerOption{testBrokers, errHandler}, + expect: ErrRequiredOption, + }, + { + name: "missing error handler", + options: []ConsumerOption{testBrokers, testTopics}, + expect: ErrRequiredOption, + }, + { + name: "consumer error", + options: []ConsumerOption{ + testTopics, testBrokers, errHandler, + consumerFunc(func(configMap *kafka.ConfigMap) (consumerClient, error) { + return nil, assert.AnError + }), + }, + expect: assert.AnError, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + _, err := NewBatchConsumer("test-batch-consumer", noopBatchHandler(), tc.options...) + assert.Error(t, err) + assert.ErrorIs(t, err, tc.expect) + }) + } +} + func TestBatchConsumer_Lifecycle(t *testing.T) { t.Parallel() @@ -75,6 +120,28 @@ func TestBatchConsumer_Lifecycle(t *testing.T) { mockKafka.AssertExpectations(t) }) + t.Run("RunUnsubscribeError", func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("ReadMessage", testTimeout).Return(newFakeKafkaMessage(), nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("Close").Return(nil) + + mockKafka.On("Unsubscribe").Return(assert.AnError) + + ctx, cancel := context.WithCancel(context.Background()) + + go func() { + <-time.After(100 * time.Millisecond) + cancel() + }() + + assert.Error(t, consumer.Run(ctx)) + + mockKafka.AssertExpectations(t) + }) + t.Run("RunCloseError", func(t *testing.T) { consumer, mockKafka := newTestBatchConsumer(t, defaultOpts...) @@ -242,6 +309,116 @@ func TestBatchConsumer_Async(t *testing.T) { mockKafka.AssertExpectations(t) } +func TestBatchConsumer_StopOffsetOnError(t *testing.T) { + t.Parallel() + + opts := append(defaultOpts, + Concurrency(2), + BatchSize(3), + ) + consumer, mockKafka := newTestBatchConsumer(t, opts...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + count := atomic.Int32{} + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + assert.NotNil(t, b) + + n := count.Add(1) + + if n > 2 { + err := assert.AnError + cancel() + + return b.AckFail(err) + } + + b.AckSuccess() + + return nil + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + mockKafka.On("StoreOffsets", mock.Anything). + Return(nil, nil). + Times(2) + + consumer.handler = handler + err := consumer.Run(ctx) + assert.ErrorIs(t, err, assert.AnError) + + mockKafka.AssertExpectations(t) +} + +func TestBatchConsumer_BatchTimeout(t *testing.T) { + t.Parallel() + + testcases := []struct { + name string + options []ConsumerOption + }{ + { + name: "sequential", + options: []ConsumerOption{ + BatchTimeout(10 * time.Millisecond), + BatchSize(100_000), + }, + }, + { + name: "async", + options: []ConsumerOption{ + Concurrency(2), + BatchTimeout(10 * time.Millisecond), + BatchSize(100_000), + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, append(defaultOpts, tc.options...)...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + b.AckSuccess() + + assert.NotNil(t, b) + assert.True(t, len(b.Messages) > 0) + + return nil + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("StoreOffsets", mock.Anything).Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + consumer.handler = handler + + go func() { + <-time.After(500 * time.Millisecond) + cancel() + }() + + err := consumer.Run(ctx) + assert.NoError(t, err) + + mockKafka.AssertExpectations(t) + }) + } +} + func TestBatchConsumer_MiddlewareExecutionOrder(t *testing.T) { t.Parallel() @@ -317,6 +494,172 @@ func TestBatchConsumer_ManualCommit(t *testing.T) { mockKafka.AssertExpectations(t) } +func TestBatchConsumer_ReadMessageTimeout(t *testing.T) { + t.Parallel() + + testcases := []struct { + name string + options []ConsumerOption + }{ + { + name: "sequential", + options: []ConsumerOption{ + BatchSize(2), + }, + }, + { + name: "async", + options: []ConsumerOption{ + Concurrency(2), + BatchSize(2), + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, append(defaultOpts, tc.options...)...) + + ctx, cancel := context.WithCancel(context.Background()) + expect := kafka.NewError(kafka.ErrTimedOut, "kafka: timed out", false) + km := newFakeKafkaMessage() + + counter := atomic.Int32{} + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + n := counter.Add(1) + + if n == 1 { + cancel() + } + + return nil + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil).Once() + mockKafka.On("ReadMessage", testTimeout).Return(nil, expect).Once() + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + consumer.handler = handler + + err := consumer.Run(ctx) + assert.NoError(t, err) + + mockKafka.AssertExpectations(t) + }) + } +} + +func TestBatchConsumer_KafkaError(t *testing.T) { + t.Parallel() + + testcases := []struct { + name string + options []ConsumerOption + }{ + { + name: "sequential", + options: []ConsumerOption{ + BatchSize(2), + }, + }, + { + name: "async", + options: []ConsumerOption{ + Concurrency(2), + BatchSize(2), + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, append(defaultOpts, tc.options...)...) + + ctx := context.Background() + expect := kafka.NewError(kafka.ErrUnknown, "kafka: unknown error", false) + km := newFakeKafkaMessage() + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("Close").Return(nil) + + mockKafka.On("ReadMessage", testTimeout). + Return(km, nil). + Times(3) + mockKafka.On("ReadMessage", testTimeout). + Return(nil, expect). + Once() + + err := consumer.Run(ctx) + assert.Error(t, err) + assert.ErrorIs(t, err, expect) + + mockKafka.AssertExpectations(t) + }) + } +} + +func TestBatchConsumer_CommitError(t *testing.T) { + t.Parallel() + + testcases := []struct { + name string + options []ConsumerOption + }{ + { + name: "sequential", + options: []ConsumerOption{ + ManualCommit(true), + }, + }, + { + name: "async", + options: []ConsumerOption{ + ManualCommit(true), + Concurrency(2), + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + consumer, mockKafka := newTestBatchConsumer(t, append(defaultOpts, tc.options...)...) + + km := newFakeKafkaMessage() + ctx := context.Background() + expect := errors.New("error in commit") + + handler := BatchHandlerFunc(func(ctx context.Context, b *Batch) error { + b.AckSuccess() + + return nil + }) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("StoreOffsets", mock.Anything).Return(nil, nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Close").Return(nil) + + mockKafka.On("Commit").Return(nil, expect) + + consumer.handler = handler + + err := consumer.Run(ctx) + assert.Error(t, err) + assert.ErrorIs(t, err, expect) + + mockKafka.AssertExpectations(t) + }) + } +} + func noopBatchHandler() BatchHandler { return BatchHandlerFunc(func(ctx context.Context, b *Batch) error { return nil diff --git a/xkafka/batch_test.go b/xkafka/batch_test.go new file mode 100644 index 0000000..e00952b --- /dev/null +++ b/xkafka/batch_test.go @@ -0,0 +1,120 @@ +package xkafka + +import ( + "errors" + "testing" + + "github.com/confluentinc/confluent-kafka-go/v2/kafka" + "github.com/stretchr/testify/assert" +) + +func TestNewBatch(t *testing.T) { + batch := NewBatch() + + assert.NotEmpty(t, batch.ID) + assert.Empty(t, batch.Messages) + assert.Zero(t, batch.Status) +} + +func TestBatch_AckSuccess(t *testing.T) { + batch := NewBatch() + batch.AckSuccess() + + assert.Equal(t, Success, batch.Status) +} + +func TestBatch_AckFail(t *testing.T) { + batch := NewBatch() + testErr := errors.New("test error") + + err := batch.AckFail(testErr) + + assert.Equal(t, Fail, batch.Status) + assert.Equal(t, testErr, batch.Err()) + assert.Equal(t, testErr, err) +} + +func TestBatch_AckSkip(t *testing.T) { + batch := NewBatch() + batch.AckSkip() + + assert.Equal(t, Skip, batch.Status) +} + +func TestBatch_OffsetMethods(t *testing.T) { + tests := []struct { + name string + messages []*Message + wantMaxOffset int64 + wantGroupOffset []kafka.TopicPartition + }{ + { + name: "empty batch", + messages: []*Message{}, + wantMaxOffset: 0, + wantGroupOffset: []kafka.TopicPartition{}, + }, + { + name: "single topic-partition", + messages: []*Message{ + {Topic: "topic1", Partition: 0, Offset: 5}, + {Topic: "topic1", Partition: 0, Offset: 10}, + }, + wantMaxOffset: 10, + wantGroupOffset: []kafka.TopicPartition{ + { + Topic: strPtr("topic1"), + Partition: 0, + Offset: kafka.Offset(11), + }, + }, + }, + { + name: "multiple topic-partitions", + messages: []*Message{ + {Topic: "topic1", Partition: 0, Offset: 5}, + {Topic: "topic1", Partition: 1, Offset: 10}, + {Topic: "topic2", Partition: 0, Offset: 15}, + }, + wantMaxOffset: 15, + wantGroupOffset: []kafka.TopicPartition{ + { + Topic: strPtr("topic1"), + Partition: 0, + Offset: kafka.Offset(6), + }, + { + Topic: strPtr("topic1"), + Partition: 1, + Offset: kafka.Offset(11), + }, + { + Topic: strPtr("topic2"), + Partition: 0, + Offset: kafka.Offset(16), + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + batch := NewBatch() + batch.Messages = tt.messages + + t.Run("MaxOffset", func(t *testing.T) { + gotMaxOffset := batch.MaxOffset() + assert.Equal(t, tt.wantMaxOffset, gotMaxOffset) + }) + + t.Run("GroupMaxOffset", func(t *testing.T) { + gotGroupOffset := batch.GroupMaxOffset() + assert.ElementsMatch(t, tt.wantGroupOffset, gotGroupOffset) + }) + }) + } +} + +func strPtr(s string) *string { + return &s +} diff --git a/xkafka/consumer_test.go b/xkafka/consumer_test.go index fc68df7..0967fc1 100644 --- a/xkafka/consumer_test.go +++ b/xkafka/consumer_test.go @@ -574,6 +574,56 @@ func TestConsumerAsync(t *testing.T) { mockKafka.AssertExpectations(t) } +func TestConsumerAsync_StopOffsetOnError(t *testing.T) { + t.Parallel() + + consumer, mockKafka := newTestConsumer(t, + append(defaultOpts, Concurrency(2))...) + + km := newFakeKafkaMessage() + ctx, cancel := context.WithCancel(context.Background()) + + mockKafka.On("SubscribeTopics", []string(testTopics), mock.Anything).Return(nil) + mockKafka.On("Unsubscribe").Return(nil) + mockKafka.On("ReadMessage", testTimeout).Return(km, nil) + mockKafka.On("Commit").Return(nil, nil) + mockKafka.On("Close").Return(nil) + + mockKafka.On("StoreOffsets", mock.Anything). + Return(nil, nil). + Times(2) + + var recv []*Message + var mu sync.Mutex + + handler := HandlerFunc(func(ctx context.Context, msg *Message) error { + mu.Lock() + defer mu.Unlock() + + recv = append(recv, msg) + + if len(recv) > 2 { + err := assert.AnError + msg.AckFail(err) + + cancel() + + return err + } + + msg.AckSuccess() + + return nil + }) + + consumer.handler = handler + + err := consumer.Run(ctx) + assert.NoError(t, err) + + mockKafka.AssertExpectations(t) +} + func TestConsumerStoreOffsetsError(t *testing.T) { t.Parallel()