From a46d1bd872bdfd0f85fc0525f26c95370de1d943 Mon Sep 17 00:00:00 2001 From: Lucas Kent Date: Wed, 6 Nov 2024 18:44:44 +1100 Subject: [PATCH] Set a magic config which makes our kafka integration tests complete in half the time (#1800) --- shotover-proxy/benches/windsock/kafka/bench.rs | 9 +++++++++ .../docker-compose-short-idle-timeout.yaml | 7 +++++++ .../kafka/cluster-1-rack/docker-compose.yaml | 7 +++++++ .../kafka/cluster-2-racks/docker-compose.yaml | 7 +++++++ .../kafka/cluster-3-racks/docker-compose.yaml | 9 ++++++++- .../test-configs/kafka/cluster-mtls/docker-compose.yaml | 7 +++++++ .../kafka/cluster-sasl-plain/docker-compose.yaml | 7 +++++++ .../cluster-sasl-scram-over-mtls/docker-compose.yaml | 7 +++++++ .../kafka/cluster-sasl-scram/docker-compose.yaml | 7 +++++++ .../test-configs/kafka/cluster-tls/docker-compose.yaml | 7 +++++++ 10 files changed, 73 insertions(+), 1 deletion(-) diff --git a/shotover-proxy/benches/windsock/kafka/bench.rs b/shotover-proxy/benches/windsock/kafka/bench.rs index 920f1f3e6..0c64814c2 100644 --- a/shotover-proxy/benches/windsock/kafka/bench.rs +++ b/shotover-proxy/benches/windsock/kafka/bench.rs @@ -171,6 +171,15 @@ impl KafkaBench { "KAFKA_CFG_PROCESS_ROLES".to_owned(), "controller,broker".to_owned(), ), + // This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + // new consumer group by avoiding constant rebalances as each initial consumer joins. + // See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + + // However for this benchmark we already discard the initial results as a warmup stage, so better to just have the benchmark startup faster. + ( + "KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS".to_owned(), + "0".to_owned(), + ), ( "KAFKA_HEAP_OPTS".to_owned(), "-Xmx4096M -Xms4096M".to_owned(), diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose-short-idle-timeout.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose-short-idle-timeout.yaml index 7701306be..dd37eaeb9 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose-short-idle-timeout.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose-short-idle-timeout.yaml @@ -31,6 +31,13 @@ services: # connections.max.idle.ms is set to 20s for testing shotovers handling of idle connection timeouts KAFKA_CFG_CONNECTIONS_MAX_IDLE_MS: 20000 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose.yaml index bc3eac691..7c9bd5940 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-1-rack/docker-compose.yaml @@ -28,6 +28,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml index e1a5b226c..91eec63b2 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-2-racks/docker-compose.yaml @@ -29,6 +29,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-3-racks/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-3-racks/docker-compose.yaml index 12fc2886a..e19e9ee7a 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-3-racks/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-3-racks/docker-compose.yaml @@ -29,6 +29,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka @@ -53,4 +60,4 @@ services: KAFKA_CFG_ADVERTISED_LISTENERS: "BROKER://172.16.1.4:9092" KAFKA_CFG_NODE_ID: 2 KAFKA_CFG_BROKER_RACK: "rack3" - volumes: *volumes \ No newline at end of file + volumes: *volumes diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-mtls/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-mtls/docker-compose.yaml index dea55ff05..0a92c0d46 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-mtls/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-mtls/docker-compose.yaml @@ -31,6 +31,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl-plain/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl-plain/docker-compose.yaml index 128d42886..24fce30c1 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl-plain/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl-plain/docker-compose.yaml @@ -34,6 +34,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram-over-mtls/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram-over-mtls/docker-compose.yaml index 5bab7e723..3224801de 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram-over-mtls/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram-over-mtls/docker-compose.yaml @@ -42,6 +42,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram/docker-compose.yaml index 2881064e2..b2ffe5fb3 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-sasl-scram/docker-compose.yaml @@ -34,6 +34,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka diff --git a/shotover-proxy/tests/test-configs/kafka/cluster-tls/docker-compose.yaml b/shotover-proxy/tests/test-configs/kafka/cluster-tls/docker-compose.yaml index 7c2724660..fc283add6 100644 --- a/shotover-proxy/tests/test-configs/kafka/cluster-tls/docker-compose.yaml +++ b/shotover-proxy/tests/test-configs/kafka/cluster-tls/docker-compose.yaml @@ -29,6 +29,13 @@ services: KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3 KAFKA_CFG_TRANSACTION_STATE_LOG_MIN_ISR: 2 + + # This cfg is set to 3000 by default, which for a typical workload reduces the overhead of creating a + # new consumer group by avoiding constant rebalances as each initial consumer joins. + # See: https://cwiki.apache.org/confluence/display/KAFKA/KIP-134%3A+Delay+initial+consumer+group+rebalance + # + # However for an integration test workload we are constantly spinning up single consumer groups, so the default value makes the tests take twice as long to run. + KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: "0" volumes: &volumes - type: tmpfs target: /bitnami/kafka