From 45494ffcf98bde8577d900144ab1fa2997fb33ed Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 7 Nov 2017 20:12:06 +0100 Subject: [PATCH 01/12] Adds test that goes unready when there's not enough replicas of >0 topics --- ops/test/replicated-partitions.yml | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 ops/test/replicated-partitions.yml diff --git a/ops/test/replicated-partitions.yml b/ops/test/replicated-partitions.yml new file mode 100644 index 00000000..a2e46ba9 --- /dev/null +++ b/ops/test/replicated-partitions.yml @@ -0,0 +1,46 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: replicated-partitions + namespace: test-kafka +spec: + replicas: 1 + selector: + matchLabels: + test-type: readiness + test-target: under-replicated-partitions + template: + metadata: + labels: + test-type: readiness + test-target: under-replicated-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + ports: + - containerPort: 80 + command: + - tail + - -f + - /dev/null + readinessProbe: + exec: + command: + - /bin/bash + - -c + - > + [ + $( + ./bin/kafka-topics.sh + --zookeeper zookeeper.kafka:2181 + --describe + --under-replicated-partitions + | + wc -l + ) + -eq + 0 + ] + periodSeconds: 30 + timeoutSeconds: 29 From bd2def567b05f7fe50fe9b10be981e3c5442cb0c Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 7 Nov 2017 20:21:45 +0100 Subject: [PATCH 02/12] Adds job to simplify replica election --- ops/preferred-replica-election-job.yml | 19 +++++++++++++++++++ ops/test/replicated-partitions.yml | 6 ++---- 2 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 ops/preferred-replica-election-job.yml diff --git a/ops/preferred-replica-election-job.yml b/ops/preferred-replica-election-job.yml new file mode 100644 index 00000000..ac4f13ad --- /dev/null +++ b/ops/preferred-replica-election-job.yml @@ -0,0 +1,19 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: preferred-replica-election + namespace: kafka +spec: + template: + metadata: + name: preferred-replica-election + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + command: + - ./bin/kafka-preferred-replica-election.sh + - --zookeeper + - zookeeper:2181 + restartPolicy: Never + backoffLimit: 3 diff --git a/ops/test/replicated-partitions.yml b/ops/test/replicated-partitions.yml index a2e46ba9..50210696 100644 --- a/ops/test/replicated-partitions.yml +++ b/ops/test/replicated-partitions.yml @@ -18,10 +18,8 @@ spec: containers: - name: kafka image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d - ports: - - containerPort: 80 command: - - tail + - tail - -f - /dev/null readinessProbe: @@ -40,7 +38,7 @@ spec: wc -l ) -eq - 0 + 0 ] periodSeconds: 30 timeoutSeconds: 29 From e83aaa3dbd86802154dc9bdeb90e11ed2abf3216 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Tue, 7 Nov 2017 20:29:52 +0100 Subject: [PATCH 03/12] Fixes container log to show unreplicated partitions (if there are any) --- {ops => maintenance}/test/replicated-partitions.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) rename {ops => maintenance}/test/replicated-partitions.yml (80%) diff --git a/ops/test/replicated-partitions.yml b/maintenance/test/replicated-partitions.yml similarity index 80% rename from ops/test/replicated-partitions.yml rename to maintenance/test/replicated-partitions.yml index 50210696..a1f81586 100644 --- a/ops/test/replicated-partitions.yml +++ b/maintenance/test/replicated-partitions.yml @@ -19,15 +19,19 @@ spec: - name: kafka image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d command: - - tail - - -f - - /dev/null + - /bin/bash + - -ec + - > + touch /tmp/testlog; + tail -f /tmp/testlog readinessProbe: exec: command: - /bin/bash - -c - > + echo "### $(date -Ins -u) ###" >> /tmp/testlog + && [ $( ./bin/kafka-topics.sh @@ -35,6 +39,8 @@ spec: --describe --under-replicated-partitions | + tee -a /tmp/testlog + | wc -l ) -eq From 74284033cea7c33536af20a5877b55650a6a3ff0 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 10 Nov 2017 08:44:22 +0100 Subject: [PATCH 04/12] But note that this job won't actually trigger replication ... of under-replicated topics if a broker is down --- {ops => maintenance}/preferred-replica-election-job.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {ops => maintenance}/preferred-replica-election-job.yml (100%) diff --git a/ops/preferred-replica-election-job.yml b/maintenance/preferred-replica-election-job.yml similarity index 100% rename from ops/preferred-replica-election-job.yml rename to maintenance/preferred-replica-election-job.yml From d1377a55de27f2f7fbc2a0b445d50f3967d4b18a Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 10 Nov 2017 09:37:47 +0100 Subject: [PATCH 05/12] Adds a job that can actually fix under-replication ... as long as you have one up-to-date replica. The use case is broker outages that are longer than mere re-scheduling, such as zone outage in a multi-zone cluster. --- maintenance/reassign-paritions-job.yml | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 maintenance/reassign-paritions-job.yml diff --git a/maintenance/reassign-paritions-job.yml b/maintenance/reassign-paritions-job.yml new file mode 100644 index 00000000..e9e184e7 --- /dev/null +++ b/maintenance/reassign-paritions-job.yml @@ -0,0 +1,51 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: reassign-partitions + namespace: kafka +spec: + template: + metadata: + name: reassign-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + env: + - name: ZOOKEEPER + value: zookeeper.kafka:2181 + # the following must be edited per job + - name: TOPICS + value: test-produce-consume,test-kafkacat + - name: BROKERS + value: 0,2 + command: + - /bin/bash + - -ce + - > + echo '{"topics":[' > /tmp/reassign-topics.json; + echo -n ' {"topic":"' >> /tmp/reassign-topics.json; + echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; + echo '"}' >> /tmp/reassign-topics.json; + echo ']}' >> /tmp/reassign-topics.json; + + echo "# reassign-topics.json"; + cat /tmp/reassign-topics.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --generate + --topics-to-move-json-file=/tmp/reassign-topics.json + --broker-list=$BROKERS > /tmp/generated.txt; + + tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --execute + --reassignment-json-file=/tmp/proposed-reassignment.json; + restartPolicy: Never + backoffLimit: 3 From e6a7aec8bd4db6cb75617e05dd451f77f9a0cd54 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 10 Nov 2017 09:50:44 +0100 Subject: [PATCH 06/12] The first two tasks under Partition Management in the Kafka book --- maintenance/README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 maintenance/README.md diff --git a/maintenance/README.md b/maintenance/README.md new file mode 100644 index 00000000..efe1d81f --- /dev/null +++ b/maintenance/README.md @@ -0,0 +1,22 @@ + +## Re-assign Leadership + +This is one of the cases where this repo begs to differ from traditional Kafka setups. +In Kubernetes the restart of a pod, and subsequent start on a different node, should be a non-event. + +> ”when a broker is stopped and restarted, it does not resume leadership of any partitions automatically” + +_-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ + +Create the `preferred-replica-election-job.yml` resource, after deleting any previous one. + +## Change a Partition's Replicas + +> ”From time to time, it may be necessary to change the replica assignments for a partition. Some examples of when this might be needed are: +> * If a topic’s partitions are not balanced across the cluster, causing uneven load on brokers +> * If a broker is taken offline and the partition is under-replicated +> * If a new broker is added and needs to receive a share of the cluster load” + +_-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ + +Use the `reassign-paritions-job.yml`, after editing `TOPICS` and `BROKERS`. From 470c1cecfc29f0e27f1c865341e77fed027b8bdb Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 14:11:38 +0100 Subject: [PATCH 07/12] Starts from the reassign-partitions job --- maintenance/increase-replication-factor.yml | 51 +++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 maintenance/increase-replication-factor.yml diff --git a/maintenance/increase-replication-factor.yml b/maintenance/increase-replication-factor.yml new file mode 100644 index 00000000..e9e184e7 --- /dev/null +++ b/maintenance/increase-replication-factor.yml @@ -0,0 +1,51 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: reassign-partitions + namespace: kafka +spec: + template: + metadata: + name: reassign-partitions + spec: + containers: + - name: kafka + image: solsson/kafka:1.0.0@sha256:17fdf1637426f45c93c65826670542e36b9f3394ede1cb61885c6a4befa8f72d + env: + - name: ZOOKEEPER + value: zookeeper.kafka:2181 + # the following must be edited per job + - name: TOPICS + value: test-produce-consume,test-kafkacat + - name: BROKERS + value: 0,2 + command: + - /bin/bash + - -ce + - > + echo '{"topics":[' > /tmp/reassign-topics.json; + echo -n ' {"topic":"' >> /tmp/reassign-topics.json; + echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; + echo '"}' >> /tmp/reassign-topics.json; + echo ']}' >> /tmp/reassign-topics.json; + + echo "# reassign-topics.json"; + cat /tmp/reassign-topics.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --generate + --topics-to-move-json-file=/tmp/reassign-topics.json + --broker-list=$BROKERS > /tmp/generated.txt; + + tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + + ./bin/kafka-reassign-partitions.sh + --zookeeper=$ZOOKEEPER + --execute + --reassignment-json-file=/tmp/proposed-reassignment.json; + restartPolicy: Never + backoffLimit: 3 From f0d06583d51b144e87f06eb425b763d1f202bc25 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 18:16:00 +0100 Subject: [PATCH 08/12] Adds the necessary json mod to require more replicas, which seems to be equivalent to setting replication.factor 3 at topic create. --- ...ml => replication-factor-increase-job.yml} | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) rename maintenance/{increase-replication-factor.yml => replication-factor-increase-job.yml} (67%) diff --git a/maintenance/increase-replication-factor.yml b/maintenance/replication-factor-increase-job.yml similarity index 67% rename from maintenance/increase-replication-factor.yml rename to maintenance/replication-factor-increase-job.yml index e9e184e7..19147d0d 100644 --- a/maintenance/increase-replication-factor.yml +++ b/maintenance/replication-factor-increase-job.yml @@ -1,12 +1,12 @@ apiVersion: batch/v1 kind: Job metadata: - name: reassign-partitions + name: replication-factor-increase namespace: kafka spec: template: metadata: - name: reassign-partitions + name: replication-factor-increase spec: containers: - name: kafka @@ -16,13 +16,18 @@ spec: value: zookeeper.kafka:2181 # the following must be edited per job - name: TOPICS - value: test-produce-consume,test-kafkacat + value: "" - name: BROKERS - value: 0,2 + value: 0,1,2 command: - /bin/bash - -ce - > + if [ -z "$TOPICS" ]; then + echo "Please set the TOPICS env (comma-separated) and re-create the job" + tail -f /dev/null + fi + echo '{"topics":[' > /tmp/reassign-topics.json; echo -n ' {"topic":"' >> /tmp/reassign-topics.json; echo -n $TOPICS | sed 's/,/"},\n {"topic":"/g' >> /tmp/reassign-topics.json; @@ -32,7 +37,7 @@ spec: echo "# reassign-topics.json"; cat /tmp/reassign-topics.json; - ./bin/kafka-reassign-partitions.sh + ./bin/kafka-reassign-partitions.sh --zookeeper=$ZOOKEEPER --generate --topics-to-move-json-file=/tmp/reassign-topics.json @@ -43,9 +48,16 @@ spec: echo "# proposed-reassignment.json"; cat /tmp/proposed-reassignment.json; + sed -i 's/"replicas":\[.\]/"replicas":[0,1,2]/g' /tmp/proposed-reassignment.json; + sed -i 's/,"log_dirs":\["any"\]//g' /tmp/proposed-reassignment.json; + echo "# proposed-reassignment.json"; + cat /tmp/proposed-reassignment.json; + ./bin/kafka-reassign-partitions.sh --zookeeper=$ZOOKEEPER --execute --reassignment-json-file=/tmp/proposed-reassignment.json; + + echo "# Reassignment exited. Upon success you may want to run preferred-replica-election." restartPolicy: Never backoffLimit: 3 From 68260c8c337672bb8bef2df69762527262e8d48f Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 18:16:54 +0100 Subject: [PATCH 09/12] Got weird overlapping output --- maintenance/replication-factor-increase-job.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/maintenance/replication-factor-increase-job.yml b/maintenance/replication-factor-increase-job.yml index 19147d0d..0574ae5b 100644 --- a/maintenance/replication-factor-increase-job.yml +++ b/maintenance/replication-factor-increase-job.yml @@ -45,6 +45,7 @@ spec: tail -n 1 /tmp/generated.txt > /tmp/proposed-reassignment.json; + sleep 1; echo "# proposed-reassignment.json"; cat /tmp/proposed-reassignment.json; From 31913591989c4f824307578964b815803e475493 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 18:23:25 +0100 Subject: [PATCH 10/12] Clarifies output a bit more --- maintenance/replication-factor-increase-job.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maintenance/replication-factor-increase-job.yml b/maintenance/replication-factor-increase-job.yml index 0574ae5b..de359870 100644 --- a/maintenance/replication-factor-increase-job.yml +++ b/maintenance/replication-factor-increase-job.yml @@ -51,9 +51,10 @@ spec: sed -i 's/"replicas":\[.\]/"replicas":[0,1,2]/g' /tmp/proposed-reassignment.json; sed -i 's/,"log_dirs":\["any"\]//g' /tmp/proposed-reassignment.json; - echo "# proposed-reassignment.json"; + echo "# proposed-reassignment.json modified to affect replication factor"; cat /tmp/proposed-reassignment.json; + echo "# Triggering kafka-reassign-partitions.sh" ./bin/kafka-reassign-partitions.sh --zookeeper=$ZOOKEEPER --execute From 4257128ef49dd19696944a6691ac52d4487bcf43 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 18:30:26 +0100 Subject: [PATCH 11/12] Explains why this operation is here --- maintenance/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/maintenance/README.md b/maintenance/README.md index efe1d81f..45684fd4 100644 --- a/maintenance/README.md +++ b/maintenance/README.md @@ -20,3 +20,13 @@ Create the `preferred-replica-election-job.yml` resource, after deleting any pre _-- Neha Narkhede, Gwen Shapira, and Todd Palino. ”Kafka: The Definitive Guide”_ Use the `reassign-paritions-job.yml`, after editing `TOPICS` and `BROKERS`. + +## Increase a topic's replication factor + +See https://github.com/Yolean/kubernetes-kafka/pull/140 + +Use the `replication-factor-increase-job.yml`, after editing `TOPICS` and `BROKERS`. + +The affected topics may end up without a preferred replica. See above to fix that, +or to affect only your selected topics use [Kafka Manager's](https://github.com/Yolean/kubernetes-kafka/pull/83) topic screen, +Generate Partition Assignments followed by Reassigned Partitions. From 43c896bc65225ebd1184967e82045b172dc35497 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Fri, 2 Feb 2018 18:33:13 +0100 Subject: [PATCH 12/12] Text fix --- maintenance/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maintenance/README.md b/maintenance/README.md index 45684fd4..5830d79a 100644 --- a/maintenance/README.md +++ b/maintenance/README.md @@ -28,5 +28,5 @@ See https://github.com/Yolean/kubernetes-kafka/pull/140 Use the `replication-factor-increase-job.yml`, after editing `TOPICS` and `BROKERS`. The affected topics may end up without a preferred replica. See above to fix that, -or to affect only your selected topics use [Kafka Manager's](https://github.com/Yolean/kubernetes-kafka/pull/83) topic screen, -Generate Partition Assignments followed by Reassigned Partitions. +or to affect only your selected topics use [Kafka Manager](https://github.com/Yolean/kubernetes-kafka/pull/83)'s topic screen, +Generate Partition Assignments followed by Reassign Partitions.