From 4351e7c1779190cc264a49947ebc346d6ce3729c Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sun, 25 Jun 2017 21:16:51 +0200 Subject: [PATCH 1/8] Uses a named storage class so you can select volume type specifically for zoo --- bootstrap/storageclass-kafka-zookeeper-gke.yml | 7 +++++++ zookeeper/50zoo.yml | 14 +++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 bootstrap/storageclass-kafka-zookeeper-gke.yml diff --git a/bootstrap/storageclass-kafka-zookeeper-gke.yml b/bootstrap/storageclass-kafka-zookeeper-gke.yml new file mode 100644 index 00000000..44891bac --- /dev/null +++ b/bootstrap/storageclass-kafka-zookeeper-gke.yml @@ -0,0 +1,7 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: kafka-zookeeper +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-ssd diff --git a/zookeeper/50zoo.yml b/zookeeper/50zoo.yml index 27e34388..9251ce10 100644 --- a/zookeeper/50zoo.yml +++ b/zookeeper/50zoo.yml @@ -35,11 +35,19 @@ spec: volumeMounts: - name: config mountPath: /usr/local/kafka/config - - name: datadir + - name: data mountPath: /var/lib/zookeeper/data volumes: - name: config configMap: name: zookeeper-config - - name: datadir - emptyDir: {} + volumeClaimTemplates: + - metadata: + name: data + annotations: + volume.beta.kubernetes.io/storage-class: kafka-zookeeper + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi From 9479e819475f128e7fb1d82c5df0abae6c9cfa76 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sun, 25 Jun 2017 21:31:24 +0200 Subject: [PATCH 2/8] Verified the volume setup with Minikube --- bootstrap/storageclass-kafka-zookeeper-minikube.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 bootstrap/storageclass-kafka-zookeeper-minikube.yml diff --git a/bootstrap/storageclass-kafka-zookeeper-minikube.yml b/bootstrap/storageclass-kafka-zookeeper-minikube.yml new file mode 100644 index 00000000..ba89eb46 --- /dev/null +++ b/bootstrap/storageclass-kafka-zookeeper-minikube.yml @@ -0,0 +1,5 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: kafka-zookeeper +provisioner: k8s.io/minikube-hostpath From a8c8a39713cbe5ae6199f0733454ae05cfd4eb20 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sun, 25 Jun 2017 21:40:12 +0200 Subject: [PATCH 3/8] Updates the readme --- README.md | 15 ++++++++------- .../storageclass-zookeeper-gke.yml | 0 .../storageclass-zookeeper-minikube.yml | 0 3 files changed, 8 insertions(+), 7 deletions(-) rename bootstrap/storageclass-kafka-zookeeper-gke.yml => configure-gke/storageclass-zookeeper-gke.yml (100%) rename bootstrap/storageclass-kafka-zookeeper-minikube.yml => configure-minikube/storageclass-zookeeper-minikube.yml (100%) diff --git a/README.md b/README.md index 5b9dbad2..cb401d10 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,12 @@ To get consistent service DNS names `kafka-N.broker.kafka`(`.svc.cluster.local`) kubectl create -f 00namespace.yml ``` +## Prepare your cluster + +For Minikube run `kubectl create -f configure-minikube/`. + +There's a similar setup for gke, in `configure-gke` of course. You might want to tweak it before creating. + ## Set up volume claims You may add [storage class](http://kubernetes.io/docs/user-guide/persistent-volumes/#storageclasses) @@ -24,18 +30,13 @@ kubectl -n kafka get pvc ## Set up Zookeeper -The Kafka book (Definitive Guide, O'Reilly 2016) recommends that Kafka has its own Zookeeper cluster with at least 5 instances, -so we use the [official docker image](https://hub.docker.com/_/zookeeper/) -but with a [startup script change to guess node id from hostname](https://github.com/solsson/zookeeper-docker/commit/df9474f858ad548be8a365cb000a4dd2d2e3a217). +The Kafka book (Definitive Guide, O'Reilly 2016) recommends that Kafka has its own Zookeeper cluster with at least 5 instances. +We use the zookeeper build that comes with the Kafka distribution, and tweak the startup command to support StatefulSet. ``` kubectl create -f ./zookeeper/ ``` -Despite being a StatefulSet, there is no persistent volume by default. -If you lose your zookeeper cluster, kafka will be unaware that persisted topics exist. -The data is still there, but you need to re-create topics. - ## Start Kafka Assuming you have your PVCs `Bound`, or enabled automatic provisioning (see above), go ahead and: diff --git a/bootstrap/storageclass-kafka-zookeeper-gke.yml b/configure-gke/storageclass-zookeeper-gke.yml similarity index 100% rename from bootstrap/storageclass-kafka-zookeeper-gke.yml rename to configure-gke/storageclass-zookeeper-gke.yml diff --git a/bootstrap/storageclass-kafka-zookeeper-minikube.yml b/configure-minikube/storageclass-zookeeper-minikube.yml similarity index 100% rename from bootstrap/storageclass-kafka-zookeeper-minikube.yml rename to configure-minikube/storageclass-zookeeper-minikube.yml From 26173af8577d3c11904196a04ca0b158f339bd64 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Sun, 25 Jun 2017 21:47:43 +0200 Subject: [PATCH 4/8] Enables metrics export to Prometheus, but they look very uninteresting. The selected config is from the jmx_exporter examples. --- README.md | 2 ++ zookeeper/50zoo.yml | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/README.md b/README.md index cb401d10..8b469cd5 100644 --- a/README.md +++ b/README.md @@ -86,4 +86,6 @@ Is the metrics system up and running? ``` kubectl logs -c metrics kafka-0 kubectl exec -c broker kafka-0 -- /bin/sh -c 'apk add --no-cache curl && curl http://localhost:5556/metrics' +kubectl logs -c metrics zoo-0 +kubectl exec -c zookeeper zoo-0 -- /bin/sh -c 'apk add --no-cache curl && curl http://localhost:5556/metrics' ``` diff --git a/zookeeper/50zoo.yml b/zookeeper/50zoo.yml index 9251ce10..12d9c441 100644 --- a/zookeeper/50zoo.yml +++ b/zookeeper/50zoo.yml @@ -10,11 +10,27 @@ spec: metadata: labels: app: zookeeper + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" spec: terminationGracePeriodSeconds: 10 containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + command: + - "java" + - "-jar" + - "jmx_prometheus_httpserver.jar" + - "5556" + - example_configs/zookeeper.yaml + ports: + - containerPort: 5556 - name: zookeeper image: solsson/kafka:0.11.0.0-rc2@sha256:c1316e0131f4ec83bc645ca2141e4fda94e0d28f4fb5f836e15e37a5e054bdf1 + env: + - name: JMX_PORT + value: "5555" command: - sh - -c From 4fd1e5ebf4196ac27d6d49d2c1a6b7b57eaab8e3 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 26 Jun 2017 13:00:12 +0200 Subject: [PATCH 5/8] Makes persistence a fundamental attribute of the statefulset --- zookeeper/10zookeeper-config.yml | 6 +++--- zookeeper/{20zoo-service.yml => 20pzoo-service.yml} | 3 ++- zookeeper/{50zoo.yml => 50pzoo.yml} | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) rename zookeeper/{20zoo-service.yml => 20pzoo-service.yml} (83%) rename zookeeper/{50zoo.yml => 50pzoo.yml} (96%) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index e9402cbd..b718ce1d 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -11,9 +11,9 @@ data: clientPort=2181 initLimit=5 syncLimit=2 - server.1=zoo-0.zoo:2888:3888:participant - server.2=zoo-1.zoo:2888:3888:participant - server.3=zoo-2.zoo:2888:3888:participant + server.1=pzoo-0.zoo:2888:3888:participant + server.2=pzoo-1.zoo:2888:3888:participant + server.3=pzoo-2.zoo:2888:3888:participant server.4=zoo-3.zoo:2888:3888:participant server.5=zoo-4.zoo:2888:3888:participant diff --git a/zookeeper/20zoo-service.yml b/zookeeper/20pzoo-service.yml similarity index 83% rename from zookeeper/20zoo-service.yml rename to zookeeper/20pzoo-service.yml index d15dcc69..00c33e1c 100644 --- a/zookeeper/20zoo-service.yml +++ b/zookeeper/20pzoo-service.yml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Service metadata: - name: zoo + name: pzoo namespace: kafka spec: ports: @@ -12,3 +12,4 @@ spec: clusterIP: None selector: app: zookeeper + storage: persistent diff --git a/zookeeper/50zoo.yml b/zookeeper/50pzoo.yml similarity index 96% rename from zookeeper/50zoo.yml rename to zookeeper/50pzoo.yml index 12d9c441..925f4c50 100644 --- a/zookeeper/50zoo.yml +++ b/zookeeper/50pzoo.yml @@ -1,15 +1,16 @@ apiVersion: apps/v1beta1 kind: StatefulSet metadata: - name: zoo + name: pzoo namespace: kafka spec: - serviceName: "zoo" + serviceName: "pzoo" replicas: 5 template: metadata: labels: app: zookeeper + storage: persistent annotations: prometheus.io/scrape: "true" prometheus.io/port: "5556" From 225569f30ba7644816a3cb2c1a83b731cc3c0276 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 26 Jun 2017 13:03:03 +0200 Subject: [PATCH 6/8] Creates identical definitions for a non-persistent zoo statefulset --- zookeeper/21zoo-service.yml | 15 ++++++++ zookeeper/51zoo.yml | 70 +++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 zookeeper/21zoo-service.yml create mode 100644 zookeeper/51zoo.yml diff --git a/zookeeper/21zoo-service.yml b/zookeeper/21zoo-service.yml new file mode 100644 index 00000000..00c33e1c --- /dev/null +++ b/zookeeper/21zoo-service.yml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: pzoo + namespace: kafka +spec: + ports: + - port: 2888 + name: peer + - port: 3888 + name: leader-election + clusterIP: None + selector: + app: zookeeper + storage: persistent diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml new file mode 100644 index 00000000..925f4c50 --- /dev/null +++ b/zookeeper/51zoo.yml @@ -0,0 +1,70 @@ +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: pzoo + namespace: kafka +spec: + serviceName: "pzoo" + replicas: 5 + template: + metadata: + labels: + app: zookeeper + storage: persistent + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "5556" + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: metrics + image: solsson/kafka-prometheus-jmx-exporter@sha256:1f7c96c287a2dbec1d909cd8f96c0656310239b55a9a90d7fd12c81f384f1f7d + command: + - "java" + - "-jar" + - "jmx_prometheus_httpserver.jar" + - "5556" + - example_configs/zookeeper.yaml + ports: + - containerPort: 5556 + - name: zookeeper + image: solsson/kafka:0.11.0.0-rc2@sha256:c1316e0131f4ec83bc645ca2141e4fda94e0d28f4fb5f836e15e37a5e054bdf1 + env: + - name: JMX_PORT + value: "5555" + command: + - sh + - -c + - > + set -e; + export ZOOKEEPER_SERVER_ID=$((${HOSTNAME##*-} + 1)); + echo "${ZOOKEEPER_SERVER_ID:-1}" | tee /var/lib/zookeeper/data/myid; + sed -i "s/server\.$ZOOKEEPER_SERVER_ID\=[a-z0-9.-]*/server.$ZOOKEEPER_SERVER_ID=0.0.0.0/" config/zookeeper.properties; + cat config/zookeeper.properties; + ./bin/zookeeper-server-start.sh config/zookeeper.properties + ports: + - containerPort: 2181 + name: client + - containerPort: 2888 + name: peer + - containerPort: 3888 + name: leader-election + volumeMounts: + - name: config + mountPath: /usr/local/kafka/config + - name: data + mountPath: /var/lib/zookeeper/data + volumes: + - name: config + configMap: + name: zookeeper-config + volumeClaimTemplates: + - metadata: + name: data + annotations: + volume.beta.kubernetes.io/storage-class: kafka-zookeeper + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi From cb83353833645e543153cf6f9756ef3858f80442 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 26 Jun 2017 13:14:54 +0200 Subject: [PATCH 7/8] A cluster in three availability zones now get one persistent zk each, and two that can move automatically at node failures --- zookeeper/10zookeeper-config.yml | 10 +++++----- zookeeper/21zoo-service.yml | 4 ++-- zookeeper/50pzoo.yml | 2 +- zookeeper/51zoo.yml | 22 +++++++--------------- 4 files changed, 15 insertions(+), 23 deletions(-) diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/10zookeeper-config.yml index b718ce1d..58d8b6aa 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/10zookeeper-config.yml @@ -11,11 +11,11 @@ data: clientPort=2181 initLimit=5 syncLimit=2 - server.1=pzoo-0.zoo:2888:3888:participant - server.2=pzoo-1.zoo:2888:3888:participant - server.3=pzoo-2.zoo:2888:3888:participant - server.4=zoo-3.zoo:2888:3888:participant - server.5=zoo-4.zoo:2888:3888:participant + server.1=pzoo-0.pzoo:2888:3888:participant + server.2=pzoo-1.pzoo:2888:3888:participant + server.3=pzoo-2.pzoo:2888:3888:participant + server.4=zoo-0.zoo:2888:3888:participant + server.5=zoo-1.zoo:2888:3888:participant log4j.properties: |- log4j.rootLogger=INFO, stdout diff --git a/zookeeper/21zoo-service.yml b/zookeeper/21zoo-service.yml index 00c33e1c..93fb3219 100644 --- a/zookeeper/21zoo-service.yml +++ b/zookeeper/21zoo-service.yml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Service metadata: - name: pzoo + name: zoo namespace: kafka spec: ports: @@ -12,4 +12,4 @@ spec: clusterIP: None selector: app: zookeeper - storage: persistent + storage: ephemeral diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 925f4c50..993fd55d 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -5,7 +5,7 @@ metadata: namespace: kafka spec: serviceName: "pzoo" - replicas: 5 + replicas: 3 template: metadata: labels: diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml index 925f4c50..f041a9a9 100644 --- a/zookeeper/51zoo.yml +++ b/zookeeper/51zoo.yml @@ -1,16 +1,16 @@ apiVersion: apps/v1beta1 kind: StatefulSet metadata: - name: pzoo + name: zoo namespace: kafka spec: - serviceName: "pzoo" - replicas: 5 + serviceName: "zoo" + replicas: 2 template: metadata: labels: app: zookeeper - storage: persistent + storage: ephemeral annotations: prometheus.io/scrape: "true" prometheus.io/port: "5556" @@ -37,7 +37,7 @@ spec: - -c - > set -e; - export ZOOKEEPER_SERVER_ID=$((${HOSTNAME##*-} + 1)); + export ZOOKEEPER_SERVER_ID=$((${HOSTNAME##*-} + 4)); echo "${ZOOKEEPER_SERVER_ID:-1}" | tee /var/lib/zookeeper/data/myid; sed -i "s/server\.$ZOOKEEPER_SERVER_ID\=[a-z0-9.-]*/server.$ZOOKEEPER_SERVER_ID=0.0.0.0/" config/zookeeper.properties; cat config/zookeeper.properties; @@ -58,13 +58,5 @@ spec: - name: config configMap: name: zookeeper-config - volumeClaimTemplates: - - metadata: - name: data - annotations: - volume.beta.kubernetes.io/storage-class: kafka-zookeeper - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 1Gi + - name: data + emptyDir: {} From efb1019fd9881f316e0ee355cbafc26e3d146be2 Mon Sep 17 00:00:00 2001 From: Staffan Olsson Date: Mon, 26 Jun 2017 13:25:37 +0200 Subject: [PATCH 8/8] Forks can tweak storage classes, but here we want setup to be simple... and with the mix of PV and emptyDir there's no reason to make PVs faster than host disks. Use 10GB as it is the minimum for standard disks on GKE. --- README.md | 6 ------ configure-gke/storageclass-zookeeper-gke.yml | 7 ------- configure-minikube/storageclass-zookeeper-minikube.yml | 5 ----- zookeeper/50pzoo.yml | 4 +--- 4 files changed, 1 insertion(+), 21 deletions(-) delete mode 100644 configure-gke/storageclass-zookeeper-gke.yml delete mode 100644 configure-minikube/storageclass-zookeeper-minikube.yml diff --git a/README.md b/README.md index 8b469cd5..ef5db0c8 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,6 @@ To get consistent service DNS names `kafka-N.broker.kafka`(`.svc.cluster.local`) kubectl create -f 00namespace.yml ``` -## Prepare your cluster - -For Minikube run `kubectl create -f configure-minikube/`. - -There's a similar setup for gke, in `configure-gke` of course. You might want to tweak it before creating. - ## Set up volume claims You may add [storage class](http://kubernetes.io/docs/user-guide/persistent-volumes/#storageclasses) diff --git a/configure-gke/storageclass-zookeeper-gke.yml b/configure-gke/storageclass-zookeeper-gke.yml deleted file mode 100644 index 44891bac..00000000 --- a/configure-gke/storageclass-zookeeper-gke.yml +++ /dev/null @@ -1,7 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-zookeeper -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-ssd diff --git a/configure-minikube/storageclass-zookeeper-minikube.yml b/configure-minikube/storageclass-zookeeper-minikube.yml deleted file mode 100644 index ba89eb46..00000000 --- a/configure-minikube/storageclass-zookeeper-minikube.yml +++ /dev/null @@ -1,5 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-zookeeper -provisioner: k8s.io/minikube-hostpath diff --git a/zookeeper/50pzoo.yml b/zookeeper/50pzoo.yml index 993fd55d..25e2ceb4 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/50pzoo.yml @@ -61,10 +61,8 @@ spec: volumeClaimTemplates: - metadata: name: data - annotations: - volume.beta.kubernetes.io/storage-class: kafka-zookeeper spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 1Gi + storage: 10Gi