From ef45af2a799f45b477181af9ded77725535b60fe Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Thu, 23 Mar 2017 14:26:05 -0700 Subject: [PATCH 1/7] Add initial version of hdfs-k8s helm chart --- charts/hdfs-k8s/Chart.yaml | 3 + charts/hdfs-k8s/README.md | 64 +++++++++++++++++ .../templates/datanode-daemonset.yaml | 71 +++++++++++++++++++ .../hdfs-k8s/templates/namenode-petset.yaml | 55 ++++++++++++++ charts/hdfs-k8s/values.yaml | 19 +++++ 5 files changed, 212 insertions(+) create mode 100644 charts/hdfs-k8s/Chart.yaml create mode 100644 charts/hdfs-k8s/README.md create mode 100644 charts/hdfs-k8s/templates/datanode-daemonset.yaml create mode 100644 charts/hdfs-k8s/templates/namenode-petset.yaml create mode 100644 charts/hdfs-k8s/values.yaml diff --git a/charts/hdfs-k8s/Chart.yaml b/charts/hdfs-k8s/Chart.yaml new file mode 100644 index 0000000..4bdeae0 --- /dev/null +++ b/charts/hdfs-k8s/Chart.yaml @@ -0,0 +1,3 @@ +name: hdfs-k8s +version: 0.1 +description: Hadoop Distributed File System (HDFS) hosted by Kubernetes. diff --git a/charts/hdfs-k8s/README.md b/charts/hdfs-k8s/README.md new file mode 100644 index 0000000..5fe56f6 --- /dev/null +++ b/charts/hdfs-k8s/README.md @@ -0,0 +1,64 @@ +HDFS `namenode` and `datanodes` running inside a kubernetes cluster. + +### Prerequisite + + This currently works only for kubernetes version 1.4, because the namenode + is using the legacy `PetSet`. We plan to switch to `StatefulSet` soon so it + can run on version 1.5 and beyond. + +### Usage + + 1. Attach a label to one of your k8s cluster host that will run the `namenode` + daemon. + + ``` + $ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0 + ``` + + 2. Find the IP of your `kube-dns` name server that resolves pod and service + host names in your k8s cluster. Default is 10.96.0.10. It will be supplied + below as the `clusterDnsIP` parameter. Try this command and find the IP + value in the output: + + ``` + $ kubectl get services --all-namespaces | grep kube-dns + kube-system kube-dns 10.96.0.10 53/UDP,53/TCP 117d + ``` + + 3. Optionally, find the domain name of your k8s cluster that become part of + pod and service host names. Default is `cluster.local`. See `values.yaml` + for additional parameters to change. You can add them below in `--set`, + as comma-separated entries. + + 4. Launch this helm chart, `hdfs-k8s`, while specifying the kube-dns name + server IP and other parameters. (You can add multiple of them below in + --set as comma-separated entries) + + ``` + $ helm install -n my-hdfs --namespace kube-system --set clusterDnsIP=10.96.0.10 hdfs-k8s + ``` + + 5. Confirm the daemons are launched. + + ``` + $ kubectl get pods --all-namespaces | grep hdfs + kube-system hdfs-datanode-ajdcz 1/1 Running 0 7m + kube-system hdfs-datanode-f1w24 1/1 Running 0 7m + ... + kube-system hdfs-namenode-0 1/1 Running 0 7m + ``` + +There will be only one `namenode` instance. i.e. High Availability (HA) is not +supported at the moment. The `namenode` instance is supposed to be pinned to +a cluster host using a node label, as shown in the usage above. `Namenode` +mount a local disk directory using k8s `hostPath` volume. + +`Datanode` daemons run on every cluster node. They also mount k8s `hostPath` +local disk volumes. + +Note these daemons run under the `kube-system` namespace. + +###Credits + +This chart is using public Hadoop docker images hosted by + [uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-k8s/templates/datanode-daemonset.yaml new file mode 100644 index 0000000..3f2ccf1 --- /dev/null +++ b/charts/hdfs-k8s/templates/datanode-daemonset.yaml @@ -0,0 +1,71 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: resolv-conf-datanode + namespace: kube-system +data: + resolv.conf: | + search kube-system.svc.{{ .Values.clusterDomain }} svc.{{ .Values.clusterDomain }} {{ .Values.clusterDomain }} + nameserver {{ .Values.clusterDnsIP }} + options ndots:5 +--- +# Deleting a daemonset may need some trick. See +# https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: hdfs-datanode + namespace: kube-system +spec: + template: + metadata: + labels: + name: hdfs-datanode + annotations: + scheduler.alpha.kubernetes.io/tolerations: | + [ + { + "key": "dedicated", + "operator": "Equal", + "value": "master", + "effect": "NoSchedule" + } + ] + spec: + hostNetwork: true + hostPID: true + containers: + - name: datanode + image: uhopper/hadoop-datanode:2.7.2 + env: + # This works only with /etc/resolv.conf mounted from the config map. + # K8s version 1.6 will fix this, per https://github.com/kubernetes/kubernetes/pull/29378. + - name: CORE_CONF_fs_defaultFS + value: hdfs://hdfs-namenode-0.hdfs-namenode.kube-system.svc.{{ .Values.clusterDomain }}:8020 + livenessProbe: + initialDelaySeconds: 30 + httpGet: + host: 127.0.0.1 + path: / + port: 50075 + securityContext: + privileged: true + volumeMounts: + - name: hdfs-data + mountPath: /hadoop/dfs/data + # Use subPath below to mount only a single file. + # See https://github.com/dshulyak/kubernetes.github.io/commit/d58ba7b075bb4848349a2c920caaa08ff3773d70 + - name: resolv-conf-volume + mountPath: /etc/resolv.conf + subPath: resolv.conf + restartPolicy: Always + volumes: + - name: hdfs-data + hostPath: + path: {{ .Values.dataNodeHostPath }} + - configMap: + name: resolv-conf-datanode + items: + - key: resolv.conf + path: resolv.conf + name: resolv-conf-volume diff --git a/charts/hdfs-k8s/templates/namenode-petset.yaml b/charts/hdfs-k8s/templates/namenode-petset.yaml new file mode 100644 index 0000000..a8e547c --- /dev/null +++ b/charts/hdfs-k8s/templates/namenode-petset.yaml @@ -0,0 +1,55 @@ +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: hdfs-namenode + namespace: kube-system + labels: + app: hdfs-namenode +spec: + ports: + - port: 8020 + name: fs + clusterIP: None + selector: + app: hdfs-namenode +--- +apiVersion: apps/v1alpha1 +kind: PetSet +metadata: + name: hdfs-namenode + namespace: kube-system +spec: + serviceName: "hdfs-namenode" + # Create a size-1 petset. The namenode DNS name will be + # hdfs-namenode-0.hdfs-namenode.kube-system.svc.{{ .Values.clusterDomain }} + replicas: 1 + template: + metadata: + labels: + app: hdfs-namenode + annotations: + pod.alpha.kubernetes.io/initialized: "true" + spec: + terminationGracePeriodSeconds: 0 + containers: + - name: hdfs-namenode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: CLUSTER_NAME + value: hdfs-k8s + ports: + - containerPort: 8020 + name: fs + volumeMounts: + - name: hdfs-name + mountPath: /hadoop/dfs/name + # Pin the pod to a node. You can label your node like below: + # $ kubectl label nodes YOUR-NODE hdfs-namenode-selector=hdfs-namenode-0 + nodeSelector: + hdfs-namenode-selector: hdfs-namenode-0 + restartPolicy: Always + volumes: + - name: hdfs-name + hostPath: + path: {{ .Values.nameNodeHostPath }} diff --git a/charts/hdfs-k8s/values.yaml b/charts/hdfs-k8s/values.yaml new file mode 100644 index 0000000..307be1a --- /dev/null +++ b/charts/hdfs-k8s/values.yaml @@ -0,0 +1,19 @@ +# Default values for template variables. + +# Set this to the IP of your kube-dns name server that resolv POD host names. +# This is used by datanode daemons when they connect to namenode using +# the host name. +clusterDnsIP: 10.96.0.10 + +# Set this to the domain name of your cluster that become part of POD and service +# host names. +clusterDomain: cluster.local + +# Path of the local disk directory on a cluster node that will contain the namenode +# fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath +# volume. +nameNodeHostPath: /hdfs-name + +# Path of the local disk directory on cluster nodes that will contain the datanode +# blocks. This will be mounted to the namenode as a k8s HostPath volume. +dataNodeHostPath: /hdfs-data From e2160fb10ecb6610b586de636d05adb1bb28e600 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Thu, 23 Mar 2017 14:31:56 -0700 Subject: [PATCH 2/7] Add license --- charts/hdfs-k8s/Chart.yaml | 14 ++++++++++++++ charts/hdfs-k8s/templates/datanode-daemonset.yaml | 14 ++++++++++++++ charts/hdfs-k8s/templates/namenode-petset.yaml | 15 +++++++++++++++ charts/hdfs-k8s/values.yaml | 15 +++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/charts/hdfs-k8s/Chart.yaml b/charts/hdfs-k8s/Chart.yaml index 4bdeae0..847c0d1 100644 --- a/charts/hdfs-k8s/Chart.yaml +++ b/charts/hdfs-k8s/Chart.yaml @@ -1,3 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. name: hdfs-k8s version: 0.1 description: Hadoop Distributed File System (HDFS) hosted by Kubernetes. diff --git a/charts/hdfs-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-k8s/templates/datanode-daemonset.yaml index 3f2ccf1..34ef343 100644 --- a/charts/hdfs-k8s/templates/datanode-daemonset.yaml +++ b/charts/hdfs-k8s/templates/datanode-daemonset.yaml @@ -1,3 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. apiVersion: v1 kind: ConfigMap metadata: diff --git a/charts/hdfs-k8s/templates/namenode-petset.yaml b/charts/hdfs-k8s/templates/namenode-petset.yaml index a8e547c..3c2a7cd 100644 --- a/charts/hdfs-k8s/templates/namenode-petset.yaml +++ b/charts/hdfs-k8s/templates/namenode-petset.yaml @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # A headless service to create DNS records. apiVersion: v1 kind: Service diff --git a/charts/hdfs-k8s/values.yaml b/charts/hdfs-k8s/values.yaml index 307be1a..1438a72 100644 --- a/charts/hdfs-k8s/values.yaml +++ b/charts/hdfs-k8s/values.yaml @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Default values for template variables. # Set this to the IP of your kube-dns name server that resolv POD host names. From 3890bc8fa471cb6ea48fa641c0b175527d28f5f9 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Sat, 25 Mar 2017 13:21:20 -0700 Subject: [PATCH 3/7] Switch namenode to StatefulSet --- charts/hdfs-k8s/README.md | 5 ++--- .../{namenode-petset.yaml => namenode-statefulset.yaml} | 8 +++----- 2 files changed, 5 insertions(+), 8 deletions(-) rename charts/hdfs-k8s/templates/{namenode-petset.yaml => namenode-statefulset.yaml} (92%) diff --git a/charts/hdfs-k8s/README.md b/charts/hdfs-k8s/README.md index 5fe56f6..c63fab3 100644 --- a/charts/hdfs-k8s/README.md +++ b/charts/hdfs-k8s/README.md @@ -2,9 +2,8 @@ HDFS `namenode` and `datanodes` running inside a kubernetes cluster. ### Prerequisite - This currently works only for kubernetes version 1.4, because the namenode - is using the legacy `PetSet`. We plan to switch to `StatefulSet` soon so it - can run on version 1.5 and beyond. + Requires Kubernetes version 1.5 and beyond, because `namenode` is using + `StatefulSet`, which is available only in version 1.5 and later. ### Usage diff --git a/charts/hdfs-k8s/templates/namenode-petset.yaml b/charts/hdfs-k8s/templates/namenode-statefulset.yaml similarity index 92% rename from charts/hdfs-k8s/templates/namenode-petset.yaml rename to charts/hdfs-k8s/templates/namenode-statefulset.yaml index 3c2a7cd..24780cf 100644 --- a/charts/hdfs-k8s/templates/namenode-petset.yaml +++ b/charts/hdfs-k8s/templates/namenode-statefulset.yaml @@ -29,22 +29,20 @@ spec: selector: app: hdfs-namenode --- -apiVersion: apps/v1alpha1 -kind: PetSet +apiVersion: apps/v1beta1 +kind: StatefulSet metadata: name: hdfs-namenode namespace: kube-system spec: serviceName: "hdfs-namenode" - # Create a size-1 petset. The namenode DNS name will be + # Create a size-1 set. The namenode DNS name will be # hdfs-namenode-0.hdfs-namenode.kube-system.svc.{{ .Values.clusterDomain }} replicas: 1 template: metadata: labels: app: hdfs-namenode - annotations: - pod.alpha.kubernetes.io/initialized: "true" spec: terminationGracePeriodSeconds: 0 containers: From 6a7374927b153b9eab61ce8f0c54dfc857395376 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Thu, 30 Mar 2017 14:45:45 -0700 Subject: [PATCH 4/7] Use two charts for sequencing. hostNetwork in namenode --- .../Chart.yaml | 2 +- charts/hdfs-datanode-k8s/README.md | 57 +++++++++++++++++ .../templates/datanode-daemonset.yaml | 0 .../values.yaml | 5 -- charts/hdfs-k8s/README.md | 63 ------------------- charts/hdfs-namenode-k8s/Chart.yaml | 17 +++++ charts/hdfs-namenode-k8s/README.md | 44 +++++++++++++ .../templates/namenode-statefulset.yaml | 6 +- charts/hdfs-namenode-k8s/values.yaml | 21 +++++++ 9 files changed, 145 insertions(+), 70 deletions(-) rename charts/{hdfs-k8s => hdfs-datanode-k8s}/Chart.yaml (97%) create mode 100644 charts/hdfs-datanode-k8s/README.md rename charts/{hdfs-k8s => hdfs-datanode-k8s}/templates/datanode-daemonset.yaml (100%) rename charts/{hdfs-k8s => hdfs-datanode-k8s}/values.yaml (86%) delete mode 100644 charts/hdfs-k8s/README.md create mode 100644 charts/hdfs-namenode-k8s/Chart.yaml create mode 100644 charts/hdfs-namenode-k8s/README.md rename charts/{hdfs-k8s => hdfs-namenode-k8s}/templates/namenode-statefulset.yaml (87%) create mode 100644 charts/hdfs-namenode-k8s/values.yaml diff --git a/charts/hdfs-k8s/Chart.yaml b/charts/hdfs-datanode-k8s/Chart.yaml similarity index 97% rename from charts/hdfs-k8s/Chart.yaml rename to charts/hdfs-datanode-k8s/Chart.yaml index 847c0d1..187f75c 100644 --- a/charts/hdfs-k8s/Chart.yaml +++ b/charts/hdfs-datanode-k8s/Chart.yaml @@ -12,6 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -name: hdfs-k8s +name: hdfs-datanode-k8s version: 0.1 description: Hadoop Distributed File System (HDFS) hosted by Kubernetes. diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md new file mode 100644 index 0000000..8c18920 --- /dev/null +++ b/charts/hdfs-datanode-k8s/README.md @@ -0,0 +1,57 @@ +HDFS `datanodes` running inside a kubernetes cluster. See the other chart for +`namenode`. + +### Prerequisite + + Requires Kubernetes version 1.5 and beyond, because `namenode` is using + `StatefulSet`, which is available only in version 1.5 and later. + + Make sure `namenode` is fully launched using the other chart. `Datanodes` rely + on DNS to resolve the hostname of the namenode when they start up. + +### Usage + + 1. Find the IP of your `kube-dns` name server that resolves pod and service + host names in your k8s cluster. Default is 10.96.0.10. It will be supplied + below as the `clusterDnsIP` parameter. Try this command and find the IP + value in the output: + + ``` + $ kubectl get services --all-namespaces | grep kube-dns + kube-system kube-dns 10.96.0.10 53/UDP,53/TCP 117d + ``` + + 2. Optionally, find the domain name of your k8s cluster that become part of + pod and service host names. Default is `cluster.local`. See `values.yaml` + for additional parameters to change. You can add them below in `--set`, + as comma-separated entries. + + 3. Launch this helm chart, `hdfs-datanode-k8s`, while specifying + the kube-dns name server IP and other parameters. (You can add multiple + of them below in --set as comma-separated entries) + + ``` + $ helm install -n my-hdfs-datanode --namespace kube-system \ + --set clusterDnsIP=10.96.0.10 hdfs-datanode-k8s + ``` + + 5. Confirm the daemons are launched. + + ``` + $ kubectl get pods --all-namespaces | grep hdfs-datanode- + kube-system hdfs-datanode-ajdcz 1/1 Running 0 7m + kube-system hdfs-datanode-f1w24 1/1 Running 0 7m + ``` + +`Datanode` daemons run on every cluster node. They also mount k8s `hostPath` +local disk volumes. + +`Datanodes` are using `hostNetwork` to register to `namenode` using +physical IPs. + +Note they run under the `kube-system` namespace. + +###Credits + +This chart is using public Hadoop docker images hosted by + [uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml similarity index 100% rename from charts/hdfs-k8s/templates/datanode-daemonset.yaml rename to charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml diff --git a/charts/hdfs-k8s/values.yaml b/charts/hdfs-datanode-k8s/values.yaml similarity index 86% rename from charts/hdfs-k8s/values.yaml rename to charts/hdfs-datanode-k8s/values.yaml index 1438a72..35e3f4c 100644 --- a/charts/hdfs-k8s/values.yaml +++ b/charts/hdfs-datanode-k8s/values.yaml @@ -24,11 +24,6 @@ clusterDnsIP: 10.96.0.10 # host names. clusterDomain: cluster.local -# Path of the local disk directory on a cluster node that will contain the namenode -# fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath -# volume. -nameNodeHostPath: /hdfs-name - # Path of the local disk directory on cluster nodes that will contain the datanode # blocks. This will be mounted to the namenode as a k8s HostPath volume. dataNodeHostPath: /hdfs-data diff --git a/charts/hdfs-k8s/README.md b/charts/hdfs-k8s/README.md deleted file mode 100644 index c63fab3..0000000 --- a/charts/hdfs-k8s/README.md +++ /dev/null @@ -1,63 +0,0 @@ -HDFS `namenode` and `datanodes` running inside a kubernetes cluster. - -### Prerequisite - - Requires Kubernetes version 1.5 and beyond, because `namenode` is using - `StatefulSet`, which is available only in version 1.5 and later. - -### Usage - - 1. Attach a label to one of your k8s cluster host that will run the `namenode` - daemon. - - ``` - $ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0 - ``` - - 2. Find the IP of your `kube-dns` name server that resolves pod and service - host names in your k8s cluster. Default is 10.96.0.10. It will be supplied - below as the `clusterDnsIP` parameter. Try this command and find the IP - value in the output: - - ``` - $ kubectl get services --all-namespaces | grep kube-dns - kube-system kube-dns 10.96.0.10 53/UDP,53/TCP 117d - ``` - - 3. Optionally, find the domain name of your k8s cluster that become part of - pod and service host names. Default is `cluster.local`. See `values.yaml` - for additional parameters to change. You can add them below in `--set`, - as comma-separated entries. - - 4. Launch this helm chart, `hdfs-k8s`, while specifying the kube-dns name - server IP and other parameters. (You can add multiple of them below in - --set as comma-separated entries) - - ``` - $ helm install -n my-hdfs --namespace kube-system --set clusterDnsIP=10.96.0.10 hdfs-k8s - ``` - - 5. Confirm the daemons are launched. - - ``` - $ kubectl get pods --all-namespaces | grep hdfs - kube-system hdfs-datanode-ajdcz 1/1 Running 0 7m - kube-system hdfs-datanode-f1w24 1/1 Running 0 7m - ... - kube-system hdfs-namenode-0 1/1 Running 0 7m - ``` - -There will be only one `namenode` instance. i.e. High Availability (HA) is not -supported at the moment. The `namenode` instance is supposed to be pinned to -a cluster host using a node label, as shown in the usage above. `Namenode` -mount a local disk directory using k8s `hostPath` volume. - -`Datanode` daemons run on every cluster node. They also mount k8s `hostPath` -local disk volumes. - -Note these daemons run under the `kube-system` namespace. - -###Credits - -This chart is using public Hadoop docker images hosted by - [uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-namenode-k8s/Chart.yaml b/charts/hdfs-namenode-k8s/Chart.yaml new file mode 100644 index 0000000..5935613 --- /dev/null +++ b/charts/hdfs-namenode-k8s/Chart.yaml @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: hdfs-namenode-k8s +version: 0.1 +description: Hadoop Distributed File System (HDFS) hosted by Kubernetes. diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md new file mode 100644 index 0000000..b7087e5 --- /dev/null +++ b/charts/hdfs-namenode-k8s/README.md @@ -0,0 +1,44 @@ +HDFS `namenode` running inside a kubernetes cluster. See the other chart for +`datanodes`. + +### Prerequisite + + Requires Kubernetes version 1.5 and beyond, because `namenode` is using + `StatefulSet`, which is available only in version 1.5 and later. + +### Usage + + 1. Attach a label to one of your k8s cluster host that will run the `namenode` + daemon. + + ``` + $ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0 + ``` + + 2. Launch this helm chart, `hdfs-namenode-k8s`. + + ``` + $ helm install -n my-hdfs-namenode --namespace kube-system hdfs-k8s + ``` + + 3. Confirm the daemon is launched. + + ``` + $ kubectl get pods --all-namespaces | grep hdfs-namenode + kube-system hdfs-namenode-0 1/1 Running 0 7m + ``` + +There will be only one `namenode` instance. i.e. High Availability (HA) is not +supported at the moment. The `namenode` instance is supposed to be pinned to +a cluster host using a node label, as shown in the usage above. `Namenode` +mount a local disk directory using k8s `hostPath` volume. + +`namenode` is using `hostNetwork` so it can see physical IPs of datanodes +without an overlay network such as weave-net mask them. + +Note it runs under the `kube-system` namespace. + +###Credits + +This chart is using public Hadoop docker images hosted by + [uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-k8s/templates/namenode-statefulset.yaml b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml similarity index 87% rename from charts/hdfs-k8s/templates/namenode-statefulset.yaml rename to charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml index 24780cf..5f020e9 100644 --- a/charts/hdfs-k8s/templates/namenode-statefulset.yaml +++ b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -37,13 +37,17 @@ metadata: spec: serviceName: "hdfs-namenode" # Create a size-1 set. The namenode DNS name will be - # hdfs-namenode-0.hdfs-namenode.kube-system.svc.{{ .Values.clusterDomain }} + # hdfs-namenode-0.hdfs-namenode.kube-system.svc.YOUR-CLUSTER-DOMAIN replicas: 1 template: metadata: labels: app: hdfs-namenode spec: + # Use hostNetwork so datanodes connect to namenode without going through an overlay network + # like weave. Otherwise, namenode fails to see physical IP address of datanodes. + hostNetwork: true + hostPID: true terminationGracePeriodSeconds: 0 containers: - name: hdfs-namenode diff --git a/charts/hdfs-namenode-k8s/values.yaml b/charts/hdfs-namenode-k8s/values.yaml new file mode 100644 index 0000000..c4634ce --- /dev/null +++ b/charts/hdfs-namenode-k8s/values.yaml @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for template variables. + +# Path of the local disk directory on a cluster node that will contain the namenode +# fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath +# volume. +nameNodeHostPath: /hdfs-name From ba95027f6086d006f247ef61d939fc04a82987fa Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Thu, 30 Mar 2017 15:45:38 -0700 Subject: [PATCH 5/7] Add clarifying comments --- charts/hdfs-datanode-k8s/README.md | 10 ++++------ charts/hdfs-namenode-k8s/README.md | 4 +++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md index 8c18920..a4347d1 100644 --- a/charts/hdfs-datanode-k8s/README.md +++ b/charts/hdfs-datanode-k8s/README.md @@ -11,14 +11,12 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for ### Usage - 1. Find the IP of your `kube-dns` name server that resolves pod and service - host names in your k8s cluster. Default is 10.96.0.10. It will be supplied - below as the `clusterDnsIP` parameter. Try this command and find the IP - value in the output: + 1. Find the service IP of your `kube-dns` of your k8s cluster. + Default is 10.96.0.10. It will be supplied below as the `clusterDnsIP` + parameter. Try this command and find the IP value in the output: ``` - $ kubectl get services --all-namespaces | grep kube-dns - kube-system kube-dns 10.96.0.10 53/UDP,53/TCP 117d + $ kubectl get svc --all-namespaces | grep dns ``` 2. Optionally, find the domain name of your k8s cluster that become part of diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md index b7087e5..e6997dc 100644 --- a/charts/hdfs-namenode-k8s/README.md +++ b/charts/hdfs-namenode-k8s/README.md @@ -9,7 +9,9 @@ HDFS `namenode` running inside a kubernetes cluster. See the other chart for ### Usage 1. Attach a label to one of your k8s cluster host that will run the `namenode` - daemon. + daemon. (This is required as `namenode` currently mounts a local disk + `hostPath` volume. We will switch to persistent volume in the future, so + we can skip this step.) ``` $ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0 From 0ccf76d60ae9e0be9f5d5b0ff5b332533514d959 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Mon, 3 Apr 2017 09:32:11 -0700 Subject: [PATCH 6/7] Drop misleading master schedule annotation --- .../templates/datanode-daemonset.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml index 34ef343..2375129 100644 --- a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml +++ b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -35,16 +35,6 @@ spec: metadata: labels: name: hdfs-datanode - annotations: - scheduler.alpha.kubernetes.io/tolerations: | - [ - { - "key": "dedicated", - "operator": "Equal", - "value": "master", - "effect": "NoSchedule" - } - ] spec: hostNetwork: true hostPID: true From 4de020b30038d9254c7041807c97dee1161f4dd0 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Mon, 3 Apr 2017 11:57:25 -0700 Subject: [PATCH 7/7] Fix README.md to drop misleading kube-dns default IP value --- charts/hdfs-datanode-k8s/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md index a4347d1..2e6704d 100644 --- a/charts/hdfs-datanode-k8s/README.md +++ b/charts/hdfs-datanode-k8s/README.md @@ -12,11 +12,11 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for ### Usage 1. Find the service IP of your `kube-dns` of your k8s cluster. - Default is 10.96.0.10. It will be supplied below as the `clusterDnsIP` - parameter. Try this command and find the IP value in the output: + Try the following command and find the IP value in the output. + It will be supplied below as the `clusterDnsIP` parameter. ``` - $ kubectl get svc --all-namespaces | grep dns + $ kubectl get svc --all-namespaces | grep kube-dns ``` 2. Optionally, find the domain name of your k8s cluster that become part of @@ -30,7 +30,7 @@ HDFS `datanodes` running inside a kubernetes cluster. See the other chart for ``` $ helm install -n my-hdfs-datanode --namespace kube-system \ - --set clusterDnsIP=10.96.0.10 hdfs-datanode-k8s + --set clusterDnsIP=YOUR-KUBE-DNS-IP hdfs-datanode-k8s ``` 5. Confirm the daemons are launched.