diff --git a/charts/hdfs-datanode-k8s/Chart.yaml b/charts/hdfs-datanode-k8s/Chart.yaml new file mode 100644 index 0000000..187f75c --- /dev/null +++ b/charts/hdfs-datanode-k8s/Chart.yaml @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: hdfs-datanode-k8s +version: 0.1 +description: Hadoop Distributed File System (HDFS) hosted by Kubernetes. diff --git a/charts/hdfs-datanode-k8s/README.md b/charts/hdfs-datanode-k8s/README.md new file mode 100644 index 0000000..2e6704d --- /dev/null +++ b/charts/hdfs-datanode-k8s/README.md @@ -0,0 +1,55 @@ +HDFS `datanodes` running inside a kubernetes cluster. See the other chart for +`namenode`. + +### Prerequisite + + Requires Kubernetes version 1.5 and beyond, because `namenode` is using + `StatefulSet`, which is available only in version 1.5 and later. + + Make sure `namenode` is fully launched using the other chart. `Datanodes` rely + on DNS to resolve the hostname of the namenode when they start up. + +### Usage + + 1. Find the service IP of your `kube-dns` of your k8s cluster. + Try the following command and find the IP value in the output. + It will be supplied below as the `clusterDnsIP` parameter. + + ``` + $ kubectl get svc --all-namespaces | grep kube-dns + ``` + + 2. Optionally, find the domain name of your k8s cluster that become part of + pod and service host names. Default is `cluster.local`. See `values.yaml` + for additional parameters to change. You can add them below in `--set`, + as comma-separated entries. + + 3. Launch this helm chart, `hdfs-datanode-k8s`, while specifying + the kube-dns name server IP and other parameters. (You can add multiple + of them below in --set as comma-separated entries) + + ``` + $ helm install -n my-hdfs-datanode --namespace kube-system \ + --set clusterDnsIP=YOUR-KUBE-DNS-IP hdfs-datanode-k8s + ``` + + 5. Confirm the daemons are launched. + + ``` + $ kubectl get pods --all-namespaces | grep hdfs-datanode- + kube-system hdfs-datanode-ajdcz 1/1 Running 0 7m + kube-system hdfs-datanode-f1w24 1/1 Running 0 7m + ``` + +`Datanode` daemons run on every cluster node. They also mount k8s `hostPath` +local disk volumes. + +`Datanodes` are using `hostNetwork` to register to `namenode` using +physical IPs. + +Note they run under the `kube-system` namespace. + +###Credits + +This chart is using public Hadoop docker images hosted by + [uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml new file mode 100644 index 0000000..2375129 --- /dev/null +++ b/charts/hdfs-datanode-k8s/templates/datanode-daemonset.yaml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: v1 +kind: ConfigMap +metadata: + name: resolv-conf-datanode + namespace: kube-system +data: + resolv.conf: | + search kube-system.svc.{{ .Values.clusterDomain }} svc.{{ .Values.clusterDomain }} {{ .Values.clusterDomain }} + nameserver {{ .Values.clusterDnsIP }} + options ndots:5 +--- +# Deleting a daemonset may need some trick. See +# https://github.com/kubernetes/kubernetes/issues/33245#issuecomment-261250489 +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: hdfs-datanode + namespace: kube-system +spec: + template: + metadata: + labels: + name: hdfs-datanode + spec: + hostNetwork: true + hostPID: true + containers: + - name: datanode + image: uhopper/hadoop-datanode:2.7.2 + env: + # This works only with /etc/resolv.conf mounted from the config map. + # K8s version 1.6 will fix this, per https://github.com/kubernetes/kubernetes/pull/29378. + - name: CORE_CONF_fs_defaultFS + value: hdfs://hdfs-namenode-0.hdfs-namenode.kube-system.svc.{{ .Values.clusterDomain }}:8020 + livenessProbe: + initialDelaySeconds: 30 + httpGet: + host: 127.0.0.1 + path: / + port: 50075 + securityContext: + privileged: true + volumeMounts: + - name: hdfs-data + mountPath: /hadoop/dfs/data + # Use subPath below to mount only a single file. + # See https://github.com/dshulyak/kubernetes.github.io/commit/d58ba7b075bb4848349a2c920caaa08ff3773d70 + - name: resolv-conf-volume + mountPath: /etc/resolv.conf + subPath: resolv.conf + restartPolicy: Always + volumes: + - name: hdfs-data + hostPath: + path: {{ .Values.dataNodeHostPath }} + - configMap: + name: resolv-conf-datanode + items: + - key: resolv.conf + path: resolv.conf + name: resolv-conf-volume diff --git a/charts/hdfs-datanode-k8s/values.yaml b/charts/hdfs-datanode-k8s/values.yaml new file mode 100644 index 0000000..35e3f4c --- /dev/null +++ b/charts/hdfs-datanode-k8s/values.yaml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for template variables. + +# Set this to the IP of your kube-dns name server that resolv POD host names. +# This is used by datanode daemons when they connect to namenode using +# the host name. +clusterDnsIP: 10.96.0.10 + +# Set this to the domain name of your cluster that become part of POD and service +# host names. +clusterDomain: cluster.local + +# Path of the local disk directory on cluster nodes that will contain the datanode +# blocks. This will be mounted to the namenode as a k8s HostPath volume. +dataNodeHostPath: /hdfs-data diff --git a/charts/hdfs-namenode-k8s/Chart.yaml b/charts/hdfs-namenode-k8s/Chart.yaml new file mode 100644 index 0000000..5935613 --- /dev/null +++ b/charts/hdfs-namenode-k8s/Chart.yaml @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: hdfs-namenode-k8s +version: 0.1 +description: Hadoop Distributed File System (HDFS) hosted by Kubernetes. diff --git a/charts/hdfs-namenode-k8s/README.md b/charts/hdfs-namenode-k8s/README.md new file mode 100644 index 0000000..e6997dc --- /dev/null +++ b/charts/hdfs-namenode-k8s/README.md @@ -0,0 +1,46 @@ +HDFS `namenode` running inside a kubernetes cluster. See the other chart for +`datanodes`. + +### Prerequisite + + Requires Kubernetes version 1.5 and beyond, because `namenode` is using + `StatefulSet`, which is available only in version 1.5 and later. + +### Usage + + 1. Attach a label to one of your k8s cluster host that will run the `namenode` + daemon. (This is required as `namenode` currently mounts a local disk + `hostPath` volume. We will switch to persistent volume in the future, so + we can skip this step.) + + ``` + $ kubectl label nodes YOUR-HOST hdfs-namenode-selector=hdfs-namenode-0 + ``` + + 2. Launch this helm chart, `hdfs-namenode-k8s`. + + ``` + $ helm install -n my-hdfs-namenode --namespace kube-system hdfs-k8s + ``` + + 3. Confirm the daemon is launched. + + ``` + $ kubectl get pods --all-namespaces | grep hdfs-namenode + kube-system hdfs-namenode-0 1/1 Running 0 7m + ``` + +There will be only one `namenode` instance. i.e. High Availability (HA) is not +supported at the moment. The `namenode` instance is supposed to be pinned to +a cluster host using a node label, as shown in the usage above. `Namenode` +mount a local disk directory using k8s `hostPath` volume. + +`namenode` is using `hostNetwork` so it can see physical IPs of datanodes +without an overlay network such as weave-net mask them. + +Note it runs under the `kube-system` namespace. + +###Credits + +This chart is using public Hadoop docker images hosted by + [uhopper](https://hub.docker.com/u/uhopper/). diff --git a/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml new file mode 100644 index 0000000..5f020e9 --- /dev/null +++ b/charts/hdfs-namenode-k8s/templates/namenode-statefulset.yaml @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A headless service to create DNS records. +apiVersion: v1 +kind: Service +metadata: + name: hdfs-namenode + namespace: kube-system + labels: + app: hdfs-namenode +spec: + ports: + - port: 8020 + name: fs + clusterIP: None + selector: + app: hdfs-namenode +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: hdfs-namenode + namespace: kube-system +spec: + serviceName: "hdfs-namenode" + # Create a size-1 set. The namenode DNS name will be + # hdfs-namenode-0.hdfs-namenode.kube-system.svc.YOUR-CLUSTER-DOMAIN + replicas: 1 + template: + metadata: + labels: + app: hdfs-namenode + spec: + # Use hostNetwork so datanodes connect to namenode without going through an overlay network + # like weave. Otherwise, namenode fails to see physical IP address of datanodes. + hostNetwork: true + hostPID: true + terminationGracePeriodSeconds: 0 + containers: + - name: hdfs-namenode + image: uhopper/hadoop-namenode:2.7.2 + env: + - name: CLUSTER_NAME + value: hdfs-k8s + ports: + - containerPort: 8020 + name: fs + volumeMounts: + - name: hdfs-name + mountPath: /hadoop/dfs/name + # Pin the pod to a node. You can label your node like below: + # $ kubectl label nodes YOUR-NODE hdfs-namenode-selector=hdfs-namenode-0 + nodeSelector: + hdfs-namenode-selector: hdfs-namenode-0 + restartPolicy: Always + volumes: + - name: hdfs-name + hostPath: + path: {{ .Values.nameNodeHostPath }} diff --git a/charts/hdfs-namenode-k8s/values.yaml b/charts/hdfs-namenode-k8s/values.yaml new file mode 100644 index 0000000..c4634ce --- /dev/null +++ b/charts/hdfs-namenode-k8s/values.yaml @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for template variables. + +# Path of the local disk directory on a cluster node that will contain the namenode +# fsimage and edit logs. This will be mounted to the namenode as a k8s HostPath +# volume. +nameNodeHostPath: /hdfs-name