-
Notifications
You must be signed in to change notification settings - Fork 12
Add PySpark with RasterFrames Support profile #54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,3 +14,4 @@ package.json | |
| jupyterhub_cookie_secret | ||
| jupyterhub-proxy.pid | ||
| .kubeconfig | ||
| .history | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRole | ||
| metadata: | ||
| name: autohttps | ||
| namespace: {{ .Release.Namespace }} | ||
| labels: | ||
| chart: {{ .Chart.Name }}-{{ .Chart.Version }} | ||
| component: autohttps | ||
| heritage: {{ .Release.Service }} | ||
| release: {{ .Release.Name }} | ||
| rules: | ||
| - apiGroups: [""] | ||
| resources: ["secrets"] | ||
| verbs: ["get", "patch", "list", "create"] | ||
| --- | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRoleBinding | ||
| metadata: | ||
| name: autohttps | ||
| namespace: {{ .Release.Namespace }} | ||
| labels: | ||
| chart: {{ .Chart.Name }}-{{ .Chart.Version }} | ||
| component: autohttps | ||
| heritage: {{ .Release.Service }} | ||
| release: {{ .Release.Name }} | ||
| subjects: | ||
| - kind: ServiceAccount | ||
| name: autohttps | ||
| namespace: {{ .Release.Namespace }} | ||
| roleRef: | ||
| apiGroup: rbac.authorization.k8s.io | ||
| kind: ClusterRole | ||
| name: autohttps |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRole | ||
| metadata: | ||
| name: hub | ||
| namespace: {{ .Release.Namespace }} | ||
| labels: | ||
| chart: {{ .Chart.Name }}-{{ .Chart.Version }} | ||
| component: hub | ||
| heritage: {{ .Release.Service }} | ||
| release: {{ .Release.Name }} | ||
| rules: | ||
| - apiGroups: [""] | ||
| resources: ["pods", "persistentvolumeclaims", "secrets", "configmaps", "services", "namespaces", "serviceaccounts"] | ||
| verbs: ["get", "watch", "list", "create", "delete", "update"] | ||
| - apiGroups: ["rbac.authorization.k8s.io"] | ||
| resources: ["roles", "rolebindings"] | ||
| verbs: ["get", "watch", "list", "create", "delete", "update"] | ||
| - apiGroups: [""] | ||
| resources: ["events"] | ||
| verbs: ["get", "watch", "list"] | ||
| --- | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRoleBinding | ||
| metadata: | ||
| name: hub | ||
| namespace: {{ .Release.Namespace }} | ||
| labels: | ||
| chart: {{ .Chart.Name }}-{{ .Chart.Version }} | ||
| component: hub | ||
| heritage: {{ .Release.Service }} | ||
| release: {{ .Release.Name }} | ||
| subjects: | ||
| - kind: ServiceAccount | ||
| name: hub | ||
| namespace: {{ .Release.Namespace }} | ||
| roleRef: | ||
| apiGroup: rbac.authorization.k8s.io | ||
| kind: ClusterRole | ||
| name: hub |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,24 @@ daskhub: | |
| values: | ||
| - gpu | ||
|
|
||
| # Spark ----------------------------------------------------- | ||
| - display_name: "PySpark" | ||
| default: "True" | ||
| description: '4 cores, 32 GiB of memory. <a href="https://github.com/pangeo-data/pangeo-docker-images" target="_blank">Pangeo Notebook</a> environment powered by <a href="https://rasterframes.io/">Raster Frames</a>, <a href="http://geotrellis.io/">GeoTrellis</a> and <a href="https://spark.apache.org/">Apache Spark</a>.' | ||
| kubespawner_override: | ||
| image: "${pyspark_image}" | ||
| cpu_guarantee: 3 | ||
| cpu_limit: 4 | ||
| mem_guarantee: "25G" | ||
| mem_limit: "32G" | ||
| default_url: "/lab/tree/PlanetaryComputerExamples/README.md" | ||
| node_affinity_required: | ||
| - matchExpressions: | ||
| - key: pc.microsoft.com/userkind | ||
| operator: NotIn | ||
| values: | ||
| - gpu | ||
|
|
||
| # R -------------------------------------------------------------------- | ||
| - display_name: "R" | ||
| description: '8 cores, 64 GiB of memory. R geospatial environment.' | ||
|
|
@@ -108,3 +126,74 @@ daskhub: | |
| operator: NotIn | ||
| values: | ||
| - gpu | ||
|
|
||
| extraFiles: | ||
| spark_default_configuration: | ||
| # TODO(https://github.com/hashicorp/terraform-provider-helm/issues/628): use set-file | ||
| stringData: | | ||
| """ | ||
| Default Spark configuration init for the Jypyter instance. | ||
| """ | ||
| import socket | ||
| import os | ||
| notebook_ip = socket.gethostbyname(socket.gethostname()) | ||
| namespace_user = os.environ.get('NAMESPACE_USER', '') | ||
| spark_config = { | ||
| 'spark.master': 'k8s://https://kubernetes.default.svc.cluster.local', | ||
| 'spark.app.name': 'STAC API with RF in K8S', | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Spark app name should probably be picked up from the Notebook name / or used the default one (more Microsofty). |
||
| 'spark.ui.port': '4040', | ||
| 'spark.driver.blockManager.port': '7777', | ||
| 'spark.driver.port': '2222', | ||
| 'spark.driver.host': notebook_ip, | ||
| 'spark.driver.bindAddress': '0.0.0.0', | ||
| 'spark.executor.instances': '2', | ||
| 'spark.executor.memory': '4g', | ||
| 'spark.driver.memory': '1g', | ||
| 'spark.executor.cores': '3', | ||
| 'spark.kubernetes.namespace': namespace_user, | ||
| 'spark.kubernetes.container.image': 'quay.io/daunnc/spark-k8s-py-3.8.8-gdal32-msftpc:3.1.2', | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be replaced by the MSFTPC containers. |
||
| 'spark.kubernetes.executor.deleteOnTermination': 'true', | ||
| 'spark.kubernetes.authenticate.driver.serviceAccountName': 'default', | ||
| 'spark.kubernetes.authenticate.caCertFile': '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt', | ||
| 'spark.kubernetes.authenticate.oauthTokenFile': '/var/run/secrets/kubernetes.io/serviceaccount/token', | ||
| 'spark.kubernetes.executor.podTemplateFile': '/etc/spark/executor-template.yml', | ||
| 'spark.kubernetes.node.selector.k8s.spark.org/dedicated': 'worker', | ||
| 'spark.kubernetes.node.selector.pc.microsoft.com/workerkind': 'spark-cpu', | ||
| 'spark.kubernetes.node.selector.kubernetes.azure.com/scalesetpriority': 'spot' | ||
| } | ||
|
|
||
| # Spark supports pool with no taints and can select nodes via selector only by default | ||
| # This template allows Spark executors to make use of Azure spots | ||
| spark_executor_template: | ||
| stringData: | | ||
| # | ||
| # Licensed to the Apache Software Foundation (ASF) under one or more | ||
| # contributor license agreements. See the NOTICE file distributed with | ||
| # this work for additional information regarding copyright ownership. | ||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| # (the "License"); you may not use this file except in compliance with | ||
| # the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
| apiVersion: v1 | ||
| Kind: Pod | ||
| metadata: | ||
| labels: | ||
| template-label-key: executor-template-label-value | ||
| spec: | ||
| containers: | ||
| - name: test-executor-container | ||
| image: will-be-overwritten | ||
| # extra toleration to support Spot instances | ||
| tolerations: | ||
| - key: "kubernetes.azure.com/scalesetpriority" | ||
| operator: "Equal" | ||
| value: "spot" | ||
| effect: "NoSchedule" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -85,10 +85,17 @@ daskhub: | |
| c.KubeSpawner.extra_labels = {} | ||
| kubespawner: | | ||
| c.KubeSpawner.start_timeout = 15 * 60 # 15 minutes | ||
| # pass the parent namespace through, needed for pre_spawn_hook to copy resources | ||
| c.KubeSpawner.environment['NAMESPACE_PARENT'] = c.KubeSpawner.namespace | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: try the template replacement instead of setting the parent namespace variable. |
||
| # hub allocates notebook in user namespaces | ||
| c.KubeSpawner.enable_user_namespaces = True | ||
| # the hub url should be accessible across namespaces | ||
| c.KubeSpawner.hub_connect_url = "http://hub.${namespace}.svc.cluster.local:8081" | ||
|
|
||
| 01-add-dask-gateway-values: | | ||
| # The daskhub helm chart doesn't correctly handle hub.baseUrl. | ||
| # DASK_GATEWAY__PUBLIC_ADDRESS set via terraform | ||
| c.KubeSpawner.environment["DASK_GATEWAY__ADDRESS"] = "http://proxy-http:8000/compute/services/dask-gateway/" | ||
| c.KubeSpawner.environment["DASK_GATEWAY__ADDRESS"] = "http://proxy-http.${namespace}.svc.cluster.local:8000/compute/services/dask-gateway/" | ||
| c.KubeSpawner.environment["DASK_GATEWAY__PUBLIC_ADDRESS"] = "https://${jupyterhub_host}/compute/services/dask-gateway/" | ||
| templates: | | ||
| c.JupyterHub.template_paths.insert(0, "/etc/jupyterhub/templates") | ||
|
|
@@ -97,8 +104,97 @@ daskhub: | |
| # Sets the following | ||
| # 1. environment variable PC_SDK_SUBSCRIPTION_KEY | ||
| # --------------------------------------------------- | ||
| from kubernetes.client import RbacAuthorizationV1Api | ||
| from kubernetes.client.rest import ApiException | ||
| from kubernetes.client.models import V1Role, V1PolicyRule, V1ObjectMeta, V1Subject, V1RoleRef, V1RoleBinding, V1ServiceAccount | ||
|
|
||
| async def ensure_service_account_role(spawner, name, namespace, role_name): | ||
| api = spawner.api | ||
| try: | ||
| api.create_namespaced_service_account(namespace, V1ServiceAccount(metadata=V1ObjectMeta(name=name))) | ||
| except ApiException as e: | ||
| if e.status != 409: | ||
| # It's fine if it already exists | ||
| spawner.log.exception(f'Failed to create service account {name} in the {namespace} namespace') | ||
| raise | ||
| try: | ||
| rules = [ | ||
| V1PolicyRule( | ||
| [''], | ||
| resources=['pods', 'services', 'configmaps'], | ||
| verbs=['get', 'watch', 'list', 'create', 'delete', 'update'] | ||
| ) | ||
| ] | ||
| role = V1Role(rules=rules) | ||
| role.metadata = V1ObjectMeta(namespace=namespace, name=role_name) | ||
|
|
||
| rbac = RbacAuthorizationV1Api() | ||
| rbac.create_namespaced_role(namespace, role) | ||
| except ApiException as e: | ||
| if e.status != 409: | ||
| # It's fine if it already exists | ||
| spawner.log.exception(f'Failed to create role {role} for service account {name} in the {namespace} namespace') | ||
| raise | ||
| try: | ||
| subject = V1Subject(kind='ServiceAccount', name=name, namespace=namespace) | ||
| role_ref = V1RoleRef(api_group='rbac.authorization.k8s.io', kind='Role', name=role_name) | ||
| metadata = V1ObjectMeta(name=f'{role_name}-binding') | ||
| role_binding = V1RoleBinding(metadata=metadata, role_ref=role_ref, subjects=[subject]) | ||
| rbac = RbacAuthorizationV1Api() | ||
| rbac.create_namespaced_role_binding(namespace=namespace, body=role_binding) | ||
| except ApiException as e: | ||
| if e.status != 409: | ||
| # It's fine if it already exists | ||
| spawner.log.exception(f'Failed to create role binding for {role} and service account {name} in the {namespace} namespace') | ||
| raise | ||
|
|
||
| async def pre_spawn_hook(spawner): | ||
| spawner.environment['NAMESPACE_USER'] = spawner.namespace | ||
| namespace_parent = spawner.environment['NAMESPACE_PARENT'] | ||
|
|
||
| # create user namespace before running the spawner | ||
| if spawner.enable_user_namespaces: | ||
| await spawner._ensure_namespace() | ||
| await ensure_service_account_role(spawner, 'default', spawner.namespace, 'default-role') | ||
|
|
||
| # copy secrets and configmaps into the new namespace | ||
| api = spawner.api | ||
| for s in api.list_namespaced_secret(namespace_parent).items: | ||
| s.metadata.namespace = spawner.namespace | ||
| s.metadata.resource_version = None | ||
| try: | ||
| api.create_namespaced_secret(spawner.namespace, s) | ||
| except ApiException as e: | ||
| if e.status != 409: | ||
| # It's fine if it already exists | ||
| spawner.log.exception(f'Failed to create namespace {spawner.namespace.namespace}, trying to patch...') | ||
| api.patch_namespaced_secret(spawner.namespace, s) | ||
| raise | ||
|
|
||
| for m in api.list_namespaced_config_map(namespace_parent).items: | ||
| m.metadata.namespace = spawner.namespace | ||
| m.metadata.resource_version = None | ||
| try: | ||
| api.create_namespaced_config_map(spawner.namespace, m) | ||
| except ApiException as e: | ||
| if e.status != 409: | ||
| # It's fine if it already exists | ||
| spawner.log.exception(f'Failed to create namespace {spawner.namespace.namespace}, trying to patch...') | ||
| api.patch_namespaced_config_map(spawner.namespace, m) | ||
| raise | ||
|
|
||
| # unmount spark default configuration with py env preload if not needed, for more details see | ||
| # https://github.com/jupyterhub/kubespawner/issues/501 | ||
| # https://discourse.jupyter.org/t/tailoring-spawn-options-and-server-configuration-to-certain-users/8449 | ||
| if spawner.user_options.get('profile', '') != 'pyspark': | ||
| spawner.volume_mounts = list(filter(lambda e: 'spark' not in e.get('subPath', ''), spawner.volume_mounts)) | ||
| # expose the Spark UI (needed only in the pyspark profile case) | ||
| else: | ||
| spawner.extra_container_config = {'ports': [ | ||
| {'containerPort': 8888, 'name': 'notebook-port', 'protocol': 'TCP'}, | ||
| {'containerPort': 4040, 'name': 'spark-ui', 'protocol': 'TCP'} | ||
| ]} | ||
|
|
||
| username = spawner.user.name | ||
| # `username` is an email address. We use that email address to look up the | ||
| # user in the Django App | ||
|
|
@@ -147,13 +243,31 @@ daskhub: | |
|
|
||
| c.KubeSpawner.pre_spawn_hook = pre_spawn_hook | ||
|
|
||
| # it is the spawner post stop hook, not related to the notebook lifecycle | ||
| # we don't need it | ||
| post_stop_hook: | | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need it here, I will remove it unless we want to keep it here for fun cc @TomAugspurger |
||
| from kubernetes.client.rest import ApiException | ||
| async def post_stop_hook(spawner): | ||
| try: | ||
| spawner.api.delete_namespace(spawner.namespace) | ||
| except ApiException as e: | ||
| if e.status != 409: | ||
| # It's fine if it is already removed | ||
| spawner.log.exception(f'Failed to delete namespace {spawner.namespace.namespace}') | ||
| raise | ||
|
|
||
| # c.KubeSpawner.post_stop_hook = post_stop_hook | ||
|
|
||
| proxy: | ||
| https: | ||
| enabled: true | ||
| letsencrypt: | ||
| contactEmail: "[email protected]" | ||
|
|
||
| singleuser: | ||
| # if not set, it also backs to default but with no ServiceAccount secrets mounted | ||
| serviceAccountName: default | ||
|
|
||
| # These limits match the "large" profiles, so that a user requesting large will be successfully scheduled. | ||
| # The user scheduler doesn't evict multiple placeholders. | ||
| memory: | ||
|
|
@@ -198,6 +312,12 @@ daskhub: | |
| - name: driven-data | ||
| mountPath: /driven-data/ | ||
|
|
||
| extraFiles: | ||
| spark_executor_template: | ||
| mountPath: /etc/spark/executor-template.yml | ||
| spark_default_configuration: | ||
| mountPath: /etc/spark-ipython/profile_default/startup/00-spark-conf.py | ||
|
|
||
| extraEnv: | ||
| DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE: '{JUPYTER_IMAGE_SPEC}' | ||
| DASK_DISTRIBUTED__DASHBOARD__LINK: '/user/{JUPYTERHUB_USER}/proxy/{port}/status' | ||
|
|
@@ -222,7 +342,7 @@ daskhub: | |
| auth: | ||
| jupyterhub: | ||
| apiToken: "{{ tf.jupyterhub_dask_gateway_token }}" | ||
| apiUrl: http://proxy-http:8000/compute/hub/api | ||
| apiUrl: http://proxy-http.${namespace}.svc.cluster.local:8000/compute/hub/api | ||
| affinity: | ||
| nodeAffinity: | ||
| requiredDuringSchedulingIgnoredDuringExecution: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ module "resources" { | |
| jupyterhub_singleuser_image_name = "pcccr.azurecr.io/public/planetary-computer/python" | ||
| jupyterhub_singleuser_image_tag = "2022.01.17.0" | ||
| python_image = "pcccr.azurecr.io/public/planetary-computer/python:2022.01.17.0" | ||
| pyspark_image = "daunnc/planetary-computer-pyspark:2021.11.29.0-gdal3.4-3.1-rf" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: make PRs against MSFTPC repos with containers. |
||
| r_image = "pcccr.azurecr.io/public/planetary-computer/r:2022.01.17.0" | ||
| gpu_pytorch_image = "pcccr.azurecr.io/public/planetary-computer/gpu-pytorch:2022.01.17.0" | ||
| gpu_tensorflow_image = "pcccr.azurecr.io/public/planetary-computer/gpu-tensorflow:2022.01.17.0" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: the only difference between PySpark and Python images is in the amount of the underlying deps.