[SPARK-612] Change user back to root, but test with nobody (apache#256)

Arthur Rand · web-flow · commit dc4f8d318f81 · 2018-01-22T09:20:41.000-08:00
* smoke test

* use nobody in tests

* update docs a little
diff --git a/docs/limitations.md b/docs/limitations.md
@@ -8,17 +8,31 @@ featureMaturity:
 
 ---
 
-*   Mesosphere does not provide support for Spark app development, such as writing a Python app to process data from Kafka or writing Scala code to process data from HDFS.
+*   Mesosphere does not provide support for Spark app development, such as writing a Python app to process data from
+    Kafka or writing Scala code to process data from HDFS.
 
-*   Spark jobs run in Docker containers. The first time you run a Spark job on a node, it might take longer than you expect because of the `docker pull`.
+*   Spark jobs run in Docker containers. The first time you run a Spark job on a node, it might take longer than you
+    expect because of the `docker pull`.
 
-*   DC/OS Apache Spark only supports running the Spark shell from within a DC/OS cluster. See the Spark Shell section for more information. For interactive analytics, we recommend Zeppelin, which supports visualizations and dynamic dependency management.
+*   DC/OS Apache Spark only supports running the Spark shell from within a DC/OS cluster. See the Spark Shell section
+    for more information. For interactive analytics, we recommend Zeppelin, which supports visualizations and dynamic
+    dependency management.
 
-*   With Spark SSL/TLS enabled,
-    if you specify environment-based secrets with `spark.mesos.[driver|executor].secret.envkeys`,
-    the keystore and truststore secrets will also show up as environment-based secrets,
-    due to the way secrets are implemented. You can ignore these extra environment variables.
+*   With Spark SSL/TLS enabled, if you specify environment-based secrets with
+    `spark.mesos.[driver|executor].secret.envkeys, the keystore and truststore secrets will also show up as
+    environment-based secrets, due to the way secrets are implemented. You can ignore these extra environment variables.
     
-*   When using Kerberos and HDFS, the Spark Driver generates delegation tokens and distributes them to it's Executors via RPC.  Authentication of the Executors with the Driver is done with a [shared secret][https://spark.apache.org/docs/latest/security.html#spark-security]. Without authentication, it is possible for executor containers to register with the Driver and retrieve the delegation tokens. Currently, for Spark on Mesos this requires manually setting up the default configuration in Spark to use authentication and setting the secret. Mesosphere is actively working to make this an automated and secure process in future releases. 
+*   When using Kerberos and HDFS, the Spark Driver generates delegation tokens and distributes them to it's Executors
+    via RPC.  Authentication of the Executors with the Driver is done with a [shared
+    secret][https://spark.apache.org/docs/latest/security.html#spark-security]. Without authentication, it is possible
+    for executor containers to register with the Driver and retrieve the delegation tokens. To secure delegation token
+    distribution, use the `--executor-auth-secret` option. 
 
-*   Spark runs all of its components in Docker containers. Since the Docker image contains a full Linux userspace with its own `/etc/users` file, it is possible for the default service user `nobody` to have a different UID inside the container than on the host system. Although user `nobody` has UID 65534 by convention on many systems, this is not always the case. As Mesos does not perform UID mapping between Linux user namespaces, specifying a service user of `nobody` in this case will cause access failures when the container user attempts to open or execute a filesystem resource owned by a user with a different UID, preventing the service from launching. If the hosts in your cluster have a UID for `nobody` other than 65534, you will need to specify a service user of root to run DC/OS Spark successfully.
+*   Spark runs all of its components in Docker containers. Since the Docker image contains a full Linux userspace with
+    its own `/etc/users` file, it is possible for the user `nobody` to have a different UID inside the
+    container than on the host system. Although user `nobody` has UID 65534 by convention on many systems, this is not
+    always the case. As Mesos does not perform UID mapping between Linux user namespaces, specifying a service user of
+    `nobody` in this case will cause access failures when the container user attempts to open or execute a filesystem
+    resource owned by a user with a different UID, preventing the service from launching. If the hosts in your cluster
+    have a UID for `nobody` other than 65534, you will need to maintain the default use (`root`) to run DC/OS Spark
+    successfully.
diff --git a/tests/test_spark.py b/tests/test_spark.py
@@ -236,12 +236,9 @@ def make_credential_secret(envvar, secret_path):
 @pytest.mark.smoke
 def test_marathon_group():
     app_id = utils.FOLDERED_SPARK_APP_NAME
-    options = {"service": {"name": app_id}}
-    utils.require_spark(options=options, service_name=app_id)
+    utils.require_spark(service_name=app_id, marathon_group=app_id)
     test_sparkPi(app_name=app_id)
     LOGGER.info("Uninstalling app_id={}".format(app_id))
-    #shakedown.uninstall_package_and_wait(SPARK_PACKAGE_NAME, app_id)
-
 
 
 @pytest.mark.sanity
diff --git a/tests/utils.py b/tests/utils.py
@@ -62,10 +62,13 @@ def streaming_job_running(job_name):
         return len([x for x in f.dict()["tasks"] if x["state"] == "TASK_RUNNING"]) > 0
 
 
-def require_spark(options=None, service_name=None, use_hdfs=False, use_history=False):
+def require_spark(service_name=None, use_hdfs=False, use_history=False, marathon_group=None,
+                  strict_mode=is_strict(), user="nobody"):
     LOGGER.info("Ensuring Spark is installed.")
-
-    _require_package(SPARK_PACKAGE_NAME, service_name, _get_spark_options(options, use_hdfs, use_history))
+    _require_package(
+        SPARK_PACKAGE_NAME,
+        service_name,
+        _get_spark_options(use_hdfs, use_history, marathon_group, strict_mode, user))
     _wait_for_spark(service_name)
     _require_spark_cli()
 
@@ -132,9 +135,13 @@ def no_spark_jobs(service_name):
     return len(driver_ips) == 0
 
 
-def _get_spark_options(options, use_hdfs, use_history):
-    if options is None:
-        options = {}
+def _get_spark_options(use_hdfs, use_history, marathon_group, strict_mode, user):
+    options = {}
+    options["service"] = options.get("service", {})
+    options["service"]["user"] = user
+    
+    if marathon_group is not None:
+        options["service"]["name"] = marathon_group
 
     if use_hdfs:
         options["hdfs"] = options.get("hdfs", {})
@@ -149,7 +156,7 @@ def _get_spark_options(options, use_hdfs, use_history):
         options["service"] = options.get("service", {})
         options["service"]["spark-history-server-url"] = history_url
 
-    if is_strict():
+    if strict_mode:
         options["service"] = options.get("service", {})
         options["service"]["service_account"] = SPARK_SERVICE_ACCOUNT
         options["service"]["service_account_secret"] = SPARK_SERVICE_ACCOUNT_SECRET
diff --git a/universe/config.json b/universe/config.json
@@ -40,7 +40,7 @@
                 "user": {
                     "description": "Executors will run as this user.",
                     "type": "string",
-                    "default": "nobody"
+                    "default": "root"
                 },
                 "docker-image": {
                     "type": "string",