diff --git a/test-horovod-tensorflow/test-horovod-tensorflow.ipynb b/test-horovod-tensorflow/test-horovod-tensorflow.ipynb index 485f9d4..375d538 100644 --- a/test-horovod-tensorflow/test-horovod-tensorflow.ipynb +++ b/test-horovod-tensorflow/test-horovod-tensorflow.ipynb @@ -44,8 +44,8 @@ }, "outputs": [], "source": [ - "# per ML-3824 need to install tensorflow and mlrun in the same command\n", - "!pip install plotly tensorflow==2.15.1 mlrun # TODO: remove 2.15.1 here and in functions requirements after ML-10271 fix" + "# with tensorflow==2.19.1 horovod build fails\n", + "!pip install plotly tensorflow==2.18.1 mlrun" ] }, { @@ -118,7 +118,7 @@ "def get_model() -> tf.keras.Model:\n", " # Build the model architecture:\n", " inputs = tf.keras.Input(shape=(28, 28))\n", - " x = tf.keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inputs)\n", + " x = tf.keras.layers.Rescaling(1.0 / 255)(inputs)\n", " x = tf.keras.layers.Flatten()(x)\n", " x = tf.keras.layers.Dense(128, activation=\"relu\")(x)\n", " x = tf.keras.layers.Dense(128, activation=\"relu\")(x)\n", @@ -147,7 +147,7 @@ "\n", " # Compile the model:\n", " model.compile(\n", - " optimizer=tf.keras.optimizers.legacy.SGD(learning_rate=0.1, momentum=0.9),\n", + " optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9),\n", " loss=\"sparse_categorical_crossentropy\",\n", " metrics=[\"accuracy\"]\n", " )\n", @@ -221,7 +221,7 @@ "outputs": [], "source": [ "# Create the job function:\n", - "job_function = project.set_function(\"./func.py\", name=\"train_job\", kind=\"job\", image=\"mlrun/mlrun\", handler=\"train\", requirements=[\"tensorflow==2.15.1\"])\n", + "job_function = project.set_function(\"./func.py\", name=\"train_job\", kind=\"job\", image=\"mlrun/mlrun\", handler=\"train\", requirements=[\"tensorflow==2.18.1\"])\n", "job_function.apply(mlrun.auto_mount())\n", "job_function.deploy()" ] @@ -238,11 +238,15 @@ "outputs": [], "source": [ "# Create the open mpi function:\n", - "mpijob_function = project.set_function(\"./func.py\", name=\"train_mpijob\", kind=\"mpijob\", image=\"mlrun/mlrun\", handler=\"train\", requirements=[\"horovod[tensorflow]\"])\n", + "mpijob_function = project.set_function(\"./func.py\", name=\"train_mpijob\", kind=\"mpijob\", image=\"mlrun/mlrun\", handler=\"train\")\n", "mpijob_function.apply(mlrun.auto_mount())\n", "mpijob_function.spec.replicas = N_RANKS\n", - "mpijob_function.with_commands([\"pip install tensorflow==2.15.1\"])\n", - "mpijob_function.deploy(builder_env={\"HOROVOD_WITH_TENSORFLOW\": \"1\"})" + "mpijob_function.with_commands([\n", + " \"apt-get update && apt-get install -y build-essential g++ cmake llvm-dev libclang-dev\",\n", + " \"pip install tensorflow==2.18.1\",\n", + " \"HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_XLA=1 HOROVOD_WITHOUT_MLX=1 pip install horovod[tensorflow]\",\n", + "])\n", + "mpijob_function.deploy()\n" ] }, { @@ -384,6 +388,22 @@ "# assert mpijob_time < job_time\n", "# assert np.isclose(job_accuracy, mpijob_accuracy, atol=0.1)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31ba0442-a98b-46fc-a680-9dfcdf1dfe9a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81d92305-268f-4ead-87df-5ab3a30f294b", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {