From fcbe1e2d9858d429b76cc551844a349abfaf8f23 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Wed, 16 Dec 2020 17:54:40 +0000 Subject: [PATCH] Disable JAX memory preallocation This is causing other GPU library tests to fail because no GPU memory is available with errors like: ``` lightgbm.basic.LightGBMError: Memory Object Allocation Failure ``` BUG=173553533 --- test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test b/test index 08abd6a7..568df4c9 100755 --- a/test +++ b/test @@ -88,6 +88,11 @@ fi # This is causing issue when other libraries are trying to run tests using a GPU. # See: https://www.tensorflow.org/guide/gpu#allowing_gpu_memory_growth # +# Note about `XLA_PYTHON_CLIENT_PREALLOCATE`. By default, JAX preallocates 90% +# of the GPU memory which is causing issues when other libraries are trying to run +# tests using a GPU. +# See: https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html +# # Note about `--hostname localhost` (b/158137436) # hostname defaults to the container name which fails DNS name # resolution with --net=none (required to keep tests hermetic). See details in bug. @@ -98,6 +103,7 @@ docker run --rm -t --read-only --net=none \ -e KAGGLE_DATA_PROXY_URL=http://127.0.0.1:8000 \ -e KAGGLE_DATA_PROXY_PROJECT=test \ -e TF_FORCE_GPU_ALLOW_GROWTH=true \ + -e XLA_PYTHON_CLIENT_PREALLOCATE=false \ --hostname localhost \ --shm-size=2g \ -v $PWD:/input:ro -v /tmp/python-build/working:/working \