From 792a9ea0e067478e01825ef89fc16286a8fa2c9d Mon Sep 17 00:00:00 2001 From: Bryan Keane Date: Tue, 29 Jul 2025 17:24:45 +0100 Subject: [PATCH 01/33] feat(RHOAIENG-26480): Run RayJobs against existing RayClusters --- poetry.lock | 1364 +++++++++-------- pyproject.toml | 8 + src/codeflare_sdk/__init__.py | 1 + src/codeflare_sdk/ray/__init__.py | 4 + .../ray/cluster/build_ray_cluster.py | 2 + src/codeflare_sdk/ray/cluster/cluster.py | 8 +- src/codeflare_sdk/ray/cluster/test_cluster.py | 10 +- src/codeflare_sdk/ray/rayjobs/__init__.py | 1 + src/codeflare_sdk/ray/rayjobs/rayjob.py | 111 ++ src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 88 ++ .../appwrapper/unit-test-all-params.yaml | 2 + tests/test_cluster_yamls/kueue/aw_kueue.yaml | 2 + .../kueue/ray_cluster_kueue.yaml | 2 + .../ray/default-appwrapper.yaml | 2 + .../ray/default-ray-cluster.yaml | 2 + .../ray/unit-test-all-params.yaml | 2 + 16 files changed, 943 insertions(+), 666 deletions(-) create mode 100644 src/codeflare_sdk/ray/rayjobs/__init__.py create mode 100644 src/codeflare_sdk/ray/rayjobs/rayjob.py create mode 100644 src/codeflare_sdk/ray/rayjobs/test_rayjob.py diff --git a/poetry.lock b/poetry.lock index 49e45352..293df340 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -14,103 +14,103 @@ files = [ [[package]] name = "aiohttp" -version = "3.12.12" +version = "3.12.14" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "aiohttp-3.12.12-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6f25e9d274d6abbb15254f76f100c3984d6b9ad6e66263cc60a465dd5c7e48f5"}, - {file = "aiohttp-3.12.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b8ec3c1a1c13d24941b5b913607e57b9364e4c0ea69d5363181467492c4b2ba6"}, - {file = "aiohttp-3.12.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81ef2f9253c327c211cb7b06ea2edd90e637cf21c347b894d540466b8d304e08"}, - {file = "aiohttp-3.12.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28ded835c3663fd41c9ad44685811b11e34e6ac9a7516a30bfce13f6abba4496"}, - {file = "aiohttp-3.12.12-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a4b78ccf254fc10605b263996949a94ca3f50e4f9100e05137d6583e266b711e"}, - {file = "aiohttp-3.12.12-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f4a5af90d5232c41bb857568fe7d11ed84408653ec9da1ff999cc30258b9bd1"}, - {file = "aiohttp-3.12.12-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffa5205c2f53f1120e93fdf2eca41b0f6344db131bc421246ee82c1e1038a14a"}, - {file = "aiohttp-3.12.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68301660f0d7a3eddfb84f959f78a8f9db98c76a49b5235508fa16edaad0f7c"}, - {file = "aiohttp-3.12.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db874d3b0c92fdbb553751af9d2733b378c25cc83cd9dfba87f12fafd2dc9cd5"}, - {file = "aiohttp-3.12.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5e53cf9c201b45838a2d07b1f2d5f7fec9666db7979240002ce64f9b8a1e0cf2"}, - {file = "aiohttp-3.12.12-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:8687cc5f32b4e328c233acd387d09a1b477007896b2f03c1c823a0fd05f63883"}, - {file = "aiohttp-3.12.12-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ee537ad29de716a3d8dc46c609908de0c25ffeebf93cd94a03d64cdc07d66d0"}, - {file = "aiohttp-3.12.12-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:411f821be5af6af11dc5bed6c6c1dc6b6b25b91737d968ec2756f9baa75e5f9b"}, - {file = "aiohttp-3.12.12-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f90319d94cf5f9786773237f24bd235a7b5959089f1af8ec1154580a3434b503"}, - {file = "aiohttp-3.12.12-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73b148e606f34e9d513c451fd65efe1091772659ca5703338a396a99f60108ff"}, - {file = "aiohttp-3.12.12-cp310-cp310-win32.whl", hash = "sha256:d40e7bfd577fdc8a92b72f35dfbdd3ec90f1bc8a72a42037fefe34d4eca2d4a1"}, - {file = "aiohttp-3.12.12-cp310-cp310-win_amd64.whl", hash = "sha256:65c7804a2343893d6dea9fce69811aea0a9ac47f68312cf2e3ee1668cd9a387f"}, - {file = "aiohttp-3.12.12-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:38823fe0d8bc059b3eaedb263fe427d887c7032e72b4ef92c472953285f0e658"}, - {file = "aiohttp-3.12.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10237f2c34711215d04ed21da63852ce023608299554080a45c576215d9df81c"}, - {file = "aiohttp-3.12.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:563ec477c0dc6d56fc7f943a3475b5acdb399c7686c30f5a98ada24bb7562c7a"}, - {file = "aiohttp-3.12.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3d05c46a61aca7c47df74afff818bc06a251ab95d95ff80b53665edfe1e0bdf"}, - {file = "aiohttp-3.12.12-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:277c882916759b4a6b6dc7e2ceb124aad071b3c6456487808d9ab13e1b448d57"}, - {file = "aiohttp-3.12.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:216abf74b324b0f4e67041dd4fb2819613909a825904f8a51701fbcd40c09cd7"}, - {file = "aiohttp-3.12.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65d6cefad286459b68e7f867b9586a821fb7f121057b88f02f536ef570992329"}, - {file = "aiohttp-3.12.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:feaaaff61966b5f4b4eae0b79fc79427f49484e4cfa5ab7d138ecd933ab540a8"}, - {file = "aiohttp-3.12.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a05917780b7cad1755784b16cfaad806bc16029a93d15f063ca60185b7d9ba05"}, - {file = "aiohttp-3.12.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:082c5ec6d262c1b2ee01c63f4fb9152c17f11692bf16f0f100ad94a7a287d456"}, - {file = "aiohttp-3.12.12-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:b265a3a8b379b38696ac78bdef943bdc4f4a5d6bed1a3fb5c75c6bab1ecea422"}, - {file = "aiohttp-3.12.12-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2e0f2e208914ecbc4b2a3b7b4daa759d0c587d9a0b451bb0835ac47fae7fa735"}, - {file = "aiohttp-3.12.12-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9923b025845b72f64d167bca221113377c8ffabd0a351dc18fb839d401ee8e22"}, - {file = "aiohttp-3.12.12-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1ebb213445900527831fecc70e185bf142fdfe5f2a691075f22d63c65ee3c35a"}, - {file = "aiohttp-3.12.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6fc369fb273a8328077d37798b77c1e65676709af5c182cb74bd169ca9defe81"}, - {file = "aiohttp-3.12.12-cp311-cp311-win32.whl", hash = "sha256:58ecd10fda6a44c311cd3742cfd2aea8c4c600338e9f27cb37434d9f5ca9ddaa"}, - {file = "aiohttp-3.12.12-cp311-cp311-win_amd64.whl", hash = "sha256:b0066e88f30be00badffb5ef8f2281532b9a9020863d873ae15f7c147770b6ec"}, - {file = "aiohttp-3.12.12-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:98451ce9ce229d092f278a74a7c2a06b3aa72984673c87796126d7ccade893e9"}, - {file = "aiohttp-3.12.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:adbac7286d89245e1aff42e948503fdc6edf6d5d65c8e305a67c40f6a8fb95f4"}, - {file = "aiohttp-3.12.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0728882115bfa85cbd8d0f664c8ccc0cfd5bd3789dd837596785450ae52fac31"}, - {file = "aiohttp-3.12.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf3b9d9e767f9d0e09fb1a31516410fc741a62cc08754578c40abc497d09540"}, - {file = "aiohttp-3.12.12-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c944860e86b9f77a462321a440ccf6fa10f5719bb9d026f6b0b11307b1c96c7b"}, - {file = "aiohttp-3.12.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b1979e1f0c98c06fd0cd940988833b102fa3aa56751f6c40ffe85cabc51f6fd"}, - {file = "aiohttp-3.12.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:120b7dd084e96cfdad85acea2ce1e7708c70a26db913eabb8d7b417c728f5d84"}, - {file = "aiohttp-3.12.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e58f5ae79649ffa247081c2e8c85e31d29623cf2a3137dda985ae05c9478aae"}, - {file = "aiohttp-3.12.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aa5f049e3e2745b0141f13e5a64e7c48b1a1427ed18bbb7957b348f282fee56"}, - {file = "aiohttp-3.12.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7163cc9cf3722d90f1822f8a38b211e3ae2fc651c63bb55449f03dc1b3ff1d44"}, - {file = "aiohttp-3.12.12-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ef97c4d035b721de6607f3980fa3e4ef0ec3aca76474b5789b7fac286a8c4e23"}, - {file = "aiohttp-3.12.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1c14448d6a86acadc3f7b2f4cc385d1fb390acb6f37dce27f86fe629410d92e3"}, - {file = "aiohttp-3.12.12-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a1b6df6255cfc493454c79221183d64007dd5080bcda100db29b7ff181b8832c"}, - {file = "aiohttp-3.12.12-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:60fc7338dfb0626c2927bfbac4785de3ea2e2bbe3d328ba5f3ece123edda4977"}, - {file = "aiohttp-3.12.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d2afc72207ef4c9d4ca9fcd00689a6a37ef2d625600c3d757b5c2b80c9d0cf9a"}, - {file = "aiohttp-3.12.12-cp312-cp312-win32.whl", hash = "sha256:8098a48f93b2cbcdb5778e7c9a0e0375363e40ad692348e6e65c3b70d593b27c"}, - {file = "aiohttp-3.12.12-cp312-cp312-win_amd64.whl", hash = "sha256:d1c1879b2e0fc337d7a1b63fe950553c2b9e93c071cf95928aeea1902d441403"}, - {file = "aiohttp-3.12.12-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ea5d604318234427929d486954e3199aded65f41593ac57aa0241ab93dda3d15"}, - {file = "aiohttp-3.12.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e03ff38250b8b572dce6fcd7b6fb6ee398bb8a59e6aa199009c5322d721df4fc"}, - {file = "aiohttp-3.12.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:71125b1fc2b6a94bccc63bbece620906a4dead336d2051f8af9cbf04480bc5af"}, - {file = "aiohttp-3.12.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:784a66f9f853a22c6b8c2bd0ff157f9b879700f468d6d72cfa99167df08c5c9c"}, - {file = "aiohttp-3.12.12-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a5be0b58670b54301404bd1840e4902570a1c3be00358e2700919cb1ea73c438"}, - {file = "aiohttp-3.12.12-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8f13566fc7bf5a728275b434bc3bdea87a7ed3ad5f734102b02ca59d9b510f"}, - {file = "aiohttp-3.12.12-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d736e57d1901683bc9be648aa308cb73e646252c74b4c639c35dcd401ed385ea"}, - {file = "aiohttp-3.12.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2007eaa7aae9102f211c519d1ec196bd3cecb1944a095db19eeaf132b798738"}, - {file = "aiohttp-3.12.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a813e61583cab6d5cdbaa34bc28863acdb92f9f46e11de1b3b9251a1e8238f6"}, - {file = "aiohttp-3.12.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e408293aa910b0aea48b86a28eace41d497a85ba16c20f619f0c604597ef996c"}, - {file = "aiohttp-3.12.12-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:f3d31faf290f5a30acba46b388465b67c6dbe8655d183e9efe2f6a1d594e6d9d"}, - {file = "aiohttp-3.12.12-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0b84731697325b023902aa643bd1726d999f5bc7854bc28b17ff410a81151d4b"}, - {file = "aiohttp-3.12.12-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a324c6852b6e327811748446e56cc9bb6eaa58710557922183175816e82a4234"}, - {file = "aiohttp-3.12.12-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:22fd867fbd72612dcf670c90486dbcbaf702cb807fb0b42bc0b7a142a573574a"}, - {file = "aiohttp-3.12.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3e092f1a970223794a4bf620a26c0e4e4e8e36bccae9b0b5da35e6d8ee598a03"}, - {file = "aiohttp-3.12.12-cp313-cp313-win32.whl", hash = "sha256:7f5f5eb8717ef8ba15ab35fcde5a70ad28bbdc34157595d1cddd888a985f5aae"}, - {file = "aiohttp-3.12.12-cp313-cp313-win_amd64.whl", hash = "sha256:ace2499bdd03c329c054dc4b47361f2b19d5aa470f7db5c7e0e989336761b33c"}, - {file = "aiohttp-3.12.12-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0d0b1c27c05a7d39a50e946ec5f94c3af4ffadd33fa5f20705df42fb0a72ca14"}, - {file = "aiohttp-3.12.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5928847e6f7b7434921fbabf73fa5609d1f2bf4c25d9d4522b1fcc3b51995cb"}, - {file = "aiohttp-3.12.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7678147c3c85a7ae61559b06411346272ed40a08f54bc05357079a63127c9718"}, - {file = "aiohttp-3.12.12-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f50057f36f2a1d8e750b273bb966bec9f69ee1e0a20725ae081610501f25d555"}, - {file = "aiohttp-3.12.12-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5e834f0f11ff5805d11f0f22b627c75eadfaf91377b457875e4e3affd0b924f"}, - {file = "aiohttp-3.12.12-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f94b2e2dea19d09745ef02ed483192260750f18731876a5c76f1c254b841443a"}, - {file = "aiohttp-3.12.12-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b434bfb49564dc1c318989a0ab1d3000d23e5cfd00d8295dc9d5a44324cdd42d"}, - {file = "aiohttp-3.12.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ed76bc80177ddb7c5c93e1a6440b115ed2c92a3063420ac55206fd0832a6459"}, - {file = "aiohttp-3.12.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1282a9acd378f2aed8dc79c01e702b1d5fd260ad083926a88ec7e987c4e0ade"}, - {file = "aiohttp-3.12.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:09a213c13fba321586edab1528b530799645b82bd64d79b779eb8d47ceea155a"}, - {file = "aiohttp-3.12.12-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:72eae16a9233561d315e72ae78ed9fc65ab3db0196e56cb2d329c755d694f137"}, - {file = "aiohttp-3.12.12-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f25990c507dbbeefd5a6a17df32a4ace634f7b20a38211d1b9609410c7f67a24"}, - {file = "aiohttp-3.12.12-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:3a2aa255417c8ccf1b39359cd0a3d63ae3b5ced83958dbebc4d9113327c0536a"}, - {file = "aiohttp-3.12.12-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:a4c53b89b3f838e9c25f943d1257efff10b348cb56895f408ddbcb0ec953a2ad"}, - {file = "aiohttp-3.12.12-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b5a49c2dcb32114455ad503e8354624d85ab311cbe032da03965882492a9cb98"}, - {file = "aiohttp-3.12.12-cp39-cp39-win32.whl", hash = "sha256:74fddc0ba8cea6b9c5bd732eb9d97853543586596b86391f8de5d4f6c2a0e068"}, - {file = "aiohttp-3.12.12-cp39-cp39-win_amd64.whl", hash = "sha256:ddf40ba4a1d0b4d232dc47d2b98ae7e937dcbc40bb5f2746bce0af490a64526f"}, - {file = "aiohttp-3.12.12.tar.gz", hash = "sha256:05875595d2483d96cb61fa9f64e75262d7ac6251a7e3c811d8e26f7d721760bd"}, + {file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:906d5075b5ba0dd1c66fcaaf60eb09926a9fef3ca92d912d2a0bbdbecf8b1248"}, + {file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c875bf6fc2fd1a572aba0e02ef4e7a63694778c5646cdbda346ee24e630d30fb"}, + {file = "aiohttp-3.12.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fbb284d15c6a45fab030740049d03c0ecd60edad9cd23b211d7e11d3be8d56fd"}, + {file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e360381e02e1a05d36b223ecab7bc4a6e7b5ab15760022dc92589ee1d4238c"}, + {file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aaf90137b5e5d84a53632ad95ebee5c9e3e7468f0aab92ba3f608adcb914fa95"}, + {file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e532a25e4a0a2685fa295a31acf65e027fbe2bea7a4b02cdfbbba8a064577663"}, + {file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eab9762c4d1b08ae04a6c77474e6136da722e34fdc0e6d6eab5ee93ac29f35d1"}, + {file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abe53c3812b2899889a7fca763cdfaeee725f5be68ea89905e4275476ffd7e61"}, + {file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5760909b7080aa2ec1d320baee90d03b21745573780a072b66ce633eb77a8656"}, + {file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:02fcd3f69051467bbaa7f84d7ec3267478c7df18d68b2e28279116e29d18d4f3"}, + {file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:4dcd1172cd6794884c33e504d3da3c35648b8be9bfa946942d353b939d5f1288"}, + {file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:224d0da41355b942b43ad08101b1b41ce633a654128ee07e36d75133443adcda"}, + {file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e387668724f4d734e865c1776d841ed75b300ee61059aca0b05bce67061dcacc"}, + {file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:dec9cde5b5a24171e0b0a4ca064b1414950904053fb77c707efd876a2da525d8"}, + {file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bbad68a2af4877cc103cd94af9160e45676fc6f0c14abb88e6e092b945c2c8e3"}, + {file = "aiohttp-3.12.14-cp310-cp310-win32.whl", hash = "sha256:ee580cb7c00bd857b3039ebca03c4448e84700dc1322f860cf7a500a6f62630c"}, + {file = "aiohttp-3.12.14-cp310-cp310-win_amd64.whl", hash = "sha256:cf4f05b8cea571e2ccc3ca744e35ead24992d90a72ca2cf7ab7a2efbac6716db"}, + {file = "aiohttp-3.12.14-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f4552ff7b18bcec18b60a90c6982049cdb9dac1dba48cf00b97934a06ce2e597"}, + {file = "aiohttp-3.12.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8283f42181ff6ccbcf25acaae4e8ab2ff7e92b3ca4a4ced73b2c12d8cd971393"}, + {file = "aiohttp-3.12.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:040afa180ea514495aaff7ad34ec3d27826eaa5d19812730fe9e529b04bb2179"}, + {file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b413c12f14c1149f0ffd890f4141a7471ba4b41234fe4fd4a0ff82b1dc299dbb"}, + {file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1d6f607ce2e1a93315414e3d448b831238f1874b9968e1195b06efaa5c87e245"}, + {file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:565e70d03e924333004ed101599902bba09ebb14843c8ea39d657f037115201b"}, + {file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4699979560728b168d5ab63c668a093c9570af2c7a78ea24ca5212c6cdc2b641"}, + {file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad5fdf6af93ec6c99bf800eba3af9a43d8bfd66dce920ac905c817ef4a712afe"}, + {file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ac76627c0b7ee0e80e871bde0d376a057916cb008a8f3ffc889570a838f5cc7"}, + {file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:798204af1180885651b77bf03adc903743a86a39c7392c472891649610844635"}, + {file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4f1205f97de92c37dd71cf2d5bcfb65fdaed3c255d246172cce729a8d849b4da"}, + {file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:76ae6f1dd041f85065d9df77c6bc9c9703da9b5c018479d20262acc3df97d419"}, + {file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a194ace7bc43ce765338ca2dfb5661489317db216ea7ea700b0332878b392cab"}, + {file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:16260e8e03744a6fe3fcb05259eeab8e08342c4c33decf96a9dad9f1187275d0"}, + {file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8c779e5ebbf0e2e15334ea404fcce54009dc069210164a244d2eac8352a44b28"}, + {file = "aiohttp-3.12.14-cp311-cp311-win32.whl", hash = "sha256:a289f50bf1bd5be227376c067927f78079a7bdeccf8daa6a9e65c38bae14324b"}, + {file = "aiohttp-3.12.14-cp311-cp311-win_amd64.whl", hash = "sha256:0b8a69acaf06b17e9c54151a6c956339cf46db4ff72b3ac28516d0f7068f4ced"}, + {file = "aiohttp-3.12.14-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a0ecbb32fc3e69bc25efcda7d28d38e987d007096cbbeed04f14a6662d0eee22"}, + {file = "aiohttp-3.12.14-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0400f0ca9bb3e0b02f6466421f253797f6384e9845820c8b05e976398ac1d81a"}, + {file = "aiohttp-3.12.14-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a56809fed4c8a830b5cae18454b7464e1529dbf66f71c4772e3cfa9cbec0a1ff"}, + {file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f2e373276e4755691a963e5d11756d093e346119f0627c2d6518208483fb6d"}, + {file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ca39e433630e9a16281125ef57ece6817afd1d54c9f1bf32e901f38f16035869"}, + {file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c748b3f8b14c77720132b2510a7d9907a03c20ba80f469e58d5dfd90c079a1c"}, + {file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a568abe1b15ce69d4cc37e23020720423f0728e3cb1f9bcd3f53420ec3bfe7"}, + {file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9888e60c2c54eaf56704b17feb558c7ed6b7439bca1e07d4818ab878f2083660"}, + {file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3006a1dc579b9156de01e7916d38c63dc1ea0679b14627a37edf6151bc530088"}, + {file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa8ec5c15ab80e5501a26719eb48a55f3c567da45c6ea5bb78c52c036b2655c7"}, + {file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:39b94e50959aa07844c7fe2206b9f75d63cc3ad1c648aaa755aa257f6f2498a9"}, + {file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:04c11907492f416dad9885d503fbfc5dcb6768d90cad8639a771922d584609d3"}, + {file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:88167bd9ab69bb46cee91bd9761db6dfd45b6e76a0438c7e884c3f8160ff21eb"}, + {file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:791504763f25e8f9f251e4688195e8b455f8820274320204f7eafc467e609425"}, + {file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2785b112346e435dd3a1a67f67713a3fe692d288542f1347ad255683f066d8e0"}, + {file = "aiohttp-3.12.14-cp312-cp312-win32.whl", hash = "sha256:15f5f4792c9c999a31d8decf444e79fcfd98497bf98e94284bf390a7bb8c1729"}, + {file = "aiohttp-3.12.14-cp312-cp312-win_amd64.whl", hash = "sha256:3b66e1a182879f579b105a80d5c4bd448b91a57e8933564bf41665064796a338"}, + {file = "aiohttp-3.12.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3143a7893d94dc82bc409f7308bc10d60285a3cd831a68faf1aa0836c5c3c767"}, + {file = "aiohttp-3.12.14-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3d62ac3d506cef54b355bd34c2a7c230eb693880001dfcda0bf88b38f5d7af7e"}, + {file = "aiohttp-3.12.14-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:48e43e075c6a438937c4de48ec30fa8ad8e6dfef122a038847456bfe7b947b63"}, + {file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:077b4488411a9724cecc436cbc8c133e0d61e694995b8de51aaf351c7578949d"}, + {file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d8c35632575653f297dcbc9546305b2c1133391089ab925a6a3706dfa775ccab"}, + {file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b8ce87963f0035c6834b28f061df90cf525ff7c9b6283a8ac23acee6502afd4"}, + {file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a2cf66e32a2563bb0766eb24eae7e9a269ac0dc48db0aae90b575dc9583026"}, + {file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdea089caf6d5cde975084a884c72d901e36ef9c2fd972c9f51efbbc64e96fbd"}, + {file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7865f27db67d49e81d463da64a59365ebd6b826e0e4847aa111056dcb9dc88"}, + {file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0ab5b38a6a39781d77713ad930cb5e7feea6f253de656a5f9f281a8f5931b086"}, + {file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9b3b15acee5c17e8848d90a4ebc27853f37077ba6aec4d8cb4dbbea56d156933"}, + {file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e4c972b0bdaac167c1e53e16a16101b17c6d0ed7eac178e653a07b9f7fad7151"}, + {file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7442488b0039257a3bdbc55f7209587911f143fca11df9869578db6c26feeeb8"}, + {file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f68d3067eecb64c5e9bab4a26aa11bd676f4c70eea9ef6536b0a4e490639add3"}, + {file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f88d3704c8b3d598a08ad17d06006cb1ca52a1182291f04979e305c8be6c9758"}, + {file = "aiohttp-3.12.14-cp313-cp313-win32.whl", hash = "sha256:a3c99ab19c7bf375c4ae3debd91ca5d394b98b6089a03231d4c580ef3c2ae4c5"}, + {file = "aiohttp-3.12.14-cp313-cp313-win_amd64.whl", hash = "sha256:3f8aad695e12edc9d571f878c62bedc91adf30c760c8632f09663e5f564f4baa"}, + {file = "aiohttp-3.12.14-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b8cc6b05e94d837bcd71c6531e2344e1ff0fb87abe4ad78a9261d67ef5d83eae"}, + {file = "aiohttp-3.12.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1dcb015ac6a3b8facd3677597edd5ff39d11d937456702f0bb2b762e390a21b"}, + {file = "aiohttp-3.12.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3779ed96105cd70ee5e85ca4f457adbce3d9ff33ec3d0ebcdf6c5727f26b21b3"}, + {file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:717a0680729b4ebd7569c1dcd718c46b09b360745fd8eb12317abc74b14d14d0"}, + {file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b5dd3a2ef7c7e968dbbac8f5574ebeac4d2b813b247e8cec28174a2ba3627170"}, + {file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4710f77598c0092239bc12c1fcc278a444e16c7032d91babf5abbf7166463f7b"}, + {file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f3e9f75ae842a6c22a195d4a127263dbf87cbab729829e0bd7857fb1672400b2"}, + {file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f9c8d55d6802086edd188e3a7d85a77787e50d56ce3eb4757a3205fa4657922"}, + {file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79b29053ff3ad307880d94562cca80693c62062a098a5776ea8ef5ef4b28d140"}, + {file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23e1332fff36bebd3183db0c7a547a1da9d3b4091509f6d818e098855f2f27d3"}, + {file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:a564188ce831fd110ea76bcc97085dd6c625b427db3f1dbb14ca4baa1447dcbc"}, + {file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a7a1b4302f70bb3ec40ca86de82def532c97a80db49cac6a6700af0de41af5ee"}, + {file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:1b07ccef62950a2519f9bfc1e5b294de5dd84329f444ca0b329605ea787a3de5"}, + {file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:938bd3ca6259e7e48b38d84f753d548bd863e0c222ed6ee6ace3fd6752768a84"}, + {file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8bc784302b6b9f163b54c4e93d7a6f09563bd01ff2b841b29ed3ac126e5040bf"}, + {file = "aiohttp-3.12.14-cp39-cp39-win32.whl", hash = "sha256:a3416f95961dd7d5393ecff99e3f41dc990fb72eda86c11f2a60308ac6dcd7a0"}, + {file = "aiohttp-3.12.14-cp39-cp39-win_amd64.whl", hash = "sha256:196858b8820d7f60578f8b47e5669b3195c21d8ab261e39b1d705346458f445f"}, + {file = "aiohttp-3.12.14.tar.gz", hash = "sha256:6e06e120e34d93100de448fd941522e11dafa78ef1a893c179901b7d66aa29f2"}, ] [package.dependencies] aiohappyeyeballs = ">=2.5.0" -aiosignal = ">=1.1.2" +aiosignal = ">=1.4.0" attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" @@ -137,18 +137,19 @@ aiohttp = ">=3.9" [[package]] name = "aiosignal" -version = "1.3.2" +version = "1.4.0" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"}, - {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"}, + {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, + {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, ] [package.dependencies] frozenlist = ">=1.1.0" +typing-extensions = {version = ">=4.2", markers = "python_version < \"3.13\""} [[package]] name = "alabaster" @@ -466,14 +467,14 @@ files = [ [[package]] name = "certifi" -version = "2025.4.26" +version = "2025.7.9" description = "Python package for providing Mozilla's CA Bundle." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" groups = ["main", "docs", "test"] files = [ - {file = "certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3"}, - {file = "certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6"}, + {file = "certifi-2025.7.9-py3-none-any.whl", hash = "sha256:d842783a14f8fdd646895ac26f719a061408834473cfc10203f6a575beb15d39"}, + {file = "certifi-2025.7.9.tar.gz", hash = "sha256:c1d2ec05395148ee10cf672ffc28cd37ea0ab0d99f9cc74c43e588cbd111b079"}, ] [[package]] @@ -688,14 +689,14 @@ markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", [[package]] name = "colorful" -version = "0.5.6" +version = "0.5.7" description = "Terminal string styling done right, in Python." optional = false python-versions = "*" groups = ["main"] files = [ - {file = "colorful-0.5.6-py2.py3-none-any.whl", hash = "sha256:eab8c1c809f5025ad2b5238a50bd691e26850da8cac8f90d660ede6ea1af9f1e"}, - {file = "colorful-0.5.6.tar.gz", hash = "sha256:b56d5c01db1dac4898308ea889edcb113fbee3e6ec5df4bacffd61d5241b5b8d"}, + {file = "colorful-0.5.7-py2.py3-none-any.whl", hash = "sha256:495dd3a23151a9568cee8a90fc1174c902ad7ef06655f50b6bddf9e80008da69"}, + {file = "colorful-0.5.7.tar.gz", hash = "sha256:c5452179b56601c178b03d468a5326cc1fe37d9be81d24d0d6bdab36c4b93ad8"}, ] [package.dependencies] @@ -1155,14 +1156,14 @@ tqdm = ["tqdm"] [[package]] name = "google-api-core" -version = "2.25.0" +version = "2.25.1" description = "Google API client core library" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "google_api_core-2.25.0-py3-none-any.whl", hash = "sha256:1db79d1281dcf9f3d10023283299ba38f3dc9f639ec41085968fd23e5bcf512e"}, - {file = "google_api_core-2.25.0.tar.gz", hash = "sha256:9b548e688702f82a34ed8409fb8a6961166f0b7795032f0be8f48308dff4333a"}, + {file = "google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7"}, + {file = "google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8"}, ] [package.dependencies] @@ -1228,67 +1229,67 @@ grpc = ["grpcio (>=1.44.0,<2.0.0)"] [[package]] name = "grpcio" -version = "1.73.0" +version = "1.73.1" description = "HTTP/2-based RPC framework" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "grpcio-1.73.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:d050197eeed50f858ef6c51ab09514856f957dba7b1f7812698260fc9cc417f6"}, - {file = "grpcio-1.73.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:ebb8d5f4b0200916fb292a964a4d41210de92aba9007e33d8551d85800ea16cb"}, - {file = "grpcio-1.73.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:c0811331b469e3f15dda5f90ab71bcd9681189a83944fd6dc908e2c9249041ef"}, - {file = "grpcio-1.73.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12787c791c3993d0ea1cc8bf90393647e9a586066b3b322949365d2772ba965b"}, - {file = "grpcio-1.73.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c17771e884fddf152f2a0df12478e8d02853e5b602a10a9a9f1f52fa02b1d32"}, - {file = "grpcio-1.73.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:275e23d4c428c26b51857bbd95fcb8e528783597207ec592571e4372b300a29f"}, - {file = "grpcio-1.73.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9ffc972b530bf73ef0f948f799482a1bf12d9b6f33406a8e6387c0ca2098a833"}, - {file = "grpcio-1.73.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d269df64aff092b2cec5e015d8ae09c7e90888b5c35c24fdca719a2c9f35"}, - {file = "grpcio-1.73.0-cp310-cp310-win32.whl", hash = "sha256:072d8154b8f74300ed362c01d54af8b93200c1a9077aeaea79828d48598514f1"}, - {file = "grpcio-1.73.0-cp310-cp310-win_amd64.whl", hash = "sha256:ce953d9d2100e1078a76a9dc2b7338d5415924dc59c69a15bf6e734db8a0f1ca"}, - {file = "grpcio-1.73.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:51036f641f171eebe5fa7aaca5abbd6150f0c338dab3a58f9111354240fe36ec"}, - {file = "grpcio-1.73.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d12bbb88381ea00bdd92c55aff3da3391fd85bc902c41275c8447b86f036ce0f"}, - {file = "grpcio-1.73.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:483c507c2328ed0e01bc1adb13d1eada05cc737ec301d8e5a8f4a90f387f1790"}, - {file = "grpcio-1.73.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c201a34aa960c962d0ce23fe5f423f97e9d4b518ad605eae6d0a82171809caaa"}, - {file = "grpcio-1.73.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:859f70c8e435e8e1fa060e04297c6818ffc81ca9ebd4940e180490958229a45a"}, - {file = "grpcio-1.73.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e2459a27c6886e7e687e4e407778425f3c6a971fa17a16420227bda39574d64b"}, - {file = "grpcio-1.73.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e0084d4559ee3dbdcce9395e1bc90fdd0262529b32c417a39ecbc18da8074ac7"}, - {file = "grpcio-1.73.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef5fff73d5f724755693a464d444ee0a448c6cdfd3c1616a9223f736c622617d"}, - {file = "grpcio-1.73.0-cp311-cp311-win32.whl", hash = "sha256:965a16b71a8eeef91fc4df1dc40dc39c344887249174053814f8a8e18449c4c3"}, - {file = "grpcio-1.73.0-cp311-cp311-win_amd64.whl", hash = "sha256:b71a7b4483d1f753bbc11089ff0f6fa63b49c97a9cc20552cded3fcad466d23b"}, - {file = "grpcio-1.73.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:fb9d7c27089d9ba3746f18d2109eb530ef2a37452d2ff50f5a6696cd39167d3b"}, - {file = "grpcio-1.73.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:128ba2ebdac41e41554d492b82c34586a90ebd0766f8ebd72160c0e3a57b9155"}, - {file = "grpcio-1.73.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:068ecc415f79408d57a7f146f54cdf9f0acb4b301a52a9e563973dc981e82f3d"}, - {file = "grpcio-1.73.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ddc1cfb2240f84d35d559ade18f69dcd4257dbaa5ba0de1a565d903aaab2968"}, - {file = "grpcio-1.73.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53007f70d9783f53b41b4cf38ed39a8e348011437e4c287eee7dd1d39d54b2f"}, - {file = "grpcio-1.73.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4dd8d8d092efede7d6f48d695ba2592046acd04ccf421436dd7ed52677a9ad29"}, - {file = "grpcio-1.73.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:70176093d0a95b44d24baa9c034bb67bfe2b6b5f7ebc2836f4093c97010e17fd"}, - {file = "grpcio-1.73.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:085ebe876373ca095e24ced95c8f440495ed0b574c491f7f4f714ff794bbcd10"}, - {file = "grpcio-1.73.0-cp312-cp312-win32.whl", hash = "sha256:cfc556c1d6aef02c727ec7d0016827a73bfe67193e47c546f7cadd3ee6bf1a60"}, - {file = "grpcio-1.73.0-cp312-cp312-win_amd64.whl", hash = "sha256:bbf45d59d090bf69f1e4e1594832aaf40aa84b31659af3c5e2c3f6a35202791a"}, - {file = "grpcio-1.73.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:da1d677018ef423202aca6d73a8d3b2cb245699eb7f50eb5f74cae15a8e1f724"}, - {file = "grpcio-1.73.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:36bf93f6a657f37c131d9dd2c391b867abf1426a86727c3575393e9e11dadb0d"}, - {file = "grpcio-1.73.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:d84000367508ade791d90c2bafbd905574b5ced8056397027a77a215d601ba15"}, - {file = "grpcio-1.73.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c98ba1d928a178ce33f3425ff823318040a2b7ef875d30a0073565e5ceb058d9"}, - {file = "grpcio-1.73.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a73c72922dfd30b396a5f25bb3a4590195ee45ecde7ee068acb0892d2900cf07"}, - {file = "grpcio-1.73.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:10e8edc035724aba0346a432060fd192b42bd03675d083c01553cab071a28da5"}, - {file = "grpcio-1.73.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f5cdc332b503c33b1643b12ea933582c7b081957c8bc2ea4cc4bc58054a09288"}, - {file = "grpcio-1.73.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:07ad7c57233c2109e4ac999cb9c2710c3b8e3f491a73b058b0ce431f31ed8145"}, - {file = "grpcio-1.73.0-cp313-cp313-win32.whl", hash = "sha256:0eb5df4f41ea10bda99a802b2a292d85be28958ede2a50f2beb8c7fc9a738419"}, - {file = "grpcio-1.73.0-cp313-cp313-win_amd64.whl", hash = "sha256:38cf518cc54cd0c47c9539cefa8888549fcc067db0b0c66a46535ca8032020c4"}, - {file = "grpcio-1.73.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:1284850607901cfe1475852d808e5a102133461ec9380bc3fc9ebc0686ee8e32"}, - {file = "grpcio-1.73.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:0e092a4b28eefb63eec00d09ef33291cd4c3a0875cde29aec4d11d74434d222c"}, - {file = "grpcio-1.73.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:33577fe7febffe8ebad458744cfee8914e0c10b09f0ff073a6b149a84df8ab8f"}, - {file = "grpcio-1.73.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:60813d8a16420d01fa0da1fc7ebfaaa49a7e5051b0337cd48f4f950eb249a08e"}, - {file = "grpcio-1.73.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a9c957dc65e5d474378d7bcc557e9184576605d4b4539e8ead6e351d7ccce20"}, - {file = "grpcio-1.73.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3902b71407d021163ea93c70c8531551f71ae742db15b66826cf8825707d2908"}, - {file = "grpcio-1.73.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1dd7fa7276dcf061e2d5f9316604499eea06b1b23e34a9380572d74fe59915a8"}, - {file = "grpcio-1.73.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2d1510c4ea473110cb46a010555f2c1a279d1c256edb276e17fa571ba1e8927c"}, - {file = "grpcio-1.73.0-cp39-cp39-win32.whl", hash = "sha256:d0a1517b2005ba1235a1190b98509264bf72e231215dfeef8db9a5a92868789e"}, - {file = "grpcio-1.73.0-cp39-cp39-win_amd64.whl", hash = "sha256:6228f7eb6d9f785f38b589d49957fca5df3d5b5349e77d2d89b14e390165344c"}, - {file = "grpcio-1.73.0.tar.gz", hash = "sha256:3af4c30918a7f0d39de500d11255f8d9da4f30e94a2033e70fe2a720e184bd8e"}, + {file = "grpcio-1.73.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:2d70f4ddd0a823436c2624640570ed6097e40935c9194482475fe8e3d9754d55"}, + {file = "grpcio-1.73.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:3841a8a5a66830261ab6a3c2a3dc539ed84e4ab019165f77b3eeb9f0ba621f26"}, + {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:628c30f8e77e0258ab788750ec92059fc3d6628590fb4b7cea8c102503623ed7"}, + {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67a0468256c9db6d5ecb1fde4bf409d016f42cef649323f0a08a72f352d1358b"}, + {file = "grpcio-1.73.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b84d65bbdebd5926eb5c53b0b9ec3b3f83408a30e4c20c373c5337b4219ec5"}, + {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c54796ca22b8349cc594d18b01099e39f2b7ffb586ad83217655781a350ce4da"}, + {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:75fc8e543962ece2f7ecd32ada2d44c0c8570ae73ec92869f9af8b944863116d"}, + {file = "grpcio-1.73.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6a6037891cd2b1dd1406b388660522e1565ed340b1fea2955b0234bdd941a862"}, + {file = "grpcio-1.73.1-cp310-cp310-win32.whl", hash = "sha256:cce7265b9617168c2d08ae570fcc2af4eaf72e84f8c710ca657cc546115263af"}, + {file = "grpcio-1.73.1-cp310-cp310-win_amd64.whl", hash = "sha256:6a2b372e65fad38842050943f42ce8fee00c6f2e8ea4f7754ba7478d26a356ee"}, + {file = "grpcio-1.73.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:ba2cea9f7ae4bc21f42015f0ec98f69ae4179848ad744b210e7685112fa507a1"}, + {file = "grpcio-1.73.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:d74c3f4f37b79e746271aa6cdb3a1d7e4432aea38735542b23adcabaaee0c097"}, + {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5b9b1805a7d61c9e90541cbe8dfe0a593dfc8c5c3a43fe623701b6a01b01d710"}, + {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3215f69a0670a8cfa2ab53236d9e8026bfb7ead5d4baabe7d7dc11d30fda967"}, + {file = "grpcio-1.73.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5eccfd9577a5dc7d5612b2ba90cca4ad14c6d949216c68585fdec9848befb1"}, + {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dc7d7fd520614fce2e6455ba89791458020a39716951c7c07694f9dbae28e9c0"}, + {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:105492124828911f85127e4825d1c1234b032cb9d238567876b5515d01151379"}, + {file = "grpcio-1.73.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:610e19b04f452ba6f402ac9aa94eb3d21fbc94553368008af634812c4a85a99e"}, + {file = "grpcio-1.73.1-cp311-cp311-win32.whl", hash = "sha256:d60588ab6ba0ac753761ee0e5b30a29398306401bfbceffe7d68ebb21193f9d4"}, + {file = "grpcio-1.73.1-cp311-cp311-win_amd64.whl", hash = "sha256:6957025a4608bb0a5ff42abd75bfbb2ed99eda29d5992ef31d691ab54b753643"}, + {file = "grpcio-1.73.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:921b25618b084e75d424a9f8e6403bfeb7abef074bb6c3174701e0f2542debcf"}, + {file = "grpcio-1.73.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:277b426a0ed341e8447fbf6c1d6b68c952adddf585ea4685aa563de0f03df887"}, + {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:96c112333309493c10e118d92f04594f9055774757f5d101b39f8150f8c25582"}, + {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f48e862aed925ae987eb7084409a80985de75243389dc9d9c271dd711e589918"}, + {file = "grpcio-1.73.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83a6c2cce218e28f5040429835fa34a29319071079e3169f9543c3fbeff166d2"}, + {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:65b0458a10b100d815a8426b1442bd17001fdb77ea13665b2f7dc9e8587fdc6b"}, + {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0a9f3ea8dce9eae9d7cb36827200133a72b37a63896e0e61a9d5ec7d61a59ab1"}, + {file = "grpcio-1.73.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:de18769aea47f18e782bf6819a37c1c528914bfd5683b8782b9da356506190c8"}, + {file = "grpcio-1.73.1-cp312-cp312-win32.whl", hash = "sha256:24e06a5319e33041e322d32c62b1e728f18ab8c9dbc91729a3d9f9e3ed336642"}, + {file = "grpcio-1.73.1-cp312-cp312-win_amd64.whl", hash = "sha256:303c8135d8ab176f8038c14cc10d698ae1db9c480f2b2823f7a987aa2a4c5646"}, + {file = "grpcio-1.73.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b310824ab5092cf74750ebd8a8a8981c1810cb2b363210e70d06ef37ad80d4f9"}, + {file = "grpcio-1.73.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:8f5a6df3fba31a3485096ac85b2e34b9666ffb0590df0cd044f58694e6a1f6b5"}, + {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:052e28fe9c41357da42250a91926a3e2f74c046575c070b69659467ca5aa976b"}, + {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c0bf15f629b1497436596b1cbddddfa3234273490229ca29561209778ebe182"}, + {file = "grpcio-1.73.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab860d5bfa788c5a021fba264802e2593688cd965d1374d31d2b1a34cacd854"}, + {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:ad1d958c31cc91ab050bd8a91355480b8e0683e21176522bacea225ce51163f2"}, + {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f43ffb3bd415c57224c7427bfb9e6c46a0b6e998754bfa0d00f408e1873dcbb5"}, + {file = "grpcio-1.73.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:686231cdd03a8a8055f798b2b54b19428cdf18fa1549bee92249b43607c42668"}, + {file = "grpcio-1.73.1-cp313-cp313-win32.whl", hash = "sha256:89018866a096e2ce21e05eabed1567479713ebe57b1db7cbb0f1e3b896793ba4"}, + {file = "grpcio-1.73.1-cp313-cp313-win_amd64.whl", hash = "sha256:4a68f8c9966b94dff693670a5cf2b54888a48a5011c5d9ce2295a1a1465ee84f"}, + {file = "grpcio-1.73.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:b4adc97d2d7f5c660a5498bda978ebb866066ad10097265a5da0511323ae9f50"}, + {file = "grpcio-1.73.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:c45a28a0cfb6ddcc7dc50a29de44ecac53d115c3388b2782404218db51cb2df3"}, + {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:10af9f2ab98a39f5b6c1896c6fc2036744b5b41d12739d48bed4c3e15b6cf900"}, + {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45cf17dcce5ebdb7b4fe9e86cb338fa99d7d1bb71defc78228e1ddf8d0de8cbb"}, + {file = "grpcio-1.73.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c502c2e950fc7e8bf05c047e8a14522ef7babac59abbfde6dbf46b7a0d9c71e"}, + {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6abfc0f9153dc4924536f40336f88bd4fe7bd7494f028675e2e04291b8c2c62a"}, + {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ed451a0e39c8e51eb1612b78686839efd1a920666d1666c1adfdb4fd51680c0f"}, + {file = "grpcio-1.73.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:07f08705a5505c9b5b0cbcbabafb96462b5a15b7236bbf6bbcc6b0b91e1cbd7e"}, + {file = "grpcio-1.73.1-cp39-cp39-win32.whl", hash = "sha256:ad5c958cc3d98bb9d71714dc69f1c13aaf2f4b53e29d4cc3f1501ef2e4d129b2"}, + {file = "grpcio-1.73.1-cp39-cp39-win_amd64.whl", hash = "sha256:42f0660bce31b745eb9d23f094a332d31f210dcadd0fc8e5be7e4c62a87ce86b"}, + {file = "grpcio-1.73.1.tar.gz", hash = "sha256:7fce2cd1c0c1116cf3850564ebfc3264fba75d3c74a7414373f1238ea365ef87"}, ] [package.extras] -protobuf = ["grpcio-tools (>=1.73.0)"] +protobuf = ["grpcio-tools (>=1.73.1)"] [[package]] name = "h11" @@ -1448,14 +1449,14 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "9.3.0" +version = "9.4.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.11" groups = ["main", "test"] files = [ - {file = "ipython-9.3.0-py3-none-any.whl", hash = "sha256:1a0b6dd9221a1f5dddf725b57ac0cb6fddc7b5f470576231ae9162b9b3455a04"}, - {file = "ipython-9.3.0.tar.gz", hash = "sha256:79eb896f9f23f50ad16c3bc205f686f6e030ad246cc309c6279a242b14afe9d8"}, + {file = "ipython-9.4.0-py3-none-any.whl", hash = "sha256:25850f025a446d9b359e8d296ba175a36aedd32e83ca9b5060430fe16801f066"}, + {file = "ipython-9.4.0.tar.gz", hash = "sha256:c033c6d4e7914c3d9768aabe76bbe87ba1dc66a92a05db6bfa1125d81f2ee270"}, ] [package.dependencies] @@ -2033,190 +2034,191 @@ files = [ [[package]] name = "msgpack" -version = "1.1.0" +version = "1.1.1" description = "MessagePack serializer" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "msgpack-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ad442d527a7e358a469faf43fda45aaf4ac3249c8310a82f0ccff9164e5dccd"}, - {file = "msgpack-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:74bed8f63f8f14d75eec75cf3d04ad581da6b914001b474a5d3cd3372c8cc27d"}, - {file = "msgpack-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:914571a2a5b4e7606997e169f64ce53a8b1e06f2cf2c3a7273aa106236d43dd5"}, - {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c921af52214dcbb75e6bdf6a661b23c3e6417f00c603dd2070bccb5c3ef499f5"}, - {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8ce0b22b890be5d252de90d0e0d119f363012027cf256185fc3d474c44b1b9e"}, - {file = "msgpack-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:73322a6cc57fcee3c0c57c4463d828e9428275fb85a27aa2aa1a92fdc42afd7b"}, - {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e1f3c3d21f7cf67bcf2da8e494d30a75e4cf60041d98b3f79875afb5b96f3a3f"}, - {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64fc9068d701233effd61b19efb1485587560b66fe57b3e50d29c5d78e7fef68"}, - {file = "msgpack-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:42f754515e0f683f9c79210a5d1cad631ec3d06cea5172214d2176a42e67e19b"}, - {file = "msgpack-1.1.0-cp310-cp310-win32.whl", hash = "sha256:3df7e6b05571b3814361e8464f9304c42d2196808e0119f55d0d3e62cd5ea044"}, - {file = "msgpack-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:685ec345eefc757a7c8af44a3032734a739f8c45d1b0ac45efc5d8977aa4720f"}, - {file = "msgpack-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d364a55082fb2a7416f6c63ae383fbd903adb5a6cf78c5b96cc6316dc1cedc7"}, - {file = "msgpack-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79ec007767b9b56860e0372085f8504db5d06bd6a327a335449508bbee9648fa"}, - {file = "msgpack-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6ad622bf7756d5a497d5b6836e7fc3752e2dd6f4c648e24b1803f6048596f701"}, - {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e59bca908d9ca0de3dc8684f21ebf9a690fe47b6be93236eb40b99af28b6ea6"}, - {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1da8f11a3dd397f0a32c76165cf0c4eb95b31013a94f6ecc0b280c05c91b59"}, - {file = "msgpack-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:452aff037287acb1d70a804ffd022b21fa2bb7c46bee884dbc864cc9024128a0"}, - {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8da4bf6d54ceed70e8861f833f83ce0814a2b72102e890cbdfe4b34764cdd66e"}, - {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:41c991beebf175faf352fb940bf2af9ad1fb77fd25f38d9142053914947cdbf6"}, - {file = "msgpack-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a52a1f3a5af7ba1c9ace055b659189f6c669cf3657095b50f9602af3a3ba0fe5"}, - {file = "msgpack-1.1.0-cp311-cp311-win32.whl", hash = "sha256:58638690ebd0a06427c5fe1a227bb6b8b9fdc2bd07701bec13c2335c82131a88"}, - {file = "msgpack-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd2906780f25c8ed5d7b323379f6138524ba793428db5d0e9d226d3fa6aa1788"}, - {file = "msgpack-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d46cf9e3705ea9485687aa4001a76e44748b609d260af21c4ceea7f2212a501d"}, - {file = "msgpack-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5dbad74103df937e1325cc4bfeaf57713be0b4f15e1c2da43ccdd836393e2ea2"}, - {file = "msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58dfc47f8b102da61e8949708b3eafc3504509a5728f8b4ddef84bd9e16ad420"}, - {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676e5be1b472909b2ee6356ff425ebedf5142427842aa06b4dfd5117d1ca8a2"}, - {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17fb65dd0bec285907f68b15734a993ad3fc94332b5bb21b0435846228de1f39"}, - {file = "msgpack-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a51abd48c6d8ac89e0cfd4fe177c61481aca2d5e7ba42044fd218cfd8ea9899f"}, - {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2137773500afa5494a61b1208619e3871f75f27b03bcfca7b3a7023284140247"}, - {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:398b713459fea610861c8a7b62a6fec1882759f308ae0795b5413ff6a160cf3c"}, - {file = "msgpack-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b"}, - {file = "msgpack-1.1.0-cp312-cp312-win32.whl", hash = "sha256:ad33e8400e4ec17ba782f7b9cf868977d867ed784a1f5f2ab46e7ba53b6e1e1b"}, - {file = "msgpack-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:115a7af8ee9e8cddc10f87636767857e7e3717b7a2e97379dc2054712693e90f"}, - {file = "msgpack-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf"}, - {file = "msgpack-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0f92a83b84e7c0749e3f12821949d79485971f087604178026085f60ce109330"}, - {file = "msgpack-1.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1964df7b81285d00a84da4e70cb1383f2e665e0f1f2a7027e683956d04b734"}, - {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59caf6a4ed0d164055ccff8fe31eddc0ebc07cf7326a2aaa0dbf7a4001cd823e"}, - {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0907e1a7119b337971a689153665764adc34e89175f9a34793307d9def08e6ca"}, - {file = "msgpack-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65553c9b6da8166e819a6aa90ad15288599b340f91d18f60b2061f402b9a4915"}, - {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7a946a8992941fea80ed4beae6bff74ffd7ee129a90b4dd5cf9c476a30e9708d"}, - {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4b51405e36e075193bc051315dbf29168d6141ae2500ba8cd80a522964e31434"}, - {file = "msgpack-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4c01941fd2ff87c2a934ee6055bda4ed353a7846b8d4f341c428109e9fcde8c"}, - {file = "msgpack-1.1.0-cp313-cp313-win32.whl", hash = "sha256:7c9a35ce2c2573bada929e0b7b3576de647b0defbd25f5139dcdaba0ae35a4cc"}, - {file = "msgpack-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f"}, - {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c40ffa9a15d74e05ba1fe2681ea33b9caffd886675412612d93ab17b58ea2fec"}, - {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1ba6136e650898082d9d5a5217d5906d1e138024f836ff48691784bbe1adf96"}, - {file = "msgpack-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0856a2b7e8dcb874be44fea031d22e5b3a19121be92a1e098f46068a11b0870"}, - {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:471e27a5787a2e3f974ba023f9e265a8c7cfd373632247deb225617e3100a3c7"}, - {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:646afc8102935a388ffc3914b336d22d1c2d6209c773f3eb5dd4d6d3b6f8c1cb"}, - {file = "msgpack-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:13599f8829cfbe0158f6456374e9eea9f44eee08076291771d8ae93eda56607f"}, - {file = "msgpack-1.1.0-cp38-cp38-win32.whl", hash = "sha256:8a84efb768fb968381e525eeeb3d92857e4985aacc39f3c47ffd00eb4509315b"}, - {file = "msgpack-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:879a7b7b0ad82481c52d3c7eb99bf6f0645dbdec5134a4bddbd16f3506947feb"}, - {file = "msgpack-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:53258eeb7a80fc46f62fd59c876957a2d0e15e6449a9e71842b6d24419d88ca1"}, - {file = "msgpack-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e7b853bbc44fb03fbdba34feb4bd414322180135e2cb5164f20ce1c9795ee48"}, - {file = "msgpack-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3e9b4936df53b970513eac1758f3882c88658a220b58dcc1e39606dccaaf01c"}, - {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46c34e99110762a76e3911fc923222472c9d681f1094096ac4102c18319e6468"}, - {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a706d1e74dd3dea05cb54580d9bd8b2880e9264856ce5068027eed09680aa74"}, - {file = "msgpack-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:534480ee5690ab3cbed89d4c8971a5c631b69a8c0883ecfea96c19118510c846"}, - {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8cf9e8c3a2153934a23ac160cc4cba0ec035f6867c8013cc6077a79823370346"}, - {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3180065ec2abbe13a4ad37688b61b99d7f9e012a535b930e0e683ad6bc30155b"}, - {file = "msgpack-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c5a91481a3cc573ac8c0d9aace09345d989dc4a0202b7fcb312c88c26d4e71a8"}, - {file = "msgpack-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f80bc7d47f76089633763f952e67f8214cb7b3ee6bfa489b3cb6a84cfac114cd"}, - {file = "msgpack-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:4d1b7ff2d6146e16e8bd665ac726a89c74163ef8cd39fa8c1087d4e52d3a2325"}, - {file = "msgpack-1.1.0.tar.gz", hash = "sha256:dd432ccc2c72b914e4cb77afce64aab761c1137cc698be3984eee260bcb2896e"}, + {file = "msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed"}, + {file = "msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8"}, + {file = "msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2"}, + {file = "msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4"}, + {file = "msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0"}, + {file = "msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26"}, + {file = "msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75"}, + {file = "msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338"}, + {file = "msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd"}, + {file = "msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8"}, + {file = "msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558"}, + {file = "msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d"}, + {file = "msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0"}, + {file = "msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f"}, + {file = "msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704"}, + {file = "msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2"}, + {file = "msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2"}, + {file = "msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752"}, + {file = "msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295"}, + {file = "msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458"}, + {file = "msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238"}, + {file = "msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157"}, + {file = "msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce"}, + {file = "msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a"}, + {file = "msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c"}, + {file = "msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b"}, + {file = "msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef"}, + {file = "msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a"}, + {file = "msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c"}, + {file = "msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4"}, + {file = "msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0"}, + {file = "msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9"}, + {file = "msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8"}, + {file = "msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a"}, + {file = "msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac"}, + {file = "msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b"}, + {file = "msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7"}, + {file = "msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5"}, + {file = "msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323"}, + {file = "msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69"}, + {file = "msgpack-1.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bba1be28247e68994355e028dcd668316db30c1f758d3241a7b903ac78dcd285"}, + {file = "msgpack-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8f93dcddb243159c9e4109c9750ba5b335ab8d48d9522c5308cd05d7e3ce600"}, + {file = "msgpack-1.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fbbc0b906a24038c9958a1ba7ae0918ad35b06cb449d398b76a7d08470b0ed9"}, + {file = "msgpack-1.1.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:61e35a55a546a1690d9d09effaa436c25ae6130573b6ee9829c37ef0f18d5e78"}, + {file = "msgpack-1.1.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:1abfc6e949b352dadf4bce0eb78023212ec5ac42f6abfd469ce91d783c149c2a"}, + {file = "msgpack-1.1.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:996f2609ddf0142daba4cefd767d6db26958aac8439ee41db9cc0db9f4c4c3a6"}, + {file = "msgpack-1.1.1-cp38-cp38-win32.whl", hash = "sha256:4d3237b224b930d58e9d83c81c0dba7aacc20fcc2f89c1e5423aa0529a4cd142"}, + {file = "msgpack-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:da8f41e602574ece93dbbda1fab24650d6bf2a24089f9e9dbb4f5730ec1e58ad"}, + {file = "msgpack-1.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5be6b6bc52fad84d010cb45433720327ce886009d862f46b26d4d154001994b"}, + {file = "msgpack-1.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a89cd8c087ea67e64844287ea52888239cbd2940884eafd2dcd25754fb72232"}, + {file = "msgpack-1.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d75f3807a9900a7d575d8d6674a3a47e9f227e8716256f35bc6f03fc597ffbf"}, + {file = "msgpack-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d182dac0221eb8faef2e6f44701812b467c02674a322c739355c39e94730cdbf"}, + {file = "msgpack-1.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b13fe0fb4aac1aa5320cd693b297fe6fdef0e7bea5518cbc2dd5299f873ae90"}, + {file = "msgpack-1.1.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:435807eeb1bc791ceb3247d13c79868deb22184e1fc4224808750f0d7d1affc1"}, + {file = "msgpack-1.1.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4835d17af722609a45e16037bb1d4d78b7bdf19d6c0128116d178956618c4e88"}, + {file = "msgpack-1.1.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a8ef6e342c137888ebbfb233e02b8fbd689bb5b5fcc59b34711ac47ebd504478"}, + {file = "msgpack-1.1.1-cp39-cp39-win32.whl", hash = "sha256:61abccf9de335d9efd149e2fff97ed5974f2481b3353772e8e2dd3402ba2bd57"}, + {file = "msgpack-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:40eae974c873b2992fd36424a5d9407f93e97656d999f43fca9d29f820899084"}, + {file = "msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd"}, ] [[package]] name = "multidict" -version = "6.4.4" +version = "6.6.3" description = "multidict implementation" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "multidict-6.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8adee3ac041145ffe4488ea73fa0a622b464cc25340d98be76924d0cda8545ff"}, - {file = "multidict-6.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b61e98c3e2a861035aaccd207da585bdcacef65fe01d7a0d07478efac005e028"}, - {file = "multidict-6.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:75493f28dbadecdbb59130e74fe935288813301a8554dc32f0c631b6bdcdf8b0"}, - {file = "multidict-6.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffc3c6a37e048b5395ee235e4a2a0d639c2349dffa32d9367a42fc20d399772"}, - {file = "multidict-6.4.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:87cb72263946b301570b0f63855569a24ee8758aaae2cd182aae7d95fbc92ca7"}, - {file = "multidict-6.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bbf7bd39822fd07e3609b6b4467af4c404dd2b88ee314837ad1830a7f4a8299"}, - {file = "multidict-6.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1f7cbd4f1f44ddf5fd86a8675b7679176eae770f2fc88115d6dddb6cefb59bc"}, - {file = "multidict-6.4.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb5ac9e5bfce0e6282e7f59ff7b7b9a74aa8e5c60d38186a4637f5aa764046ad"}, - {file = "multidict-6.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4efc31dfef8c4eeb95b6b17d799eedad88c4902daba39ce637e23a17ea078915"}, - {file = "multidict-6.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9fcad2945b1b91c29ef2b4050f590bfcb68d8ac8e0995a74e659aa57e8d78e01"}, - {file = "multidict-6.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:d877447e7368c7320832acb7159557e49b21ea10ffeb135c1077dbbc0816b598"}, - {file = "multidict-6.4.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:33a12ebac9f380714c298cbfd3e5b9c0c4e89c75fe612ae496512ee51028915f"}, - {file = "multidict-6.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:0f14ea68d29b43a9bf37953881b1e3eb75b2739e896ba4a6aa4ad4c5b9ffa145"}, - {file = "multidict-6.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0327ad2c747a6600e4797d115d3c38a220fdb28e54983abe8964fd17e95ae83c"}, - {file = "multidict-6.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d1a20707492db9719a05fc62ee215fd2c29b22b47c1b1ba347f9abc831e26683"}, - {file = "multidict-6.4.4-cp310-cp310-win32.whl", hash = "sha256:d83f18315b9fca5db2452d1881ef20f79593c4aa824095b62cb280019ef7aa3d"}, - {file = "multidict-6.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:9c17341ee04545fd962ae07330cb5a39977294c883485c8d74634669b1f7fe04"}, - {file = "multidict-6.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4f5f29794ac0e73d2a06ac03fd18870adc0135a9d384f4a306a951188ed02f95"}, - {file = "multidict-6.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c04157266344158ebd57b7120d9b0b35812285d26d0e78193e17ef57bfe2979a"}, - {file = "multidict-6.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb61ffd3ab8310d93427e460f565322c44ef12769f51f77277b4abad7b6f7223"}, - {file = "multidict-6.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e0ba18a9afd495f17c351d08ebbc4284e9c9f7971d715f196b79636a4d0de44"}, - {file = "multidict-6.4.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9faf1b1dcaadf9f900d23a0e6d6c8eadd6a95795a0e57fcca73acce0eb912065"}, - {file = "multidict-6.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a4d1cb1327c6082c4fce4e2a438483390964c02213bc6b8d782cf782c9b1471f"}, - {file = "multidict-6.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:941f1bec2f5dbd51feeb40aea654c2747f811ab01bdd3422a48a4e4576b7d76a"}, - {file = "multidict-6.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5f8a146184da7ea12910a4cec51ef85e44f6268467fb489c3caf0cd512f29c2"}, - {file = "multidict-6.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:232b7237e57ec3c09be97206bfb83a0aa1c5d7d377faa019c68a210fa35831f1"}, - {file = "multidict-6.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:55ae0721c1513e5e3210bca4fc98456b980b0c2c016679d3d723119b6b202c42"}, - {file = "multidict-6.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:51d662c072579f63137919d7bb8fc250655ce79f00c82ecf11cab678f335062e"}, - {file = "multidict-6.4.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0e05c39962baa0bb19a6b210e9b1422c35c093b651d64246b6c2e1a7e242d9fd"}, - {file = "multidict-6.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5b1cc3ab8c31d9ebf0faa6e3540fb91257590da330ffe6d2393d4208e638925"}, - {file = "multidict-6.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:93ec84488a384cd7b8a29c2c7f467137d8a73f6fe38bb810ecf29d1ade011a7c"}, - {file = "multidict-6.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b308402608493638763abc95f9dc0030bbd6ac6aff784512e8ac3da73a88af08"}, - {file = "multidict-6.4.4-cp311-cp311-win32.whl", hash = "sha256:343892a27d1a04d6ae455ecece12904d242d299ada01633d94c4f431d68a8c49"}, - {file = "multidict-6.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:73484a94f55359780c0f458bbd3c39cb9cf9c182552177d2136e828269dee529"}, - {file = "multidict-6.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:dc388f75a1c00000824bf28b7633e40854f4127ede80512b44c3cfeeea1839a2"}, - {file = "multidict-6.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:98af87593a666f739d9dba5d0ae86e01b0e1a9cfcd2e30d2d361fbbbd1a9162d"}, - {file = "multidict-6.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aff4cafea2d120327d55eadd6b7f1136a8e5a0ecf6fb3b6863e8aca32cd8e50a"}, - {file = "multidict-6.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:169c4ba7858176b797fe551d6e99040c531c775d2d57b31bcf4de6d7a669847f"}, - {file = "multidict-6.4.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b9eb4c59c54421a32b3273d4239865cb14ead53a606db066d7130ac80cc8ec93"}, - {file = "multidict-6.4.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cf3bd54c56aa16fdb40028d545eaa8d051402b61533c21e84046e05513d5780"}, - {file = "multidict-6.4.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f682c42003c7264134bfe886376299db4cc0c6cd06a3295b41b347044bcb5482"}, - {file = "multidict-6.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920f9cf2abdf6e493c519492d892c362007f113c94da4c239ae88429835bad1"}, - {file = "multidict-6.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:530d86827a2df6504526106b4c104ba19044594f8722d3e87714e847c74a0275"}, - {file = "multidict-6.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ecde56ea2439b96ed8a8d826b50c57364612ddac0438c39e473fafad7ae1c23b"}, - {file = "multidict-6.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:dc8c9736d8574b560634775ac0def6bdc1661fc63fa27ffdfc7264c565bcb4f2"}, - {file = "multidict-6.4.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7f3d3b3c34867579ea47cbd6c1f2ce23fbfd20a273b6f9e3177e256584f1eacc"}, - {file = "multidict-6.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:87a728af265e08f96b6318ebe3c0f68b9335131f461efab2fc64cc84a44aa6ed"}, - {file = "multidict-6.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9f193eeda1857f8e8d3079a4abd258f42ef4a4bc87388452ed1e1c4d2b0c8740"}, - {file = "multidict-6.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be06e73c06415199200e9a2324a11252a3d62030319919cde5e6950ffeccf72e"}, - {file = "multidict-6.4.4-cp312-cp312-win32.whl", hash = "sha256:622f26ea6a7e19b7c48dd9228071f571b2fbbd57a8cd71c061e848f281550e6b"}, - {file = "multidict-6.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:5e2bcda30d5009996ff439e02a9f2b5c3d64a20151d34898c000a6281faa3781"}, - {file = "multidict-6.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:82ffabefc8d84c2742ad19c37f02cde5ec2a1ee172d19944d380f920a340e4b9"}, - {file = "multidict-6.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6a2f58a66fe2c22615ad26156354005391e26a2f3721c3621504cd87c1ea87bf"}, - {file = "multidict-6.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5883d6ee0fd9d8a48e9174df47540b7545909841ac82354c7ae4cbe9952603bd"}, - {file = "multidict-6.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9abcf56a9511653fa1d052bfc55fbe53dbee8f34e68bd6a5a038731b0ca42d15"}, - {file = "multidict-6.4.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6ed5ae5605d4ad5a049fad2a28bb7193400700ce2f4ae484ab702d1e3749c3f9"}, - {file = "multidict-6.4.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbfcb60396f9bcfa63e017a180c3105b8c123a63e9d1428a36544e7d37ca9e20"}, - {file = "multidict-6.4.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0f1987787f5f1e2076b59692352ab29a955b09ccc433c1f6b8e8e18666f608b"}, - {file = "multidict-6.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0121ccce8c812047d8d43d691a1ad7641f72c4f730474878a5aeae1b8ead8c"}, - {file = "multidict-6.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83ec4967114295b8afd120a8eec579920c882831a3e4c3331d591a8e5bfbbc0f"}, - {file = "multidict-6.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:995f985e2e268deaf17867801b859a282e0448633f1310e3704b30616d269d69"}, - {file = "multidict-6.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d832c608f94b9f92a0ec8b7e949be7792a642b6e535fcf32f3e28fab69eeb046"}, - {file = "multidict-6.4.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d21c1212171cf7da703c5b0b7a0e85be23b720818aef502ad187d627316d5645"}, - {file = "multidict-6.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:cbebaa076aaecad3d4bb4c008ecc73b09274c952cf6a1b78ccfd689e51f5a5b0"}, - {file = "multidict-6.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c93a6fb06cc8e5d3628b2b5fda215a5db01e8f08fc15fadd65662d9b857acbe4"}, - {file = "multidict-6.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8cd8f81f1310182362fb0c7898145ea9c9b08a71081c5963b40ee3e3cac589b1"}, - {file = "multidict-6.4.4-cp313-cp313-win32.whl", hash = "sha256:3e9f1cd61a0ab857154205fb0b1f3d3ace88d27ebd1409ab7af5096e409614cd"}, - {file = "multidict-6.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:8ffb40b74400e4455785c2fa37eba434269149ec525fc8329858c862e4b35373"}, - {file = "multidict-6.4.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:6a602151dbf177be2450ef38966f4be3467d41a86c6a845070d12e17c858a156"}, - {file = "multidict-6.4.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d2b9712211b860d123815a80b859075d86a4d54787e247d7fbee9db6832cf1c"}, - {file = "multidict-6.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d2fa86af59f8fc1972e121ade052145f6da22758f6996a197d69bb52f8204e7e"}, - {file = "multidict-6.4.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50855d03e9e4d66eab6947ba688ffb714616f985838077bc4b490e769e48da51"}, - {file = "multidict-6.4.4-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5bce06b83be23225be1905dcdb6b789064fae92499fbc458f59a8c0e68718601"}, - {file = "multidict-6.4.4-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66ed0731f8e5dfd8369a883b6e564aca085fb9289aacabd9decd70568b9a30de"}, - {file = "multidict-6.4.4-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:329ae97fc2f56f44d91bc47fe0972b1f52d21c4b7a2ac97040da02577e2daca2"}, - {file = "multidict-6.4.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c27e5dcf520923d6474d98b96749e6805f7677e93aaaf62656005b8643f907ab"}, - {file = "multidict-6.4.4-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:058cc59b9e9b143cc56715e59e22941a5d868c322242278d28123a5d09cdf6b0"}, - {file = "multidict-6.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:69133376bc9a03f8c47343d33f91f74a99c339e8b58cea90433d8e24bb298031"}, - {file = "multidict-6.4.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:d6b15c55721b1b115c5ba178c77104123745b1417527ad9641a4c5e2047450f0"}, - {file = "multidict-6.4.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:a887b77f51d3d41e6e1a63cf3bc7ddf24de5939d9ff69441387dfefa58ac2e26"}, - {file = "multidict-6.4.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:632a3bf8f1787f7ef7d3c2f68a7bde5be2f702906f8b5842ad6da9d974d0aab3"}, - {file = "multidict-6.4.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a145c550900deb7540973c5cdb183b0d24bed6b80bf7bddf33ed8f569082535e"}, - {file = "multidict-6.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cc5d83c6619ca5c9672cb78b39ed8542f1975a803dee2cda114ff73cbb076edd"}, - {file = "multidict-6.4.4-cp313-cp313t-win32.whl", hash = "sha256:3312f63261b9df49be9d57aaa6abf53a6ad96d93b24f9cc16cf979956355ce6e"}, - {file = "multidict-6.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:ba852168d814b2c73333073e1c7116d9395bea69575a01b0b3c89d2d5a87c8fb"}, - {file = "multidict-6.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:603f39bd1cf85705c6c1ba59644b480dfe495e6ee2b877908de93322705ad7cf"}, - {file = "multidict-6.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fc60f91c02e11dfbe3ff4e1219c085695c339af72d1641800fe6075b91850c8f"}, - {file = "multidict-6.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:496bcf01c76a70a31c3d746fd39383aad8d685ce6331e4c709e9af4ced5fa221"}, - {file = "multidict-6.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4219390fb5bf8e548e77b428bb36a21d9382960db5321b74d9d9987148074d6b"}, - {file = "multidict-6.4.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef4e9096ff86dfdcbd4a78253090ba13b1d183daa11b973e842465d94ae1772"}, - {file = "multidict-6.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49a29d7133b1fc214e818bbe025a77cc6025ed9a4f407d2850373ddde07fd04a"}, - {file = "multidict-6.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e32053d6d3a8b0dfe49fde05b496731a0e6099a4df92154641c00aa76786aef5"}, - {file = "multidict-6.4.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cc403092a49509e8ef2d2fd636a8ecefc4698cc57bbe894606b14579bc2a955"}, - {file = "multidict-6.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5363f9b2a7f3910e5c87d8b1855c478c05a2dc559ac57308117424dfaad6805c"}, - {file = "multidict-6.4.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2e543a40e4946cf70a88a3be87837a3ae0aebd9058ba49e91cacb0b2cd631e2b"}, - {file = "multidict-6.4.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:60d849912350da557fe7de20aa8cf394aada6980d0052cc829eeda4a0db1c1db"}, - {file = "multidict-6.4.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:19d08b4f22eae45bb018b9f06e2838c1e4b853c67628ef8ae126d99de0da6395"}, - {file = "multidict-6.4.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d693307856d1ef08041e8b6ff01d5b4618715007d288490ce2c7e29013c12b9a"}, - {file = "multidict-6.4.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:fad6daaed41021934917f4fb03ca2db8d8a4d79bf89b17ebe77228eb6710c003"}, - {file = "multidict-6.4.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c10d17371bff801af0daf8b073c30b6cf14215784dc08cd5c43ab5b7b8029bbc"}, - {file = "multidict-6.4.4-cp39-cp39-win32.whl", hash = "sha256:7e23f2f841fcb3ebd4724a40032d32e0892fbba4143e43d2a9e7695c5e50e6bd"}, - {file = "multidict-6.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:4d7b50b673ffb4ff4366e7ab43cf1f0aef4bd3608735c5fbdf0bdb6f690da411"}, - {file = "multidict-6.4.4-py3-none-any.whl", hash = "sha256:bd4557071b561a8b3b6075c3ce93cf9bfb6182cb241805c3d66ced3b75eff4ac"}, - {file = "multidict-6.4.4.tar.gz", hash = "sha256:69ee9e6ba214b5245031b76233dd95408a0fd57fdb019ddcc1ead4790932a8e8"}, + {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2be5b7b35271f7fff1397204ba6708365e3d773579fe2a30625e16c4b4ce817"}, + {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12f4581d2930840295c461764b9a65732ec01250b46c6b2c510d7ee68872b140"}, + {file = "multidict-6.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dd7793bab517e706c9ed9d7310b06c8672fd0aeee5781bfad612f56b8e0f7d14"}, + {file = "multidict-6.6.3-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:72d8815f2cd3cf3df0f83cac3f3ef801d908b2d90409ae28102e0553af85545a"}, + {file = "multidict-6.6.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:531e331a2ee53543ab32b16334e2deb26f4e6b9b28e41f8e0c87e99a6c8e2d69"}, + {file = "multidict-6.6.3-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:42ca5aa9329a63be8dc49040f63817d1ac980e02eeddba763a9ae5b4027b9c9c"}, + {file = "multidict-6.6.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:208b9b9757060b9faa6f11ab4bc52846e4f3c2fb8b14d5680c8aac80af3dc751"}, + {file = "multidict-6.6.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:acf6b97bd0884891af6a8b43d0f586ab2fcf8e717cbd47ab4bdddc09e20652d8"}, + {file = "multidict-6.6.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:68e9e12ed00e2089725669bdc88602b0b6f8d23c0c95e52b95f0bc69f7fe9b55"}, + {file = "multidict-6.6.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05db2f66c9addb10cfa226e1acb363450fab2ff8a6df73c622fefe2f5af6d4e7"}, + {file = "multidict-6.6.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:0db58da8eafb514db832a1b44f8fa7906fdd102f7d982025f816a93ba45e3dcb"}, + {file = "multidict-6.6.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:14117a41c8fdb3ee19c743b1c027da0736fdb79584d61a766da53d399b71176c"}, + {file = "multidict-6.6.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:877443eaaabcd0b74ff32ebeed6f6176c71850feb7d6a1d2db65945256ea535c"}, + {file = "multidict-6.6.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:70b72e749a4f6e7ed8fb334fa8d8496384840319512746a5f42fa0aec79f4d61"}, + {file = "multidict-6.6.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43571f785b86afd02b3855c5ac8e86ec921b760298d6f82ff2a61daf5a35330b"}, + {file = "multidict-6.6.3-cp310-cp310-win32.whl", hash = "sha256:20c5a0c3c13a15fd5ea86c42311859f970070e4e24de5a550e99d7c271d76318"}, + {file = "multidict-6.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:ab0a34a007704c625e25a9116c6770b4d3617a071c8a7c30cd338dfbadfe6485"}, + {file = "multidict-6.6.3-cp310-cp310-win_arm64.whl", hash = "sha256:769841d70ca8bdd140a715746199fc6473414bd02efd678d75681d2d6a8986c5"}, + {file = "multidict-6.6.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:18f4eba0cbac3546b8ae31e0bbc55b02c801ae3cbaf80c247fcdd89b456ff58c"}, + {file = "multidict-6.6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef43b5dd842382329e4797c46f10748d8c2b6e0614f46b4afe4aee9ac33159df"}, + {file = "multidict-6.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf9bd1fd5eec01494e0f2e8e446a74a85d5e49afb63d75a9934e4a5423dba21d"}, + {file = "multidict-6.6.3-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:5bd8d6f793a787153956cd35e24f60485bf0651c238e207b9a54f7458b16d539"}, + {file = "multidict-6.6.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1bf99b4daf908c73856bd87ee0a2499c3c9a3d19bb04b9c6025e66af3fd07462"}, + {file = "multidict-6.6.3-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0b9e59946b49dafaf990fd9c17ceafa62976e8471a14952163d10a7a630413a9"}, + {file = "multidict-6.6.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e2db616467070d0533832d204c54eea6836a5e628f2cb1e6dfd8cd6ba7277cb7"}, + {file = "multidict-6.6.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7394888236621f61dcdd25189b2768ae5cc280f041029a5bcf1122ac63df79f9"}, + {file = "multidict-6.6.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f114d8478733ca7388e7c7e0ab34b72547476b97009d643644ac33d4d3fe1821"}, + {file = "multidict-6.6.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cdf22e4db76d323bcdc733514bf732e9fb349707c98d341d40ebcc6e9318ef3d"}, + {file = "multidict-6.6.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e995a34c3d44ab511bfc11aa26869b9d66c2d8c799fa0e74b28a473a692532d6"}, + {file = "multidict-6.6.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:766a4a5996f54361d8d5a9050140aa5362fe48ce51c755a50c0bc3706460c430"}, + {file = "multidict-6.6.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:3893a0d7d28a7fe6ca7a1f760593bc13038d1d35daf52199d431b61d2660602b"}, + {file = "multidict-6.6.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:934796c81ea996e61914ba58064920d6cad5d99140ac3167901eb932150e2e56"}, + {file = "multidict-6.6.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9ed948328aec2072bc00f05d961ceadfd3e9bfc2966c1319aeaf7b7c21219183"}, + {file = "multidict-6.6.3-cp311-cp311-win32.whl", hash = "sha256:9f5b28c074c76afc3e4c610c488e3493976fe0e596dd3db6c8ddfbb0134dcac5"}, + {file = "multidict-6.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc7f6fbc61b1c16050a389c630da0b32fc6d4a3d191394ab78972bf5edc568c2"}, + {file = "multidict-6.6.3-cp311-cp311-win_arm64.whl", hash = "sha256:d4e47d8faffaae822fb5cba20937c048d4f734f43572e7079298a6c39fb172cb"}, + {file = "multidict-6.6.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:056bebbeda16b2e38642d75e9e5310c484b7c24e3841dc0fb943206a72ec89d6"}, + {file = "multidict-6.6.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e5f481cccb3c5c5e5de5d00b5141dc589c1047e60d07e85bbd7dea3d4580d63f"}, + {file = "multidict-6.6.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:10bea2ee839a759ee368b5a6e47787f399b41e70cf0c20d90dfaf4158dfb4e55"}, + {file = "multidict-6.6.3-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:2334cfb0fa9549d6ce2c21af2bfbcd3ac4ec3646b1b1581c88e3e2b1779ec92b"}, + {file = "multidict-6.6.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8fee016722550a2276ca2cb5bb624480e0ed2bd49125b2b73b7010b9090e888"}, + {file = "multidict-6.6.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5511cb35f5c50a2db21047c875eb42f308c5583edf96bd8ebf7d770a9d68f6d"}, + {file = "multidict-6.6.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:712b348f7f449948e0a6c4564a21c7db965af900973a67db432d724619b3c680"}, + {file = "multidict-6.6.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e4e15d2138ee2694e038e33b7c3da70e6b0ad8868b9f8094a72e1414aeda9c1a"}, + {file = "multidict-6.6.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8df25594989aebff8a130f7899fa03cbfcc5d2b5f4a461cf2518236fe6f15961"}, + {file = "multidict-6.6.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:159ca68bfd284a8860f8d8112cf0521113bffd9c17568579e4d13d1f1dc76b65"}, + {file = "multidict-6.6.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e098c17856a8c9ade81b4810888c5ad1914099657226283cab3062c0540b0643"}, + {file = "multidict-6.6.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:67c92ed673049dec52d7ed39f8cf9ebbadf5032c774058b4406d18c8f8fe7063"}, + {file = "multidict-6.6.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:bd0578596e3a835ef451784053cfd327d607fc39ea1a14812139339a18a0dbc3"}, + {file = "multidict-6.6.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:346055630a2df2115cd23ae271910b4cae40f4e336773550dca4889b12916e75"}, + {file = "multidict-6.6.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:555ff55a359302b79de97e0468e9ee80637b0de1fce77721639f7cd9440b3a10"}, + {file = "multidict-6.6.3-cp312-cp312-win32.whl", hash = "sha256:73ab034fb8d58ff85c2bcbadc470efc3fafeea8affcf8722855fb94557f14cc5"}, + {file = "multidict-6.6.3-cp312-cp312-win_amd64.whl", hash = "sha256:04cbcce84f63b9af41bad04a54d4cc4e60e90c35b9e6ccb130be2d75b71f8c17"}, + {file = "multidict-6.6.3-cp312-cp312-win_arm64.whl", hash = "sha256:0f1130b896ecb52d2a1e615260f3ea2af55fa7dc3d7c3003ba0c3121a759b18b"}, + {file = "multidict-6.6.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:540d3c06d48507357a7d57721e5094b4f7093399a0106c211f33540fdc374d55"}, + {file = "multidict-6.6.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c19cea2a690f04247d43f366d03e4eb110a0dc4cd1bbeee4d445435428ed35b"}, + {file = "multidict-6.6.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7af039820cfd00effec86bda5d8debef711a3e86a1d3772e85bea0f243a4bd65"}, + {file = "multidict-6.6.3-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:500b84f51654fdc3944e936f2922114349bf8fdcac77c3092b03449f0e5bc2b3"}, + {file = "multidict-6.6.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3fc723ab8a5c5ed6c50418e9bfcd8e6dceba6c271cee6728a10a4ed8561520c"}, + {file = "multidict-6.6.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:94c47ea3ade005b5976789baaed66d4de4480d0a0bf31cef6edaa41c1e7b56a6"}, + {file = "multidict-6.6.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dbc7cf464cc6d67e83e136c9f55726da3a30176f020a36ead246eceed87f1cd8"}, + {file = "multidict-6.6.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:900eb9f9da25ada070f8ee4a23f884e0ee66fe4e1a38c3af644256a508ad81ca"}, + {file = "multidict-6.6.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c6df517cf177da5d47ab15407143a89cd1a23f8b335f3a28d57e8b0a3dbb884"}, + {file = "multidict-6.6.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4ef421045f13879e21c994b36e728d8e7d126c91a64b9185810ab51d474f27e7"}, + {file = "multidict-6.6.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:6c1e61bb4f80895c081790b6b09fa49e13566df8fbff817da3f85b3a8192e36b"}, + {file = "multidict-6.6.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e5e8523bb12d7623cd8300dbd91b9e439a46a028cd078ca695eb66ba31adee3c"}, + {file = "multidict-6.6.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ef58340cc896219e4e653dade08fea5c55c6df41bcc68122e3be3e9d873d9a7b"}, + {file = "multidict-6.6.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fc9dc435ec8699e7b602b94fe0cd4703e69273a01cbc34409af29e7820f777f1"}, + {file = "multidict-6.6.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9e864486ef4ab07db5e9cb997bad2b681514158d6954dd1958dfb163b83d53e6"}, + {file = "multidict-6.6.3-cp313-cp313-win32.whl", hash = "sha256:5633a82fba8e841bc5c5c06b16e21529573cd654f67fd833650a215520a6210e"}, + {file = "multidict-6.6.3-cp313-cp313-win_amd64.whl", hash = "sha256:e93089c1570a4ad54c3714a12c2cef549dc9d58e97bcded193d928649cab78e9"}, + {file = "multidict-6.6.3-cp313-cp313-win_arm64.whl", hash = "sha256:c60b401f192e79caec61f166da9c924e9f8bc65548d4246842df91651e83d600"}, + {file = "multidict-6.6.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:02fd8f32d403a6ff13864b0851f1f523d4c988051eea0471d4f1fd8010f11134"}, + {file = "multidict-6.6.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f3aa090106b1543f3f87b2041eef3c156c8da2aed90c63a2fbed62d875c49c37"}, + {file = "multidict-6.6.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e924fb978615a5e33ff644cc42e6aa241effcf4f3322c09d4f8cebde95aff5f8"}, + {file = "multidict-6.6.3-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:b9fe5a0e57c6dbd0e2ce81ca66272282c32cd11d31658ee9553849d91289e1c1"}, + {file = "multidict-6.6.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b24576f208793ebae00280c59927c3b7c2a3b1655e443a25f753c4611bc1c373"}, + {file = "multidict-6.6.3-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:135631cb6c58eac37d7ac0df380294fecdc026b28837fa07c02e459c7fb9c54e"}, + {file = "multidict-6.6.3-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:274d416b0df887aef98f19f21578653982cfb8a05b4e187d4a17103322eeaf8f"}, + {file = "multidict-6.6.3-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e252017a817fad7ce05cafbe5711ed40faeb580e63b16755a3a24e66fa1d87c0"}, + {file = "multidict-6.6.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e4cc8d848cd4fe1cdee28c13ea79ab0ed37fc2e89dd77bac86a2e7959a8c3bc"}, + {file = "multidict-6.6.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9e236a7094b9c4c1b7585f6b9cca34b9d833cf079f7e4c49e6a4a6ec9bfdc68f"}, + {file = "multidict-6.6.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:e0cb0ab69915c55627c933f0b555a943d98ba71b4d1c57bc0d0a66e2567c7471"}, + {file = "multidict-6.6.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:81ef2f64593aba09c5212a3d0f8c906a0d38d710a011f2f42759704d4557d3f2"}, + {file = "multidict-6.6.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:b9cbc60010de3562545fa198bfc6d3825df430ea96d2cc509c39bd71e2e7d648"}, + {file = "multidict-6.6.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:70d974eaaa37211390cd02ef93b7e938de564bbffa866f0b08d07e5e65da783d"}, + {file = "multidict-6.6.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3713303e4a6663c6d01d648a68f2848701001f3390a030edaaf3fc949c90bf7c"}, + {file = "multidict-6.6.3-cp313-cp313t-win32.whl", hash = "sha256:639ecc9fe7cd73f2495f62c213e964843826f44505a3e5d82805aa85cac6f89e"}, + {file = "multidict-6.6.3-cp313-cp313t-win_amd64.whl", hash = "sha256:9f97e181f344a0ef3881b573d31de8542cc0dbc559ec68c8f8b5ce2c2e91646d"}, + {file = "multidict-6.6.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ce8b7693da41a3c4fde5871c738a81490cea5496c671d74374c8ab889e1834fb"}, + {file = "multidict-6.6.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c8161b5a7778d3137ea2ee7ae8a08cce0010de3b00ac671c5ebddeaa17cefd22"}, + {file = "multidict-6.6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1328201ee930f069961ae707d59c6627ac92e351ed5b92397cf534d1336ce557"}, + {file = "multidict-6.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b1db4d2093d6b235de76932febf9d50766cf49a5692277b2c28a501c9637f616"}, + {file = "multidict-6.6.3-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53becb01dd8ebd19d1724bebe369cfa87e4e7f29abbbe5c14c98ce4c383e16cd"}, + {file = "multidict-6.6.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41bb9d1d4c303886e2d85bade86e59885112a7f4277af5ad47ab919a2251f306"}, + {file = "multidict-6.6.3-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:775b464d31dac90f23192af9c291dc9f423101857e33e9ebf0020a10bfcf4144"}, + {file = "multidict-6.6.3-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d04d01f0a913202205a598246cf77826fe3baa5a63e9f6ccf1ab0601cf56eca0"}, + {file = "multidict-6.6.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d25594d3b38a2e6cabfdcafef339f754ca6e81fbbdb6650ad773ea9775af35ab"}, + {file = "multidict-6.6.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:35712f1748d409e0707b165bf49f9f17f9e28ae85470c41615778f8d4f7d9609"}, + {file = "multidict-6.6.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1c8082e5814b662de8589d6a06c17e77940d5539080cbab9fe6794b5241b76d9"}, + {file = "multidict-6.6.3-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:61af8a4b771f1d4d000b3168c12c3120ccf7284502a94aa58c68a81f5afac090"}, + {file = "multidict-6.6.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:448e4a9afccbf297577f2eaa586f07067441e7b63c8362a3540ba5a38dc0f14a"}, + {file = "multidict-6.6.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:233ad16999afc2bbd3e534ad8dbe685ef8ee49a37dbc2cdc9514e57b6d589ced"}, + {file = "multidict-6.6.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:bb933c891cd4da6bdcc9733d048e994e22e1883287ff7540c2a0f3b117605092"}, + {file = "multidict-6.6.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:37b09ca60998e87734699e88c2363abfd457ed18cfbf88e4009a4e83788e63ed"}, + {file = "multidict-6.6.3-cp39-cp39-win32.whl", hash = "sha256:f54cb79d26d0cd420637d184af38f0668558f3c4bbe22ab7ad830e67249f2e0b"}, + {file = "multidict-6.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:295adc9c0551e5d5214b45cf29ca23dbc28c2d197a9c30d51aed9e037cb7c578"}, + {file = "multidict-6.6.3-cp39-cp39-win_arm64.whl", hash = "sha256:15332783596f227db50fb261c2c251a58ac3873c457f3a550a95d5c0aa3c770d"}, + {file = "multidict-6.6.3-py3-none-any.whl", hash = "sha256:8db10f29c7541fc5da4defd8cd697e1ca429db743fa716325f236079b96f775a"}, + {file = "multidict-6.6.3.tar.gz", hash = "sha256:798a9eb12dab0a6c2e29c1de6f3468af5cb2da6053a20dfa3344907eed0937cc"}, ] [[package]] @@ -2333,75 +2335,75 @@ test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync" [[package]] name = "numpy" -version = "2.3.0" +version = "2.3.1" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.11" groups = ["main"] files = [ - {file = "numpy-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c3c9fdde0fa18afa1099d6257eb82890ea4f3102847e692193b54e00312a9ae9"}, - {file = "numpy-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46d16f72c2192da7b83984aa5455baee640e33a9f1e61e656f29adf55e406c2b"}, - {file = "numpy-2.3.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a0be278be9307c4ab06b788f2a077f05e180aea817b3e41cebbd5aaf7bd85ed3"}, - {file = "numpy-2.3.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:99224862d1412d2562248d4710126355d3a8db7672170a39d6909ac47687a8a4"}, - {file = "numpy-2.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2393a914db64b0ead0ab80c962e42d09d5f385802006a6c87835acb1f58adb96"}, - {file = "numpy-2.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:7729c8008d55e80784bd113787ce876ca117185c579c0d626f59b87d433ea779"}, - {file = "numpy-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:06d4fb37a8d383b769281714897420c5cc3545c79dc427df57fc9b852ee0bf58"}, - {file = "numpy-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c39ec392b5db5088259c68250e342612db82dc80ce044cf16496cf14cf6bc6f8"}, - {file = "numpy-2.3.0-cp311-cp311-win32.whl", hash = "sha256:ee9d3ee70d62827bc91f3ea5eee33153212c41f639918550ac0475e3588da59f"}, - {file = "numpy-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:43c55b6a860b0eb44d42341438b03513cf3879cb3617afb749ad49307e164edd"}, - {file = "numpy-2.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:2e6a1409eee0cb0316cb64640a49a49ca44deb1a537e6b1121dc7c458a1299a8"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:389b85335838155a9076e9ad7f8fdba0827496ec2d2dc32ce69ce7898bde03ba"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9498f60cd6bb8238d8eaf468a3d5bb031d34cd12556af53510f05fcf581c1b7e"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:622a65d40d8eb427d8e722fd410ac3ad4958002f109230bc714fa551044ebae2"}, - {file = "numpy-2.3.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b9446d9d8505aadadb686d51d838f2b6688c9e85636a0c3abaeb55ed54756459"}, - {file = "numpy-2.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:50080245365d75137a2bf46151e975de63146ae6d79f7e6bd5c0e85c9931d06a"}, - {file = "numpy-2.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c24bb4113c66936eeaa0dc1e47c74770453d34f46ee07ae4efd853a2ed1ad10a"}, - {file = "numpy-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4d8d294287fdf685281e671886c6dcdf0291a7c19db3e5cb4178d07ccf6ecc67"}, - {file = "numpy-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6295f81f093b7f5769d1728a6bd8bf7466de2adfa771ede944ce6711382b89dc"}, - {file = "numpy-2.3.0-cp312-cp312-win32.whl", hash = "sha256:e6648078bdd974ef5d15cecc31b0c410e2e24178a6e10bf511e0557eed0f2570"}, - {file = "numpy-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:0898c67a58cdaaf29994bc0e2c65230fd4de0ac40afaf1584ed0b02cd74c6fdd"}, - {file = "numpy-2.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:bd8df082b6c4695753ad6193018c05aac465d634834dca47a3ae06d4bb22d9ea"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5754ab5595bfa2c2387d241296e0381c21f44a4b90a776c3c1d39eede13a746a"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d11fa02f77752d8099573d64e5fe33de3229b6632036ec08f7080f46b6649959"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:aba48d17e87688a765ab1cd557882052f238e2f36545dfa8e29e6a91aef77afe"}, - {file = "numpy-2.3.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4dc58865623023b63b10d52f18abaac3729346a7a46a778381e0e3af4b7f3beb"}, - {file = "numpy-2.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:df470d376f54e052c76517393fa443758fefcdd634645bc9c1f84eafc67087f0"}, - {file = "numpy-2.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:87717eb24d4a8a64683b7a4e91ace04e2f5c7c77872f823f02a94feee186168f"}, - {file = "numpy-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d8fa264d56882b59dcb5ea4d6ab6f31d0c58a57b41aec605848b6eb2ef4a43e8"}, - {file = "numpy-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e651756066a0eaf900916497e20e02fe1ae544187cb0fe88de981671ee7f6270"}, - {file = "numpy-2.3.0-cp313-cp313-win32.whl", hash = "sha256:e43c3cce3b6ae5f94696669ff2a6eafd9a6b9332008bafa4117af70f4b88be6f"}, - {file = "numpy-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:81ae0bf2564cf475f94be4a27ef7bcf8af0c3e28da46770fc904da9abd5279b5"}, - {file = "numpy-2.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:c8738baa52505fa6e82778580b23f945e3578412554d937093eac9205e845e6e"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:39b27d8b38942a647f048b675f134dd5a567f95bfff481f9109ec308515c51d8"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0eba4a1ea88f9a6f30f56fdafdeb8da3774349eacddab9581a21234b8535d3d3"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:b0f1f11d0a1da54927436505a5a7670b154eac27f5672afc389661013dfe3d4f"}, - {file = "numpy-2.3.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:690d0a5b60a47e1f9dcec7b77750a4854c0d690e9058b7bef3106e3ae9117808"}, - {file = "numpy-2.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:8b51ead2b258284458e570942137155978583e407babc22e3d0ed7af33ce06f8"}, - {file = "numpy-2.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:aaf81c7b82c73bd9b45e79cfb9476cb9c29e937494bfe9092c26aece812818ad"}, - {file = "numpy-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f420033a20b4f6a2a11f585f93c843ac40686a7c3fa514060a97d9de93e5e72b"}, - {file = "numpy-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d344ca32ab482bcf8735d8f95091ad081f97120546f3d250240868430ce52555"}, - {file = "numpy-2.3.0-cp313-cp313t-win32.whl", hash = "sha256:48a2e8eaf76364c32a1feaa60d6925eaf32ed7a040183b807e02674305beef61"}, - {file = "numpy-2.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ba17f93a94e503551f154de210e4d50c5e3ee20f7e7a1b5f6ce3f22d419b93bb"}, - {file = "numpy-2.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f14e016d9409680959691c109be98c436c6249eaf7f118b424679793607b5944"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:80b46117c7359de8167cc00a2c7d823bdd505e8c7727ae0871025a86d668283b"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:5814a0f43e70c061f47abd5857d120179609ddc32a613138cbb6c4e9e2dbdda5"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ef6c1e88fd6b81ac6d215ed71dc8cd027e54d4bf1d2682d362449097156267a2"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33a5a12a45bb82d9997e2c0b12adae97507ad7c347546190a18ff14c28bbca12"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:54dfc8681c1906d239e95ab1508d0a533c4a9505e52ee2d71a5472b04437ef97"}, - {file = "numpy-2.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e017a8a251ff4d18d71f139e28bdc7c31edba7a507f72b1414ed902cbe48c74d"}, - {file = "numpy-2.3.0.tar.gz", hash = "sha256:581f87f9e9e9db2cba2141400e160e9dd644ee248788d6f90636eeb8fd9260a6"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ea9e48336a402551f52cd8f593343699003d2353daa4b72ce8d34f66b722070"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ccb7336eaf0e77c1635b232c141846493a588ec9ea777a7c24d7166bb8533ae"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0bb3a4a61e1d327e035275d2a993c96fa786e4913aa089843e6a2d9dd205c66a"}, + {file = "numpy-2.3.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:e344eb79dab01f1e838ebb67aab09965fb271d6da6b00adda26328ac27d4a66e"}, + {file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:467db865b392168ceb1ef1ffa6f5a86e62468c43e0cfb4ab6da667ede10e58db"}, + {file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:afed2ce4a84f6b0fc6c1ce734ff368cbf5a5e24e8954a338f3bdffa0718adffb"}, + {file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0025048b3c1557a20bc80d06fdeb8cc7fc193721484cca82b2cfa072fec71a93"}, + {file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a5ee121b60aa509679b682819c602579e1df14a5b07fe95671c8849aad8f2115"}, + {file = "numpy-2.3.1-cp311-cp311-win32.whl", hash = "sha256:a8b740f5579ae4585831b3cf0e3b0425c667274f82a484866d2adf9570539369"}, + {file = "numpy-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4580adadc53311b163444f877e0789f1c8861e2698f6b2a4ca852fda154f3ff"}, + {file = "numpy-2.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:ec0bdafa906f95adc9a0c6f26a4871fa753f25caaa0e032578a30457bff0af6a"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2959d8f268f3d8ee402b04a9ec4bb7604555aeacf78b360dc4ec27f1d508177d"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:762e0c0c6b56bdedfef9a8e1d4538556438288c4276901ea008ae44091954e29"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:867ef172a0976aaa1f1d1b63cf2090de8b636a7674607d514505fb7276ab08fc"}, + {file = "numpy-2.3.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:4e602e1b8682c2b833af89ba641ad4176053aaa50f5cacda1a27004352dde943"}, + {file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8e333040d069eba1652fb08962ec5b76af7f2c7bce1df7e1418c8055cf776f25"}, + {file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e7cbf5a5eafd8d230a3ce356d892512185230e4781a361229bd902ff403bc660"}, + {file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f1b8f26d1086835f442286c1d9b64bb3974b0b1e41bb105358fd07d20872952"}, + {file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ee8340cb48c9b7a5899d1149eece41ca535513a9698098edbade2a8e7a84da77"}, + {file = "numpy-2.3.1-cp312-cp312-win32.whl", hash = "sha256:e772dda20a6002ef7061713dc1e2585bc1b534e7909b2030b5a46dae8ff077ab"}, + {file = "numpy-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cfecc7822543abdea6de08758091da655ea2210b8ffa1faf116b940693d3df76"}, + {file = "numpy-2.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:7be91b2239af2658653c5bb6f1b8bccafaf08226a258caf78ce44710a0160d30"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25a1992b0a3fdcdaec9f552ef10d8103186f5397ab45e2d25f8ac51b1a6b97e8"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dea630156d39b02a63c18f508f85010230409db5b2927ba59c8ba4ab3e8272e"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bada6058dd886061f10ea15f230ccf7dfff40572e99fef440a4a857c8728c9c0"}, + {file = "numpy-2.3.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:a894f3816eb17b29e4783e5873f92faf55b710c2519e5c351767c51f79d8526d"}, + {file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:18703df6c4a4fee55fd3d6e5a253d01c5d33a295409b03fda0c86b3ca2ff41a1"}, + {file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5902660491bd7a48b2ec16c23ccb9124b8abfd9583c5fdfa123fe6b421e03de1"}, + {file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:36890eb9e9d2081137bd78d29050ba63b8dab95dff7912eadf1185e80074b2a0"}, + {file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a780033466159c2270531e2b8ac063704592a0bc62ec4a1b991c7c40705eb0e8"}, + {file = "numpy-2.3.1-cp313-cp313-win32.whl", hash = "sha256:39bff12c076812595c3a306f22bfe49919c5513aa1e0e70fac756a0be7c2a2b8"}, + {file = "numpy-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d5ee6eec45f08ce507a6570e06f2f879b374a552087a4179ea7838edbcbfa42"}, + {file = "numpy-2.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:0c4d9e0a8368db90f93bd192bfa771ace63137c3488d198ee21dfb8e7771916e"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b0b5397374f32ec0649dd98c652a1798192042e715df918c20672c62fb52d4b8"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c5bdf2015ccfcee8253fb8be695516ac4457c743473a43290fd36eba6a1777eb"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d70f20df7f08b90a2062c1f07737dd340adccf2068d0f1b9b3d56e2038979fee"}, + {file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:2fb86b7e58f9ac50e1e9dd1290154107e47d1eef23a0ae9145ded06ea606f992"}, + {file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:23ab05b2d241f76cb883ce8b9a93a680752fbfcbd51c50eff0b88b979e471d8c"}, + {file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ce2ce9e5de4703a673e705183f64fd5da5bf36e7beddcb63a25ee2286e71ca48"}, + {file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c4913079974eeb5c16ccfd2b1f09354b8fed7e0d6f2cab933104a09a6419b1ee"}, + {file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:010ce9b4f00d5c036053ca684c77441f2f2c934fd23bee058b4d6f196efd8280"}, + {file = "numpy-2.3.1-cp313-cp313t-win32.whl", hash = "sha256:6269b9edfe32912584ec496d91b00b6d34282ca1d07eb10e82dfc780907d6c2e"}, + {file = "numpy-2.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2a809637460e88a113e186e87f228d74ae2852a2e0c44de275263376f17b5bdc"}, + {file = "numpy-2.3.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eccb9a159db9aed60800187bc47a6d3451553f0e1b08b068d8b277ddfbb9b244"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ad506d4b09e684394c42c966ec1527f6ebc25da7f4da4b1b056606ffe446b8a3"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ebb8603d45bc86bbd5edb0d63e52c5fd9e7945d3a503b77e486bd88dde67a19b"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:15aa4c392ac396e2ad3d0a2680c0f0dee420f9fed14eef09bdb9450ee6dcb7b7"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c6e0bf9d1a2f50d2b65a7cf56db37c095af17b59f6c132396f7c6d5dd76484df"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:eabd7e8740d494ce2b4ea0ff05afa1b7b291e978c0ae075487c51e8bd93c0c68"}, + {file = "numpy-2.3.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e610832418a2bc09d974cc9fecebfa51e9532d6190223bc5ef6a7402ebf3b5cb"}, + {file = "numpy-2.3.1.tar.gz", hash = "sha256:1ec9ae20a4226da374362cca3c62cd753faf2f951440b0e3b98e93c235441d2b"}, ] [[package]] name = "oauthlib" -version = "3.2.2" +version = "3.3.1" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, - {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, + {file = "oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1"}, + {file = "oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9"}, ] [package.extras] @@ -2409,6 +2411,26 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "odh-kuberay-client" +version = "0.0.0.dev40" +description = "Python SDK for Kuberay client" +optional = false +python-versions = ">=3.11,<4.0" +groups = ["main"] +files = [ + {file = "odh_kuberay_client-0.0.0.dev40-py3-none-any.whl", hash = "sha256:547daaa07ff3687b75dc844473b0897822d3aa4803aed865037ddf41da22f593"}, + {file = "odh_kuberay_client-0.0.0.dev40.tar.gz", hash = "sha256:a4ec11aff244099256cbca0628d8dbb4c5fe48e09966a6b75b412895aebd4834"}, +] + +[package.dependencies] +kubernetes = ">=25.0.0" + +[package.source] +type = "legacy" +url = "https://test.pypi.org/simple" +reference = "testpypi" + [[package]] name = "opencensus" version = "0.11.4" @@ -2562,54 +2584,54 @@ files = [ [[package]] name = "pandas" -version = "2.3.0" +version = "2.3.1" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pandas-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:625466edd01d43b75b1883a64d859168e4556261a5035b32f9d743b67ef44634"}, - {file = "pandas-2.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6872d695c896f00df46b71648eea332279ef4077a409e2fe94220208b6bb675"}, - {file = "pandas-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4dd97c19bd06bc557ad787a15b6489d2614ddaab5d104a0310eb314c724b2d2"}, - {file = "pandas-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:034abd6f3db8b9880aaee98f4f5d4dbec7c4829938463ec046517220b2f8574e"}, - {file = "pandas-2.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23c2b2dc5213810208ca0b80b8666670eb4660bbfd9d45f58592cc4ddcfd62e1"}, - {file = "pandas-2.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:39ff73ec07be5e90330cc6ff5705c651ace83374189dcdcb46e6ff54b4a72cd6"}, - {file = "pandas-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:40cecc4ea5abd2921682b57532baea5588cc5f80f0231c624056b146887274d2"}, - {file = "pandas-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8adff9f138fc614347ff33812046787f7d43b3cef7c0f0171b3340cae333f6ca"}, - {file = "pandas-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e5f08eb9a445d07720776df6e641975665c9ea12c9d8a331e0f6890f2dcd76ef"}, - {file = "pandas-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa35c266c8cd1a67d75971a1912b185b492d257092bdd2709bbdebe574ed228d"}, - {file = "pandas-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a0cc77b0f089d2d2ffe3007db58f170dae9b9f54e569b299db871a3ab5bf46"}, - {file = "pandas-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c06f6f144ad0a1bf84699aeea7eff6068ca5c63ceb404798198af7eb86082e33"}, - {file = "pandas-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ed16339bc354a73e0a609df36d256672c7d296f3f767ac07257801aa064ff73c"}, - {file = "pandas-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:fa07e138b3f6c04addfeaf56cc7fdb96c3b68a3fe5e5401251f231fce40a0d7a"}, - {file = "pandas-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2eb4728a18dcd2908c7fccf74a982e241b467d178724545a48d0caf534b38ebf"}, - {file = "pandas-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9d8c3187be7479ea5c3d30c32a5d73d62a621166675063b2edd21bc47614027"}, - {file = "pandas-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ff730713d4c4f2f1c860e36c005c7cefc1c7c80c21c0688fd605aa43c9fcf09"}, - {file = "pandas-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba24af48643b12ffe49b27065d3babd52702d95ab70f50e1b34f71ca703e2c0d"}, - {file = "pandas-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:404d681c698e3c8a40a61d0cd9412cc7364ab9a9cc6e144ae2992e11a2e77a20"}, - {file = "pandas-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6021910b086b3ca756755e86ddc64e0ddafd5e58e076c72cb1585162e5ad259b"}, - {file = "pandas-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:094e271a15b579650ebf4c5155c05dcd2a14fd4fdd72cf4854b2f7ad31ea30be"}, - {file = "pandas-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983"}, - {file = "pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd"}, - {file = "pandas-2.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f"}, - {file = "pandas-2.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3"}, - {file = "pandas-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8"}, - {file = "pandas-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9"}, - {file = "pandas-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390"}, - {file = "pandas-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575"}, - {file = "pandas-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042"}, - {file = "pandas-2.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c"}, - {file = "pandas-2.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67"}, - {file = "pandas-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f"}, - {file = "pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249"}, - {file = "pandas-2.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9efc0acbbffb5236fbdf0409c04edce96bec4bdaa649d49985427bd1ec73e085"}, - {file = "pandas-2.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:75651c14fde635e680496148a8526b328e09fe0572d9ae9b638648c46a544ba3"}, - {file = "pandas-2.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5be867a0541a9fb47a4be0c5790a4bccd5b77b92f0a59eeec9375fafc2aa14"}, - {file = "pandas-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84141f722d45d0c2a89544dd29d35b3abfc13d2250ed7e68394eda7564bd6324"}, - {file = "pandas-2.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f95a2aef32614ed86216d3c450ab12a4e82084e8102e355707a1d96e33d51c34"}, - {file = "pandas-2.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e0f51973ba93a9f97185049326d75b942b9aeb472bec616a129806facb129ebb"}, - {file = "pandas-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:b198687ca9c8529662213538a9bb1e60fa0bf0f6af89292eb68fea28743fcd5a"}, - {file = "pandas-2.3.0.tar.gz", hash = "sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133"}, + {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, + {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, + {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f951fbb702dacd390561e0ea45cdd8ecfa7fb56935eb3dd78e306c19104b9b0"}, + {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd05b72ec02ebfb993569b4931b2e16fbb4d6ad6ce80224a3ee838387d83a191"}, + {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1b916a627919a247d865aed068eb65eb91a344b13f5b57ab9f610b7716c92de1"}, + {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fe67dc676818c186d5a3d5425250e40f179c2a89145df477dd82945eaea89e97"}, + {file = "pandas-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:2eb789ae0274672acbd3c575b0598d213345660120a257b47b5dafdc618aec83"}, + {file = "pandas-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2b0540963d83431f5ce8870ea02a7430adca100cec8a050f0811f8e31035541b"}, + {file = "pandas-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fe7317f578c6a153912bd2292f02e40c1d8f253e93c599e82620c7f69755c74f"}, + {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6723a27ad7b244c0c79d8e7007092d7c8f0f11305770e2f4cd778b3ad5f9f85"}, + {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3462c3735fe19f2638f2c3a40bd94ec2dc5ba13abbb032dd2fa1f540a075509d"}, + {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:98bcc8b5bf7afed22cc753a28bc4d9e26e078e777066bc53fac7904ddef9a678"}, + {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d544806b485ddf29e52d75b1f559142514e60ef58a832f74fb38e48d757b299"}, + {file = "pandas-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b3cd4273d3cb3707b6fffd217204c52ed92859533e31dc03b7c5008aa933aaab"}, + {file = "pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3"}, + {file = "pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232"}, + {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e"}, + {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4"}, + {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8"}, + {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679"}, + {file = "pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8"}, + {file = "pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22"}, + {file = "pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a"}, + {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928"}, + {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9"}, + {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12"}, + {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb"}, + {file = "pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956"}, + {file = "pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a"}, + {file = "pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9"}, + {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275"}, + {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab"}, + {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96"}, + {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444"}, + {file = "pandas-2.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4645f770f98d656f11c69e81aeb21c6fca076a44bed3dcbb9396a4311bc7f6d8"}, + {file = "pandas-2.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:342e59589cc454aaff7484d75b816a433350b3d7964d7847327edda4d532a2e3"}, + {file = "pandas-2.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d12f618d80379fde6af007f65f0c25bd3e40251dbd1636480dfffce2cf1e6da"}, + {file = "pandas-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd71c47a911da120d72ef173aeac0bf5241423f9bfea57320110a978457e069e"}, + {file = "pandas-2.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:09e3b1587f0f3b0913e21e8b32c3119174551deb4a4eba4a89bc7377947977e7"}, + {file = "pandas-2.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2323294c73ed50f612f67e2bf3ae45aea04dce5690778e08a09391897f35ff88"}, + {file = "pandas-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:b4b0de34dc8499c2db34000ef8baad684cfa4cbd836ecee05f323ebfba348c7d"}, + {file = "pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2"}, ] [package.dependencies] @@ -3287,14 +3309,14 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" [[package]] name = "pygments" -version = "2.19.1" +version = "2.19.2" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" groups = ["main", "docs", "test"] files = [ - {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, - {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, ] [package.extras] @@ -3383,19 +3405,24 @@ pytest = ">=7.0.0" [[package]] name = "python-dateutil" -version = "2.9.0.post0" +version = "3.9.0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" groups = ["main", "test"] files = [ - {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, - {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, + {file = "python-dateutil-3.9.0.tar.gz", hash = "sha256:e090c9a06b858a55d8b6a518fc54d079646eb7262b373ff98f8f13877a5327ec"}, + {file = "python_dateutil-3.9.0-py2.py3-none-any.whl", hash = "sha256:971787138d3cb47d927800e544872edc9e49f33ad1335adc139c409aa5e6a9a8"}, ] [package.dependencies] six = ">=1.5" +[package.source] +type = "legacy" +url = "https://test.pypi.org/simple" +reference = "testpypi" + [[package]] name = "python-json-logger" version = "3.3.0" @@ -3533,105 +3560,91 @@ files = [ [[package]] name = "pyzmq" -version = "26.4.0" +version = "27.0.0" description = "Python bindings for 0MQ" optional = false python-versions = ">=3.8" groups = ["test"] files = [ - {file = "pyzmq-26.4.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:0329bdf83e170ac133f44a233fc651f6ed66ef8e66693b5af7d54f45d1ef5918"}, - {file = "pyzmq-26.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:398a825d2dea96227cf6460ce0a174cf7657d6f6827807d4d1ae9d0f9ae64315"}, - {file = "pyzmq-26.4.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d52d62edc96787f5c1dfa6c6ccff9b581cfae5a70d94ec4c8da157656c73b5b"}, - {file = "pyzmq-26.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1410c3a3705db68d11eb2424d75894d41cff2f64d948ffe245dd97a9debfebf4"}, - {file = "pyzmq-26.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7dacb06a9c83b007cc01e8e5277f94c95c453c5851aac5e83efe93e72226353f"}, - {file = "pyzmq-26.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6bab961c8c9b3a4dc94d26e9b2cdf84de9918931d01d6ff38c721a83ab3c0ef5"}, - {file = "pyzmq-26.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7a5c09413b924d96af2aa8b57e76b9b0058284d60e2fc3730ce0f979031d162a"}, - {file = "pyzmq-26.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7d489ac234d38e57f458fdbd12a996bfe990ac028feaf6f3c1e81ff766513d3b"}, - {file = "pyzmq-26.4.0-cp310-cp310-win32.whl", hash = "sha256:dea1c8db78fb1b4b7dc9f8e213d0af3fc8ecd2c51a1d5a3ca1cde1bda034a980"}, - {file = "pyzmq-26.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:fa59e1f5a224b5e04dc6c101d7186058efa68288c2d714aa12d27603ae93318b"}, - {file = "pyzmq-26.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:a651fe2f447672f4a815e22e74630b6b1ec3a1ab670c95e5e5e28dcd4e69bbb5"}, - {file = "pyzmq-26.4.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:bfcf82644c9b45ddd7cd2a041f3ff8dce4a0904429b74d73a439e8cab1bd9e54"}, - {file = "pyzmq-26.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9bcae3979b2654d5289d3490742378b2f3ce804b0b5fd42036074e2bf35b030"}, - {file = "pyzmq-26.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ccdff8ac4246b6fb60dcf3982dfaeeff5dd04f36051fe0632748fc0aa0679c01"}, - {file = "pyzmq-26.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4550af385b442dc2d55ab7717837812799d3674cb12f9a3aa897611839c18e9e"}, - {file = "pyzmq-26.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f7ffe9db1187a253fca95191854b3fda24696f086e8789d1d449308a34b88"}, - {file = "pyzmq-26.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3709c9ff7ba61589b7372923fd82b99a81932b592a5c7f1a24147c91da9a68d6"}, - {file = "pyzmq-26.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f8f3c30fb2d26ae5ce36b59768ba60fb72507ea9efc72f8f69fa088450cff1df"}, - {file = "pyzmq-26.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:382a4a48c8080e273427fc692037e3f7d2851959ffe40864f2db32646eeb3cef"}, - {file = "pyzmq-26.4.0-cp311-cp311-win32.whl", hash = "sha256:d56aad0517d4c09e3b4f15adebba8f6372c5102c27742a5bdbfc74a7dceb8fca"}, - {file = "pyzmq-26.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:963977ac8baed7058c1e126014f3fe58b3773f45c78cce7af5c26c09b6823896"}, - {file = "pyzmq-26.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:c0c8e8cadc81e44cc5088fcd53b9b3b4ce9344815f6c4a03aec653509296fae3"}, - {file = "pyzmq-26.4.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:5227cb8da4b6f68acfd48d20c588197fd67745c278827d5238c707daf579227b"}, - {file = "pyzmq-26.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1c07a7fa7f7ba86554a2b1bef198c9fed570c08ee062fd2fd6a4dcacd45f905"}, - {file = "pyzmq-26.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae775fa83f52f52de73183f7ef5395186f7105d5ed65b1ae65ba27cb1260de2b"}, - {file = "pyzmq-26.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66c760d0226ebd52f1e6b644a9e839b5db1e107a23f2fcd46ec0569a4fdd4e63"}, - {file = "pyzmq-26.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ef8c6ecc1d520debc147173eaa3765d53f06cd8dbe7bd377064cdbc53ab456f5"}, - {file = "pyzmq-26.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3150ef4084e163dec29ae667b10d96aad309b668fac6810c9e8c27cf543d6e0b"}, - {file = "pyzmq-26.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4448c9e55bf8329fa1dcedd32f661bf611214fa70c8e02fee4347bc589d39a84"}, - {file = "pyzmq-26.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e07dde3647afb084d985310d067a3efa6efad0621ee10826f2cb2f9a31b89d2f"}, - {file = "pyzmq-26.4.0-cp312-cp312-win32.whl", hash = "sha256:ba034a32ecf9af72adfa5ee383ad0fd4f4e38cdb62b13624278ef768fe5b5b44"}, - {file = "pyzmq-26.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:056a97aab4064f526ecb32f4343917a4022a5d9efb6b9df990ff72e1879e40be"}, - {file = "pyzmq-26.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:2f23c750e485ce1eb639dbd576d27d168595908aa2d60b149e2d9e34c9df40e0"}, - {file = "pyzmq-26.4.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:c43fac689880f5174d6fc864857d1247fe5cfa22b09ed058a344ca92bf5301e3"}, - {file = "pyzmq-26.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:902aca7eba477657c5fb81c808318460328758e8367ecdd1964b6330c73cae43"}, - {file = "pyzmq-26.4.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5e48a830bfd152fe17fbdeaf99ac5271aa4122521bf0d275b6b24e52ef35eb6"}, - {file = "pyzmq-26.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31be2b6de98c824c06f5574331f805707c667dc8f60cb18580b7de078479891e"}, - {file = "pyzmq-26.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6332452034be001bbf3206ac59c0d2a7713de5f25bb38b06519fc6967b7cf771"}, - {file = "pyzmq-26.4.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:da8c0f5dd352136853e6a09b1b986ee5278dfddfebd30515e16eae425c872b30"}, - {file = "pyzmq-26.4.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f4ccc1a0a2c9806dda2a2dd118a3b7b681e448f3bb354056cad44a65169f6d86"}, - {file = "pyzmq-26.4.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1c0b5fceadbab461578daf8d1dcc918ebe7ddd2952f748cf30c7cf2de5d51101"}, - {file = "pyzmq-26.4.0-cp313-cp313-win32.whl", hash = "sha256:28e2b0ff5ba4b3dd11062d905682bad33385cfa3cc03e81abd7f0822263e6637"}, - {file = "pyzmq-26.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:23ecc9d241004c10e8b4f49d12ac064cd7000e1643343944a10df98e57bc544b"}, - {file = "pyzmq-26.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:1edb0385c7f025045d6e0f759d4d3afe43c17a3d898914ec6582e6f464203c08"}, - {file = "pyzmq-26.4.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:93a29e882b2ba1db86ba5dd5e88e18e0ac6b627026c5cfbec9983422011b82d4"}, - {file = "pyzmq-26.4.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb45684f276f57110bb89e4300c00f1233ca631f08f5f42528a5c408a79efc4a"}, - {file = "pyzmq-26.4.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f72073e75260cb301aad4258ad6150fa7f57c719b3f498cb91e31df16784d89b"}, - {file = "pyzmq-26.4.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be37e24b13026cfedd233bcbbccd8c0bcd2fdd186216094d095f60076201538d"}, - {file = "pyzmq-26.4.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:237b283044934d26f1eeff4075f751b05d2f3ed42a257fc44386d00df6a270cf"}, - {file = "pyzmq-26.4.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:b30f862f6768b17040929a68432c8a8be77780317f45a353cb17e423127d250c"}, - {file = "pyzmq-26.4.0-cp313-cp313t-musllinux_1_1_i686.whl", hash = "sha256:c80fcd3504232f13617c6ab501124d373e4895424e65de8b72042333316f64a8"}, - {file = "pyzmq-26.4.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:26a2a7451606b87f67cdeca2c2789d86f605da08b4bd616b1a9981605ca3a364"}, - {file = "pyzmq-26.4.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:831cc53bf6068d46d942af52fa8b0b9d128fb39bcf1f80d468dc9a3ae1da5bfb"}, - {file = "pyzmq-26.4.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:51d18be6193c25bd229524cfac21e39887c8d5e0217b1857998dfbef57c070a4"}, - {file = "pyzmq-26.4.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:445c97854204119ae2232503585ebb4fa7517142f71092cb129e5ee547957a1f"}, - {file = "pyzmq-26.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:807b8f4ad3e6084412c0f3df0613269f552110fa6fb91743e3e306223dbf11a6"}, - {file = "pyzmq-26.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c01d109dd675ac47fa15c0a79d256878d898f90bc10589f808b62d021d2e653c"}, - {file = "pyzmq-26.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0a294026e28679a8dd64c922e59411cb586dad307661b4d8a5c49e7bbca37621"}, - {file = "pyzmq-26.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:22c8dd677274af8dfb1efd05006d6f68fb2f054b17066e308ae20cb3f61028cf"}, - {file = "pyzmq-26.4.0-cp38-cp38-win32.whl", hash = "sha256:14fc678b696bc42c14e2d7f86ac4e97889d5e6b94d366ebcb637a768d2ad01af"}, - {file = "pyzmq-26.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:d1ef0a536662bbbdc8525f7e2ef19e74123ec9c4578e0582ecd41aedc414a169"}, - {file = "pyzmq-26.4.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:a88643de8abd000ce99ca72056a1a2ae15881ee365ecb24dd1d9111e43d57842"}, - {file = "pyzmq-26.4.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0a744ce209ecb557406fb928f3c8c55ce79b16c3eeb682da38ef5059a9af0848"}, - {file = "pyzmq-26.4.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9434540f333332224ecb02ee6278b6c6f11ea1266b48526e73c903119b2f420f"}, - {file = "pyzmq-26.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6c6f0a23e55cd38d27d4c89add963294ea091ebcb104d7fdab0f093bc5abb1c"}, - {file = "pyzmq-26.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6145df55dc2309f6ef72d70576dcd5aabb0fd373311613fe85a5e547c722b780"}, - {file = "pyzmq-26.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2ea81823840ef8c56e5d2f9918e4d571236294fea4d1842b302aebffb9e40997"}, - {file = "pyzmq-26.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc2abc385dc37835445abe206524fbc0c9e3fce87631dfaa90918a1ba8f425eb"}, - {file = "pyzmq-26.4.0-cp39-cp39-win32.whl", hash = "sha256:41a2508fe7bed4c76b4cf55aacfb8733926f59d440d9ae2b81ee8220633b4d12"}, - {file = "pyzmq-26.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:d4000e8255d6cbce38982e5622ebb90823f3409b7ffe8aeae4337ef7d6d2612a"}, - {file = "pyzmq-26.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:b4f6919d9c120488246bdc2a2f96662fa80d67b35bd6d66218f457e722b3ff64"}, - {file = "pyzmq-26.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:98d948288ce893a2edc5ec3c438fe8de2daa5bbbd6e2e865ec5f966e237084ba"}, - {file = "pyzmq-26.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9f34f5c9e0203ece706a1003f1492a56c06c0632d86cb77bcfe77b56aacf27b"}, - {file = "pyzmq-26.4.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80c9b48aef586ff8b698359ce22f9508937c799cc1d2c9c2f7c95996f2300c94"}, - {file = "pyzmq-26.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3f2a5b74009fd50b53b26f65daff23e9853e79aa86e0aa08a53a7628d92d44a"}, - {file = "pyzmq-26.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:61c5f93d7622d84cb3092d7f6398ffc77654c346545313a3737e266fc11a3beb"}, - {file = "pyzmq-26.4.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4478b14cb54a805088299c25a79f27eaf530564a7a4f72bf432a040042b554eb"}, - {file = "pyzmq-26.4.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a28ac29c60e4ba84b5f58605ace8ad495414a724fe7aceb7cf06cd0598d04e1"}, - {file = "pyzmq-26.4.0-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43b03c1ceea27c6520124f4fb2ba9c647409b9abdf9a62388117148a90419494"}, - {file = "pyzmq-26.4.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7731abd23a782851426d4e37deb2057bf9410848a4459b5ede4fe89342e687a9"}, - {file = "pyzmq-26.4.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a222ad02fbe80166b0526c038776e8042cd4e5f0dec1489a006a1df47e9040e0"}, - {file = "pyzmq-26.4.0-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:91c3ffaea475ec8bb1a32d77ebc441dcdd13cd3c4c284a6672b92a0f5ade1917"}, - {file = "pyzmq-26.4.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d9a78a52668bf5c9e7b0da36aa5760a9fc3680144e1445d68e98df78a25082ed"}, - {file = "pyzmq-26.4.0-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b70cab356ff8c860118b89dc86cd910c73ce2127eb986dada4fbac399ef644cf"}, - {file = "pyzmq-26.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acae207d4387780838192326b32d373bb286da0b299e733860e96f80728eb0af"}, - {file = "pyzmq-26.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f928eafd15794aa4be75463d537348b35503c1e014c5b663f206504ec1a90fe4"}, - {file = "pyzmq-26.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:552b0d2e39987733e1e9e948a0ced6ff75e0ea39ab1a1db2fc36eb60fd8760db"}, - {file = "pyzmq-26.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd670a8aa843f2ee637039bbd412e0d7294a5e588e1ecc9ad98b0cdc050259a4"}, - {file = "pyzmq-26.4.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d367b7b775a0e1e54a59a2ba3ed4d5e0a31566af97cc9154e34262777dab95ed"}, - {file = "pyzmq-26.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112af16c406e4a93df2caef49f884f4c2bb2b558b0b5577ef0b2465d15c1abc"}, - {file = "pyzmq-26.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76c298683f82669cab0b6da59071f55238c039738297c69f187a542c6d40099"}, - {file = "pyzmq-26.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:49b6ca2e625b46f499fb081aaf7819a177f41eeb555acb05758aa97f4f95d147"}, - {file = "pyzmq-26.4.0.tar.gz", hash = "sha256:4bd13f85f80962f91a651a7356fe0472791a5f7a92f227822b5acf44795c626d"}, + {file = "pyzmq-27.0.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:b973ee650e8f442ce482c1d99ca7ab537c69098d53a3d046676a484fd710c87a"}, + {file = "pyzmq-27.0.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:661942bc7cd0223d569d808f2e5696d9cc120acc73bf3e88a1f1be7ab648a7e4"}, + {file = "pyzmq-27.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50360fb2a056ffd16e5f4177eee67f1dd1017332ea53fb095fe7b5bf29c70246"}, + {file = "pyzmq-27.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf209a6dc4b420ed32a7093642843cbf8703ed0a7d86c16c0b98af46762ebefb"}, + {file = "pyzmq-27.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c2dace4a7041cca2fba5357a2d7c97c5effdf52f63a1ef252cfa496875a3762d"}, + {file = "pyzmq-27.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:63af72b2955fc77caf0a77444baa2431fcabb4370219da38e1a9f8d12aaebe28"}, + {file = "pyzmq-27.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e8c4adce8e37e75c4215297d7745551b8dcfa5f728f23ce09bf4e678a9399413"}, + {file = "pyzmq-27.0.0-cp310-cp310-win32.whl", hash = "sha256:5d5ef4718ecab24f785794e0e7536436698b459bfbc19a1650ef55280119d93b"}, + {file = "pyzmq-27.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:e40609380480b3d12c30f841323f42451c755b8fece84235236f5fe5ffca8c1c"}, + {file = "pyzmq-27.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6b0397b0be277b46762956f576e04dc06ced265759e8c2ff41a0ee1aa0064198"}, + {file = "pyzmq-27.0.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:21457825249b2a53834fa969c69713f8b5a79583689387a5e7aed880963ac564"}, + {file = "pyzmq-27.0.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:1958947983fef513e6e98eff9cb487b60bf14f588dc0e6bf35fa13751d2c8251"}, + {file = "pyzmq-27.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0dc628b5493f9a8cd9844b8bee9732ef587ab00002157c9329e4fc0ef4d3afa"}, + {file = "pyzmq-27.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7bbe9e1ed2c8d3da736a15694d87c12493e54cc9dc9790796f0321794bbc91f"}, + {file = "pyzmq-27.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dc1091f59143b471d19eb64f54bae4f54bcf2a466ffb66fe45d94d8d734eb495"}, + {file = "pyzmq-27.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7011ade88c8e535cf140f8d1a59428676fbbce7c6e54fefce58bf117aefb6667"}, + {file = "pyzmq-27.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2c386339d7e3f064213aede5d03d054b237937fbca6dd2197ac8cf3b25a6b14e"}, + {file = "pyzmq-27.0.0-cp311-cp311-win32.whl", hash = "sha256:0546a720c1f407b2172cb04b6b094a78773491497e3644863cf5c96c42df8cff"}, + {file = "pyzmq-27.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:15f39d50bd6c9091c67315ceb878a4f531957b121d2a05ebd077eb35ddc5efed"}, + {file = "pyzmq-27.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c5817641eebb391a2268c27fecd4162448e03538387093cdbd8bf3510c316b38"}, + {file = "pyzmq-27.0.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:cbabc59dcfaac66655c040dfcb8118f133fb5dde185e5fc152628354c1598e52"}, + {file = "pyzmq-27.0.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:cb0ac5179cba4b2f94f1aa208fbb77b62c4c9bf24dd446278b8b602cf85fcda3"}, + {file = "pyzmq-27.0.0-cp312-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53a48f0228eab6cbf69fde3aa3c03cbe04e50e623ef92ae395fce47ef8a76152"}, + {file = "pyzmq-27.0.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:111db5f395e09f7e775f759d598f43cb815fc58e0147623c4816486e1a39dc22"}, + {file = "pyzmq-27.0.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c8878011653dcdc27cc2c57e04ff96f0471e797f5c19ac3d7813a245bcb24371"}, + {file = "pyzmq-27.0.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:c0ed2c1f335ba55b5fdc964622254917d6b782311c50e138863eda409fbb3b6d"}, + {file = "pyzmq-27.0.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e918d70862d4cfd4b1c187310015646a14e1f5917922ab45b29f28f345eeb6be"}, + {file = "pyzmq-27.0.0-cp312-abi3-win32.whl", hash = "sha256:88b4e43cab04c3c0f0d55df3b1eef62df2b629a1a369b5289a58f6fa8b07c4f4"}, + {file = "pyzmq-27.0.0-cp312-abi3-win_amd64.whl", hash = "sha256:dce4199bf5f648a902ce37e7b3afa286f305cd2ef7a8b6ec907470ccb6c8b371"}, + {file = "pyzmq-27.0.0-cp312-abi3-win_arm64.whl", hash = "sha256:56e46bbb85d52c1072b3f809cc1ce77251d560bc036d3a312b96db1afe76db2e"}, + {file = "pyzmq-27.0.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:c36ad534c0c29b4afa088dc53543c525b23c0797e01b69fef59b1a9c0e38b688"}, + {file = "pyzmq-27.0.0-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:67855c14173aec36395d7777aaba3cc527b393821f30143fd20b98e1ff31fd38"}, + {file = "pyzmq-27.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8617c7d43cd8ccdb62aebe984bfed77ca8f036e6c3e46dd3dddda64b10f0ab7a"}, + {file = "pyzmq-27.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67bfbcbd0a04c575e8103a6061d03e393d9f80ffdb9beb3189261e9e9bc5d5e9"}, + {file = "pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5cd11d46d7b7e5958121b3eaf4cd8638eff3a720ec527692132f05a57f14341d"}, + {file = "pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:b801c2e40c5aa6072c2f4876de8dccd100af6d9918d4d0d7aa54a1d982fd4f44"}, + {file = "pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:20d5cb29e8c5f76a127c75b6e7a77e846bc4b655c373baa098c26a61b7ecd0ef"}, + {file = "pyzmq-27.0.0-cp313-cp313t-win32.whl", hash = "sha256:a20528da85c7ac7a19b7384e8c3f8fa707841fd85afc4ed56eda59d93e3d98ad"}, + {file = "pyzmq-27.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d8229f2efece6a660ee211d74d91dbc2a76b95544d46c74c615e491900dc107f"}, + {file = "pyzmq-27.0.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:f4162dbbd9c5c84fb930a36f290b08c93e35fce020d768a16fc8891a2f72bab8"}, + {file = "pyzmq-27.0.0-cp38-cp38-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4e7d0a8d460fba526cc047333bdcbf172a159b8bd6be8c3eb63a416ff9ba1477"}, + {file = "pyzmq-27.0.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:29f44e3c26b9783816ba9ce274110435d8f5b19bbd82f7a6c7612bb1452a3597"}, + {file = "pyzmq-27.0.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e435540fa1da54667f0026cf1e8407fe6d8a11f1010b7f06b0b17214ebfcf5e"}, + {file = "pyzmq-27.0.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:51f5726de3532b8222e569990c8aa34664faa97038304644679a51d906e60c6e"}, + {file = "pyzmq-27.0.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:42c7555123679637c99205b1aa9e8f7d90fe29d4c243c719e347d4852545216c"}, + {file = "pyzmq-27.0.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a979b7cf9e33d86c4949df527a3018767e5f53bc3b02adf14d4d8db1db63ccc0"}, + {file = "pyzmq-27.0.0-cp38-cp38-win32.whl", hash = "sha256:26b72c5ae20bf59061c3570db835edb81d1e0706ff141747055591c4b41193f8"}, + {file = "pyzmq-27.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:55a0155b148fe0428285a30922f7213539aa84329a5ad828bca4bbbc665c70a4"}, + {file = "pyzmq-27.0.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:100f6e5052ba42b2533011d34a018a5ace34f8cac67cb03cfa37c8bdae0ca617"}, + {file = "pyzmq-27.0.0-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:bf6c6b061efd00404b9750e2cfbd9507492c8d4b3721ded76cb03786131be2ed"}, + {file = "pyzmq-27.0.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ee05728c0b0b2484a9fc20466fa776fffb65d95f7317a3419985b8c908563861"}, + {file = "pyzmq-27.0.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7cdf07fe0a557b131366f80727ec8ccc4b70d89f1e3f920d94a594d598d754f0"}, + {file = "pyzmq-27.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:90252fa2ff3a104219db1f5ced7032a7b5fc82d7c8d2fec2b9a3e6fd4e25576b"}, + {file = "pyzmq-27.0.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ea6d441c513bf18c578c73c323acf7b4184507fc244762193aa3a871333c9045"}, + {file = "pyzmq-27.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ae2b34bcfaae20c064948a4113bf8709eee89fd08317eb293ae4ebd69b4d9740"}, + {file = "pyzmq-27.0.0-cp39-cp39-win32.whl", hash = "sha256:5b10bd6f008937705cf6e7bf8b6ece5ca055991e3eb130bca8023e20b86aa9a3"}, + {file = "pyzmq-27.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:00387d12a8af4b24883895f7e6b9495dc20a66027b696536edac35cb988c38f3"}, + {file = "pyzmq-27.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:4c19d39c04c29a6619adfeb19e3735c421b3bfee082f320662f52e59c47202ba"}, + {file = "pyzmq-27.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:656c1866505a5735d0660b7da6d7147174bbf59d4975fc2b7f09f43c9bc25745"}, + {file = "pyzmq-27.0.0-pp310-pypy310_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:74175b9e12779382432dd1d1f5960ebe7465d36649b98a06c6b26be24d173fab"}, + {file = "pyzmq-27.0.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8c6de908465697a8708e4d6843a1e884f567962fc61eb1706856545141d0cbb"}, + {file = "pyzmq-27.0.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c644aaacc01d0df5c7072826df45e67301f191c55f68d7b2916d83a9ddc1b551"}, + {file = "pyzmq-27.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:10f70c1d9a446a85013a36871a296007f6fe4232b530aa254baf9da3f8328bc0"}, + {file = "pyzmq-27.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd1dc59763effd1576f8368047c9c31468fce0af89d76b5067641137506792ae"}, + {file = "pyzmq-27.0.0-pp311-pypy311_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:60e8cc82d968174650c1860d7b716366caab9973787a1c060cf8043130f7d0f7"}, + {file = "pyzmq-27.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:14fe7aaac86e4e93ea779a821967360c781d7ac5115b3f1a171ced77065a0174"}, + {file = "pyzmq-27.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6ad0562d4e6abb785be3e4dd68599c41be821b521da38c402bc9ab2a8e7ebc7e"}, + {file = "pyzmq-27.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:9df43a2459cd3a3563404c1456b2c4c69564daa7dbaf15724c09821a3329ce46"}, + {file = "pyzmq-27.0.0-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c86ea8fe85e2eb0ffa00b53192c401477d5252f6dd1db2e2ed21c1c30d17e5e"}, + {file = "pyzmq-27.0.0-pp38-pypy38_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:c45fee3968834cd291a13da5fac128b696c9592a9493a0f7ce0b47fa03cc574d"}, + {file = "pyzmq-27.0.0-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cae73bb6898c4e045fbed5024cb587e4110fddb66f6163bcab5f81f9d4b9c496"}, + {file = "pyzmq-27.0.0-pp38-pypy38_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26d542258c7a1f35a9cff3d887687d3235006134b0ac1c62a6fe1ad3ac10440e"}, + {file = "pyzmq-27.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:04cd50ef3b28e35ced65740fb9956a5b3f77a6ff32fcd887e3210433f437dd0f"}, + {file = "pyzmq-27.0.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:39ddd3ba0a641f01d8f13a3cfd4c4924eb58e660d8afe87e9061d6e8ca6f7ac3"}, + {file = "pyzmq-27.0.0-pp39-pypy39_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:8ca7e6a0388dd9e1180b14728051068f4efe83e0d2de058b5ff92c63f399a73f"}, + {file = "pyzmq-27.0.0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2524c40891be6a3106885a3935d58452dd83eb7a5742a33cc780a1ad4c49dec0"}, + {file = "pyzmq-27.0.0-pp39-pypy39_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6a56e3e5bd2d62a01744fd2f1ce21d760c7c65f030e9522738d75932a14ab62a"}, + {file = "pyzmq-27.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:096af9e133fec3a72108ddefba1e42985cb3639e9de52cfd336b6fc23aa083e9"}, + {file = "pyzmq-27.0.0.tar.gz", hash = "sha256:b1f08eeb9ce1510e6939b6e5dcd46a17765e2333daae78ecf4606808442e52cf"}, ] [package.dependencies] @@ -3828,129 +3841,156 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "rpds-py" -version = "0.25.1" +version = "0.26.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" groups = ["main", "test"] files = [ - {file = "rpds_py-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f4ad628b5174d5315761b67f212774a32f5bad5e61396d38108bd801c0a8f5d9"}, - {file = "rpds_py-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c742af695f7525e559c16f1562cf2323db0e3f0fbdcabdf6865b095256b2d40"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:605ffe7769e24b1800b4d024d24034405d9404f0bc2f55b6db3362cd34145a6f"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc6f3ddef93243538be76f8e47045b4aad7a66a212cd3a0f23e34469473d36b"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f70316f760174ca04492b5ab01be631a8ae30cadab1d1081035136ba12738cfa"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1dafef8df605fdb46edcc0bf1573dea0d6d7b01ba87f85cd04dc855b2b4479e"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0701942049095741a8aeb298a31b203e735d1c61f4423511d2b1a41dcd8a16da"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e87798852ae0b37c88babb7f7bbbb3e3fecc562a1c340195b44c7e24d403e380"}, - {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3bcce0edc1488906c2d4c75c94c70a0417e83920dd4c88fec1078c94843a6ce9"}, - {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e2f6a2347d3440ae789505693a02836383426249d5293541cd712e07e7aecf54"}, - {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4fd52d3455a0aa997734f3835cbc4c9f32571345143960e7d7ebfe7b5fbfa3b2"}, - {file = "rpds_py-0.25.1-cp310-cp310-win32.whl", hash = "sha256:3f0b1798cae2bbbc9b9db44ee068c556d4737911ad53a4e5093d09d04b3bbc24"}, - {file = "rpds_py-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:3ebd879ab996537fc510a2be58c59915b5dd63bccb06d1ef514fee787e05984a"}, - {file = "rpds_py-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5f048bbf18b1f9120685c6d6bb70cc1a52c8cc11bdd04e643d28d3be0baf666d"}, - {file = "rpds_py-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fbb0dbba559959fcb5d0735a0f87cdbca9e95dac87982e9b95c0f8f7ad10255"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4ca54b9cf9d80b4016a67a0193ebe0bcf29f6b0a96f09db942087e294d3d4c2"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ee3e26eb83d39b886d2cb6e06ea701bba82ef30a0de044d34626ede51ec98b0"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89706d0683c73a26f76a5315d893c051324d771196ae8b13e6ffa1ffaf5e574f"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2013ee878c76269c7b557a9a9c042335d732e89d482606990b70a839635feb7"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45e484db65e5380804afbec784522de84fa95e6bb92ef1bd3325d33d13efaebd"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:48d64155d02127c249695abb87d39f0faf410733428d499867606be138161d65"}, - {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:048893e902132fd6548a2e661fb38bf4896a89eea95ac5816cf443524a85556f"}, - {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0317177b1e8691ab5879f4f33f4b6dc55ad3b344399e23df2e499de7b10a548d"}, - {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bffcf57826d77a4151962bf1701374e0fc87f536e56ec46f1abdd6a903354042"}, - {file = "rpds_py-0.25.1-cp311-cp311-win32.whl", hash = "sha256:cda776f1967cb304816173b30994faaf2fd5bcb37e73118a47964a02c348e1bc"}, - {file = "rpds_py-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:dc3c1ff0abc91444cd20ec643d0f805df9a3661fcacf9c95000329f3ddf268a4"}, - {file = "rpds_py-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:5a3ddb74b0985c4387719fc536faced33cadf2172769540c62e2a94b7b9be1c4"}, - {file = "rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c"}, - {file = "rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65"}, - {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c"}, - {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd"}, - {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb"}, - {file = "rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe"}, - {file = "rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192"}, - {file = "rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728"}, - {file = "rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559"}, - {file = "rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295"}, - {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b"}, - {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98"}, - {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd"}, - {file = "rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31"}, - {file = "rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500"}, - {file = "rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5"}, - {file = "rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129"}, - {file = "rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6"}, - {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78"}, - {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72"}, - {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66"}, - {file = "rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523"}, - {file = "rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763"}, - {file = "rpds_py-0.25.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ce4c8e485a3c59593f1a6f683cf0ea5ab1c1dc94d11eea5619e4fb5228b40fbd"}, - {file = "rpds_py-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8222acdb51a22929c3b2ddb236b69c59c72af4019d2cba961e2f9add9b6e634"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4593c4eae9b27d22df41cde518b4b9e4464d139e4322e2127daa9b5b981b76be"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd035756830c712b64725a76327ce80e82ed12ebab361d3a1cdc0f51ea21acb0"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:114a07e85f32b125404f28f2ed0ba431685151c037a26032b213c882f26eb908"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dec21e02e6cc932538b5203d3a8bd6aa1480c98c4914cb88eea064ecdbc6396a"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09eab132f41bf792c7a0ea1578e55df3f3e7f61888e340779b06050a9a3f16e9"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c98f126c4fc697b84c423e387337d5b07e4a61e9feac494362a59fd7a2d9ed80"}, - {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0e6a327af8ebf6baba1c10fadd04964c1965d375d318f4435d5f3f9651550f4a"}, - {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bc120d1132cff853ff617754196d0ac0ae63befe7c8498bd67731ba368abe451"}, - {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:140f61d9bed7839446bdd44852e30195c8e520f81329b4201ceead4d64eb3a9f"}, - {file = "rpds_py-0.25.1-cp39-cp39-win32.whl", hash = "sha256:9c006f3aadeda131b438c3092124bd196b66312f0caa5823ef09585a669cf449"}, - {file = "rpds_py-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:a61d0b2c7c9a0ae45732a77844917b427ff16ad5464b4d4f5e4adb955f582890"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b24bf3cd93d5b6ecfbedec73b15f143596c88ee249fa98cefa9a9dc9d92c6f28"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:0eb90e94f43e5085623932b68840b6f379f26db7b5c2e6bcef3179bd83c9330f"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d50e4864498a9ab639d6d8854b25e80642bd362ff104312d9770b05d66e5fb13"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c9409b47ba0650544b0bb3c188243b83654dfe55dcc173a86832314e1a6a35d"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:796ad874c89127c91970652a4ee8b00d56368b7e00d3477f4415fe78164c8000"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85608eb70a659bf4c1142b2781083d4b7c0c4e2c90eff11856a9754e965b2540"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4feb9211d15d9160bc85fa72fed46432cdc143eb9cf6d5ca377335a921ac37b"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ccfa689b9246c48947d31dd9d8b16d89a0ecc8e0e26ea5253068efb6c542b76e"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3c5b317ecbd8226887994852e85de562f7177add602514d4ac40f87de3ae45a8"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:454601988aab2c6e8fd49e7634c65476b2b919647626208e376afcd22019eeb8"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:1c0c434a53714358532d13539272db75a5ed9df75a4a090a753ac7173ec14e11"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f73ce1512e04fbe2bc97836e89830d6b4314c171587a99688082d090f934d20a"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ee86d81551ec68a5c25373c5643d343150cc54672b5e9a0cafc93c1870a53954"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89c24300cd4a8e4a51e55c31a8ff3918e6651b241ee8876a42cc2b2a078533ba"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:771c16060ff4e79584dc48902a91ba79fd93eade3aa3a12d6d2a4aadaf7d542b"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:785ffacd0ee61c3e60bdfde93baa6d7c10d86f15655bd706c89da08068dc5038"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a40046a529cc15cef88ac5ab589f83f739e2d332cb4d7399072242400ed68c9"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85fc223d9c76cabe5d0bff82214459189720dc135db45f9f66aa7cffbf9ff6c1"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0be9965f93c222fb9b4cc254235b3b2b215796c03ef5ee64f995b1b69af0762"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8378fa4a940f3fb509c081e06cb7f7f2adae8cf46ef258b0e0ed7519facd573e"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:33358883a4490287e67a2c391dfaea4d9359860281db3292b6886bf0be3d8692"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1d1fadd539298e70cac2f2cb36f5b8a65f742b9b9f1014dd4ea1f7785e2470bf"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a46c2fb2545e21181445515960006e85d22025bd2fe6db23e76daec6eb689fe"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:50f2c501a89c9a5f4e454b126193c5495b9fb441a75b298c60591d8a2eb92e1b"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d779b325cc8238227c47fbc53964c8cc9a941d5dbae87aa007a1f08f2f77b23"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:036ded36bedb727beeabc16dc1dad7cb154b3fa444e936a03b67a86dc6a5066e"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245550f5a1ac98504147cba96ffec8fabc22b610742e9150138e5d60774686d7"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff7c23ba0a88cb7b104281a99476cccadf29de2a0ef5ce864959a52675b1ca83"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e37caa8cdb3b7cf24786451a0bdb853f6347b8b92005eeb64225ae1db54d1c2b"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2f48ab00181600ee266a095fe815134eb456163f7d6699f525dee471f312cf"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e5fc7484fa7dce57e25063b0ec9638ff02a908304f861d81ea49273e43838c1"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d3c10228d6cf6fe2b63d2e7985e94f6916fa46940df46b70449e9ff9297bd3d1"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:5d9e40f32745db28c1ef7aad23f6fc458dc1e29945bd6781060f0d15628b8ddf"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:35a8d1a24b5936b35c5003313bc177403d8bdef0f8b24f28b1c4a255f94ea992"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6099263f526efff9cf3883dfef505518730f7a7a93049b1d90d42e50a22b4793"}, - {file = "rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3"}, + {file = "rpds_py-0.26.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:4c70c70f9169692b36307a95f3d8c0a9fcd79f7b4a383aad5eaa0e9718b79b37"}, + {file = "rpds_py-0.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:777c62479d12395bfb932944e61e915741e364c843afc3196b694db3d669fcd0"}, + {file = "rpds_py-0.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec671691e72dff75817386aa02d81e708b5a7ec0dec6669ec05213ff6b77e1bd"}, + {file = "rpds_py-0.26.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a1cb5d6ce81379401bbb7f6dbe3d56de537fb8235979843f0d53bc2e9815a79"}, + {file = "rpds_py-0.26.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f789e32fa1fb6a7bf890e0124e7b42d1e60d28ebff57fe806719abb75f0e9a3"}, + {file = "rpds_py-0.26.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c55b0a669976cf258afd718de3d9ad1b7d1fe0a91cd1ab36f38b03d4d4aeaaf"}, + {file = "rpds_py-0.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c70d9ec912802ecfd6cd390dadb34a9578b04f9bcb8e863d0a7598ba5e9e7ccc"}, + {file = "rpds_py-0.26.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3021933c2cb7def39d927b9862292e0f4c75a13d7de70eb0ab06efed4c508c19"}, + {file = "rpds_py-0.26.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8a7898b6ca3b7d6659e55cdac825a2e58c638cbf335cde41f4619e290dd0ad11"}, + {file = "rpds_py-0.26.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:12bff2ad9447188377f1b2794772f91fe68bb4bbfa5a39d7941fbebdbf8c500f"}, + {file = "rpds_py-0.26.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:191aa858f7d4902e975d4cf2f2d9243816c91e9605070aeb09c0a800d187e323"}, + {file = "rpds_py-0.26.0-cp310-cp310-win32.whl", hash = "sha256:b37a04d9f52cb76b6b78f35109b513f6519efb481d8ca4c321f6a3b9580b3f45"}, + {file = "rpds_py-0.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:38721d4c9edd3eb6670437d8d5e2070063f305bfa2d5aa4278c51cedcd508a84"}, + {file = "rpds_py-0.26.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:9e8cb77286025bdb21be2941d64ac6ca016130bfdcd228739e8ab137eb4406ed"}, + {file = "rpds_py-0.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e09330b21d98adc8ccb2dbb9fc6cb434e8908d4c119aeaa772cb1caab5440a0"}, + {file = "rpds_py-0.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9c1b92b774b2e68d11193dc39620d62fd8ab33f0a3c77ecdabe19c179cdbc1"}, + {file = "rpds_py-0.26.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:824e6d3503ab990d7090768e4dfd9e840837bae057f212ff9f4f05ec6d1975e7"}, + {file = "rpds_py-0.26.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ad7fd2258228bf288f2331f0a6148ad0186b2e3643055ed0db30990e59817a6"}, + {file = "rpds_py-0.26.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0dc23bbb3e06ec1ea72d515fb572c1fea59695aefbffb106501138762e1e915e"}, + {file = "rpds_py-0.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d80bf832ac7b1920ee29a426cdca335f96a2b5caa839811803e999b41ba9030d"}, + {file = "rpds_py-0.26.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0919f38f5542c0a87e7b4afcafab6fd2c15386632d249e9a087498571250abe3"}, + {file = "rpds_py-0.26.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d422b945683e409000c888e384546dbab9009bb92f7c0b456e217988cf316107"}, + {file = "rpds_py-0.26.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:77a7711fa562ba2da1aa757e11024ad6d93bad6ad7ede5afb9af144623e5f76a"}, + {file = "rpds_py-0.26.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:238e8c8610cb7c29460e37184f6799547f7e09e6a9bdbdab4e8edb90986a2318"}, + {file = "rpds_py-0.26.0-cp311-cp311-win32.whl", hash = "sha256:893b022bfbdf26d7bedb083efeea624e8550ca6eb98bf7fea30211ce95b9201a"}, + {file = "rpds_py-0.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:87a5531de9f71aceb8af041d72fc4cab4943648d91875ed56d2e629bef6d4c03"}, + {file = "rpds_py-0.26.0-cp311-cp311-win_arm64.whl", hash = "sha256:de2713f48c1ad57f89ac25b3cb7daed2156d8e822cf0eca9b96a6f990718cc41"}, + {file = "rpds_py-0.26.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:894514d47e012e794f1350f076c427d2347ebf82f9b958d554d12819849a369d"}, + {file = "rpds_py-0.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc921b96fa95a097add244da36a1d9e4f3039160d1d30f1b35837bf108c21136"}, + {file = "rpds_py-0.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e1157659470aa42a75448b6e943c895be8c70531c43cb78b9ba990778955582"}, + {file = "rpds_py-0.26.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:521ccf56f45bb3a791182dc6b88ae5f8fa079dd705ee42138c76deb1238e554e"}, + {file = "rpds_py-0.26.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9def736773fd56b305c0eef698be5192c77bfa30d55a0e5885f80126c4831a15"}, + {file = "rpds_py-0.26.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cdad4ea3b4513b475e027be79e5a0ceac8ee1c113a1a11e5edc3c30c29f964d8"}, + {file = "rpds_py-0.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82b165b07f416bdccf5c84546a484cc8f15137ca38325403864bfdf2b5b72f6a"}, + {file = "rpds_py-0.26.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d04cab0a54b9dba4d278fe955a1390da3cf71f57feb78ddc7cb67cbe0bd30323"}, + {file = "rpds_py-0.26.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:79061ba1a11b6a12743a2b0f72a46aa2758613d454aa6ba4f5a265cc48850158"}, + {file = "rpds_py-0.26.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f405c93675d8d4c5ac87364bb38d06c988e11028a64b52a47158a355079661f3"}, + {file = "rpds_py-0.26.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dafd4c44b74aa4bed4b250f1aed165b8ef5de743bcca3b88fc9619b6087093d2"}, + {file = "rpds_py-0.26.0-cp312-cp312-win32.whl", hash = "sha256:3da5852aad63fa0c6f836f3359647870e21ea96cf433eb393ffa45263a170d44"}, + {file = "rpds_py-0.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf47cfdabc2194a669dcf7a8dbba62e37a04c5041d2125fae0233b720da6f05c"}, + {file = "rpds_py-0.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:20ab1ae4fa534f73647aad289003f1104092890849e0266271351922ed5574f8"}, + {file = "rpds_py-0.26.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:696764a5be111b036256c0b18cd29783fab22154690fc698062fc1b0084b511d"}, + {file = "rpds_py-0.26.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e6c15d2080a63aaed876e228efe4f814bc7889c63b1e112ad46fdc8b368b9e1"}, + {file = "rpds_py-0.26.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390e3170babf42462739a93321e657444f0862c6d722a291accc46f9d21ed04e"}, + {file = "rpds_py-0.26.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7da84c2c74c0f5bc97d853d9e17bb83e2dcafcff0dc48286916001cc114379a1"}, + {file = "rpds_py-0.26.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c5fe114a6dd480a510b6d3661d09d67d1622c4bf20660a474507aaee7eeeee9"}, + {file = "rpds_py-0.26.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3100b3090269f3a7ea727b06a6080d4eb7439dca4c0e91a07c5d133bb1727ea7"}, + {file = "rpds_py-0.26.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c03c9b0c64afd0320ae57de4c982801271c0c211aa2d37f3003ff5feb75bb04"}, + {file = "rpds_py-0.26.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5963b72ccd199ade6ee493723d18a3f21ba7d5b957017607f815788cef50eaf1"}, + {file = "rpds_py-0.26.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9da4e873860ad5bab3291438525cae80169daecbfafe5657f7f5fb4d6b3f96b9"}, + {file = "rpds_py-0.26.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5afaddaa8e8c7f1f7b4c5c725c0070b6eed0228f705b90a1732a48e84350f4e9"}, + {file = "rpds_py-0.26.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4916dc96489616a6f9667e7526af8fa693c0fdb4f3acb0e5d9f4400eb06a47ba"}, + {file = "rpds_py-0.26.0-cp313-cp313-win32.whl", hash = "sha256:2a343f91b17097c546b93f7999976fd6c9d5900617aa848c81d794e062ab302b"}, + {file = "rpds_py-0.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:0a0b60701f2300c81b2ac88a5fb893ccfa408e1c4a555a77f908a2596eb875a5"}, + {file = "rpds_py-0.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:257d011919f133a4746958257f2c75238e3ff54255acd5e3e11f3ff41fd14256"}, + {file = "rpds_py-0.26.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:529c8156d7506fba5740e05da8795688f87119cce330c244519cf706a4a3d618"}, + {file = "rpds_py-0.26.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f53ec51f9d24e9638a40cabb95078ade8c99251945dad8d57bf4aabe86ecee35"}, + {file = "rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab504c4d654e4a29558eaa5bb8cea5fdc1703ea60a8099ffd9c758472cf913f"}, + {file = "rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd0641abca296bc1a00183fe44f7fced8807ed49d501f188faa642d0e4975b83"}, + {file = "rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69b312fecc1d017b5327afa81d4da1480f51c68810963a7336d92203dbb3d4f1"}, + {file = "rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c741107203954f6fc34d3066d213d0a0c40f7bb5aafd698fb39888af277c70d8"}, + {file = "rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc3e55a7db08dc9a6ed5fb7103019d2c1a38a349ac41901f9f66d7f95750942f"}, + {file = "rpds_py-0.26.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e851920caab2dbcae311fd28f4313c6953993893eb5c1bb367ec69d9a39e7ed"}, + {file = "rpds_py-0.26.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dfbf280da5f876d0b00c81f26bedce274e72a678c28845453885a9b3c22ae632"}, + {file = "rpds_py-0.26.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:1cc81d14ddfa53d7f3906694d35d54d9d3f850ef8e4e99ee68bc0d1e5fed9a9c"}, + {file = "rpds_py-0.26.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dca83c498b4650a91efcf7b88d669b170256bf8017a5db6f3e06c2bf031f57e0"}, + {file = "rpds_py-0.26.0-cp313-cp313t-win32.whl", hash = "sha256:4d11382bcaf12f80b51d790dee295c56a159633a8e81e6323b16e55d81ae37e9"}, + {file = "rpds_py-0.26.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff110acded3c22c033e637dd8896e411c7d3a11289b2edf041f86663dbc791e9"}, + {file = "rpds_py-0.26.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:da619979df60a940cd434084355c514c25cf8eb4cf9a508510682f6c851a4f7a"}, + {file = "rpds_py-0.26.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ea89a2458a1a75f87caabefe789c87539ea4e43b40f18cff526052e35bbb4fdf"}, + {file = "rpds_py-0.26.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feac1045b3327a45944e7dcbeb57530339f6b17baff154df51ef8b0da34c8c12"}, + {file = "rpds_py-0.26.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b818a592bd69bfe437ee8368603d4a2d928c34cffcdf77c2e761a759ffd17d20"}, + {file = "rpds_py-0.26.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a8b0dd8648709b62d9372fc00a57466f5fdeefed666afe3fea5a6c9539a0331"}, + {file = "rpds_py-0.26.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6d3498ad0df07d81112aa6ec6c95a7e7b1ae00929fb73e7ebee0f3faaeabad2f"}, + {file = "rpds_py-0.26.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24a4146ccb15be237fdef10f331c568e1b0e505f8c8c9ed5d67759dac58ac246"}, + {file = "rpds_py-0.26.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a9a63785467b2d73635957d32a4f6e73d5e4df497a16a6392fa066b753e87387"}, + {file = "rpds_py-0.26.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:de4ed93a8c91debfd5a047be327b7cc8b0cc6afe32a716bbbc4aedca9e2a83af"}, + {file = "rpds_py-0.26.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:caf51943715b12af827696ec395bfa68f090a4c1a1d2509eb4e2cb69abbbdb33"}, + {file = "rpds_py-0.26.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4a59e5bc386de021f56337f757301b337d7ab58baa40174fb150accd480bc953"}, + {file = "rpds_py-0.26.0-cp314-cp314-win32.whl", hash = "sha256:92c8db839367ef16a662478f0a2fe13e15f2227da3c1430a782ad0f6ee009ec9"}, + {file = "rpds_py-0.26.0-cp314-cp314-win_amd64.whl", hash = "sha256:b0afb8cdd034150d4d9f53926226ed27ad15b7f465e93d7468caaf5eafae0d37"}, + {file = "rpds_py-0.26.0-cp314-cp314-win_arm64.whl", hash = "sha256:ca3f059f4ba485d90c8dc75cb5ca897e15325e4e609812ce57f896607c1c0867"}, + {file = "rpds_py-0.26.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5afea17ab3a126006dc2f293b14ffc7ef3c85336cf451564a0515ed7648033da"}, + {file = "rpds_py-0.26.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:69f0c0a3df7fd3a7eec50a00396104bb9a843ea6d45fcc31c2d5243446ffd7a7"}, + {file = "rpds_py-0.26.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:801a71f70f9813e82d2513c9a96532551fce1e278ec0c64610992c49c04c2dad"}, + {file = "rpds_py-0.26.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df52098cde6d5e02fa75c1f6244f07971773adb4a26625edd5c18fee906fa84d"}, + {file = "rpds_py-0.26.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bc596b30f86dc6f0929499c9e574601679d0341a0108c25b9b358a042f51bca"}, + {file = "rpds_py-0.26.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9dfbe56b299cf5875b68eb6f0ebaadc9cac520a1989cac0db0765abfb3709c19"}, + {file = "rpds_py-0.26.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac64f4b2bdb4ea622175c9ab7cf09444e412e22c0e02e906978b3b488af5fde8"}, + {file = "rpds_py-0.26.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:181ef9b6bbf9845a264f9aa45c31836e9f3c1f13be565d0d010e964c661d1e2b"}, + {file = "rpds_py-0.26.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:49028aa684c144ea502a8e847d23aed5e4c2ef7cadfa7d5eaafcb40864844b7a"}, + {file = "rpds_py-0.26.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:e5d524d68a474a9688336045bbf76cb0def88549c1b2ad9dbfec1fb7cfbe9170"}, + {file = "rpds_py-0.26.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c1851f429b822831bd2edcbe0cfd12ee9ea77868f8d3daf267b189371671c80e"}, + {file = "rpds_py-0.26.0-cp314-cp314t-win32.whl", hash = "sha256:7bdb17009696214c3b66bb3590c6d62e14ac5935e53e929bcdbc5a495987a84f"}, + {file = "rpds_py-0.26.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f14440b9573a6f76b4ee4770c13f0b5921f71dde3b6fcb8dabbefd13b7fe05d7"}, + {file = "rpds_py-0.26.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:7a48af25d9b3c15684059d0d1fc0bc30e8eee5ca521030e2bffddcab5be40226"}, + {file = "rpds_py-0.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0c71c2f6bf36e61ee5c47b2b9b5d47e4d1baad6426bfed9eea3e858fc6ee8806"}, + {file = "rpds_py-0.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d815d48b1804ed7867b539236b6dd62997850ca1c91cad187f2ddb1b7bbef19"}, + {file = "rpds_py-0.26.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84cfbd4d4d2cdeb2be61a057a258d26b22877266dd905809e94172dff01a42ae"}, + {file = "rpds_py-0.26.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fbaa70553ca116c77717f513e08815aec458e6b69a028d4028d403b3bc84ff37"}, + {file = "rpds_py-0.26.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39bfea47c375f379d8e87ab4bb9eb2c836e4f2069f0f65731d85e55d74666387"}, + {file = "rpds_py-0.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1533b7eb683fb5f38c1d68a3c78f5fdd8f1412fa6b9bf03b40f450785a0ab915"}, + {file = "rpds_py-0.26.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c5ab0ee51f560d179b057555b4f601b7df909ed31312d301b99f8b9fc6028284"}, + {file = "rpds_py-0.26.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e5162afc9e0d1f9cae3b577d9c29ddbab3505ab39012cb794d94a005825bde21"}, + {file = "rpds_py-0.26.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:43f10b007033f359bc3fa9cd5e6c1e76723f056ffa9a6b5c117cc35720a80292"}, + {file = "rpds_py-0.26.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3730a48e5622e598293eee0762b09cff34dd3f271530f47b0894891281f051d"}, + {file = "rpds_py-0.26.0-cp39-cp39-win32.whl", hash = "sha256:4b1f66eb81eab2e0ff5775a3a312e5e2e16bf758f7b06be82fb0d04078c7ac51"}, + {file = "rpds_py-0.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:519067e29f67b5c90e64fb1a6b6e9d2ec0ba28705c51956637bac23a2f4ddae1"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3c0909c5234543ada2515c05dc08595b08d621ba919629e94427e8e03539c958"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c1fb0cda2abcc0ac62f64e2ea4b4e64c57dfd6b885e693095460c61bde7bb18e"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d142d2d6cf9b31c12aa4878d82ed3b2324226270b89b676ac62ccd7df52d08"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a547e21c5610b7e9093d870be50682a6a6cf180d6da0f42c47c306073bfdbbf6"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:35e9a70a0f335371275cdcd08bc5b8051ac494dd58bff3bbfb421038220dc871"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0dfa6115c6def37905344d56fb54c03afc49104e2ca473d5dedec0f6606913b4"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:313cfcd6af1a55a286a3c9a25f64af6d0e46cf60bc5798f1db152d97a216ff6f"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f7bf2496fa563c046d05e4d232d7b7fd61346e2402052064b773e5c378bf6f73"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:aa81873e2c8c5aa616ab8e017a481a96742fdf9313c40f14338ca7dbf50cb55f"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:68ffcf982715f5b5b7686bdd349ff75d422e8f22551000c24b30eaa1b7f7ae84"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6188de70e190847bb6db3dc3981cbadff87d27d6fe9b4f0e18726d55795cee9b"}, + {file = "rpds_py-0.26.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1c962145c7473723df9722ba4c058de12eb5ebedcb4e27e7d902920aa3831ee8"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f61a9326f80ca59214d1cceb0a09bb2ece5b2563d4e0cd37bfd5515c28510674"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:183f857a53bcf4b1b42ef0f57ca553ab56bdd170e49d8091e96c51c3d69ca696"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:941c1cfdf4799d623cf3aa1d326a6b4fdb7a5799ee2687f3516738216d2262fb"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72a8d9564a717ee291f554eeb4bfeafe2309d5ec0aa6c475170bdab0f9ee8e88"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:511d15193cbe013619dd05414c35a7dedf2088fcee93c6bbb7c77859765bd4e8"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aea1f9741b603a8d8fedb0ed5502c2bc0accbc51f43e2ad1337fe7259c2b77a5"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4019a9d473c708cf2f16415688ef0b4639e07abaa569d72f74745bbeffafa2c7"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:093d63b4b0f52d98ebae33b8c50900d3d67e0666094b1be7a12fffd7f65de74b"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:2abe21d8ba64cded53a2a677e149ceb76dcf44284202d737178afe7ba540c1eb"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:4feb7511c29f8442cbbc28149a92093d32e815a28aa2c50d333826ad2a20fdf0"}, + {file = "rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e99685fc95d386da368013e7fb4269dd39c30d99f812a8372d62f244f662709c"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a90a13408a7a856b87be8a9f008fff53c5080eea4e4180f6c2e546e4a972fb5d"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3ac51b65e8dc76cf4949419c54c5528adb24fc721df722fd452e5fbc236f5c40"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59b2093224a18c6508d95cfdeba8db9cbfd6f3494e94793b58972933fcee4c6d"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f01a5d6444a3258b00dc07b6ea4733e26f8072b788bef750baa37b370266137"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b6e2c12160c72aeda9d1283e612f68804621f448145a210f1bf1d79151c47090"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb28c1f569f8d33b2b5dcd05d0e6ef7005d8639c54c2f0be824f05aedf715255"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1766b5724c3f779317d5321664a343c07773c8c5fd1532e4039e6cc7d1a815be"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b6d9e5a2ed9c4988c8f9b28b3bc0e3e5b1aaa10c28d210a594ff3a8c02742daf"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:b5f7a446ddaf6ca0fad9a5535b56fbfc29998bf0e0b450d174bbec0d600e1d72"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:eed5ac260dd545fbc20da5f4f15e7efe36a55e0e7cf706e4ec005b491a9546a0"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:582462833ba7cee52e968b0341b85e392ae53d44c0f9af6a5927c80e539a8b67"}, + {file = "rpds_py-0.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:69a607203441e07e9a8a529cff1d5b73f6a160f22db1097211e6212a68567d11"}, + {file = "rpds_py-0.26.0.tar.gz", hash = "sha256:20dae58a859b0906f0685642e591056f1e787f3a8b39c8e8749a45dc7d26bdb0"}, ] [[package]] @@ -4020,27 +4060,27 @@ files = [ [[package]] name = "smart-open" -version = "7.1.0" -description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" +version = "7.3.0.post1" +description = "Utils for streaming large files (S3, HDFS, GCS, SFTP, Azure Blob Storage, gzip, bz2, zst...)" optional = false -python-versions = "<4.0,>=3.7" +python-versions = "<4.0,>=3.8" groups = ["main"] files = [ - {file = "smart_open-7.1.0-py3-none-any.whl", hash = "sha256:4b8489bb6058196258bafe901730c7db0dcf4f083f316e97269c66f45502055b"}, - {file = "smart_open-7.1.0.tar.gz", hash = "sha256:a4f09f84f0f6d3637c6543aca7b5487438877a21360e7368ccf1f704789752ba"}, + {file = "smart_open-7.3.0.post1-py3-none-any.whl", hash = "sha256:c73661a2c24bf045c1e04e08fffc585b59af023fe783d57896f590489db66fb4"}, + {file = "smart_open-7.3.0.post1.tar.gz", hash = "sha256:ce6a3d9bc1afbf6234ad13c010b77f8cd36d24636811e3c52c3b5160f5214d1e"}, ] [package.dependencies] wrapt = "*" [package.extras] -all = ["azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "paramiko", "requests", "zstandard"] +all = ["smart_open[azure,gcs,http,s3,ssh,webhdfs,zst]"] azure = ["azure-common", "azure-core", "azure-storage-blob"] gcs = ["google-cloud-storage (>=2.6.0)"] http = ["requests"] s3 = ["boto3"] ssh = ["paramiko"] -test = ["awscli", "azure-common", "azure-core", "azure-storage-blob", "boto3", "google-cloud-storage (>=2.6.0)", "moto[server]", "numpy", "paramiko", "pyopenssl", "pytest", "pytest-benchmark", "pytest-rerunfailures", "requests", "responses", "zstandard"] +test = ["awscli", "moto[server]", "numpy", "pyopenssl", "pytest", "pytest-rerunfailures", "pytest_benchmark", "responses", "smart_open[all]"] webhdfs = ["requests"] zst = ["zstandard"] @@ -4351,26 +4391,26 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "types-python-dateutil" -version = "2.9.0.20250516" +version = "2.9.0.20250708" description = "Typing stubs for python-dateutil" optional = false python-versions = ">=3.9" groups = ["test"] files = [ - {file = "types_python_dateutil-2.9.0.20250516-py3-none-any.whl", hash = "sha256:2b2b3f57f9c6a61fba26a9c0ffb9ea5681c9b83e69cd897c6b5f668d9c0cab93"}, - {file = "types_python_dateutil-2.9.0.20250516.tar.gz", hash = "sha256:13e80d6c9c47df23ad773d54b2826bd52dbbb41be87c3f339381c1700ad21ee5"}, + {file = "types_python_dateutil-2.9.0.20250708-py3-none-any.whl", hash = "sha256:4d6d0cc1cc4d24a2dc3816024e502564094497b713f7befda4d5bc7a8e3fd21f"}, + {file = "types_python_dateutil-2.9.0.20250708.tar.gz", hash = "sha256:ccdbd75dab2d6c9696c350579f34cffe2c281e4c5f27a585b2a2438dd1d5c8ab"}, ] [[package]] name = "typing-extensions" -version = "4.14.0" +version = "4.14.1" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" groups = ["main", "test"] files = [ - {file = "typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af"}, - {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"}, + {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"}, + {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"}, ] [[package]] @@ -4417,14 +4457,14 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake [[package]] name = "urllib3" -version = "2.4.0" +version = "2.5.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" groups = ["main", "docs", "test"] files = [ - {file = "urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813"}, - {file = "urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466"}, + {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, + {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, ] [package.extras] @@ -4750,4 +4790,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "6720576cf9ff57c7bb15b97e268bb414218f6a053e7e0a5bdd45d022c0847111" +content-hash = "3bcd24a524ab4189cd5e48a4f343fc21bf53fabe48ad701e13609b5483b89760" diff --git a/pyproject.toml b/pyproject.toml index 71610478..1a0b8d6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,14 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" +odh-kuberay-client = {version = "0.0.0.dev40", source = "testpypi"} + +[[tool.poetry.source]] +name = "pypi" + +[[tool.poetry.source]] +name = "testpypi" +url = "https://test.pypi.org/simple/" [tool.poetry.group.docs] optional = true diff --git a/src/codeflare_sdk/__init__.py b/src/codeflare_sdk/__init__.py index 9ab5c745..95753a59 100644 --- a/src/codeflare_sdk/__init__.py +++ b/src/codeflare_sdk/__init__.py @@ -10,6 +10,7 @@ AWManager, AppWrapperStatus, RayJobClient, + RayJob, ) from .common.widgets import view_clusters diff --git a/src/codeflare_sdk/ray/__init__.py b/src/codeflare_sdk/ray/__init__.py index ab55cc82..7c8e84da 100644 --- a/src/codeflare_sdk/ray/__init__.py +++ b/src/codeflare_sdk/ray/__init__.py @@ -4,6 +4,10 @@ RayJobClient, ) +from .rayjobs import ( + RayJob, +) + from .cluster import ( Cluster, ClusterConfiguration, diff --git a/src/codeflare_sdk/ray/cluster/build_ray_cluster.py b/src/codeflare_sdk/ray/cluster/build_ray_cluster.py index e8b68919..7b2ed79c 100644 --- a/src/codeflare_sdk/ray/cluster/build_ray_cluster.py +++ b/src/codeflare_sdk/ray/cluster/build_ray_cluster.py @@ -136,6 +136,7 @@ def build_ray_cluster(cluster: "codeflare_sdk.ray.cluster.Cluster"): "enableIngress": False, "rayStartParams": { "dashboard-host": "0.0.0.0", + "dashboard-port": "8265", "block": "true", "num-gpus": str(head_gpu_count), "resources": head_resources, @@ -245,6 +246,7 @@ def get_labels(cluster: "codeflare_sdk.ray.cluster.Cluster"): """ labels = { "controller-tools.k8s.io": "1.0", + "ray.io/cluster": cluster.config.name, # Enforced label always present } if cluster.config.labels != {}: labels.update(cluster.config.labels) diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index 31b80418..ed81749b 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -20,8 +20,12 @@ from time import sleep from typing import List, Optional, Tuple, Dict +import copy -from ray.job_submission import JobSubmissionClient +from ray.job_submission import JobSubmissionClient, JobStatus +import time +import uuid +import warnings from ...common.kubernetes_cluster.auth import ( config_check, @@ -57,7 +61,6 @@ from kubernetes.client.rest import ApiException from kubernetes.client.rest import ApiException -import warnings CF_SDK_FIELD_MANAGER = "codeflare-sdk" @@ -762,6 +765,7 @@ def get_cluster( head_extended_resource_requests=head_extended_resources, worker_extended_resource_requests=worker_extended_resources, ) + # Ignore the warning here for the lack of a ClusterConfiguration with warnings.catch_warnings(): warnings.filterwarnings( diff --git a/src/codeflare_sdk/ray/cluster/test_cluster.py b/src/codeflare_sdk/ray/cluster/test_cluster.py index 6475f7a8..164b3a81 100644 --- a/src/codeflare_sdk/ray/cluster/test_cluster.py +++ b/src/codeflare_sdk/ray/cluster/test_cluster.py @@ -956,5 +956,11 @@ def test_cluster_namespace_type_error(mocker): # Make sure to always keep this function last def test_cleanup(): - os.remove(f"{aw_dir}test-all-params.yaml") - os.remove(f"{aw_dir}aw-all-params.yaml") + # Remove files only if they exist + test_file = f"{aw_dir}test-all-params.yaml" + if os.path.exists(test_file): + os.remove(test_file) + + aw_file = f"{aw_dir}aw-all-params.yaml" + if os.path.exists(aw_file): + os.remove(aw_file) diff --git a/src/codeflare_sdk/ray/rayjobs/__init__.py b/src/codeflare_sdk/ray/rayjobs/__init__.py new file mode 100644 index 00000000..d9cbae34 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/__init__.py @@ -0,0 +1 @@ +from .rayjob import RayJob diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py new file mode 100644 index 00000000..e7a9a588 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -0,0 +1,111 @@ +""" +RayJob client for submitting and managing Ray jobs using the odh-kuberay-client. +""" + +import logging +from typing import Dict, Any, Optional +from odh_kuberay_client.kuberay_job_api import RayjobApi + +# Set up logging +logger = logging.getLogger(__name__) + + +class RayJob: + """ + A client for managing Ray jobs using the KubeRay operator. + + This class provides a simplified interface for submitting and managing + Ray jobs in a Kubernetes cluster with the KubeRay operator installed. + """ + + def __init__( + self, + job_name: str, + cluster_name: str, + namespace: str = "default", + entrypoint: str = "None", + runtime_env: Optional[Dict[str, Any]] = None, + ): + """ + Initialize a RayJob instance. + + Args: + name: The name for the Ray job + namespace: The Kubernetes namespace to submit the job to (default: "default") + cluster_name: The name of the Ray cluster to submit the job to + **kwargs: Additional configuration options + """ + self.name = job_name + self.namespace = namespace + self.cluster_name = cluster_name + self.entrypoint = entrypoint + self.runtime_env = runtime_env + + # Initialize the KubeRay job API client + self._api = RayjobApi() + + logger.info(f"Initialized RayJob: {self.name} in namespace: {self.namespace}") + + def submit( + self, + ) -> str: + """ + Submit the Ray job to the Kubernetes cluster. + + Args: + entrypoint: The Python script or command to run + runtime_env: Ray runtime environment configuration (optional) + + Returns: + The job ID/name if submission was successful + + Raises: + RuntimeError: If the job has already been submitted or submission fails + """ + # Build the RayJob custom resource + rayjob_cr = self._build_rayjob_cr( + entrypoint=self.entrypoint, + runtime_env=self.runtime_env, + ) + + # Submit the job + logger.info( + f"Submitting RayJob {self.name} to RayCluster {self.cluster_name} in namespace {self.namespace}" + ) + result = self._api.submit_job(k8s_namespace=self.namespace, job=rayjob_cr) + + if result: + logger.info(f"Successfully submitted RayJob {self.name}") + return self.name + else: + raise RuntimeError(f"Failed to submit RayJob {self.name}") + + def _build_rayjob_cr( + self, + entrypoint: str, + runtime_env: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """ + Build the RayJob custom resource specification. + + This creates a minimal RayJob CR that can be extended later. + """ + # Basic RayJob custom resource structure + rayjob_cr = { + "apiVersion": "ray.io/v1", + "kind": "RayJob", + "metadata": { + "name": self.name, + "namespace": self.namespace, + }, + "spec": { + "entrypoint": entrypoint, + "clusterSelector": {"ray.io/cluster": self.cluster_name}, + }, + } + + # Add runtime environment if specified + if runtime_env: + rayjob_cr["spec"]["runtimeEnvYAML"] = str(runtime_env) + + return rayjob_cr diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py new file mode 100644 index 00000000..1136e6e5 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -0,0 +1,88 @@ +# Copyright 2024 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from unittest.mock import MagicMock +from codeflare_sdk.ray.rayjobs.rayjob import RayJob + + +def test_rayjob_submit_success(mocker): + """Test successful RayJob submission.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + # Configure the mock to return success when submit is called + mock_api_instance.submit.return_value = {"metadata": {"name": "test-rayjob"}} + + # Create RayJob instance + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-ray-cluster", + namespace="test-namespace", + entrypoint="python -c 'print(\"hello world\")'", + runtime_env={"pip": ["requests"]}, + ) + + # Submit the job + job_id = rayjob.submit() + + # Assertions + assert job_id == "test-rayjob" + + # Verify the API was called with correct parameters + mock_api_instance.submit_job.assert_called_once() + call_args = mock_api_instance.submit_job.call_args + + # Check the namespace parameter + assert call_args.kwargs["k8s_namespace"] == "test-namespace" + + # Check the job custom resource + job_cr = call_args.kwargs["job"] + assert job_cr["metadata"]["name"] == "test-rayjob" + assert job_cr["metadata"]["namespace"] == "test-namespace" + assert job_cr["spec"]["entrypoint"] == "python -c 'print(\"hello world\")'" + assert job_cr["spec"]["clusterSelector"]["ray.io/cluster"] == "test-ray-cluster" + assert job_cr["spec"]["runtimeEnvYAML"] == "{'pip': ['requests']}" + + +def test_rayjob_submit_failure(mocker): + """Test RayJob submission failure.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + # Configure the mock to return failure (False/None) when submit_job is called + mock_api_instance.submit_job.return_value = None + + # Create a RayJob instance + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-ray-cluster", + namespace="default", + entrypoint="python script.py", + runtime_env={"pip": ["numpy"]}, + ) + + # Test that RuntimeError is raised on failure + with pytest.raises(RuntimeError, match="Failed to submit RayJob test-rayjob"): + rayjob.submit() diff --git a/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml b/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml index aa097dd9..3d710bfc 100644 --- a/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml +++ b/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml @@ -19,6 +19,7 @@ spec: controller-tools.k8s.io: '1.0' key1: value1 key2: value2 + ray.io/cluster: aw-all-params name: aw-all-params namespace: ns spec: @@ -38,6 +39,7 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 + dashboard-port: '8265' num-gpus: '1' resources: '"{\"TPU\": 2}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/kueue/aw_kueue.yaml b/tests/test_cluster_yamls/kueue/aw_kueue.yaml index f5f16406..7f72d25b 100644 --- a/tests/test_cluster_yamls/kueue/aw_kueue.yaml +++ b/tests/test_cluster_yamls/kueue/aw_kueue.yaml @@ -13,6 +13,7 @@ spec: metadata: labels: controller-tools.k8s.io: '1.0' + ray.io/cluster: unit-test-aw-kueue name: unit-test-aw-kueue namespace: ns spec: @@ -32,6 +33,7 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 + dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml index d6db4f4a..7a5a62ba 100644 --- a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml +++ b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml @@ -13,6 +13,7 @@ spec: metadata: labels: controller-tools.k8s.io: '1.0' + ray.io/cluster: unit-test-cluster-kueue name: unit-test-cluster-kueue namespace: ns spec: @@ -32,6 +33,7 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 + dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/ray/default-appwrapper.yaml b/tests/test_cluster_yamls/ray/default-appwrapper.yaml index 27828163..734f3d33 100644 --- a/tests/test_cluster_yamls/ray/default-appwrapper.yaml +++ b/tests/test_cluster_yamls/ray/default-appwrapper.yaml @@ -11,6 +11,7 @@ spec: metadata: labels: controller-tools.k8s.io: '1.0' + ray.io/cluster: default-appwrapper name: default-appwrapper namespace: ns spec: @@ -30,6 +31,7 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 + dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/ray/default-ray-cluster.yaml b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml index d2e6b05a..cc5f2ada 100644 --- a/tests/test_cluster_yamls/ray/default-ray-cluster.yaml +++ b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml @@ -3,6 +3,7 @@ kind: RayCluster metadata: labels: controller-tools.k8s.io: '1.0' + ray.io/cluster: default-cluster name: default-cluster namespace: ns spec: @@ -22,6 +23,7 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 + dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/ray/unit-test-all-params.yaml b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml index ee0878c1..213a082a 100644 --- a/tests/test_cluster_yamls/ray/unit-test-all-params.yaml +++ b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml @@ -10,6 +10,7 @@ metadata: key1: value1 key2: value2 kueue.x-k8s.io/queue-name: local-queue-default + ray.io/cluster: test-all-params name: test-all-params namespace: ns spec: @@ -29,6 +30,7 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 + dashboard-port: '8265' num-gpus: '1' resources: '"{\"TPU\": 2}"' serviceType: ClusterIP From fd36665244cbbf01cacd77d911426a6d0277e9e8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 30 Jul 2025 09:20:40 +0000 Subject: [PATCH 02/33] Updated coverage.svg --- coverage.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coverage.svg b/coverage.svg index a8c7e72a..59d64b37 100644 --- a/coverage.svg +++ b/coverage.svg @@ -15,7 +15,7 @@ coverage coverage - 92% - 92% + 93% + 93% From 5b908db4c54814f97136e27eeb38c0f08fc26116 Mon Sep 17 00:00:00 2001 From: Pat O'Connor Date: Tue, 29 Jul 2025 18:21:27 +0100 Subject: [PATCH 03/33] feat(RHOAIENG-26590): Report RayJob status via SDK Signed-off-by: Pat O'Connor --- src/codeflare_sdk/ray/__init__.py | 3 + src/codeflare_sdk/ray/rayjobs/__init__.py | 1 + src/codeflare_sdk/ray/rayjobs/pretty_print.py | 116 +++++++ src/codeflare_sdk/ray/rayjobs/rayjob.py | 95 +++++- src/codeflare_sdk/ray/rayjobs/status.py | 64 ++++ .../ray/rayjobs/test_pretty_print.py | 262 ++++++++++++++++ src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 2 +- src/codeflare_sdk/ray/rayjobs/test_status.py | 290 ++++++++++++++++++ 8 files changed, 830 insertions(+), 3 deletions(-) create mode 100644 src/codeflare_sdk/ray/rayjobs/pretty_print.py create mode 100644 src/codeflare_sdk/ray/rayjobs/status.py create mode 100644 src/codeflare_sdk/ray/rayjobs/test_pretty_print.py create mode 100644 src/codeflare_sdk/ray/rayjobs/test_status.py diff --git a/src/codeflare_sdk/ray/__init__.py b/src/codeflare_sdk/ray/__init__.py index 7c8e84da..b2278a05 100644 --- a/src/codeflare_sdk/ray/__init__.py +++ b/src/codeflare_sdk/ray/__init__.py @@ -6,6 +6,9 @@ from .rayjobs import ( RayJob, + RayJobDeploymentStatus, + CodeflareRayJobStatus, + RayJobInfo, ) from .cluster import ( diff --git a/src/codeflare_sdk/ray/rayjobs/__init__.py b/src/codeflare_sdk/ray/rayjobs/__init__.py index d9cbae34..47b573af 100644 --- a/src/codeflare_sdk/ray/rayjobs/__init__.py +++ b/src/codeflare_sdk/ray/rayjobs/__init__.py @@ -1 +1,2 @@ from .rayjob import RayJob +from .status import RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo diff --git a/src/codeflare_sdk/ray/rayjobs/pretty_print.py b/src/codeflare_sdk/ray/rayjobs/pretty_print.py new file mode 100644 index 00000000..9bc89b88 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/pretty_print.py @@ -0,0 +1,116 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This sub-module exists primarily to be used internally by the RayJob object +(in the rayjob sub-module) for pretty-printing job status and details. +""" + +from rich.console import Console +from rich.table import Table +from rich.panel import Panel +from typing import Tuple, Optional + +from .status import RayJobDeploymentStatus, RayJobInfo + + +def print_job_status(job_info: RayJobInfo): + """ + Pretty print the job status in a format similar to cluster status. + """ + status_display, header_color = _get_status_display(job_info.status) + + # Create main info table + table = _create_info_table(header_color, job_info.name, status_display) + table.add_row(f"[bold]Job ID:[/bold] {job_info.job_id}") + table.add_row(f"[bold]Status:[/bold] {job_info.status.value}") + table.add_row(f"[bold]RayCluster:[/bold] {job_info.cluster_name}") + table.add_row(f"[bold]Namespace:[/bold] {job_info.namespace}") + + # Add timing information if available + if job_info.start_time: + table.add_row(f"[bold]Started:[/bold] {job_info.start_time}") + + # Add attempt counts if there are failures + if job_info.failed_attempts > 0: + table.add_row(f"[bold]Failed Attempts:[/bold] {job_info.failed_attempts}") + + _print_table_in_panel(table) + + +def print_no_job_found(job_name: str, namespace: str): + """ + Print a message when no job is found. + """ + # Create table with error message + table = _create_info_table( + "[white on red][bold]Name", job_name, "[bold red]No RayJob found" + ) + table.add_row() + table.add_row("Please run rayjob.submit() to submit a job.") + table.add_row() + table.add_row(f"[bold]Namespace:[/bold] {namespace}") + + _print_table_in_panel(table) + + +def _get_status_display(status: RayJobDeploymentStatus) -> Tuple[str, str]: + """ + Get the display string and header color for a given status. + + Returns: + Tuple of (status_display, header_color) + """ + status_mapping = { + RayJobDeploymentStatus.COMPLETE: ( + "Complete :white_heavy_check_mark:", + "[white on green][bold]Name", + ), + RayJobDeploymentStatus.RUNNING: ("Running :gear:", "[white on blue][bold]Name"), + RayJobDeploymentStatus.FAILED: ("Failed :x:", "[white on red][bold]Name"), + RayJobDeploymentStatus.SUSPENDED: ( + "Suspended :pause_button:", + "[white on yellow][bold]Name", + ), + } + + return status_mapping.get( + status, ("Unknown :question:", "[white on red][bold]Name") + ) + + +def _create_info_table(header_color: str, name: str, status_display: str) -> Table: + """ + Create a standardized info table with header and status. + + Returns: + Table with header row, name/status row, and empty separator row + """ + table = Table(box=None, show_header=False) + table.add_row(header_color) + table.add_row("[bold underline]" + name, status_display) + table.add_row() # Empty separator row + return table + + +def _print_table_in_panel(table: Table): + """ + Print a table wrapped in a consistent panel format. + """ + console = Console() + main_table = Table( + box=None, title="[bold] :package: CodeFlare RayJob Status :package:" + ) + main_table.add_row(Panel.fit(table)) + console.print(main_table) diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index e7a9a588..ac2210a2 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -1,11 +1,32 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ RayJob client for submitting and managing Ray jobs using the odh-kuberay-client. """ import logging -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, Tuple from odh_kuberay_client.kuberay_job_api import RayjobApi +from .status import ( + RayJobDeploymentStatus, + CodeflareRayJobStatus, + RayJobInfo, +) +from . import pretty_print + # Set up logging logger = logging.getLogger(__name__) @@ -15,7 +36,7 @@ class RayJob: A client for managing Ray jobs using the KubeRay operator. This class provides a simplified interface for submitting and managing - Ray jobs in a Kubernetes cluster with the KubeRay operator installed. + RayJob CRs (using the KubeRay RayJob python client). """ def __init__( @@ -109,3 +130,73 @@ def _build_rayjob_cr( rayjob_cr["spec"]["runtimeEnvYAML"] = str(runtime_env) return rayjob_cr + + def status( + self, print_to_console: bool = True + ) -> Tuple[CodeflareRayJobStatus, bool]: + """ + Get the status of the Ray job. + + Args: + print_to_console (bool): Whether to print formatted status to console (default: True) + + Returns: + Tuple of (CodeflareRayJobStatus, ready: bool) where ready indicates job completion + """ + status_data = self._api.get_job_status( + name=self.name, k8s_namespace=self.namespace + ) + + if not status_data: + if print_to_console: + pretty_print.print_no_job_found(self.name, self.namespace) + return CodeflareRayJobStatus.UNKNOWN, False + + # Map deployment status to our enums + deployment_status_str = status_data.get("jobDeploymentStatus", "Unknown") + + try: + deployment_status = RayJobDeploymentStatus(deployment_status_str) + except ValueError: + deployment_status = RayJobDeploymentStatus.UNKNOWN + + # Create RayJobInfo dataclass + job_info = RayJobInfo( + name=self.name, + job_id=status_data.get("jobId", ""), + status=deployment_status, + namespace=self.namespace, + cluster_name=self.cluster_name, + start_time=status_data.get("startTime"), + end_time=status_data.get("endTime"), + failed_attempts=status_data.get("failed", 0), + succeeded_attempts=status_data.get("succeeded", 0), + ) + + # Map to CodeFlare status and determine readiness + codeflare_status, ready = self._map_to_codeflare_status(deployment_status) + + if print_to_console: + pretty_print.print_job_status(job_info) + + return codeflare_status, ready + + def _map_to_codeflare_status( + self, deployment_status: RayJobDeploymentStatus + ) -> Tuple[CodeflareRayJobStatus, bool]: + """ + Map deployment status to CodeFlare status and determine readiness. + + Returns: + Tuple of (CodeflareRayJobStatus, ready: bool) + """ + status_mapping = { + RayJobDeploymentStatus.COMPLETE: (CodeflareRayJobStatus.COMPLETE, True), + RayJobDeploymentStatus.RUNNING: (CodeflareRayJobStatus.RUNNING, False), + RayJobDeploymentStatus.FAILED: (CodeflareRayJobStatus.FAILED, False), + RayJobDeploymentStatus.SUSPENDED: (CodeflareRayJobStatus.SUSPENDED, False), + } + + return status_mapping.get( + deployment_status, (CodeflareRayJobStatus.UNKNOWN, False) + ) diff --git a/src/codeflare_sdk/ray/rayjobs/status.py b/src/codeflare_sdk/ray/rayjobs/status.py new file mode 100644 index 00000000..027ed09c --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/status.py @@ -0,0 +1,64 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The status sub-module defines Enums containing information for Ray job +deployment states and CodeFlare job states, as well as +dataclasses to store information for Ray jobs. +""" + +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +class RayJobDeploymentStatus(Enum): + """ + Defines the possible deployment states of a Ray job (from the KubeRay RayJob API). + """ + + COMPLETE = "Complete" + RUNNING = "Running" + FAILED = "Failed" + SUSPENDED = "Suspended" + UNKNOWN = "Unknown" + + +class CodeflareRayJobStatus(Enum): + """ + Defines the possible reportable states of a CodeFlare Ray job. + """ + + COMPLETE = 1 + RUNNING = 2 + FAILED = 3 + SUSPENDED = 4 + UNKNOWN = 5 + + +@dataclass +class RayJobInfo: + """ + For storing information about a Ray job. + """ + + name: str + job_id: str + status: RayJobDeploymentStatus + namespace: str + cluster_name: str + start_time: Optional[str] = None + end_time: Optional[str] = None + failed_attempts: int = 0 + succeeded_attempts: int = 0 diff --git a/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py b/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py new file mode 100644 index 00000000..dbfd7caf --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py @@ -0,0 +1,262 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.rayjobs.pretty_print import ( + _get_status_display, + print_job_status, + print_no_job_found, +) +from codeflare_sdk.ray.rayjobs.status import RayJobDeploymentStatus, RayJobInfo +from unittest.mock import MagicMock, call + + +def test_get_status_display(): + """ + Test the _get_status_display function. + """ + # Test Complete status + display, color = _get_status_display(RayJobDeploymentStatus.COMPLETE) + assert display == "Complete :white_heavy_check_mark:" + assert color == "[white on green][bold]Name" + + # Test Running status + display, color = _get_status_display(RayJobDeploymentStatus.RUNNING) + assert display == "Running :gear:" + assert color == "[white on blue][bold]Name" + + # Test Failed status + display, color = _get_status_display(RayJobDeploymentStatus.FAILED) + assert display == "Failed :x:" + assert color == "[white on red][bold]Name" + + # Test Suspended status + display, color = _get_status_display(RayJobDeploymentStatus.SUSPENDED) + assert display == "Suspended :pause_button:" + assert color == "[white on yellow][bold]Name" + + # Test Unknown status + display, color = _get_status_display(RayJobDeploymentStatus.UNKNOWN) + assert display == "Unknown :question:" + assert color == "[white on red][bold]Name" + + +def test_print_job_status_running_format(mocker): + """ + Test the print_job_status function format for a running job. + """ + # Mock Rich components to verify format + mock_console = MagicMock() + mock_inner_table = MagicMock() + mock_main_table = MagicMock() + mock_panel = MagicMock() + + # Mock Table to return different instances for inner and main tables + table_instances = [mock_inner_table, mock_main_table] + mock_table_class = MagicMock(side_effect=table_instances) + + mocker.patch( + "codeflare_sdk.ray.rayjobs.pretty_print.Console", return_value=mock_console + ) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Table", mock_table_class) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Panel", mock_panel) + + # Create test job info for running job + job_info = RayJobInfo( + name="test-job", + job_id="test-job-abc123", + status=RayJobDeploymentStatus.RUNNING, + namespace="test-ns", + cluster_name="test-cluster", + start_time="2025-07-28T11:37:07Z", + failed_attempts=0, + succeeded_attempts=0, + ) + + # Call the function + print_job_status(job_info) + + # Verify both Table calls + expected_table_calls = [ + call(box=None, show_header=False), # Inner content table + call( + box=None, title="[bold] :package: CodeFlare RayJob Status :package:" + ), # Main wrapper table + ] + mock_table_class.assert_has_calls(expected_table_calls) + + # Verify inner table rows are added in correct order and format (versus our hard-coded version of this for cluster) + expected_calls = [ + call("[white on blue][bold]Name"), # Header with blue color for running + call( + "[bold underline]test-job", "Running :gear:" + ), # Name and status with gear emoji + call(), # Empty separator row + call("[bold]Job ID:[/bold] test-job-abc123"), + call("[bold]Status:[/bold] Running"), + call("[bold]RayCluster:[/bold] test-cluster"), + call("[bold]Namespace:[/bold] test-ns"), + call("[bold]Started:[/bold] 2025-07-28T11:37:07Z"), + ] + mock_inner_table.add_row.assert_has_calls(expected_calls) + + # Verify Panel is created with inner table + mock_panel.fit.assert_called_once_with(mock_inner_table) + + # Verify main table gets the panel + mock_main_table.add_row.assert_called_once_with(mock_panel.fit.return_value) + + # Verify console prints the main table + mock_console.print.assert_called_once_with(mock_main_table) + + +def test_print_job_status_complete_format(mocker): + """ + Test the print_job_status function format for a completed job. + """ + # Mock Rich components + mock_console = MagicMock() + mock_inner_table = MagicMock() + mock_main_table = MagicMock() + mock_panel = MagicMock() + + # Mock Table to return different instances + table_instances = [mock_inner_table, mock_main_table] + mock_table_class = MagicMock(side_effect=table_instances) + + mocker.patch( + "codeflare_sdk.ray.rayjobs.pretty_print.Console", return_value=mock_console + ) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Table", mock_table_class) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Panel", mock_panel) + + # Create test job info for completed job + job_info = RayJobInfo( + name="completed-job", + job_id="completed-job-xyz789", + status=RayJobDeploymentStatus.COMPLETE, + namespace="prod-ns", + cluster_name="prod-cluster", + start_time="2025-07-28T11:37:07Z", + failed_attempts=0, + succeeded_attempts=1, + ) + + # Call the function + print_job_status(job_info) + + # Verify correct header color for completed job (green) (versus our hard-coded version of this for cluster) + expected_calls = [ + call("[white on green][bold]Name"), # Green header for complete + call( + "[bold underline]completed-job", "Complete :white_heavy_check_mark:" + ), # Checkmark emoji + call(), # Empty separator + call("[bold]Job ID:[/bold] completed-job-xyz789"), + call("[bold]Status:[/bold] Complete"), + call("[bold]RayCluster:[/bold] prod-cluster"), + call("[bold]Namespace:[/bold] prod-ns"), + call("[bold]Started:[/bold] 2025-07-28T11:37:07Z"), + ] + mock_inner_table.add_row.assert_has_calls(expected_calls) + + +def test_print_job_status_failed_with_attempts_format(mocker): + """ + Test the print_job_status function format for a failed job with attempts. + """ + # Mock Rich components + mock_console = MagicMock() + mock_inner_table = MagicMock() + mock_main_table = MagicMock() + mock_panel = MagicMock() + + # Mock Table to return different instances + table_instances = [mock_inner_table, mock_main_table] + mock_table_class = MagicMock(side_effect=table_instances) + + mocker.patch( + "codeflare_sdk.ray.rayjobs.pretty_print.Console", return_value=mock_console + ) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Table", mock_table_class) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Panel", mock_panel) + + # Create test job info with failures + job_info = RayJobInfo( + name="failing-job", + job_id="failing-job-fail123", + status=RayJobDeploymentStatus.FAILED, + namespace="test-ns", + cluster_name="test-cluster", + start_time="2025-07-28T11:37:07Z", + failed_attempts=3, # Has failures + succeeded_attempts=0, + ) + + # Call the function + print_job_status(job_info) + + # Verify correct formatting including failure attempts (versus our hard-coded version of this for cluster) + expected_calls = [ + call("[white on red][bold]Name"), # Red header for failed + call("[bold underline]failing-job", "Failed :x:"), # X emoji for failed + call(), # Empty separator + call("[bold]Job ID:[/bold] failing-job-fail123"), + call("[bold]Status:[/bold] Failed"), + call("[bold]RayCluster:[/bold] test-cluster"), + call("[bold]Namespace:[/bold] test-ns"), + call("[bold]Started:[/bold] 2025-07-28T11:37:07Z"), + call("[bold]Failed Attempts:[/bold] 3"), # Failed attempts should be shown + ] + mock_inner_table.add_row.assert_has_calls(expected_calls) + + +def test_print_no_job_found_format(mocker): + """ + Test the print_no_job_found function format. + """ + # Mock Rich components + mock_console = MagicMock() + mock_inner_table = MagicMock() + mock_main_table = MagicMock() + mock_panel = MagicMock() + + # Mock Table to return different instances + table_instances = [mock_inner_table, mock_main_table] + mock_table_class = MagicMock(side_effect=table_instances) + + mocker.patch( + "codeflare_sdk.ray.rayjobs.pretty_print.Console", return_value=mock_console + ) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Table", mock_table_class) + mocker.patch("codeflare_sdk.ray.rayjobs.pretty_print.Panel", mock_panel) + + # Call the function + print_no_job_found("missing-job", "test-namespace") + + # Verify error message format (versus our hard-coded version of this for cluster) + expected_calls = [ + call("[white on red][bold]Name"), # Red header for error + call( + "[bold underline]missing-job", "[bold red]No RayJob found" + ), # Error message in red + call(), # Empty separator + call(), # Another empty row + call("Please run rayjob.submit() to submit a job."), # Helpful hint + call(), # Empty separator + call("[bold]Namespace:[/bold] test-namespace"), + ] + mock_inner_table.add_row.assert_has_calls(expected_calls) + + # Verify Panel is used + mock_panel.fit.assert_called_once_with(mock_inner_table) diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 1136e6e5..5429f303 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -1,4 +1,4 @@ -# Copyright 2024 IBM, Red Hat +# Copyright 2025 IBM, Red Hat # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/codeflare_sdk/ray/rayjobs/test_status.py b/src/codeflare_sdk/ray/rayjobs/test_status.py new file mode 100644 index 00000000..6d2ce946 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/test_status.py @@ -0,0 +1,290 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.ray.rayjobs.rayjob import RayJob +from codeflare_sdk.ray.rayjobs.status import ( + CodeflareRayJobStatus, + RayJobDeploymentStatus, + RayJobInfo, +) + + +def test_rayjob_status(mocker): + """ + Test the RayJob status method with different deployment statuses. + """ + # Mock the RayjobApi to avoid actual Kubernetes calls + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = mock_api_class.return_value + + # Create a RayJob instance + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Test case 1: No job found + mock_api_instance.get_job_status.return_value = None + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.UNKNOWN + assert ready == False + + # Test case 2: Running job + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "Running", + "startTime": "2025-07-28T11:37:07Z", + "failed": 0, + "succeeded": 0, + "rayClusterName": "test-cluster", + } + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.RUNNING + assert ready == False + + # Test case 3: Complete job + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "Complete", + "startTime": "2025-07-28T11:37:07Z", + "endTime": "2025-07-28T11:42:30Z", + "failed": 0, + "succeeded": 1, + "rayClusterName": "test-cluster", + } + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.COMPLETE + assert ready == True + + # Test case 4: Failed job + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "Failed", + "startTime": "2025-07-28T11:37:07Z", + "endTime": "2025-07-28T11:42:30Z", + "failed": 1, + "succeeded": 0, + "rayClusterName": "test-cluster", + } + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.FAILED + assert ready == False + + # Test case 5: Suspended job + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "Suspended", + "startTime": "2025-07-28T11:37:07Z", + "failed": 0, + "succeeded": 0, + "rayClusterName": "test-cluster", + } + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.SUSPENDED + assert ready == False + + +def test_rayjob_status_unknown_deployment_status(mocker): + """ + Test handling of unknown deployment status from the API. + """ + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = mock_api_class.return_value + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Test with unrecognized deployment status + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "SomeNewStatus", # Unknown status + "startTime": "2025-07-28T11:37:07Z", + "failed": 0, + "succeeded": 0, + } + + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.UNKNOWN + assert ready == False + + +def test_rayjob_status_missing_fields(mocker): + """ + Test handling of API response with missing fields. + """ + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = mock_api_class.return_value + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Test with minimal API response (missing some fields) + mock_api_instance.get_job_status.return_value = { + # Missing jobId, failed, succeeded, etc. + "jobDeploymentStatus": "Running", + } + + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.RUNNING + assert ready == False + + +def test_map_to_codeflare_status(mocker): + """ + Test the _map_to_codeflare_status helper method directly. + """ + # Mock the RayjobApi constructor to avoid authentication issues + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Test all deployment status mappings + status, ready = rayjob._map_to_codeflare_status(RayJobDeploymentStatus.COMPLETE) + assert status == CodeflareRayJobStatus.COMPLETE + assert ready == True + + status, ready = rayjob._map_to_codeflare_status(RayJobDeploymentStatus.RUNNING) + assert status == CodeflareRayJobStatus.RUNNING + assert ready == False + + status, ready = rayjob._map_to_codeflare_status(RayJobDeploymentStatus.FAILED) + assert status == CodeflareRayJobStatus.FAILED + assert ready == False + + status, ready = rayjob._map_to_codeflare_status(RayJobDeploymentStatus.SUSPENDED) + assert status == CodeflareRayJobStatus.SUSPENDED + assert ready == False + + status, ready = rayjob._map_to_codeflare_status(RayJobDeploymentStatus.UNKNOWN) + assert status == CodeflareRayJobStatus.UNKNOWN + assert ready == False + + +def test_rayjob_info_dataclass(): + """ + Test the RayJobInfo dataclass creation and field access. + """ + job_info = RayJobInfo( + name="test-job", + job_id="test-job-abc123", + status=RayJobDeploymentStatus.RUNNING, + namespace="test-ns", + cluster_name="test-cluster", + start_time="2025-07-28T11:37:07Z", + failed_attempts=0, + succeeded_attempts=0, + ) + + # Test all fields are accessible + assert job_info.name == "test-job" + assert job_info.job_id == "test-job-abc123" + assert job_info.status == RayJobDeploymentStatus.RUNNING + assert job_info.namespace == "test-ns" + assert job_info.cluster_name == "test-cluster" + assert job_info.start_time == "2025-07-28T11:37:07Z" + assert job_info.end_time is None # Default value + assert job_info.failed_attempts == 0 + assert job_info.succeeded_attempts == 0 + + +def test_rayjob_status_print_no_job_found(mocker): + """ + Test that pretty_print.print_no_job_found is called when no job is found and print_to_console=True. + """ + # Mock the RayjobApi and pretty_print + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = mock_api_class.return_value + mock_print_no_job_found = mocker.patch( + "codeflare_sdk.ray.rayjobs.pretty_print.print_no_job_found" + ) + + # Create a RayJob instance + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # No job found scenario + mock_api_instance.get_job_status.return_value = None + + # Call status with print_to_console=True + status, ready = rayjob.status(print_to_console=True) + + # Verify the pretty print function was called + mock_print_no_job_found.assert_called_once_with("test-job", "test-ns") + assert status == CodeflareRayJobStatus.UNKNOWN + assert ready == False + + +def test_rayjob_status_print_job_found(mocker): + """ + Test that pretty_print.print_job_status is called when job is found and print_to_console=True. + """ + # Mock the RayjobApi and pretty_print + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = mock_api_class.return_value + mock_print_job_status = mocker.patch( + "codeflare_sdk.ray.rayjobs.pretty_print.print_job_status" + ) + + # Create a RayJob instance + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Job found scenario + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "Running", + "startTime": "2025-07-28T11:37:07Z", + "failed": 0, + "succeeded": 0, + "rayClusterName": "test-cluster", + } + + # Call status with print_to_console=True + status, ready = rayjob.status(print_to_console=True) + + # Verify the pretty print function was called + mock_print_job_status.assert_called_once() + # Verify the RayJobInfo object passed to print_job_status + call_args = mock_print_job_status.call_args[0][0] # First positional argument + assert call_args.name == "test-job" + assert call_args.job_id == "test-job-abc123" + assert call_args.status == RayJobDeploymentStatus.RUNNING + assert call_args.namespace == "test-ns" + assert call_args.cluster_name == "test-cluster" + + assert status == CodeflareRayJobStatus.RUNNING + assert ready == False From 2ec5f6278c937500589da7f1e8e3bd28d06d46df Mon Sep 17 00:00:00 2001 From: Pat O'Connor Date: Tue, 29 Jul 2025 12:33:39 +0100 Subject: [PATCH 04/33] feat(RHOAIENG-26487): Cluster lifecycling via RayJob Signed-off-by: Pat O'Connor --- src/codeflare_sdk/ray/rayjobs/pretty_print.py | 1 + src/codeflare_sdk/ray/rayjobs/rayjob.py | 168 +++++++-- .../ray/rayjobs/test_pretty_print.py | 3 + src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 324 +++++++++++++++++- 4 files changed, 460 insertions(+), 36 deletions(-) diff --git a/src/codeflare_sdk/ray/rayjobs/pretty_print.py b/src/codeflare_sdk/ray/rayjobs/pretty_print.py index 9bc89b88..34e8dfa1 100644 --- a/src/codeflare_sdk/ray/rayjobs/pretty_print.py +++ b/src/codeflare_sdk/ray/rayjobs/pretty_print.py @@ -40,6 +40,7 @@ def print_job_status(job_info: RayJobInfo): # Add timing information if available if job_info.start_time: + table.add_row() table.add_row(f"[bold]Started:[/bold] {job_info.start_time}") # Add attempt counts if there are failures diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index ac2210a2..0d966b0e 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -20,6 +20,10 @@ from typing import Dict, Any, Optional, Tuple from odh_kuberay_client.kuberay_job_api import RayjobApi +from ..cluster.cluster import Cluster +from ..cluster.config import ClusterConfiguration +from ..cluster.build_ray_cluster import build_ray_cluster + from .status import ( RayJobDeploymentStatus, CodeflareRayJobStatus, @@ -27,7 +31,7 @@ ) from . import pretty_print -# Set up logging + logger = logging.getLogger(__name__) @@ -42,74 +46,110 @@ class RayJob: def __init__( self, job_name: str, - cluster_name: str, + cluster_name: Optional[str] = None, + cluster_config: Optional[ClusterConfiguration] = None, namespace: str = "default", - entrypoint: str = "None", + entrypoint: Optional[str] = None, runtime_env: Optional[Dict[str, Any]] = None, + shutdown_after_job_finishes: bool = True, + ttl_seconds_after_finished: int = 0, + active_deadline_seconds: Optional[int] = None, ): """ Initialize a RayJob instance. Args: - name: The name for the Ray job - namespace: The Kubernetes namespace to submit the job to (default: "default") - cluster_name: The name of the Ray cluster to submit the job to - **kwargs: Additional configuration options + job_name: The name for the Ray job + cluster_name: The name of an existing Ray cluster (optional if cluster_config provided) + cluster_config: Configuration for creating a new cluster (optional if cluster_name provided) + namespace: The Kubernetes namespace (default: "default") + entrypoint: The Python script or command to run (required for submission) + runtime_env: Ray runtime environment configuration (optional) + shutdown_after_job_finishes: Whether to automatically cleanup the cluster after job completion (default: True) + ttl_seconds_after_finished: Seconds to wait before cleanup after job finishes (default: 0) + active_deadline_seconds: Maximum time the job can run before being terminated (optional) """ + # Validate input parameters + if cluster_name is None and cluster_config is None: + raise ValueError("Either cluster_name or cluster_config must be provided") + + if cluster_name is not None and cluster_config is not None: + raise ValueError("Cannot specify both cluster_name and cluster_config") + self.name = job_name self.namespace = namespace - self.cluster_name = cluster_name self.entrypoint = entrypoint self.runtime_env = runtime_env + self.shutdown_after_job_finishes = shutdown_after_job_finishes + self.ttl_seconds_after_finished = ttl_seconds_after_finished + self.active_deadline_seconds = active_deadline_seconds + + # Cluster configuration + self._cluster_name = cluster_name + self._cluster_config = cluster_config + + # Determine cluster name for the job + if cluster_config is not None: + # Ensure cluster config has the same namespace as the job + if cluster_config.namespace is None: + cluster_config.namespace = namespace + elif cluster_config.namespace != namespace: + logger.warning( + f"Cluster config namespace ({cluster_config.namespace}) differs from job namespace ({namespace})" + ) + + self.cluster_name = cluster_config.name or f"{job_name}-cluster" + # Update the cluster config name if it wasn't set + if not cluster_config.name: + cluster_config.name = self.cluster_name + else: + self.cluster_name = cluster_name # Initialize the KubeRay job API client self._api = RayjobApi() logger.info(f"Initialized RayJob: {self.name} in namespace: {self.namespace}") - def submit( - self, - ) -> str: + def submit(self) -> str: """ Submit the Ray job to the Kubernetes cluster. - Args: - entrypoint: The Python script or command to run - runtime_env: Ray runtime environment configuration (optional) + The RayJob CRD will automatically: + - Create a new cluster if cluster_config was provided + - Use existing cluster if cluster_name was provided + - Clean up resources based on shutdown_after_job_finishes setting Returns: The job ID/name if submission was successful Raises: - RuntimeError: If the job has already been submitted or submission fails + ValueError: If entrypoint is not provided + RuntimeError: If job submission fails """ + # Validate required parameters + if not self.entrypoint: + raise ValueError("entrypoint must be provided to submit a RayJob") + # Build the RayJob custom resource - rayjob_cr = self._build_rayjob_cr( - entrypoint=self.entrypoint, - runtime_env=self.runtime_env, - ) + rayjob_cr = self._build_rayjob_cr() - # Submit the job - logger.info( - f"Submitting RayJob {self.name} to RayCluster {self.cluster_name} in namespace {self.namespace}" - ) + # Submit the job - KubeRay operator handles everything else + logger.info(f"Submitting RayJob {self.name} to KubeRay operator") result = self._api.submit_job(k8s_namespace=self.namespace, job=rayjob_cr) if result: logger.info(f"Successfully submitted RayJob {self.name}") + if self.shutdown_after_job_finishes: + logger.info( + f"Cluster will be automatically cleaned up {self.ttl_seconds_after_finished}s after job completion" + ) return self.name else: raise RuntimeError(f"Failed to submit RayJob {self.name}") - def _build_rayjob_cr( - self, - entrypoint: str, - runtime_env: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + def _build_rayjob_cr(self) -> Dict[str, Any]: """ - Build the RayJob custom resource specification. - - This creates a minimal RayJob CR that can be extended later. + Build the RayJob custom resource specification using native RayJob capabilities. """ # Basic RayJob custom resource structure rayjob_cr = { @@ -120,17 +160,75 @@ def _build_rayjob_cr( "namespace": self.namespace, }, "spec": { - "entrypoint": entrypoint, - "clusterSelector": {"ray.io/cluster": self.cluster_name}, + "entrypoint": self.entrypoint, + "shutdownAfterJobFinishes": self.shutdown_after_job_finishes, + "ttlSecondsAfterFinished": self.ttl_seconds_after_finished, }, } + # Add active deadline if specified + if self.active_deadline_seconds: + rayjob_cr["spec"]["activeDeadlineSeconds"] = self.active_deadline_seconds + # Add runtime environment if specified - if runtime_env: - rayjob_cr["spec"]["runtimeEnvYAML"] = str(runtime_env) + if self.runtime_env: + rayjob_cr["spec"]["runtimeEnvYAML"] = str(self.runtime_env) + + # Configure cluster: either use existing or create new + if self._cluster_config is not None: + # Use rayClusterSpec to create a new cluster - leverage existing build logic + ray_cluster_spec = self._build_ray_cluster_spec() + rayjob_cr["spec"]["rayClusterSpec"] = ray_cluster_spec + logger.info(f"RayJob will create new cluster: {self.cluster_name}") + else: + # Use clusterSelector to reference existing cluster + rayjob_cr["spec"]["clusterSelector"] = {"ray.io/cluster": self.cluster_name} + logger.info(f"RayJob will use existing cluster: {self.cluster_name}") return rayjob_cr + def _build_ray_cluster_spec(self) -> Dict[str, Any]: + """ + Build the RayCluster spec from ClusterConfiguration using existing build_ray_cluster logic. + + Returns: + Dict containing the RayCluster spec for embedding in RayJob + """ + if not self._cluster_config: + raise RuntimeError("No cluster configuration provided") + + # Create a shallow copy of the cluster config to avoid modifying the original + import copy + + temp_config = copy.copy(self._cluster_config) + + # Ensure we get a RayCluster (not AppWrapper) and don't write to file + temp_config.appwrapper = False + temp_config.write_to_file = False + + # Create a minimal Cluster object for the build process + from ..cluster.cluster import Cluster + + temp_cluster = Cluster.__new__(Cluster) # Create without calling __init__ + temp_cluster.config = temp_config + + """ + For now, RayJob with a new/auto-created cluster will not work with Kueue. + This is due to the Kueue label not being propagated to the RayCluster. + """ + + # Use the existing build_ray_cluster function to generate the RayCluster + ray_cluster_dict = build_ray_cluster(temp_cluster) + + # Extract just the RayCluster spec - RayJob CRD doesn't support metadata in rayClusterSpec + # Note: CodeFlare Operator should still create dashboard routes for the RayCluster + ray_cluster_spec = ray_cluster_dict["spec"] + + logger.info( + f"Built RayCluster spec using existing build logic for cluster: {self.cluster_name}" + ) + return ray_cluster_spec + def status( self, print_to_console: bool = True ) -> Tuple[CodeflareRayJobStatus, bool]: diff --git a/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py b/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py index dbfd7caf..3bbe8bee 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py +++ b/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py @@ -106,6 +106,7 @@ def test_print_job_status_running_format(mocker): call("[bold]Status:[/bold] Running"), call("[bold]RayCluster:[/bold] test-cluster"), call("[bold]Namespace:[/bold] test-ns"), + call(), # Empty row before timing info call("[bold]Started:[/bold] 2025-07-28T11:37:07Z"), ] mock_inner_table.add_row.assert_has_calls(expected_calls) @@ -166,6 +167,7 @@ def test_print_job_status_complete_format(mocker): call("[bold]Status:[/bold] Complete"), call("[bold]RayCluster:[/bold] prod-cluster"), call("[bold]Namespace:[/bold] prod-ns"), + call(), # Empty row before timing info call("[bold]Started:[/bold] 2025-07-28T11:37:07Z"), ] mock_inner_table.add_row.assert_has_calls(expected_calls) @@ -215,6 +217,7 @@ def test_print_job_status_failed_with_attempts_format(mocker): call("[bold]Status:[/bold] Failed"), call("[bold]RayCluster:[/bold] test-cluster"), call("[bold]Namespace:[/bold] test-ns"), + call(), # Empty row before timing info call("[bold]Started:[/bold] 2025-07-28T11:37:07Z"), call("[bold]Failed Attempts:[/bold] 3"), # Failed attempts should be shown ] diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 5429f303..7554ca4c 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -13,8 +13,9 @@ # limitations under the License. import pytest -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from codeflare_sdk.ray.rayjobs.rayjob import RayJob +from codeflare_sdk.ray.cluster.config import ClusterConfiguration def test_rayjob_submit_success(mocker): @@ -86,3 +87,324 @@ def test_rayjob_submit_failure(mocker): # Test that RuntimeError is raised on failure with pytest.raises(RuntimeError, match="Failed to submit RayJob test-rayjob"): rayjob.submit() + + +def test_rayjob_init_validation_both_provided(mocker): + """Test that providing both cluster_name and cluster_config raises error.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + cluster_config = ClusterConfiguration(name="test-cluster", namespace="test") + + with pytest.raises( + ValueError, match="Cannot specify both cluster_name and cluster_config" + ): + RayJob( + job_name="test-job", + cluster_name="existing-cluster", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + +def test_rayjob_init_validation_neither_provided(mocker): + """Test that providing neither cluster_name nor cluster_config raises error.""" + # Mock kubernetes config loading (though this should fail before reaching it) + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely (though this should fail before reaching it) + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + with pytest.raises( + ValueError, match="Either cluster_name or cluster_config must be provided" + ): + RayJob(job_name="test-job", entrypoint="python script.py") + + +def test_rayjob_init_with_cluster_config(mocker): + """Test RayJob initialization with cluster configuration for auto-creation.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + cluster_config = ClusterConfiguration( + name="auto-cluster", namespace="test-namespace", num_workers=2 + ) + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + assert rayjob.name == "test-job" + assert rayjob.cluster_name == "auto-cluster" + assert rayjob._cluster_config == cluster_config + assert rayjob._cluster_name is None + + +def test_rayjob_cluster_name_generation(mocker): + """Test that cluster names are generated when config has empty name.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + cluster_config = ClusterConfiguration( + name="", # Empty name should trigger generation + namespace="test-namespace", + num_workers=1, + ) + + rayjob = RayJob( + job_name="my-job", cluster_config=cluster_config, entrypoint="python script.py" + ) + + assert rayjob.cluster_name == "my-job-cluster" + assert cluster_config.name == "my-job-cluster" # Should be updated + + +def test_rayjob_cluster_config_namespace_none(mocker): + """Test that cluster config namespace is set when None.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + cluster_config = ClusterConfiguration( + name="test-cluster", + namespace=None, # This should be set to job namespace + num_workers=1, + ) + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + namespace="job-namespace", + entrypoint="python script.py", + ) + + assert cluster_config.namespace == "job-namespace" + assert rayjob.namespace == "job-namespace" + + +def test_rayjob_with_active_deadline_seconds(mocker): + """Test RayJob CR generation with active deadline seconds.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python main.py", + active_deadline_seconds=30, + ) + + rayjob_cr = rayjob._build_rayjob_cr() + + assert rayjob_cr["spec"]["activeDeadlineSeconds"] == 30 + + +def test_build_ray_cluster_spec_no_config_error(mocker): + """Test _build_ray_cluster_spec raises error when no cluster config.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Create RayJob with cluster_name (no cluster_config) + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python script.py", + ) + + # Line 198: Should raise RuntimeError when trying to build spec without config + with pytest.raises(RuntimeError, match="No cluster configuration provided"): + rayjob._build_ray_cluster_spec() + + +@patch("codeflare_sdk.ray.rayjobs.rayjob.build_ray_cluster") +def test_build_ray_cluster_spec(mock_build_ray_cluster, mocker): + """Test _build_ray_cluster_spec method.""" + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + mock_ray_cluster = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": {"name": "test-cluster", "namespace": "test"}, + "spec": { + "rayVersion": "2.9.0", + "headGroupSpec": {"replicas": 1}, + "workerGroupSpecs": [{"replicas": 2}], + }, + } + mock_build_ray_cluster.return_value = mock_ray_cluster + + cluster_config = ClusterConfiguration( + name="test-cluster", namespace="test", num_workers=2 + ) + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + spec = rayjob._build_ray_cluster_spec() + + # Should return only the spec part, not metadata + assert spec == mock_ray_cluster["spec"] + assert "metadata" not in spec + + # Verify build_ray_cluster was called with correct parameters + mock_build_ray_cluster.assert_called_once() + call_args = mock_build_ray_cluster.call_args[0][0] + assert call_args.config.appwrapper is False + assert call_args.config.write_to_file is False + + +def test_build_rayjob_cr_with_existing_cluster(mocker): + """Test _build_rayjob_cr method with existing cluster.""" + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + namespace="test-namespace", + entrypoint="python main.py", + shutdown_after_job_finishes=False, + ttl_seconds_after_finished=300, + ) + + rayjob_cr = rayjob._build_rayjob_cr() + + # Check basic structure + assert rayjob_cr["apiVersion"] == "ray.io/v1" + assert rayjob_cr["kind"] == "RayJob" + assert rayjob_cr["metadata"]["name"] == "test-job" + + # Check lifecycle parameters + spec = rayjob_cr["spec"] + assert spec["entrypoint"] == "python main.py" + assert spec["shutdownAfterJobFinishes"] is False + assert spec["ttlSecondsAfterFinished"] == 300 + + # Should use clusterSelector for existing cluster + assert spec["clusterSelector"]["ray.io/cluster"] == "existing-cluster" + assert "rayClusterSpec" not in spec + + +@patch("codeflare_sdk.ray.rayjobs.rayjob.build_ray_cluster") +def test_build_rayjob_cr_with_auto_cluster(mock_build_ray_cluster, mocker): + """Test _build_rayjob_cr method with auto-created cluster.""" + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + mock_ray_cluster = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": {"name": "auto-cluster", "namespace": "test"}, + "spec": { + "rayVersion": "2.9.0", + "headGroupSpec": {"replicas": 1}, + "workerGroupSpecs": [{"replicas": 2}], + }, + } + mock_build_ray_cluster.return_value = mock_ray_cluster + + cluster_config = ClusterConfiguration( + name="auto-cluster", namespace="test-namespace", num_workers=2 + ) + + rayjob = RayJob( + job_name="test-job", cluster_config=cluster_config, entrypoint="python main.py" + ) + + rayjob_cr = rayjob._build_rayjob_cr() + + # Should use rayClusterSpec for auto-created cluster + assert rayjob_cr["spec"]["rayClusterSpec"] == mock_ray_cluster["spec"] + assert "clusterSelector" not in rayjob_cr["spec"] + + +def test_submit_validation_no_entrypoint(mocker): + """Test that submit() raises error when entrypoint is None.""" + mocker.patch("kubernetes.config.load_kube_config") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=None, # No entrypoint provided + ) + + with pytest.raises( + ValueError, match="entrypoint must be provided to submit a RayJob" + ): + rayjob.submit() + + +@patch("codeflare_sdk.ray.rayjobs.rayjob.build_ray_cluster") +def test_submit_with_auto_cluster(mock_build_ray_cluster, mocker): + """Test successful submission with auto-created cluster.""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_ray_cluster = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "spec": { + "rayVersion": "2.9.0", + "headGroupSpec": {"replicas": 1}, + "workerGroupSpecs": [{"replicas": 1}], + }, + } + mock_build_ray_cluster.return_value = mock_ray_cluster + + # Mock the RayjobApi + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + mock_api_instance.submit_job.return_value = True + + cluster_config = ClusterConfiguration( + name="auto-cluster", namespace="test", num_workers=1 + ) + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + result = rayjob.submit() + + assert result == "test-job" + + # Verify the correct RayJob CR was submitted + mock_api_instance.submit_job.assert_called_once() + call_args = mock_api_instance.submit_job.call_args + + job_cr = call_args.kwargs["job"] + assert "rayClusterSpec" in job_cr["spec"] + assert job_cr["spec"]["rayClusterSpec"] == mock_ray_cluster["spec"] From a4d21221272967175858e60f11eede334b6178f6 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Tue, 12 Aug 2025 20:46:01 +0100 Subject: [PATCH 05/33] feat(RHOAIENG-26487): rayjob lifecycled cluster improvements and tests --- poetry.lock | 42 +- pyproject.toml | 2 +- src/codeflare_sdk/__init__.py | 1 + src/codeflare_sdk/common/kueue/kueue.py | 3 +- src/codeflare_sdk/common/utils/__init__.py | 7 + src/codeflare_sdk/common/utils/k8s_utils.py | 37 + src/codeflare_sdk/common/utils/test_demos.py | 57 ++ .../common/utils/test_k8s_utils.py | 255 +++++++ src/codeflare_sdk/common/utils/utils.py | 11 + .../common/widgets/test_widgets.py | 4 +- src/codeflare_sdk/common/widgets/widgets.py | 6 +- src/codeflare_sdk/ray/__init__.py | 1 + .../ray/cluster/build_ray_cluster.py | 16 +- src/codeflare_sdk/ray/cluster/cluster.py | 28 +- .../ray/cluster/test_build_ray_cluster.py | 2 +- src/codeflare_sdk/ray/rayjobs/__init__.py | 2 +- src/codeflare_sdk/ray/rayjobs/config.py | 457 +++++++++++++ src/codeflare_sdk/ray/rayjobs/rayjob.py | 165 ++--- src/codeflare_sdk/ray/rayjobs/test_config.py | 82 +++ src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 641 ++++++++++++++++-- 20 files changed, 1619 insertions(+), 200 deletions(-) create mode 100644 src/codeflare_sdk/common/utils/k8s_utils.py create mode 100644 src/codeflare_sdk/common/utils/test_demos.py create mode 100644 src/codeflare_sdk/common/utils/test_k8s_utils.py create mode 100644 src/codeflare_sdk/ray/rayjobs/config.py create mode 100644 src/codeflare_sdk/ray/rayjobs/test_config.py diff --git a/poetry.lock b/poetry.lock index 293df340..e8380cf6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2411,26 +2411,6 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] -[[package]] -name = "odh-kuberay-client" -version = "0.0.0.dev40" -description = "Python SDK for Kuberay client" -optional = false -python-versions = ">=3.11,<4.0" -groups = ["main"] -files = [ - {file = "odh_kuberay_client-0.0.0.dev40-py3-none-any.whl", hash = "sha256:547daaa07ff3687b75dc844473b0897822d3aa4803aed865037ddf41da22f593"}, - {file = "odh_kuberay_client-0.0.0.dev40.tar.gz", hash = "sha256:a4ec11aff244099256cbca0628d8dbb4c5fe48e09966a6b75b412895aebd4834"}, -] - -[package.dependencies] -kubernetes = ">=25.0.0" - -[package.source] -type = "legacy" -url = "https://test.pypi.org/simple" -reference = "testpypi" - [[package]] name = "opencensus" version = "0.11.4" @@ -3403,6 +3383,26 @@ files = [ [package.dependencies] pytest = ">=7.0.0" +[[package]] +name = "python-client" +version = "0.0.0-dev" +description = "Python Client for Kuberay" +optional = false +python-versions = "^3.11" +groups = ["main"] +files = [] +develop = false + +[package.dependencies] +kubernetes = ">=25.0.0" + +[package.source] +type = "git" +url = "https://github.com/ray-project/kuberay.git" +reference = "d1e750d9beac612ad455b951c1a789f971409ab3" +resolved_reference = "d1e750d9beac612ad455b951c1a789f971409ab3" +subdirectory = "clients/python-client" + [[package]] name = "python-dateutil" version = "3.9.0" @@ -4790,4 +4790,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "3bcd24a524ab4189cd5e48a4f343fc21bf53fabe48ad701e13609b5483b89760" +content-hash = "d854f6abffad6c08100fdfeeb53d41fac01ef253f3d3b07cae3608d44768d4ee" diff --git a/pyproject.toml b/pyproject.toml index 1a0b8d6c..7664398a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" -odh-kuberay-client = {version = "0.0.0.dev40", source = "testpypi"} +python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "d1e750d9beac612ad455b951c1a789f971409ab3" } [[tool.poetry.source]] name = "pypi" diff --git a/src/codeflare_sdk/__init__.py b/src/codeflare_sdk/__init__.py index 95753a59..f9a06524 100644 --- a/src/codeflare_sdk/__init__.py +++ b/src/codeflare_sdk/__init__.py @@ -11,6 +11,7 @@ AppWrapperStatus, RayJobClient, RayJob, + RayJobClusterConfig, ) from .common.widgets import view_clusters diff --git a/src/codeflare_sdk/common/kueue/kueue.py b/src/codeflare_sdk/common/kueue/kueue.py index 00f3364a..a721713e 100644 --- a/src/codeflare_sdk/common/kueue/kueue.py +++ b/src/codeflare_sdk/common/kueue/kueue.py @@ -18,6 +18,8 @@ from kubernetes import client from kubernetes.client.exceptions import ApiException +from ...common.utils import get_current_namespace + def get_default_kueue_name(namespace: str) -> Optional[str]: """ @@ -81,7 +83,6 @@ def list_local_queues( List[dict]: A list of dictionaries containing the name of the local queue and the available flavors """ - from ...ray.cluster.cluster import get_current_namespace if namespace is None: # pragma: no cover namespace = get_current_namespace() diff --git a/src/codeflare_sdk/common/utils/__init__.py b/src/codeflare_sdk/common/utils/__init__.py index e69de29b..e662bf5e 100644 --- a/src/codeflare_sdk/common/utils/__init__.py +++ b/src/codeflare_sdk/common/utils/__init__.py @@ -0,0 +1,7 @@ +""" +Common utilities for the CodeFlare SDK. +""" + +from .k8s_utils import get_current_namespace + +__all__ = ["get_current_namespace"] diff --git a/src/codeflare_sdk/common/utils/k8s_utils.py b/src/codeflare_sdk/common/utils/k8s_utils.py new file mode 100644 index 00000000..57eccf2d --- /dev/null +++ b/src/codeflare_sdk/common/utils/k8s_utils.py @@ -0,0 +1,37 @@ +""" +Kubernetes utility functions for the CodeFlare SDK. +""" + +import os +from kubernetes import config +from ..kubernetes_cluster import config_check, _kube_api_error_handling + + +def get_current_namespace(): + """ + Retrieves the current Kubernetes namespace. + + This function attempts to detect the current namespace by: + 1. First checking if running inside a pod (reading from service account namespace file) + 2. Falling back to reading from the current kubeconfig context + + Returns: + str: + The current namespace or None if not found. + """ + if os.path.isfile("/var/run/secrets/kubernetes.io/serviceaccount/namespace"): + try: + file = open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") + active_context = file.readline().strip("\n") + return active_context + except Exception as e: + print("Unable to find current namespace") + print("trying to gather from current context") + try: + _, active_context = config.list_kube_config_contexts(config_check()) + except Exception as e: + return _kube_api_error_handling(e) + try: + return active_context["context"]["namespace"] + except KeyError: + return None diff --git a/src/codeflare_sdk/common/utils/test_demos.py b/src/codeflare_sdk/common/utils/test_demos.py new file mode 100644 index 00000000..9124cbec --- /dev/null +++ b/src/codeflare_sdk/common/utils/test_demos.py @@ -0,0 +1,57 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for demos module. +""" + +import pytest +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock +from codeflare_sdk.common.utils.demos import copy_demo_nbs + + +class TestCopyDemoNbs: + """Test cases for copy_demo_nbs function.""" + + def test_copy_demo_nbs_directory_exists_error(self): + """Test that FileExistsError is raised when directory exists and overwrite=False.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create a subdirectory that will conflict + conflict_dir = Path(temp_dir) / "demo-notebooks" + conflict_dir.mkdir() + + with pytest.raises(FileExistsError, match="Directory.*already exists"): + copy_demo_nbs(dir=str(conflict_dir), overwrite=False) + + def test_copy_demo_nbs_overwrite_true(self): + """Test that overwrite=True allows copying to existing directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create a subdirectory that will conflict + conflict_dir = Path(temp_dir) / "demo-notebooks" + conflict_dir.mkdir() + + # Mock the demo_dir to point to a real directory + with patch("codeflare_sdk.common.utils.demos.demo_dir", temp_dir): + # Should not raise an error with overwrite=True + copy_demo_nbs(dir=str(conflict_dir), overwrite=True) + + def test_copy_demo_nbs_default_parameters(self): + """Test copy_demo_nbs with default parameters.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Mock the demo_dir to point to a real directory + with patch("codeflare_sdk.common.utils.demos.demo_dir", temp_dir): + # Should work with default parameters + copy_demo_nbs(dir=temp_dir, overwrite=True) diff --git a/src/codeflare_sdk/common/utils/test_k8s_utils.py b/src/codeflare_sdk/common/utils/test_k8s_utils.py new file mode 100644 index 00000000..fcd0623d --- /dev/null +++ b/src/codeflare_sdk/common/utils/test_k8s_utils.py @@ -0,0 +1,255 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for k8s_utils module. +""" + +import pytest +from unittest.mock import mock_open, patch, MagicMock +from codeflare_sdk.common.utils.k8s_utils import get_current_namespace + + +class TestGetCurrentNamespace: + """Test cases for get_current_namespace function.""" + + def test_get_current_namespace_incluster_success(self): + """Test successful namespace detection from in-cluster service account.""" + mock_file_content = "test-namespace\n" + + with patch("os.path.isfile", return_value=True): + with patch("builtins.open", mock_open(read_data=mock_file_content)): + result = get_current_namespace() + + assert result == "test-namespace" + + def test_get_current_namespace_incluster_file_read_error(self): + """Test handling of file read errors when reading service account namespace.""" + with patch("os.path.isfile", return_value=True): + with patch("builtins.open", side_effect=IOError("File read error")): + with patch("builtins.print") as mock_print: + # Mock config_check to avoid kubeconfig fallback + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + side_effect=Exception("Config error"), + ): + with patch( + "codeflare_sdk.common.utils.k8s_utils._kube_api_error_handling", + return_value=None, + ): + result = get_current_namespace() + + assert result is None + # Should see both error messages: in-cluster failure and kubeconfig fallback + mock_print.assert_any_call("Unable to find current namespace") + mock_print.assert_any_call("trying to gather from current context") + + def test_get_current_namespace_incluster_file_open_error(self): + """Test handling of file open errors when reading service account namespace.""" + with patch("os.path.isfile", return_value=True): + with patch( + "builtins.open", side_effect=PermissionError("Permission denied") + ): + with patch("builtins.print") as mock_print: + # Mock config_check to avoid kubeconfig fallback + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + side_effect=Exception("Config error"), + ): + with patch( + "codeflare_sdk.common.utils.k8s_utils._kube_api_error_handling", + return_value=None, + ): + result = get_current_namespace() + + assert result is None + # Should see both error messages: in-cluster failure and kubeconfig fallback + mock_print.assert_any_call("Unable to find current namespace") + mock_print.assert_any_call("trying to gather from current context") + + def test_get_current_namespace_kubeconfig_success(self): + """Test successful namespace detection from kubeconfig context.""" + mock_contexts = [ + {"name": "context1", "context": {"namespace": "default"}}, + {"name": "context2", "context": {"namespace": "test-namespace"}}, + ] + mock_active_context = { + "name": "context2", + "context": {"namespace": "test-namespace"}, + } + + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + return_value="~/.kube/config", + ): + with patch( + "kubernetes.config.list_kube_config_contexts", + return_value=(mock_contexts, mock_active_context), + ): + result = get_current_namespace() + + assert result == "test-namespace" + mock_print.assert_called_with("trying to gather from current context") + + def test_get_current_namespace_kubeconfig_no_namespace_in_context(self): + """Test handling when kubeconfig context has no namespace field.""" + mock_contexts = [ + {"name": "context1", "context": {}}, + {"name": "context2", "context": {"cluster": "test-cluster"}}, + ] + mock_active_context = { + "name": "context2", + "context": {"cluster": "test-cluster"}, + } + + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + return_value="~/.kube/config", + ): + with patch( + "kubernetes.config.list_kube_config_contexts", + return_value=(mock_contexts, mock_active_context), + ): + result = get_current_namespace() + + assert result is None + mock_print.assert_called_with("trying to gather from current context") + + def test_get_current_namespace_kubeconfig_config_check_error(self): + """Test handling when config_check raises an exception.""" + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + side_effect=Exception("Config error"), + ): + with patch( + "codeflare_sdk.common.utils.k8s_utils._kube_api_error_handling", + return_value=None, + ) as mock_error_handler: + result = get_current_namespace() + + assert result is None + mock_print.assert_called_with("trying to gather from current context") + mock_error_handler.assert_called_once() + + def test_get_current_namespace_kubeconfig_list_contexts_error(self): + """Test handling when list_kube_config_contexts raises an exception.""" + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + return_value="~/.kube/config", + ): + with patch( + "kubernetes.config.list_kube_config_contexts", + side_effect=Exception("Context error"), + ): + with patch( + "codeflare_sdk.common.utils.k8s_utils._kube_api_error_handling", + return_value=None, + ) as mock_error_handler: + result = get_current_namespace() + + assert result is None + mock_print.assert_called_with("trying to gather from current context") + mock_error_handler.assert_called_once() + + def test_get_current_namespace_kubeconfig_key_error(self): + """Test handling when accessing context namespace raises KeyError.""" + mock_contexts = [{"name": "context1", "context": {"namespace": "default"}}] + mock_active_context = {"name": "context1"} # Missing 'context' key + + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + return_value="~/.kube/config", + ): + with patch( + "kubernetes.config.list_kube_config_contexts", + return_value=(mock_contexts, mock_active_context), + ): + result = get_current_namespace() + + assert result is None + mock_print.assert_called_with("trying to gather from current context") + + def test_get_current_namespace_fallback_flow(self): + """Test the complete fallback flow from in-cluster to kubeconfig.""" + # First attempt: in-cluster file doesn't exist + # Second attempt: kubeconfig context has namespace + mock_contexts = [ + {"name": "context1", "context": {"namespace": "fallback-namespace"}} + ] + mock_active_context = { + "name": "context1", + "context": {"namespace": "fallback-namespace"}, + } + + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + return_value="~/.kube/config", + ): + with patch( + "kubernetes.config.list_kube_config_contexts", + return_value=(mock_contexts, mock_active_context), + ): + result = get_current_namespace() + + assert result == "fallback-namespace" + mock_print.assert_called_with("trying to gather from current context") + + def test_get_current_namespace_complete_failure(self): + """Test complete failure scenario where no namespace can be detected.""" + with patch("os.path.isfile", return_value=False): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + side_effect=Exception("Config error"), + ): + with patch( + "codeflare_sdk.common.utils.k8s_utils._kube_api_error_handling", + return_value=None, + ): + result = get_current_namespace() + + assert result is None + mock_print.assert_called_with("trying to gather from current context") + + def test_get_current_namespace_mixed_errors(self): + """Test scenario with mixed error conditions.""" + # In-cluster file exists but read fails, then kubeconfig also fails + with patch("os.path.isfile", return_value=True): + with patch("builtins.open", side_effect=IOError("File read error")): + with patch("builtins.print") as mock_print: + with patch( + "codeflare_sdk.common.utils.k8s_utils.config_check", + side_effect=Exception("Config error"), + ): + with patch( + "codeflare_sdk.common.utils.k8s_utils._kube_api_error_handling", + return_value=None, + ): + result = get_current_namespace() + + assert result is None + # Should see both error messages + assert mock_print.call_count >= 2 diff --git a/src/codeflare_sdk/common/utils/utils.py b/src/codeflare_sdk/common/utils/utils.py index f876e924..7e30b994 100644 --- a/src/codeflare_sdk/common/utils/utils.py +++ b/src/codeflare_sdk/common/utils/utils.py @@ -19,6 +19,17 @@ ) +def update_image(image) -> str: + """ + The update_image() function automatically sets the image config parameter to a preset image based on Python version if not specified. + This now points to the centralized function in utils.py. + """ + if not image: + # Pull the image based on the matching Python version (or output a warning if not supported) + image = get_ray_image_for_python_version(warn_on_unsupported=True) + return image + + def get_ray_image_for_python_version(python_version=None, warn_on_unsupported=True): """ Get the appropriate Ray image for a given Python version. diff --git a/src/codeflare_sdk/common/widgets/test_widgets.py b/src/codeflare_sdk/common/widgets/test_widgets.py index f88d8eb2..33beca5c 100644 --- a/src/codeflare_sdk/common/widgets/test_widgets.py +++ b/src/codeflare_sdk/common/widgets/test_widgets.py @@ -106,7 +106,7 @@ def test_view_clusters(mocker, capsys): # Prepare to run view_clusters when notebook environment is detected mocker.patch("codeflare_sdk.common.widgets.widgets.is_notebook", return_value=True) mock_get_current_namespace = mocker.patch( - "codeflare_sdk.ray.cluster.cluster.get_current_namespace", + "codeflare_sdk.common.utils.get_current_namespace", return_value="default", ) namespace = mock_get_current_namespace.return_value @@ -250,7 +250,7 @@ def test_ray_cluster_manager_widgets_init(mocker, capsys): return_value=test_ray_clusters_df, ) mocker.patch( - "codeflare_sdk.ray.cluster.cluster.get_current_namespace", + "codeflare_sdk.common.utils.get_current_namespace", return_value=namespace, ) mock_delete_cluster = mocker.patch( diff --git a/src/codeflare_sdk/common/widgets/widgets.py b/src/codeflare_sdk/common/widgets/widgets.py index 36d896e8..91295fa9 100644 --- a/src/codeflare_sdk/common/widgets/widgets.py +++ b/src/codeflare_sdk/common/widgets/widgets.py @@ -26,6 +26,8 @@ import ipywidgets as widgets from IPython.display import display, HTML, Javascript import pandas as pd + +from ...common.utils import get_current_namespace from ...ray.cluster.config import ClusterConfiguration from ...ray.cluster.status import RayClusterStatus from ..kubernetes_cluster import _kube_api_error_handling @@ -43,8 +45,6 @@ class RayClusterManagerWidgets: """ def __init__(self, ray_clusters_df: pd.DataFrame, namespace: str = None): - from ...ray.cluster.cluster import get_current_namespace - # Data self.ray_clusters_df = ray_clusters_df self.namespace = get_current_namespace() if not namespace else namespace @@ -353,7 +353,7 @@ def view_clusters(namespace: str = None): ) return # Exit function if not in Jupyter Notebook - from ...ray.cluster.cluster import get_current_namespace + from ...common.utils import get_current_namespace if not namespace: namespace = get_current_namespace() diff --git a/src/codeflare_sdk/ray/__init__.py b/src/codeflare_sdk/ray/__init__.py index b2278a05..806ed9a4 100644 --- a/src/codeflare_sdk/ray/__init__.py +++ b/src/codeflare_sdk/ray/__init__.py @@ -6,6 +6,7 @@ from .rayjobs import ( RayJob, + RayJobClusterConfig, RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo, diff --git a/src/codeflare_sdk/ray/cluster/build_ray_cluster.py b/src/codeflare_sdk/ray/cluster/build_ray_cluster.py index 7b2ed79c..936f3275 100644 --- a/src/codeflare_sdk/ray/cluster/build_ray_cluster.py +++ b/src/codeflare_sdk/ray/cluster/build_ray_cluster.py @@ -21,7 +21,7 @@ from ...common.kubernetes_cluster import get_api_client, config_check from kubernetes.client.exceptions import ApiException from ...common.utils.constants import RAY_VERSION -from ...common.utils.utils import get_ray_image_for_python_version +from ...common.utils.utils import update_image import codeflare_sdk import os @@ -96,9 +96,6 @@ ), ] -# Use centralized mapping from constants (so that we only have to update constants.py) -SUPPORTED_PYTHON_VERSIONS = constants.SUPPORTED_PYTHON_VERSIONS - # RayCluster/AppWrapper builder function def build_ray_cluster(cluster: "codeflare_sdk.ray.cluster.Cluster"): @@ -271,17 +268,6 @@ def with_nb_annotations(annotations: dict): # Head/Worker container related functions -def update_image(image) -> str: - """ - The update_image() function automatically sets the image config parameter to a preset image based on Python version if not specified. - This now points to the centralized function in utils.py. - """ - if not image: - # Pull the image based on the matching Python version (or output a warning if not supported) - image = get_ray_image_for_python_version(warn_on_unsupported=True) - return image - - def get_pod_spec( cluster: "codeflare_sdk.ray.cluster.Cluster", containers: List, diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index ed81749b..037f6bdf 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -27,6 +27,8 @@ import uuid import warnings +from ...common.utils import get_current_namespace + from ...common.kubernetes_cluster.auth import ( config_check, get_api_client, @@ -640,32 +642,6 @@ def list_all_queued( return resources -def get_current_namespace(): # pragma: no cover - """ - Retrieves the current Kubernetes namespace. - - Returns: - str: - The current namespace or None if not found. - """ - if os.path.isfile("/var/run/secrets/kubernetes.io/serviceaccount/namespace"): - try: - file = open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") - active_context = file.readline().strip("\n") - return active_context - except Exception as e: - print("Unable to find current namespace") - print("trying to gather from current context") - try: - _, active_context = config.list_kube_config_contexts(config_check()) - except Exception as e: - return _kube_api_error_handling(e) - try: - return active_context["context"]["namespace"] - except KeyError: - return None - - def get_cluster( cluster_name: str, namespace: str = "default", diff --git a/src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py b/src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py index f970d945..3a7947d3 100644 --- a/src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py +++ b/src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py @@ -40,7 +40,7 @@ def test_gen_names_without_name(mocker): def test_update_image_without_supported_python_version(mocker): # Mock SUPPORTED_PYTHON_VERSIONS mocker.patch.dict( - "codeflare_sdk.ray.cluster.build_ray_cluster.SUPPORTED_PYTHON_VERSIONS", + "codeflare_sdk.common.utils.constants.SUPPORTED_PYTHON_VERSIONS", { "3.11": "ray-py3.11", "3.12": "ray-py3.12", diff --git a/src/codeflare_sdk/ray/rayjobs/__init__.py b/src/codeflare_sdk/ray/rayjobs/__init__.py index 47b573af..756fad91 100644 --- a/src/codeflare_sdk/ray/rayjobs/__init__.py +++ b/src/codeflare_sdk/ray/rayjobs/__init__.py @@ -1,2 +1,2 @@ -from .rayjob import RayJob +from .rayjob import RayJob, RayJobClusterConfig from .status import RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py new file mode 100644 index 00000000..96b59046 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -0,0 +1,457 @@ +# Copyright 2022 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The config sub-module contains the definition of the RayJobClusterConfigV2 dataclass, +which is used to specify resource requirements and other details when creating a +Cluster object. +""" + +import pathlib +from dataclasses import dataclass, field, fields +from typing import Dict, List, Optional, Union, get_args, get_origin, Any +from kubernetes.client import ( + V1ConfigMapVolumeSource, + V1KeyToPath, + V1Toleration, + V1Volume, + V1VolumeMount, + V1ObjectMeta, + V1Container, + V1ContainerPort, + V1Lifecycle, + V1ExecAction, + V1LifecycleHandler, + V1EnvVar, + V1PodTemplateSpec, + V1PodSpec, + V1ResourceRequirements, +) + +import logging + +from ...common.utils.constants import RAY_VERSION +from ...common.utils.utils import update_image + +logger = logging.getLogger(__name__) + +dir = pathlib.Path(__file__).parent.parent.resolve() + +# https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html +DEFAULT_ACCELERATORS = { + "nvidia.com/gpu": "GPU", + "intel.com/gpu": "GPU", + "amd.com/gpu": "GPU", + "aws.amazon.com/neuroncore": "neuron_cores", + "google.com/tpu": "TPU", + "habana.ai/gaudi": "HPU", + "huawei.com/Ascend910": "NPU", + "huawei.com/Ascend310": "NPU", +} + +# Default volume mounts for CA certificates +DEFAULT_VOLUME_MOUNTS = [ + V1VolumeMount( + mount_path="/etc/pki/tls/certs/odh-trusted-ca-bundle.crt", + name="odh-trusted-ca-cert", + sub_path="odh-trusted-ca-bundle.crt", + ), + V1VolumeMount( + mount_path="/etc/ssl/certs/odh-trusted-ca-bundle.crt", + name="odh-trusted-ca-cert", + sub_path="odh-trusted-ca-bundle.crt", + ), + V1VolumeMount( + mount_path="/etc/pki/tls/certs/odh-ca-bundle.crt", + name="odh-ca-cert", + sub_path="odh-ca-bundle.crt", + ), + V1VolumeMount( + mount_path="/etc/ssl/certs/odh-ca-bundle.crt", + name="odh-ca-cert", + sub_path="odh-ca-bundle.crt", + ), +] + +# Default volumes for CA certificates +DEFAULT_VOLUMES = [ + V1Volume( + name="odh-trusted-ca-cert", + config_map=V1ConfigMapVolumeSource( + name="odh-trusted-ca-bundle", + items=[V1KeyToPath(key="ca-bundle.crt", path="odh-trusted-ca-bundle.crt")], + optional=True, + ), + ), + V1Volume( + name="odh-ca-cert", + config_map=V1ConfigMapVolumeSource( + name="odh-trusted-ca-bundle", + items=[V1KeyToPath(key="odh-ca-bundle.crt", path="odh-ca-bundle.crt")], + optional=True, + ), + ), +] + + +@dataclass +class RayJobClusterConfig: + """ + This dataclass is used to specify resource requirements and other details for RayJobs. + The cluster name and namespace are automatically derived from the RayJob configuration. + + Args: + head_accelerators: + A dictionary of extended resource requests for the head node. ex: {"nvidia.com/gpu": 1} + head_tolerations: + List of tolerations for head nodes. + num_workers: + The number of workers to create. + worker_tolerations: + List of tolerations for worker nodes. + envs: + A dictionary of environment variables to set for the cluster. + image: + The image to use for the cluster. + image_pull_secrets: + A list of image pull secrets to use for the cluster. + labels: + A dictionary of labels to apply to the cluster. + worker_accelerators: + A dictionary of extended resource requests for each worker. ex: {"nvidia.com/gpu": 1} + accelerator_configs: + A dictionary of custom resource mappings to map extended resource requests to RayCluster resource names. + Defaults to DEFAULT_ACCELERATORS but can be overridden with custom mappings. + local_queue: + The name of the queue to use for the cluster. + annotations: + A dictionary of annotations to apply to the cluster. + volumes: + A list of V1Volume objects to add to the Cluster + volume_mounts: + A list of V1VolumeMount objects to add to the Cluster + """ + + head_cpu_requests: Union[int, str] = 2 + head_cpu_limits: Union[int, str] = 2 + head_memory_requests: Union[int, str] = 8 + head_memory_limits: Union[int, str] = 8 + head_accelerators: Dict[str, Union[str, int]] = field(default_factory=dict) + head_tolerations: Optional[List[V1Toleration]] = None + worker_cpu_requests: Union[int, str] = 1 + worker_cpu_limits: Union[int, str] = 1 + num_workers: int = 1 + worker_memory_requests: Union[int, str] = 2 + worker_memory_limits: Union[int, str] = 2 + worker_tolerations: Optional[List[V1Toleration]] = None + envs: Dict[str, str] = field(default_factory=dict) + image: str = "" + image_pull_secrets: List[str] = field(default_factory=list) + labels: Dict[str, str] = field(default_factory=dict) + worker_accelerators: Dict[str, Union[str, int]] = field(default_factory=dict) + accelerator_configs: Dict[str, str] = field( + default_factory=lambda: DEFAULT_ACCELERATORS.copy() + ) + local_queue: Optional[str] = None + annotations: Dict[str, str] = field(default_factory=dict) + volumes: list[V1Volume] = field(default_factory=list) + volume_mounts: list[V1VolumeMount] = field(default_factory=list) + + def __post_init__(self): + self._validate_types() + self._memory_to_string() + self._validate_gpu_config(self.head_accelerators) + self._validate_gpu_config(self.worker_accelerators) + + def _validate_gpu_config(self, gpu_config: Dict[str, int]): + for k in gpu_config.keys(): + if k not in self.accelerator_configs.keys(): + raise ValueError( + f"GPU configuration '{k}' not found in accelerator_configs, available resources are {list(self.accelerator_configs.keys())}, to add more supported resources use accelerator_configs. i.e. accelerator_configs = {{'{k}': 'FOO_BAR'}}" + ) + + def _memory_to_string(self): + if isinstance(self.head_memory_requests, int): + self.head_memory_requests = f"{self.head_memory_requests}G" + if isinstance(self.head_memory_limits, int): + self.head_memory_limits = f"{self.head_memory_limits}G" + if isinstance(self.worker_memory_requests, int): + self.worker_memory_requests = f"{self.worker_memory_requests}G" + if isinstance(self.worker_memory_limits, int): + self.worker_memory_limits = f"{self.worker_memory_limits}G" + + def _validate_types(self): + """Validate the types of all fields in the RayJobClusterConfig dataclass.""" + errors = [] + for field_info in fields(self): + value = getattr(self, field_info.name) + expected_type = field_info.type + if not self._is_type(value, expected_type): + errors.append(f"'{field_info.name}' should be of type {expected_type}.") + + if errors: + raise TypeError("Type validation failed:\n" + "\n".join(errors)) + + @staticmethod + def _is_type(value, expected_type): + """Check if the value matches the expected type.""" + + def check_type(value, expected_type): + origin_type = get_origin(expected_type) + args = get_args(expected_type) + if origin_type is Union: + return any(check_type(value, union_type) for union_type in args) + if origin_type is list: + if value is not None: + return all(check_type(elem, args[0]) for elem in (value or [])) + else: + return True + if origin_type is dict: + if value is not None: + return all( + check_type(k, args[0]) and check_type(v, args[1]) + for k, v in value.items() + ) + else: + return True + if origin_type is tuple: + return all(check_type(elem, etype) for elem, etype in zip(value, args)) + if expected_type is int: + return isinstance(value, int) and not isinstance(value, bool) + if expected_type is bool: + return isinstance(value, bool) + return isinstance(value, expected_type) + + return check_type(value, expected_type) + + def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]: + """ + Build the RayCluster spec from RayJobClusterConfig for embedding in RayJob. + + Args: + self: The cluster configuration object (RayJobClusterConfig) + cluster_name: The name for the cluster (derived from RayJob name) + + Returns: + Dict containing the RayCluster spec for embedding in RayJob + """ + ray_cluster_spec = { + "rayVersion": RAY_VERSION, + "enableInTreeAutoscaling": False, + "headGroupSpec": self._build_head_group_spec(), + "workerGroupSpecs": [self._build_worker_group_spec(cluster_name)], + } + + return ray_cluster_spec + + def _build_head_group_spec(self) -> Dict[str, Any]: + """Build the head group specification.""" + return { + "serviceType": "ClusterIP", + "enableIngress": False, + "rayStartParams": self._build_head_ray_params(), + "template": V1PodTemplateSpec( + metadata=V1ObjectMeta(annotations=self.annotations), + spec=self._build_pod_spec(self._build_head_container(), is_head=True), + ), + } + + def _build_worker_group_spec(self, cluster_name: str) -> Dict[str, Any]: + """Build the worker group specification.""" + return { + "replicas": self.num_workers, + "minReplicas": self.num_workers, + "maxReplicas": self.num_workers, + "groupName": f"worker-group-{cluster_name}", + "rayStartParams": self._build_worker_ray_params(), + "template": V1PodTemplateSpec( + metadata=V1ObjectMeta(annotations=self.annotations), + spec=self._build_pod_spec( + self._build_worker_container(), + is_head=False, + ), + ), + } + + def _build_head_ray_params(self) -> Dict[str, str]: + """Build Ray start parameters for head node.""" + params = { + "dashboard-host": "0.0.0.0", + "dashboard-port": "8265", + "block": "true", + } + + # Add GPU count if specified + if self.head_accelerators: + gpu_count = sum( + count + for resource_type, count in self.head_accelerators.items() + if "gpu" in resource_type.lower() + ) + if gpu_count > 0: + params["num-gpus"] = str(gpu_count) + + return params + + def _build_worker_ray_params(self) -> Dict[str, str]: + """Build Ray start parameters for worker nodes.""" + params = { + "block": "true", + } + + # Add GPU count if specified + if self.worker_accelerators: + gpu_count = sum( + count + for resource_type, count in self.worker_accelerators.items() + if "gpu" in resource_type.lower() + ) + if gpu_count > 0: + params["num-gpus"] = str(gpu_count) + + return params + + def _build_head_container(self) -> V1Container: + """Build the head container specification.""" + container = V1Container( + name="ray-head", + image=update_image(self.image), + image_pull_policy="IfNotPresent", # Always IfNotPresent for RayJobs + ports=[ + V1ContainerPort(name="gcs", container_port=6379), + V1ContainerPort(name="dashboard", container_port=8265), + V1ContainerPort(name="client", container_port=10001), + ], + lifecycle=V1Lifecycle( + pre_stop=V1LifecycleHandler( + _exec=V1ExecAction(command=["/bin/sh", "-c", "ray stop"]) + ) + ), + resources=self._build_resource_requirements( + self.head_cpu_requests, + self.head_cpu_limits, + self.head_memory_requests, + self.head_memory_limits, + self.head_accelerators, + ), + volume_mounts=self._generate_volume_mounts(), + ) + + # Add environment variables if specified + if hasattr(self, "envs") and self.envs: + container.env = self._build_env_vars() + + return container + + def _build_worker_container(self) -> V1Container: + """Build the worker container specification.""" + container = V1Container( + name="ray-worker", + image=update_image(self.image), + image_pull_policy="IfNotPresent", # Always IfNotPresent for RayJobs + lifecycle=V1Lifecycle( + pre_stop=V1LifecycleHandler( + _exec=V1ExecAction(command=["/bin/sh", "-c", "ray stop"]) + ) + ), + resources=self._build_resource_requirements( + self.worker_cpu_requests, + self.worker_cpu_limits, + self.worker_memory_requests, + self.worker_memory_limits, + self.worker_accelerators, + ), + volume_mounts=self._generate_volume_mounts(), + ) + + # Add environment variables if specified + if hasattr(self, "envs") and self.envs: + container.env = self._build_env_vars() + + return container + + def _build_resource_requirements( + self, + cpu_requests: Union[int, str], + cpu_limits: Union[int, str], + memory_requests: Union[int, str], + memory_limits: Union[int, str], + extended_resource_requests: Dict[str, Union[int, str]] = None, + ) -> V1ResourceRequirements: + """Build Kubernetes resource requirements.""" + resource_requirements = V1ResourceRequirements( + requests={"cpu": cpu_requests, "memory": memory_requests}, + limits={"cpu": cpu_limits, "memory": memory_limits}, + ) + + # Add extended resources (e.g., GPUs) + if extended_resource_requests: + for resource_type, amount in extended_resource_requests.items(): + resource_requirements.limits[resource_type] = amount + resource_requirements.requests[resource_type] = amount + + return resource_requirements + + def _build_pod_spec(self, container: V1Container, is_head: bool) -> V1PodSpec: + """Build the pod specification.""" + pod_spec = V1PodSpec( + containers=[container], + volumes=self._generate_volumes(), + restart_policy="Never", # RayJobs should not restart + ) + + # Add tolerations if specified + if is_head and hasattr(self, "head_tolerations") and self.head_tolerations: + pod_spec.tolerations = self.head_tolerations + elif ( + not is_head + and hasattr(self, "worker_tolerations") + and self.worker_tolerations + ): + pod_spec.tolerations = self.worker_tolerations + + # Add image pull secrets if specified + if hasattr(self, "image_pull_secrets") and self.image_pull_secrets: + from kubernetes.client import V1LocalObjectReference + + pod_spec.image_pull_secrets = [ + V1LocalObjectReference(name=secret) + for secret in self.image_pull_secrets + ] + + return pod_spec + + def _generate_volume_mounts(self) -> list: + """Generate volume mounts for the container.""" + volume_mounts = DEFAULT_VOLUME_MOUNTS.copy() + + # Add custom volume mounts if specified + if hasattr(self, "volume_mounts") and self.volume_mounts: + volume_mounts.extend(self.volume_mounts) + + return volume_mounts + + def _generate_volumes(self) -> list: + """Generate volumes for the pod.""" + volumes = DEFAULT_VOLUMES.copy() + + # Add custom volumes if specified + if hasattr(self, "volumes") and self.volumes: + volumes.extend(self.volumes) + + return volumes + + def _build_env_vars(self) -> list: + """Build environment variables list.""" + return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()] diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 0d966b0e..ab0899d2 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -13,16 +13,16 @@ # limitations under the License. """ -RayJob client for submitting and managing Ray jobs using the odh-kuberay-client. +RayJob client for submitting and managing Ray jobs using the kuberay python client. """ import logging from typing import Dict, Any, Optional, Tuple -from odh_kuberay_client.kuberay_job_api import RayjobApi +from python_client.kuberay_job_api import RayjobApi -from ..cluster.cluster import Cluster -from ..cluster.config import ClusterConfiguration -from ..cluster.build_ray_cluster import build_ray_cluster +from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + +from ...common.utils import get_current_namespace from .status import ( RayJobDeploymentStatus, @@ -46,12 +46,12 @@ class RayJob: def __init__( self, job_name: str, + entrypoint: str, cluster_name: Optional[str] = None, - cluster_config: Optional[ClusterConfiguration] = None, - namespace: str = "default", - entrypoint: Optional[str] = None, + cluster_config: Optional[RayJobClusterConfig] = None, + namespace: Optional[str] = None, runtime_env: Optional[Dict[str, Any]] = None, - shutdown_after_job_finishes: bool = True, + shutdown_after_job_finishes: Optional[bool] = None, ttl_seconds_after_finished: int = 0, active_deadline_seconds: Optional[int] = None, ): @@ -60,50 +60,85 @@ def __init__( Args: job_name: The name for the Ray job + entrypoint: The Python script or command to run (required) cluster_name: The name of an existing Ray cluster (optional if cluster_config provided) cluster_config: Configuration for creating a new cluster (optional if cluster_name provided) - namespace: The Kubernetes namespace (default: "default") - entrypoint: The Python script or command to run (required for submission) + namespace: The Kubernetes namespace (auto-detected if not specified) runtime_env: Ray runtime environment configuration (optional) - shutdown_after_job_finishes: Whether to automatically cleanup the cluster after job completion (default: True) + shutdown_after_job_finishes: Whether to shut down cluster after job finishes (optional) ttl_seconds_after_finished: Seconds to wait before cleanup after job finishes (default: 0) active_deadline_seconds: Maximum time the job can run before being terminated (optional) + + Note: + shutdown_after_job_finishes is automatically detected but can be overridden: + - True if cluster_config is provided (new cluster will be cleaned up) + - False if cluster_name is provided (existing cluster will not be shut down) + - User can explicitly set this value to override auto-detection """ - # Validate input parameters if cluster_name is None and cluster_config is None: - raise ValueError("Either cluster_name or cluster_config must be provided") + raise ValueError( + "❌ Configuration Error: You must provide either 'cluster_name' (for existing cluster) " + "or 'cluster_config' (to create new cluster), but not both." + ) if cluster_name is not None and cluster_config is not None: - raise ValueError("Cannot specify both cluster_name and cluster_config") + raise ValueError( + "❌ Configuration Error: You cannot specify both 'cluster_name' and 'cluster_config'. " + "Choose one approach:\n" + "• Use 'cluster_name' to connect to an existing cluster\n" + "• Use 'cluster_config' to create a new cluster" + ) + + if cluster_config is None and cluster_name is None: + raise ValueError( + "❌ Configuration Error: When not providing 'cluster_config', 'cluster_name' is required " + "to specify which existing cluster to use." + ) self.name = job_name - self.namespace = namespace self.entrypoint = entrypoint self.runtime_env = runtime_env - self.shutdown_after_job_finishes = shutdown_after_job_finishes self.ttl_seconds_after_finished = ttl_seconds_after_finished self.active_deadline_seconds = active_deadline_seconds - # Cluster configuration + # Auto-set shutdown_after_job_finishes based on cluster_config presence + # If cluster_config is provided, we want to clean up the cluster after job finishes + # If using existing cluster, we don't want to shut it down + # User can override this behavior by explicitly setting shutdown_after_job_finishes + if shutdown_after_job_finishes is not None: + self.shutdown_after_job_finishes = shutdown_after_job_finishes + elif cluster_config is not None: + self.shutdown_after_job_finishes = True + else: + self.shutdown_after_job_finishes = False + + if namespace is None: + detected_namespace = get_current_namespace() + if detected_namespace: + self.namespace = detected_namespace + logger.info(f"Auto-detected namespace: {self.namespace}") + else: + raise ValueError( + "❌ Configuration Error: Could not auto-detect Kubernetes namespace. " + "Please explicitly specify the 'namespace' parameter. " + ) + else: + self.namespace = namespace + self._cluster_name = cluster_name self._cluster_config = cluster_config - # Determine cluster name for the job if cluster_config is not None: - # Ensure cluster config has the same namespace as the job - if cluster_config.namespace is None: - cluster_config.namespace = namespace - elif cluster_config.namespace != namespace: - logger.warning( - f"Cluster config namespace ({cluster_config.namespace}) differs from job namespace ({namespace})" - ) - - self.cluster_name = cluster_config.name or f"{job_name}-cluster" - # Update the cluster config name if it wasn't set - if not cluster_config.name: - cluster_config.name = self.cluster_name + self.cluster_name = f"{job_name}-cluster" + logger.info(f"Creating new cluster: {self.cluster_name}") else: + # Using existing cluster: cluster_name must be provided + if cluster_name is None: + raise ValueError( + "❌ Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'" + ) self.cluster_name = cluster_name + logger.info(f"Using existing cluster: {self.cluster_name}") # Initialize the KubeRay job API client self._api = RayjobApi() @@ -111,21 +146,6 @@ def __init__( logger.info(f"Initialized RayJob: {self.name} in namespace: {self.namespace}") def submit(self) -> str: - """ - Submit the Ray job to the Kubernetes cluster. - - The RayJob CRD will automatically: - - Create a new cluster if cluster_config was provided - - Use existing cluster if cluster_name was provided - - Clean up resources based on shutdown_after_job_finishes setting - - Returns: - The job ID/name if submission was successful - - Raises: - ValueError: If entrypoint is not provided - RuntimeError: If job submission fails - """ # Validate required parameters if not self.entrypoint: raise ValueError("entrypoint must be provided to submit a RayJob") @@ -176,9 +196,16 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: # Configure cluster: either use existing or create new if self._cluster_config is not None: - # Use rayClusterSpec to create a new cluster - leverage existing build logic - ray_cluster_spec = self._build_ray_cluster_spec() + ray_cluster_spec = self._cluster_config.build_ray_cluster_spec( + cluster_name=self.cluster_name + ) + + logger.info( + f"Built RayCluster spec using RayJob-specific builder for cluster: {self.cluster_name}" + ) + rayjob_cr["spec"]["rayClusterSpec"] = ray_cluster_spec + logger.info(f"RayJob will create new cluster: {self.cluster_name}") else: # Use clusterSelector to reference existing cluster @@ -187,48 +214,6 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: return rayjob_cr - def _build_ray_cluster_spec(self) -> Dict[str, Any]: - """ - Build the RayCluster spec from ClusterConfiguration using existing build_ray_cluster logic. - - Returns: - Dict containing the RayCluster spec for embedding in RayJob - """ - if not self._cluster_config: - raise RuntimeError("No cluster configuration provided") - - # Create a shallow copy of the cluster config to avoid modifying the original - import copy - - temp_config = copy.copy(self._cluster_config) - - # Ensure we get a RayCluster (not AppWrapper) and don't write to file - temp_config.appwrapper = False - temp_config.write_to_file = False - - # Create a minimal Cluster object for the build process - from ..cluster.cluster import Cluster - - temp_cluster = Cluster.__new__(Cluster) # Create without calling __init__ - temp_cluster.config = temp_config - - """ - For now, RayJob with a new/auto-created cluster will not work with Kueue. - This is due to the Kueue label not being propagated to the RayCluster. - """ - - # Use the existing build_ray_cluster function to generate the RayCluster - ray_cluster_dict = build_ray_cluster(temp_cluster) - - # Extract just the RayCluster spec - RayJob CRD doesn't support metadata in rayClusterSpec - # Note: CodeFlare Operator should still create dashboard routes for the RayCluster - ray_cluster_spec = ray_cluster_dict["spec"] - - logger.info( - f"Built RayCluster spec using existing build logic for cluster: {self.cluster_name}" - ) - return ray_cluster_spec - def status( self, print_to_console: bool = True ) -> Tuple[CodeflareRayJobStatus, bool]: diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test_config.py new file mode 100644 index 00000000..cefe9606 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/test_config.py @@ -0,0 +1,82 @@ +""" +Tests for the simplified RayJobClusterConfig accelerator_configs behavior. +""" + +import pytest +from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig, DEFAULT_ACCELERATORS + + +def test_accelerator_configs_defaults_to_default_accelerators(): + """Test that accelerator_configs defaults to DEFAULT_ACCELERATORS.copy()""" + config = RayJobClusterConfig() + + # Should have all the default accelerators + assert "nvidia.com/gpu" in config.accelerator_configs + assert "intel.com/gpu" in config.accelerator_configs + assert "google.com/tpu" in config.accelerator_configs + + # Should be a copy, not the same object + assert config.accelerator_configs is not DEFAULT_ACCELERATORS + assert config.accelerator_configs == DEFAULT_ACCELERATORS + + +def test_accelerator_configs_can_be_overridden(): + """Test that users can override accelerator_configs with custom mappings""" + custom_configs = { + "nvidia.com/gpu": "GPU", + "custom.com/accelerator": "CUSTOM_ACCELERATOR", + } + + config = RayJobClusterConfig(accelerator_configs=custom_configs) + + # Should have custom configs + assert config.accelerator_configs == custom_configs + assert "custom.com/accelerator" in config.accelerator_configs + assert "nvidia.com/gpu" in config.accelerator_configs + + # Should NOT have other defaults + assert "intel.com/gpu" not in config.accelerator_configs + assert "google.com/tpu" not in config.accelerator_configs + + +def test_accelerator_configs_can_extend_defaults(): + """Test that users can extend defaults by providing additional configs""" + extended_configs = { + **DEFAULT_ACCELERATORS, + "custom.com/accelerator": "CUSTOM_ACCEL", + } + + config = RayJobClusterConfig(accelerator_configs=extended_configs) + + # Should have all defaults plus custom + assert "nvidia.com/gpu" in config.accelerator_configs + assert "intel.com/gpu" in config.accelerator_configs + assert "custom.com/accelerator" in config.accelerator_configs + assert config.accelerator_configs["custom.com/accelerator"] == "CUSTOM_ACCEL" + + +def test_gpu_validation_works_with_defaults(): + """Test that GPU validation works with default accelerator configs""" + config = RayJobClusterConfig(head_accelerators={"nvidia.com/gpu": 1}) + + # Should not raise any errors + assert config.head_accelerators == {"nvidia.com/gpu": 1} + + +def test_gpu_validation_works_with_custom_configs(): + """Test that GPU validation works with custom accelerator configs""" + config = RayJobClusterConfig( + accelerator_configs={"custom.com/accelerator": "CUSTOM_ACCEL"}, + head_accelerators={"custom.com/accelerator": 1}, + ) + + # Should not raise any errors + assert config.head_accelerators == {"custom.com/accelerator": 1} + + +def test_gpu_validation_fails_with_unsupported_accelerator(): + """Test that GPU validation fails with unsupported accelerators""" + with pytest.raises( + ValueError, match="GPU configuration 'unsupported.com/accelerator' not found" + ): + RayJobClusterConfig(head_accelerators={"unsupported.com/accelerator": 1}) diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 7554ca4c..970f0159 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -14,6 +14,8 @@ import pytest from unittest.mock import MagicMock, patch +from codeflare_sdk.common.utils.constants import CUDA_RUNTIME_IMAGE, RAY_VERSION + from codeflare_sdk.ray.rayjobs.rayjob import RayJob from codeflare_sdk.ray.cluster.config import ClusterConfiguration @@ -100,7 +102,8 @@ def test_rayjob_init_validation_both_provided(mocker): cluster_config = ClusterConfiguration(name="test-cluster", namespace="test") with pytest.raises( - ValueError, match="Cannot specify both cluster_name and cluster_config" + ValueError, + match="❌ Configuration Error: You cannot specify both 'cluster_name' and 'cluster_config'", ): RayJob( job_name="test-job", @@ -119,7 +122,8 @@ def test_rayjob_init_validation_neither_provided(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") with pytest.raises( - ValueError, match="Either cluster_name or cluster_config must be provided" + ValueError, + match="❌ Configuration Error: You must provide either 'cluster_name'", ): RayJob(job_name="test-job", entrypoint="python script.py") @@ -140,10 +144,11 @@ def test_rayjob_init_with_cluster_config(mocker): job_name="test-job", cluster_config=cluster_config, entrypoint="python script.py", + namespace="test-namespace", ) assert rayjob.name == "test-job" - assert rayjob.cluster_name == "auto-cluster" + assert rayjob.cluster_name == "test-job-cluster" # Generated from job name assert rayjob._cluster_config == cluster_config assert rayjob._cluster_name is None @@ -163,11 +168,13 @@ def test_rayjob_cluster_name_generation(mocker): ) rayjob = RayJob( - job_name="my-job", cluster_config=cluster_config, entrypoint="python script.py" + job_name="my-job", + cluster_config=cluster_config, + entrypoint="python script.py", + namespace="test-namespace", ) assert rayjob.cluster_name == "my-job-cluster" - assert cluster_config.name == "my-job-cluster" # Should be updated def test_rayjob_cluster_config_namespace_none(mocker): @@ -191,7 +198,6 @@ def test_rayjob_cluster_config_namespace_none(mocker): entrypoint="python script.py", ) - assert cluster_config.namespace == "job-namespace" assert rayjob.namespace == "job-namespace" @@ -229,15 +235,20 @@ def test_build_ray_cluster_spec_no_config_error(mocker): job_name="test-job", cluster_name="existing-cluster", entrypoint="python script.py", + namespace="test-namespace", ) - # Line 198: Should raise RuntimeError when trying to build spec without config - with pytest.raises(RuntimeError, match="No cluster configuration provided"): - rayjob._build_ray_cluster_spec() + # Since we removed _build_ray_cluster_spec method, this test is no longer applicable + # The method is now called internally by _build_rayjob_cr when needed + # We can test this by calling _build_rayjob_cr instead + rayjob_cr = rayjob._build_rayjob_cr() + + # Should use clusterSelector for existing cluster + assert rayjob_cr["spec"]["clusterSelector"]["ray.io/cluster"] == "existing-cluster" + assert "rayClusterSpec" not in rayjob_cr["spec"] -@patch("codeflare_sdk.ray.rayjobs.rayjob.build_ray_cluster") -def test_build_ray_cluster_spec(mock_build_ray_cluster, mocker): +def test_build_ray_cluster_spec(mocker): """Test _build_ray_cluster_spec method.""" mocker.patch("kubernetes.config.load_kube_config") @@ -249,34 +260,38 @@ def test_build_ray_cluster_spec(mock_build_ray_cluster, mocker): "kind": "RayCluster", "metadata": {"name": "test-cluster", "namespace": "test"}, "spec": { - "rayVersion": "2.9.0", + "rayVersion": RAY_VERSION, "headGroupSpec": {"replicas": 1}, "workerGroupSpecs": [{"replicas": 2}], }, } - mock_build_ray_cluster.return_value = mock_ray_cluster + # Use RayJobClusterConfig which has the build_ray_cluster_spec method + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig - cluster_config = ClusterConfiguration( - name="test-cluster", namespace="test", num_workers=2 + cluster_config = RayJobClusterConfig(num_workers=2) + + # Mock the method that will be called + mocker.patch.object( + cluster_config, "build_ray_cluster_spec", return_value=mock_ray_cluster["spec"] ) rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, entrypoint="python script.py", + namespace="test-namespace", ) - spec = rayjob._build_ray_cluster_spec() + # Test the integration through _build_rayjob_cr + rayjob_cr = rayjob._build_rayjob_cr() - # Should return only the spec part, not metadata - assert spec == mock_ray_cluster["spec"] - assert "metadata" not in spec + # Should have rayClusterSpec + assert "rayClusterSpec" in rayjob_cr["spec"] - # Verify build_ray_cluster was called with correct parameters - mock_build_ray_cluster.assert_called_once() - call_args = mock_build_ray_cluster.call_args[0][0] - assert call_args.config.appwrapper is False - assert call_args.config.write_to_file is False + # Verify build_ray_cluster_spec was called on the cluster config + cluster_config.build_ray_cluster_spec.assert_called_once_with( + cluster_name="test-job-cluster" + ) def test_build_rayjob_cr_with_existing_cluster(mocker): @@ -291,7 +306,6 @@ def test_build_rayjob_cr_with_existing_cluster(mocker): cluster_name="existing-cluster", namespace="test-namespace", entrypoint="python main.py", - shutdown_after_job_finishes=False, ttl_seconds_after_finished=300, ) @@ -305,6 +319,7 @@ def test_build_rayjob_cr_with_existing_cluster(mocker): # Check lifecycle parameters spec = rayjob_cr["spec"] assert spec["entrypoint"] == "python main.py" + # shutdownAfterJobFinishes should be False when using existing cluster (auto-set) assert spec["shutdownAfterJobFinishes"] is False assert spec["ttlSecondsAfterFinished"] == 300 @@ -313,8 +328,7 @@ def test_build_rayjob_cr_with_existing_cluster(mocker): assert "rayClusterSpec" not in spec -@patch("codeflare_sdk.ray.rayjobs.rayjob.build_ray_cluster") -def test_build_rayjob_cr_with_auto_cluster(mock_build_ray_cluster, mocker): +def test_build_rayjob_cr_with_auto_cluster(mocker): """Test _build_rayjob_cr method with auto-created cluster.""" mocker.patch("kubernetes.config.load_kube_config") @@ -326,19 +340,26 @@ def test_build_rayjob_cr_with_auto_cluster(mock_build_ray_cluster, mocker): "kind": "RayCluster", "metadata": {"name": "auto-cluster", "namespace": "test"}, "spec": { - "rayVersion": "2.9.0", + "rayVersion": RAY_VERSION, "headGroupSpec": {"replicas": 1}, "workerGroupSpecs": [{"replicas": 2}], }, } - mock_build_ray_cluster.return_value = mock_ray_cluster + # Use RayJobClusterConfig and mock its build_ray_cluster_spec method + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig - cluster_config = ClusterConfiguration( - name="auto-cluster", namespace="test-namespace", num_workers=2 + cluster_config = RayJobClusterConfig(num_workers=2) + + # Mock the method that will be called + mocker.patch.object( + cluster_config, "build_ray_cluster_spec", return_value=mock_ray_cluster["spec"] ) rayjob = RayJob( - job_name="test-job", cluster_config=cluster_config, entrypoint="python main.py" + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python main.py", + namespace="test-namespace", ) rayjob_cr = rayjob._build_rayjob_cr() @@ -357,6 +378,7 @@ def test_submit_validation_no_entrypoint(mocker): job_name="test-job", cluster_name="test-cluster", entrypoint=None, # No entrypoint provided + namespace="test-namespace", ) with pytest.raises( @@ -365,8 +387,7 @@ def test_submit_validation_no_entrypoint(mocker): rayjob.submit() -@patch("codeflare_sdk.ray.rayjobs.rayjob.build_ray_cluster") -def test_submit_with_auto_cluster(mock_build_ray_cluster, mocker): +def test_submit_with_auto_cluster(mocker): """Test successful submission with auto-created cluster.""" mocker.patch("kubernetes.config.load_kube_config") @@ -374,27 +395,32 @@ def test_submit_with_auto_cluster(mock_build_ray_cluster, mocker): "apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": { - "rayVersion": "2.9.0", + "rayVersion": RAY_VERSION, "headGroupSpec": {"replicas": 1}, "workerGroupSpecs": [{"replicas": 1}], }, } - mock_build_ray_cluster.return_value = mock_ray_cluster - # Mock the RayjobApi mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance mock_api_instance.submit_job.return_value = True - cluster_config = ClusterConfiguration( - name="auto-cluster", namespace="test", num_workers=1 + # Use RayJobClusterConfig and mock its build_ray_cluster_spec method + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig(num_workers=1) + + # Mock the method that will be called + mocker.patch.object( + cluster_config, "build_ray_cluster_spec", return_value=mock_ray_cluster["spec"] ) rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, entrypoint="python script.py", + namespace="test-namespace", ) result = rayjob.submit() @@ -408,3 +434,540 @@ def test_submit_with_auto_cluster(mock_build_ray_cluster, mocker): job_cr = call_args.kwargs["job"] assert "rayClusterSpec" in job_cr["spec"] assert job_cr["spec"]["rayClusterSpec"] == mock_ray_cluster["spec"] + + +def test_namespace_auto_detection_success(mocker): + """Test successful namespace auto-detection.""" + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="detected-ns", + ) + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", entrypoint="python script.py", cluster_name="test-cluster" + ) + + assert rayjob.namespace == "detected-ns" + + +def test_namespace_auto_detection_fallback(mocker): + """Test that namespace auto-detection failure raises an error.""" + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", return_value=None + ) + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + with pytest.raises(ValueError, match="Could not auto-detect Kubernetes namespace"): + RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="test-cluster", + ) + + +def test_namespace_explicit_override(mocker): + """Test that explicit namespace overrides auto-detection.""" + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="detected-ns", + ) + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="test-cluster", + namespace="explicit-ns", + ) + + assert rayjob.namespace == "explicit-ns" + + +def test_shutdown_behavior_with_cluster_config(mocker): + """Test that shutdown_after_job_finishes is True when cluster_config is provided.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig() + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + namespace="test-namespace", + ) + + assert rayjob.shutdown_after_job_finishes is True + + +def test_shutdown_behavior_with_existing_cluster(mocker): + """Test that shutdown_after_job_finishes is False when using existing cluster.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="existing-cluster", + namespace="test-namespace", + ) + + assert rayjob.shutdown_after_job_finishes is False + + +def test_rayjob_with_rayjob_cluster_config(mocker): + """Test RayJob with the new RayJobClusterConfig.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig( + num_workers=2, + head_cpu_requests="500m", + head_memory_requests="512Mi", + ) + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + namespace="test-namespace", + ) + + assert rayjob._cluster_config == cluster_config + assert rayjob.cluster_name == "test-job-cluster" # Generated from job name + + +def test_rayjob_cluster_config_validation(mocker): + """Test validation of RayJobClusterConfig parameters.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + # Test with minimal valid config + cluster_config = RayJobClusterConfig() + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + namespace="test-namespace", + ) + + assert rayjob._cluster_config is not None + + +def test_rayjob_missing_entrypoint_validation(mocker): + """Test that RayJob requires entrypoint for submission.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Should raise an error during construction + with pytest.raises( + TypeError, match="missing 1 required positional argument: 'entrypoint'" + ): + RayJob( + job_name="test-job", + cluster_name="test-cluster", + # No entrypoint provided + ) + + +def test_build_ray_cluster_spec_integration(mocker): + """Test integration with the new build_ray_cluster_spec method.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi class entirely + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig() + + # Mock the build_ray_cluster_spec method on the cluster config + mock_spec = {"spec": "test-spec"} + mocker.patch.object( + cluster_config, "build_ray_cluster_spec", return_value=mock_spec + ) + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + namespace="test-namespace", + ) + + # Build the RayJob CR + rayjob_cr = rayjob._build_rayjob_cr() + + # Verify the method was called correctly + cluster_config.build_ray_cluster_spec.assert_called_once_with( + cluster_name="test-job-cluster" + ) + + # Verify the spec is included in the RayJob CR + assert "rayClusterSpec" in rayjob_cr["spec"] + assert rayjob_cr["spec"]["rayClusterSpec"] == mock_spec + + +def test_rayjob_with_runtime_env(mocker): + """Test RayJob with runtime environment configuration.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + runtime_env = {"pip": ["numpy", "pandas"]} + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="test-cluster", + runtime_env=runtime_env, + namespace="test-namespace", + ) + + assert rayjob.runtime_env == runtime_env + + # Verify runtime env is included in the CR + rayjob_cr = rayjob._build_rayjob_cr() + assert rayjob_cr["spec"]["runtimeEnvYAML"] == str(runtime_env) + + +def test_rayjob_with_active_deadline_and_ttl(mocker): + """Test RayJob with both active deadline and TTL settings.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="test-cluster", + active_deadline_seconds=300, + ttl_seconds_after_finished=600, + namespace="test-namespace", + ) + + assert rayjob.active_deadline_seconds == 300 + assert rayjob.ttl_seconds_after_finished == 600 + + # Verify both are included in the CR + rayjob_cr = rayjob._build_rayjob_cr() + assert rayjob_cr["spec"]["activeDeadlineSeconds"] == 300 + assert rayjob_cr["spec"]["ttlSecondsAfterFinished"] == 600 + + +def test_rayjob_cluster_name_generation_with_config(mocker): + """Test cluster name generation when using cluster_config.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig() + + rayjob = RayJob( + job_name="my-job", + entrypoint="python script.py", + cluster_config=cluster_config, + namespace="test-namespace", # Explicitly specify namespace + ) + + assert rayjob.cluster_name == "my-job-cluster" + # Note: cluster_config.name is not set in RayJob (it's only for resource config) + # The cluster name is generated independently for the RayJob + + +def test_rayjob_namespace_propagation_to_cluster_config(mocker): + """Test that job namespace is propagated to cluster config when None.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + from codeflare_sdk.ray.rayjobs.rayjob import get_current_namespace + + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="detected-ns", + ) + + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig() + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + ) + + assert rayjob.namespace == "detected-ns" + + +def test_rayjob_error_handling_invalid_cluster_config(mocker): + """Test error handling with invalid cluster configuration.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + with pytest.raises(ValueError): + RayJob( + job_name="test-job", + entrypoint="python script.py", + ) + + +def test_rayjob_constructor_parameter_validation(mocker): + """Test constructor parameter validation.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Test with valid parameters + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="test-cluster", + namespace="test-ns", + runtime_env={"pip": ["numpy"]}, + ttl_seconds_after_finished=300, + active_deadline_seconds=600, + ) + + assert rayjob.name == "test-job" + assert rayjob.entrypoint == "python script.py" + assert rayjob.cluster_name == "test-cluster" + assert rayjob.namespace == "test-ns" + assert rayjob.runtime_env == {"pip": ["numpy"]} + assert rayjob.ttl_seconds_after_finished == 300 + assert rayjob.active_deadline_seconds == 600 + + +def test_build_ray_cluster_spec_function(mocker): + """Test the build_ray_cluster_spec method directly.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + # Create a test cluster config + cluster_config = RayJobClusterConfig( + num_workers=2, + head_cpu_requests="500m", + head_memory_requests="512Mi", + worker_cpu_requests="250m", + worker_memory_requests="256Mi", + ) + + # Build the spec using the method on the cluster config + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify basic structure + assert "rayVersion" in spec + assert "enableInTreeAutoscaling" in spec + assert "headGroupSpec" in spec + assert "workerGroupSpecs" in spec + + # Verify head group spec + head_spec = spec["headGroupSpec"] + assert head_spec["serviceType"] == "ClusterIP" + assert head_spec["enableIngress"] is False + assert "rayStartParams" in head_spec + assert "template" in head_spec + + # Verify worker group spec + worker_specs = spec["workerGroupSpecs"] + assert len(worker_specs) == 1 + worker_spec = worker_specs[0] + assert worker_spec["replicas"] == 2 + assert worker_spec["minReplicas"] == 2 + assert worker_spec["maxReplicas"] == 2 + assert worker_spec["groupName"] == "worker-group-test-cluster" + + +def test_build_ray_cluster_spec_with_accelerators(mocker): + """Test build_ray_cluster_spec with GPU accelerators.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + # Create a test cluster config with GPU accelerators + cluster_config = RayJobClusterConfig( + head_accelerators={"nvidia.com/gpu": 1}, + worker_accelerators={"nvidia.com/gpu": 2}, + ) + + # Build the spec using the method on the cluster config + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify head group has GPU parameters + head_spec = spec["headGroupSpec"] + head_params = head_spec["rayStartParams"] + assert "num-gpus" in head_params + assert head_params["num-gpus"] == "1" + + # Verify worker group has GPU parameters + worker_specs = spec["workerGroupSpecs"] + worker_spec = worker_specs[0] + worker_params = worker_spec["rayStartParams"] + assert "num-gpus" in worker_params + assert worker_params["num-gpus"] == "2" + + +def test_build_ray_cluster_spec_with_custom_volumes(mocker): + """Test build_ray_cluster_spec with custom volumes and volume mounts.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from kubernetes.client import V1Volume, V1VolumeMount + + # Create custom volumes and volume mounts + custom_volume = V1Volume(name="custom-data", empty_dir={}) + custom_volume_mount = V1VolumeMount(name="custom-data", mount_path="/data") + + # Create a test cluster config with custom volumes + cluster_config = RayJobClusterConfig( + volumes=[custom_volume], + volume_mounts=[custom_volume_mount], + ) + + # Build the spec using the method on the cluster config + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify custom volumes are included + head_spec = spec["headGroupSpec"] + head_pod_spec = head_spec["template"].spec # Access the spec attribute + # Note: We can't easily check DEFAULT_VOLUMES length since they're now part of the class + assert len(head_pod_spec.volumes) > 0 + + # Verify custom volume mounts are included + head_container = head_pod_spec.containers[0] # Access the containers attribute + # Note: We can't easily check DEFAULT_VOLUME_MOUNTS length since they're now part of the class + assert len(head_container.volume_mounts) > 0 + + +def test_build_ray_cluster_spec_with_environment_variables(mocker): + """Test build_ray_cluster_spec with environment variables.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + # Create a test cluster config with environment variables + cluster_config = RayJobClusterConfig( + envs={"CUDA_VISIBLE_DEVICES": "0", "RAY_DISABLE_IMPORT_WARNING": "1"}, + ) + + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify environment variables are included in head container + head_spec = spec["headGroupSpec"] + head_pod_spec = head_spec["template"].spec + head_container = head_pod_spec.containers[0] + assert hasattr(head_container, "env") + env_vars = {env.name: env.value for env in head_container.env} + assert env_vars["CUDA_VISIBLE_DEVICES"] == "0" + assert env_vars["RAY_DISABLE_IMPORT_WARNING"] == "1" + + # Verify environment variables are included in worker container + worker_specs = spec["workerGroupSpecs"] + worker_spec = worker_specs[0] + worker_pod_spec = worker_spec["template"].spec + worker_container = worker_pod_spec.containers[0] + + assert hasattr(worker_container, "env") + worker_env_vars = {env.name: env.value for env in worker_container.env} + assert worker_env_vars["CUDA_VISIBLE_DEVICES"] == "0" + assert worker_env_vars["RAY_DISABLE_IMPORT_WARNING"] == "1" + + +def test_build_ray_cluster_spec_with_tolerations(mocker): + """Test build_ray_cluster_spec with tolerations.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from kubernetes.client import V1Toleration + + # Create test tolerations + head_toleration = V1Toleration( + key="node-role.kubernetes.io/master", operator="Exists", effect="NoSchedule" + ) + worker_toleration = V1Toleration( + key="nvidia.com/gpu", operator="Exists", effect="NoSchedule" + ) + + # Create a test cluster config with tolerations + cluster_config = RayJobClusterConfig( + head_tolerations=[head_toleration], + worker_tolerations=[worker_toleration], + ) + + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify head tolerations + head_spec = spec["headGroupSpec"] + head_pod_spec = head_spec["template"].spec # Access the spec attribute + assert hasattr(head_pod_spec, "tolerations") + assert len(head_pod_spec.tolerations) == 1 + assert head_pod_spec.tolerations[0].key == "node-role.kubernetes.io/master" + + # Verify worker tolerations + worker_specs = spec["workerGroupSpecs"] + worker_spec = worker_specs[0] + worker_pod_spec = worker_spec["template"].spec # Access the spec attribute + assert hasattr(worker_pod_spec, "tolerations") + assert len(worker_pod_spec.tolerations) == 1 + assert worker_pod_spec.tolerations[0].key == "nvidia.com/gpu" + + +def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): + """Test build_ray_cluster_spec with image pull secrets.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + # Create a test cluster config with image pull secrets + cluster_config = RayJobClusterConfig( + image_pull_secrets=["my-registry-secret", "another-secret"] + ) + + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify image pull secrets are included in head pod + head_spec = spec["headGroupSpec"] + head_pod_spec = head_spec["template"].spec # Access the spec attribute + assert hasattr(head_pod_spec, "image_pull_secrets") + + head_secrets = head_pod_spec.image_pull_secrets + assert len(head_secrets) == 2 + assert head_secrets[0].name == "my-registry-secret" + assert head_secrets[1].name == "another-secret" + + # Verify image pull secrets are included in worker pod + worker_specs = spec["workerGroupSpecs"] + worker_spec = worker_specs[0] + worker_pod_spec = worker_spec["template"].spec + assert hasattr(worker_pod_spec, "image_pull_secrets") + + worker_secrets = worker_pod_spec.image_pull_secrets + assert len(worker_secrets) == 2 + assert worker_secrets[0].name == "my-registry-secret" + assert worker_secrets[1].name == "another-secret" + + +def test_rayjob_user_override_shutdown_behavior(mocker): + """Test that user can override the auto-detected shutdown behavior.""" + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Test 1: User overrides shutdown to True even when using existing cluster + rayjob_existing_override = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="existing-cluster", + shutdown_after_job_finishes=True, # User override + namespace="test-namespace", # Explicitly specify namespace + ) + + assert rayjob_existing_override.shutdown_after_job_finishes is True + + # Test 2: User overrides shutdown to False even when creating new cluster + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + cluster_config = RayJobClusterConfig() + + rayjob_new_override = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + shutdown_after_job_finishes=False, # User override + namespace="test-namespace", # Explicitly specify namespace + ) + + assert rayjob_new_override.shutdown_after_job_finishes is False + + # Test 3: User override takes precedence over auto-detection + rayjob_override_priority = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_config=cluster_config, + shutdown_after_job_finishes=True, # Should override auto-detection + namespace="test-namespace", # Explicitly specify namespace + ) + + assert rayjob_override_priority.shutdown_after_job_finishes is True From ee389206321b0ebcbe509a756422b437d0331cdf Mon Sep 17 00:00:00 2001 From: Pat O'Connor Date: Fri, 1 Aug 2025 14:56:25 +0100 Subject: [PATCH 06/33] task(RHOAIENG-26481): Existing cluster RayJob demo notebook Signed-off-by: Pat O'Connor --- .../4_rayjob_existing_cluster.ipynb | 212 ++++++++++++++++++ .../4_rayjob_existing_cluster.ipynb | 212 ++++++++++++++++++ 2 files changed, 424 insertions(+) create mode 100644 demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb create mode 100644 demo-notebooks/guided-demos/preview_nbs/4_rayjob_existing_cluster.ipynb diff --git a/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb b/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb new file mode 100644 index 00000000..5348099c --- /dev/null +++ b/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9259e514", + "metadata": {}, + "source": [ + "# Submitting RayJobs against an existing RayCluster\n", + "\n", + "In this notebook, we will go through the basics of using the SDK to:\n", + " * Spin up a Ray cluster with our desired resources\n", + " * Verify the status of this cluster\n", + " * Submit a RayJob against that cluster\n", + " * Verify the status of this job" + ] + }, + { + "cell_type": "markdown", + "id": "18136ea7", + "metadata": {}, + "source": [ + "## Creating the RayCluster" + ] + }, + { + "cell_type": "markdown", + "id": "a1c2545d", + "metadata": {}, + "source": [ + "First, we'll need to import the relevant CodeFlare SDK packages. You can do this by executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51e18292", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import Cluster, ClusterConfiguration, RayJob, TokenAuthentication" + ] + }, + { + "cell_type": "markdown", + "id": "649c5911", + "metadata": {}, + "source": [ + "Execute the below cell to authenticate the notebook via OpenShift." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc364888", + "metadata": {}, + "outputs": [], + "source": [ + "auth = TokenAuthentication(\n", + " token = \"XXXXX\",\n", + " server = \"XXXXX\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "markdown", + "id": "5581eca9", + "metadata": {}, + "source": [ + "Next we'll need to initalize our RayCluster and apply it. You can do this be executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3094c60a", + "metadata": {}, + "outputs": [], + "source": [ + "cluster = Cluster(ClusterConfiguration(\n", + " name='rayjob-cluster',\n", + " head_extended_resource_requests={'nvidia.com/gpu':0},\n", + " worker_extended_resource_requests={'nvidia.com/gpu':0},\n", + " num_workers=2,\n", + " worker_cpu_requests=1,\n", + " worker_cpu_limits=1,\n", + " worker_memory_requests=4,\n", + " worker_memory_limits=4,\n", + "\n", + "))\n", + "\n", + "cluster.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "f3612de2", + "metadata": {}, + "source": [ + "We can check the status of our cluster by executing the below cell. If it's not up immediately, run the cell a few more times until you see that it's in a 'running' state." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96d92f93", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.status()" + ] + }, + { + "cell_type": "markdown", + "id": "a0e2a650", + "metadata": {}, + "source": [ + "## Creating and Submitting the RayJob" + ] + }, + { + "cell_type": "markdown", + "id": "4cf03419", + "metadata": {}, + "source": [ + "Now we can create the RayJob that we want to submit against the running cluster. The process is quite similar to how we initialize and apply the cluster. \n", + "In this context, we need to use the `cluster_name` variable to point it to our existing cluster.\n", + "\n", + "For the sake of demonstration, the job we'll submit via the `entrypoint` is a single python command. In standard practice this would be pointed to a python training script.\n", + "\n", + "We'll then call the `submit()` function to run the job against our cluster.\n", + "\n", + "You can run the below cell to achieve this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94edca70", + "metadata": {}, + "outputs": [], + "source": [ + "rayjob = RayJob(\n", + " job_name=\"sdk-test-job\",\n", + " cluster_name=\"rayjob-cluster\",\n", + " namespace=\"rhods-notebooks\",\n", + " entrypoint=\"python -c 'import time; time.sleep(20)'\",\n", + ")\n", + "\n", + "rayjob.submit()" + ] + }, + { + "cell_type": "markdown", + "id": "30a8899a", + "metadata": {}, + "source": [ + "We can observe the status of the RayJob in the same way as the RayCluster by invoking the `submit()` function via the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3283b09c", + "metadata": {}, + "outputs": [], + "source": [ + "rayjob.submit()" + ] + }, + { + "cell_type": "markdown", + "id": "9f3c9c9f", + "metadata": {}, + "source": [ + "This function will output different tables based on the RayJob's current status. You can re-run the cell multiple times to observe the changes as you need to. Once you've observed that the job has been completed, you can shut down the cluster we created earlier by executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b11e379", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.down()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/demo-notebooks/guided-demos/preview_nbs/4_rayjob_existing_cluster.ipynb b/demo-notebooks/guided-demos/preview_nbs/4_rayjob_existing_cluster.ipynb new file mode 100644 index 00000000..5348099c --- /dev/null +++ b/demo-notebooks/guided-demos/preview_nbs/4_rayjob_existing_cluster.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9259e514", + "metadata": {}, + "source": [ + "# Submitting RayJobs against an existing RayCluster\n", + "\n", + "In this notebook, we will go through the basics of using the SDK to:\n", + " * Spin up a Ray cluster with our desired resources\n", + " * Verify the status of this cluster\n", + " * Submit a RayJob against that cluster\n", + " * Verify the status of this job" + ] + }, + { + "cell_type": "markdown", + "id": "18136ea7", + "metadata": {}, + "source": [ + "## Creating the RayCluster" + ] + }, + { + "cell_type": "markdown", + "id": "a1c2545d", + "metadata": {}, + "source": [ + "First, we'll need to import the relevant CodeFlare SDK packages. You can do this by executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51e18292", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import Cluster, ClusterConfiguration, RayJob, TokenAuthentication" + ] + }, + { + "cell_type": "markdown", + "id": "649c5911", + "metadata": {}, + "source": [ + "Execute the below cell to authenticate the notebook via OpenShift." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc364888", + "metadata": {}, + "outputs": [], + "source": [ + "auth = TokenAuthentication(\n", + " token = \"XXXXX\",\n", + " server = \"XXXXX\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "markdown", + "id": "5581eca9", + "metadata": {}, + "source": [ + "Next we'll need to initalize our RayCluster and apply it. You can do this be executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3094c60a", + "metadata": {}, + "outputs": [], + "source": [ + "cluster = Cluster(ClusterConfiguration(\n", + " name='rayjob-cluster',\n", + " head_extended_resource_requests={'nvidia.com/gpu':0},\n", + " worker_extended_resource_requests={'nvidia.com/gpu':0},\n", + " num_workers=2,\n", + " worker_cpu_requests=1,\n", + " worker_cpu_limits=1,\n", + " worker_memory_requests=4,\n", + " worker_memory_limits=4,\n", + "\n", + "))\n", + "\n", + "cluster.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "f3612de2", + "metadata": {}, + "source": [ + "We can check the status of our cluster by executing the below cell. If it's not up immediately, run the cell a few more times until you see that it's in a 'running' state." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96d92f93", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.status()" + ] + }, + { + "cell_type": "markdown", + "id": "a0e2a650", + "metadata": {}, + "source": [ + "## Creating and Submitting the RayJob" + ] + }, + { + "cell_type": "markdown", + "id": "4cf03419", + "metadata": {}, + "source": [ + "Now we can create the RayJob that we want to submit against the running cluster. The process is quite similar to how we initialize and apply the cluster. \n", + "In this context, we need to use the `cluster_name` variable to point it to our existing cluster.\n", + "\n", + "For the sake of demonstration, the job we'll submit via the `entrypoint` is a single python command. In standard practice this would be pointed to a python training script.\n", + "\n", + "We'll then call the `submit()` function to run the job against our cluster.\n", + "\n", + "You can run the below cell to achieve this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94edca70", + "metadata": {}, + "outputs": [], + "source": [ + "rayjob = RayJob(\n", + " job_name=\"sdk-test-job\",\n", + " cluster_name=\"rayjob-cluster\",\n", + " namespace=\"rhods-notebooks\",\n", + " entrypoint=\"python -c 'import time; time.sleep(20)'\",\n", + ")\n", + "\n", + "rayjob.submit()" + ] + }, + { + "cell_type": "markdown", + "id": "30a8899a", + "metadata": {}, + "source": [ + "We can observe the status of the RayJob in the same way as the RayCluster by invoking the `submit()` function via the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3283b09c", + "metadata": {}, + "outputs": [], + "source": [ + "rayjob.submit()" + ] + }, + { + "cell_type": "markdown", + "id": "9f3c9c9f", + "metadata": {}, + "source": [ + "This function will output different tables based on the RayJob's current status. You can re-run the cell multiple times to observe the changes as you need to. Once you've observed that the job has been completed, you can shut down the cluster we created earlier by executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b11e379", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.down()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From f1d2ef2d638d04a671a48d5acda240e44bf4ed94 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Tue, 12 Aug 2025 21:28:35 +0100 Subject: [PATCH 07/33] feat(RHOAIENG-26482): add gcs fault tolerance --- src/codeflare_sdk/ray/rayjobs/__init__.py | 1 + src/codeflare_sdk/ray/rayjobs/config.py | 62 +++++++++++++++++++- src/codeflare_sdk/ray/rayjobs/rayjob.py | 1 - src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 21 +++++++ 4 files changed, 83 insertions(+), 2 deletions(-) diff --git a/src/codeflare_sdk/ray/rayjobs/__init__.py b/src/codeflare_sdk/ray/rayjobs/__init__.py index 756fad91..c415c606 100644 --- a/src/codeflare_sdk/ray/rayjobs/__init__.py +++ b/src/codeflare_sdk/ray/rayjobs/__init__.py @@ -1,2 +1,3 @@ from .rayjob import RayJob, RayJobClusterConfig from .status import RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo +from .config import RayJobClusterConfig diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index 96b59046..50a56610 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -13,7 +13,7 @@ # limitations under the License. """ -The config sub-module contains the definition of the RayJobClusterConfigV2 dataclass, +The config sub-module contains the definition of the RayJobClusterConfig dataclass, which is used to specify resource requirements and other details when creating a Cluster object. """ @@ -141,6 +141,14 @@ class RayJobClusterConfig: A list of V1Volume objects to add to the Cluster volume_mounts: A list of V1VolumeMount objects to add to the Cluster + enable_gcs_ft: + A boolean indicating whether to enable GCS fault tolerance. + redis_address: + The address of the Redis server to use for GCS fault tolerance, required when enable_gcs_ft is True. + redis_password_secret: + Kubernetes secret reference containing Redis password. ex: {"name": "secret-name", "key": "password-key"} + external_storage_namespace: + The storage namespace to use for GCS fault tolerance. By default, KubeRay sets it to the UID of RayCluster. """ head_cpu_requests: Union[int, str] = 2 @@ -167,8 +175,33 @@ class RayJobClusterConfig: annotations: Dict[str, str] = field(default_factory=dict) volumes: list[V1Volume] = field(default_factory=list) volume_mounts: list[V1VolumeMount] = field(default_factory=list) + enable_gcs_ft: bool = False + redis_address: Optional[str] = None + redis_password_secret: Optional[Dict[str, str]] = None + external_storage_namespace: Optional[str] = None def __post_init__(self): + if self.enable_gcs_ft: + if not self.redis_address: + raise ValueError( + "redis_address must be provided when enable_gcs_ft is True" + ) + + if self.redis_password_secret and not isinstance( + self.redis_password_secret, dict + ): + raise ValueError( + "redis_password_secret must be a dictionary with 'name' and 'key' fields" + ) + + if self.redis_password_secret and ( + "name" not in self.redis_password_secret + or "key" not in self.redis_password_secret + ): + raise ValueError( + "redis_password_secret must contain both 'name' and 'key' fields" + ) + self._validate_types() self._memory_to_string() self._validate_gpu_config(self.head_accelerators) @@ -253,6 +286,11 @@ def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]: "workerGroupSpecs": [self._build_worker_group_spec(cluster_name)], } + # Add GCS fault tolerance if enabled + if self.enable_gcs_ft: + gcs_ft_options = self._build_gcs_ft_options() + ray_cluster_spec["gcsFaultToleranceOptions"] = gcs_ft_options + return ray_cluster_spec def _build_head_group_spec(self) -> Dict[str, Any]: @@ -455,3 +493,25 @@ def _generate_volumes(self) -> list: def _build_env_vars(self) -> list: """Build environment variables list.""" return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()] + + def _build_gcs_ft_options(self) -> Dict[str, Any]: + """Build GCS fault tolerance options.""" + gcs_ft_options = {"redisAddress": self.redis_address} + + if ( + hasattr(self, "external_storage_namespace") + and self.external_storage_namespace + ): + gcs_ft_options["externalStorageNamespace"] = self.external_storage_namespace + + if hasattr(self, "redis_password_secret") and self.redis_password_secret: + gcs_ft_options["redisPassword"] = { + "valueFrom": { + "secretKeyRef": { + "name": self.redis_password_secret["name"], + "key": self.redis_password_secret["key"], + } + } + } + + return gcs_ft_options diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index ab0899d2..93b3ed71 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -140,7 +140,6 @@ def __init__( self.cluster_name = cluster_name logger.info(f"Using existing cluster: {self.cluster_name}") - # Initialize the KubeRay job API client self._api = RayjobApi() logger.info(f"Initialized RayJob: {self.name} in namespace: {self.namespace}") diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 970f0159..c1ebaaa8 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -971,3 +971,24 @@ def test_rayjob_user_override_shutdown_behavior(mocker): ) assert rayjob_override_priority.shutdown_after_job_finishes is True + + +def test_build_ray_cluster_spec_with_gcs_ft(mocker): + """Test build_ray_cluster_spec with GCS fault tolerance enabled.""" + from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + + # Create a test cluster config with GCS FT enabled + cluster_config = RayJobClusterConfig( + enable_gcs_ft=True, + redis_address="redis://redis-service:6379", + external_storage_namespace="storage-ns", + ) + + # Build the spec using the method on the cluster config + spec = cluster_config.build_ray_cluster_spec("test-cluster") + + # Verify GCS fault tolerance options + assert "gcsFaultToleranceOptions" in spec + gcs_ft = spec["gcsFaultToleranceOptions"] + assert gcs_ft["redisAddress"] == "redis://redis-service:6379" + assert gcs_ft["externalStorageNamespace"] == "storage-ns" From 327919247507fba31e085abef9147b3c99498df3 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 13 Aug 2025 17:07:34 +0100 Subject: [PATCH 08/33] feat(RHOAIENG-26482): disable usage stats and rename RayJobClusterConfig --- codecov.yml | 15 ++++ src/codeflare_sdk/__init__.py | 2 +- src/codeflare_sdk/ray/__init__.py | 2 +- src/codeflare_sdk/ray/rayjobs/__init__.py | 4 +- src/codeflare_sdk/ray/rayjobs/config.py | 12 +-- src/codeflare_sdk/ray/rayjobs/rayjob.py | 4 +- src/codeflare_sdk/ray/rayjobs/test_config.py | 67 +++++++++++++++-- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 78 ++++++++++---------- 8 files changed, 126 insertions(+), 58 deletions(-) diff --git a/codecov.yml b/codecov.yml index 550965e6..fab28aee 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,3 +1,18 @@ ignore: - "**/*.ipynb" - "demo-notebooks/**" + - "**/__init__.py" + +coverage: + precision: 2 + round: down + status: + project: + default: + target: auto + threshold: 2.5% + patch: + default: + target: 85% + threshold: 2.5% + diff --git a/src/codeflare_sdk/__init__.py b/src/codeflare_sdk/__init__.py index f9a06524..a27702e7 100644 --- a/src/codeflare_sdk/__init__.py +++ b/src/codeflare_sdk/__init__.py @@ -11,7 +11,7 @@ AppWrapperStatus, RayJobClient, RayJob, - RayJobClusterConfig, + ManagedClusterConfig, ) from .common.widgets import view_clusters diff --git a/src/codeflare_sdk/ray/__init__.py b/src/codeflare_sdk/ray/__init__.py index 806ed9a4..7bd0b2c8 100644 --- a/src/codeflare_sdk/ray/__init__.py +++ b/src/codeflare_sdk/ray/__init__.py @@ -6,7 +6,7 @@ from .rayjobs import ( RayJob, - RayJobClusterConfig, + ManagedClusterConfig, RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo, diff --git a/src/codeflare_sdk/ray/rayjobs/__init__.py b/src/codeflare_sdk/ray/rayjobs/__init__.py index c415c606..cd6b4123 100644 --- a/src/codeflare_sdk/ray/rayjobs/__init__.py +++ b/src/codeflare_sdk/ray/rayjobs/__init__.py @@ -1,3 +1,3 @@ -from .rayjob import RayJob, RayJobClusterConfig +from .rayjob import RayJob, ManagedClusterConfig from .status import RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo -from .config import RayJobClusterConfig +from .config import ManagedClusterConfig diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index 50a56610..7a8e14e6 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -13,7 +13,7 @@ # limitations under the License. """ -The config sub-module contains the definition of the RayJobClusterConfig dataclass, +The config sub-module contains the definition of the ManagedClusterConfig dataclass, which is used to specify resource requirements and other details when creating a Cluster object. """ @@ -106,7 +106,7 @@ @dataclass -class RayJobClusterConfig: +class ManagedClusterConfig: """ This dataclass is used to specify resource requirements and other details for RayJobs. The cluster name and namespace are automatically derived from the RayJob configuration. @@ -181,6 +181,8 @@ class RayJobClusterConfig: external_storage_namespace: Optional[str] = None def __post_init__(self): + self.envs["RAY_USAGE_STATS_ENABLED"] = "0" + if self.enable_gcs_ft: if not self.redis_address: raise ValueError( @@ -225,7 +227,7 @@ def _memory_to_string(self): self.worker_memory_limits = f"{self.worker_memory_limits}G" def _validate_types(self): - """Validate the types of all fields in the RayJobClusterConfig dataclass.""" + """Validate the types of all fields in the ManagedClusterConfig dataclass.""" errors = [] for field_info in fields(self): value = getattr(self, field_info.name) @@ -270,10 +272,10 @@ def check_type(value, expected_type): def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]: """ - Build the RayCluster spec from RayJobClusterConfig for embedding in RayJob. + Build the RayCluster spec from ManagedClusterConfig for embedding in RayJob. Args: - self: The cluster configuration object (RayJobClusterConfig) + self: The cluster configuration object (ManagedClusterConfig) cluster_name: The name for the cluster (derived from RayJob name) Returns: diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 93b3ed71..a1577d91 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -20,7 +20,7 @@ from typing import Dict, Any, Optional, Tuple from python_client.kuberay_job_api import RayjobApi -from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig +from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from ...common.utils import get_current_namespace @@ -48,7 +48,7 @@ def __init__( job_name: str, entrypoint: str, cluster_name: Optional[str] = None, - cluster_config: Optional[RayJobClusterConfig] = None, + cluster_config: Optional[ManagedClusterConfig] = None, namespace: Optional[str] = None, runtime_env: Optional[Dict[str, Any]] = None, shutdown_after_job_finishes: Optional[bool] = None, diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test_config.py index cefe9606..80736295 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_config.py +++ b/src/codeflare_sdk/ray/rayjobs/test_config.py @@ -1,14 +1,14 @@ """ -Tests for the simplified RayJobClusterConfig accelerator_configs behavior. +Tests for the simplified ManagedClusterConfig accelerator_configs behavior. """ import pytest -from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig, DEFAULT_ACCELERATORS +from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig, DEFAULT_ACCELERATORS def test_accelerator_configs_defaults_to_default_accelerators(): """Test that accelerator_configs defaults to DEFAULT_ACCELERATORS.copy()""" - config = RayJobClusterConfig() + config = ManagedClusterConfig() # Should have all the default accelerators assert "nvidia.com/gpu" in config.accelerator_configs @@ -27,7 +27,7 @@ def test_accelerator_configs_can_be_overridden(): "custom.com/accelerator": "CUSTOM_ACCELERATOR", } - config = RayJobClusterConfig(accelerator_configs=custom_configs) + config = ManagedClusterConfig(accelerator_configs=custom_configs) # Should have custom configs assert config.accelerator_configs == custom_configs @@ -46,7 +46,7 @@ def test_accelerator_configs_can_extend_defaults(): "custom.com/accelerator": "CUSTOM_ACCEL", } - config = RayJobClusterConfig(accelerator_configs=extended_configs) + config = ManagedClusterConfig(accelerator_configs=extended_configs) # Should have all defaults plus custom assert "nvidia.com/gpu" in config.accelerator_configs @@ -57,7 +57,7 @@ def test_accelerator_configs_can_extend_defaults(): def test_gpu_validation_works_with_defaults(): """Test that GPU validation works with default accelerator configs""" - config = RayJobClusterConfig(head_accelerators={"nvidia.com/gpu": 1}) + config = ManagedClusterConfig(head_accelerators={"nvidia.com/gpu": 1}) # Should not raise any errors assert config.head_accelerators == {"nvidia.com/gpu": 1} @@ -65,7 +65,7 @@ def test_gpu_validation_works_with_defaults(): def test_gpu_validation_works_with_custom_configs(): """Test that GPU validation works with custom accelerator configs""" - config = RayJobClusterConfig( + config = ManagedClusterConfig( accelerator_configs={"custom.com/accelerator": "CUSTOM_ACCEL"}, head_accelerators={"custom.com/accelerator": 1}, ) @@ -79,4 +79,55 @@ def test_gpu_validation_fails_with_unsupported_accelerator(): with pytest.raises( ValueError, match="GPU configuration 'unsupported.com/accelerator' not found" ): - RayJobClusterConfig(head_accelerators={"unsupported.com/accelerator": 1}) + ManagedClusterConfig(head_accelerators={"unsupported.com/accelerator": 1}) + + +def test_ray_usage_stats_always_disabled_by_default(): + """Test that RAY_USAGE_STATS_ENABLED is always set to '0' by default""" + config = ManagedClusterConfig() + + # Should always have the environment variable set to "0" + assert "RAY_USAGE_STATS_ENABLED" in config.envs + assert config.envs["RAY_USAGE_STATS_ENABLED"] == "0" + + +def test_ray_usage_stats_overwrites_user_env(): + """Test that RAY_USAGE_STATS_ENABLED is always set to '0' even if user specifies it""" + # User tries to enable usage stats + config = ManagedClusterConfig(envs={"RAY_USAGE_STATS_ENABLED": "1"}) + + # Should still be disabled (our setting takes precedence) + assert "RAY_USAGE_STATS_ENABLED" in config.envs + assert config.envs["RAY_USAGE_STATS_ENABLED"] == "0" + + +def test_ray_usage_stats_overwrites_user_env_string(): + """Test that RAY_USAGE_STATS_ENABLED is always set to '0' even if user specifies it as string""" + # User tries to enable usage stats with string + config = ManagedClusterConfig(envs={"RAY_USAGE_STATS_ENABLED": "true"}) + + # Should still be disabled (our setting takes precedence) + assert "RAY_USAGE_STATS_ENABLED" in config.envs + assert config.envs["RAY_USAGE_STATS_ENABLED"] == "0" + + +def test_ray_usage_stats_with_other_user_envs(): + """Test that RAY_USAGE_STATS_ENABLED is set correctly while preserving other user envs""" + # User sets other environment variables + user_envs = { + "CUSTOM_VAR": "custom_value", + "ANOTHER_VAR": "another_value", + "RAY_USAGE_STATS_ENABLED": "1", # This should be overwritten + } + + config = ManagedClusterConfig(envs=user_envs) + + # Our setting should take precedence + assert config.envs["RAY_USAGE_STATS_ENABLED"] == "0" + + # Other user envs should be preserved + assert config.envs["CUSTOM_VAR"] == "custom_value" + assert config.envs["ANOTHER_VAR"] == "another_value" + + # Total count should be correct (3 user envs) + assert len(config.envs) == 3 diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index c1ebaaa8..1ecd4b48 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -265,10 +265,10 @@ def test_build_ray_cluster_spec(mocker): "workerGroupSpecs": [{"replicas": 2}], }, } - # Use RayJobClusterConfig which has the build_ray_cluster_spec method - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + # Use ManagedClusterConfig which has the build_ray_cluster_spec method + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig(num_workers=2) + cluster_config = ManagedClusterConfig(num_workers=2) # Mock the method that will be called mocker.patch.object( @@ -345,10 +345,10 @@ def test_build_rayjob_cr_with_auto_cluster(mocker): "workerGroupSpecs": [{"replicas": 2}], }, } - # Use RayJobClusterConfig and mock its build_ray_cluster_spec method - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + # Use ManagedClusterConfig and mock its build_ray_cluster_spec method + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig(num_workers=2) + cluster_config = ManagedClusterConfig(num_workers=2) # Mock the method that will be called mocker.patch.object( @@ -406,10 +406,10 @@ def test_submit_with_auto_cluster(mocker): mock_api_class.return_value = mock_api_instance mock_api_instance.submit_job.return_value = True - # Use RayJobClusterConfig and mock its build_ray_cluster_spec method - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + # Use ManagedClusterConfig and mock its build_ray_cluster_spec method + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig(num_workers=1) + cluster_config = ManagedClusterConfig(num_workers=1) # Mock the method that will be called mocker.patch.object( @@ -488,9 +488,9 @@ def test_shutdown_behavior_with_cluster_config(mocker): """Test that shutdown_after_job_finishes is True when cluster_config is provided.""" mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig() + cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", @@ -517,12 +517,12 @@ def test_shutdown_behavior_with_existing_cluster(mocker): def test_rayjob_with_rayjob_cluster_config(mocker): - """Test RayJob with the new RayJobClusterConfig.""" + """Test RayJob with the new ManagedClusterConfig.""" mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", head_memory_requests="512Mi", @@ -540,13 +540,13 @@ def test_rayjob_with_rayjob_cluster_config(mocker): def test_rayjob_cluster_config_validation(mocker): - """Test validation of RayJobClusterConfig parameters.""" + """Test validation of ManagedClusterConfig parameters.""" mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig # Test with minimal valid config - cluster_config = RayJobClusterConfig() + cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", @@ -581,9 +581,9 @@ def test_build_ray_cluster_spec_integration(mocker): # Mock the RayjobApi class entirely mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig() + cluster_config = ManagedClusterConfig() # Mock the build_ray_cluster_spec method on the cluster config mock_spec = {"spec": "test-spec"} @@ -658,9 +658,9 @@ def test_rayjob_cluster_name_generation_with_config(mocker): """Test cluster name generation when using cluster_config.""" mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig() + cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="my-job", @@ -685,9 +685,9 @@ def test_rayjob_namespace_propagation_to_cluster_config(mocker): return_value="detected-ns", ) - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig() + cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", @@ -735,10 +735,10 @@ def test_rayjob_constructor_parameter_validation(mocker): def test_build_ray_cluster_spec_function(mocker): """Test the build_ray_cluster_spec method directly.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig # Create a test cluster config - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", head_memory_requests="512Mi", @@ -774,10 +774,10 @@ def test_build_ray_cluster_spec_function(mocker): def test_build_ray_cluster_spec_with_accelerators(mocker): """Test build_ray_cluster_spec with GPU accelerators.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig # Create a test cluster config with GPU accelerators - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( head_accelerators={"nvidia.com/gpu": 1}, worker_accelerators={"nvidia.com/gpu": 2}, ) @@ -801,7 +801,7 @@ def test_build_ray_cluster_spec_with_accelerators(mocker): def test_build_ray_cluster_spec_with_custom_volumes(mocker): """Test build_ray_cluster_spec with custom volumes and volume mounts.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from kubernetes.client import V1Volume, V1VolumeMount # Create custom volumes and volume mounts @@ -809,7 +809,7 @@ def test_build_ray_cluster_spec_with_custom_volumes(mocker): custom_volume_mount = V1VolumeMount(name="custom-data", mount_path="/data") # Create a test cluster config with custom volumes - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( volumes=[custom_volume], volume_mounts=[custom_volume_mount], ) @@ -831,10 +831,10 @@ def test_build_ray_cluster_spec_with_custom_volumes(mocker): def test_build_ray_cluster_spec_with_environment_variables(mocker): """Test build_ray_cluster_spec with environment variables.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig # Create a test cluster config with environment variables - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( envs={"CUDA_VISIBLE_DEVICES": "0", "RAY_DISABLE_IMPORT_WARNING": "1"}, ) @@ -863,7 +863,7 @@ def test_build_ray_cluster_spec_with_environment_variables(mocker): def test_build_ray_cluster_spec_with_tolerations(mocker): """Test build_ray_cluster_spec with tolerations.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from kubernetes.client import V1Toleration # Create test tolerations @@ -875,7 +875,7 @@ def test_build_ray_cluster_spec_with_tolerations(mocker): ) # Create a test cluster config with tolerations - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( head_tolerations=[head_toleration], worker_tolerations=[worker_toleration], ) @@ -900,10 +900,10 @@ def test_build_ray_cluster_spec_with_tolerations(mocker): def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): """Test build_ray_cluster_spec with image pull secrets.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig # Create a test cluster config with image pull secrets - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( image_pull_secrets=["my-registry-secret", "another-secret"] ) @@ -947,9 +947,9 @@ def test_rayjob_user_override_shutdown_behavior(mocker): assert rayjob_existing_override.shutdown_after_job_finishes is True # Test 2: User overrides shutdown to False even when creating new cluster - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = RayJobClusterConfig() + cluster_config = ManagedClusterConfig() rayjob_new_override = RayJob( job_name="test-job", @@ -975,10 +975,10 @@ def test_rayjob_user_override_shutdown_behavior(mocker): def test_build_ray_cluster_spec_with_gcs_ft(mocker): """Test build_ray_cluster_spec with GCS fault tolerance enabled.""" - from codeflare_sdk.ray.rayjobs.config import RayJobClusterConfig + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig # Create a test cluster config with GCS FT enabled - cluster_config = RayJobClusterConfig( + cluster_config = ManagedClusterConfig( enable_gcs_ft=True, redis_address="redis://redis-service:6379", external_storage_namespace="storage-ns", From 17b8a4195363778bbb274125e54834c1e35e8358 Mon Sep 17 00:00:00 2001 From: lilylinh Date: Wed, 20 Aug 2025 12:18:40 +0100 Subject: [PATCH 09/33] feat(RHOAIENG-29330):Deny RayCluster creation with Ray Version mismatches fixed --- .../common/utils/test_validation.py | 224 ++++++++++++++++++ src/codeflare_sdk/common/utils/validation.py | 134 +++++++++++ src/codeflare_sdk/ray/cluster/config.py | 1 + src/codeflare_sdk/ray/cluster/test_config.py | 4 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 43 +++- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 153 +++++++++++- 6 files changed, 556 insertions(+), 3 deletions(-) create mode 100644 src/codeflare_sdk/common/utils/test_validation.py create mode 100644 src/codeflare_sdk/common/utils/validation.py diff --git a/src/codeflare_sdk/common/utils/test_validation.py b/src/codeflare_sdk/common/utils/test_validation.py new file mode 100644 index 00000000..20416d00 --- /dev/null +++ b/src/codeflare_sdk/common/utils/test_validation.py @@ -0,0 +1,224 @@ +# Copyright 2022-2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from codeflare_sdk.common.utils.validation import ( + extract_ray_version_from_image, + validate_ray_version_compatibility, +) +from codeflare_sdk.common.utils.constants import RAY_VERSION + + +class TestRayVersionDetection: + """Test Ray version detection from container image names.""" + + def test_extract_ray_version_standard_format(self): + """Test extraction from standard Ray image formats.""" + # Standard format + assert extract_ray_version_from_image("ray:2.47.1") == "2.47.1" + assert extract_ray_version_from_image("ray:2.46.0") == "2.46.0" + assert extract_ray_version_from_image("ray:1.13.0") == "1.13.0" + + def test_extract_ray_version_with_registry(self): + """Test extraction from images with registry prefixes.""" + assert extract_ray_version_from_image("quay.io/ray:2.47.1") == "2.47.1" + assert ( + extract_ray_version_from_image("docker.io/rayproject/ray:2.47.1") + == "2.47.1" + ) + assert ( + extract_ray_version_from_image("gcr.io/my-project/ray:2.47.1") == "2.47.1" + ) + + def test_extract_ray_version_with_suffixes(self): + """Test extraction from images with version suffixes.""" + assert ( + extract_ray_version_from_image("quay.io/modh/ray:2.47.1-py311-cu121") + == "2.47.1" + ) + assert extract_ray_version_from_image("ray:2.47.1-py311") == "2.47.1" + assert extract_ray_version_from_image("ray:2.47.1-gpu") == "2.47.1" + assert extract_ray_version_from_image("ray:2.47.1-rocm62") == "2.47.1" + + def test_extract_ray_version_complex_registry_paths(self): + """Test extraction from complex registry paths.""" + assert ( + extract_ray_version_from_image("quay.io/modh/ray:2.47.1-py311-cu121") + == "2.47.1" + ) + assert ( + extract_ray_version_from_image("registry.company.com/team/ray:2.47.1") + == "2.47.1" + ) + + def test_extract_ray_version_no_version_found(self): + """Test cases where no version can be extracted.""" + # SHA-based tags + assert ( + extract_ray_version_from_image( + "quay.io/modh/ray@sha256:6d076aeb38ab3c34a6a2ef0f58dc667089aa15826fa08a73273c629333e12f1e" + ) + is None + ) + + # Non-semantic versions + assert extract_ray_version_from_image("ray:latest") is None + assert extract_ray_version_from_image("ray:nightly") is None + assert ( + extract_ray_version_from_image("ray:v2.47") is None + ) # Missing patch version + + # Non-Ray images + assert extract_ray_version_from_image("python:3.11") is None + assert extract_ray_version_from_image("ubuntu:20.04") is None + + # Empty or None + assert extract_ray_version_from_image("") is None + assert extract_ray_version_from_image(None) is None + + def test_extract_ray_version_edge_cases(self): + """Test edge cases for version extraction.""" + # Version with 'v' prefix should not match our pattern + assert extract_ray_version_from_image("ray:v2.47.1") is None + + # Multiple version-like patterns - should match the first valid one + assert ( + extract_ray_version_from_image("registry/ray:2.47.1-based-on-1.0.0") + == "2.47.1" + ) + + +class TestRayVersionValidation: + """Test Ray version compatibility validation.""" + + def test_validate_compatible_versions(self): + """Test validation with compatible Ray versions.""" + # Exact match + is_compatible, is_warning, message = validate_ray_version_compatibility( + f"ray:{RAY_VERSION}" + ) + assert is_compatible is True + assert is_warning is False + assert "Ray versions match" in message + + # With registry and suffixes + is_compatible, is_warning, message = validate_ray_version_compatibility( + f"quay.io/modh/ray:{RAY_VERSION}-py311-cu121" + ) + assert is_compatible is True + assert is_warning is False + assert "Ray versions match" in message + + def test_validate_incompatible_versions(self): + """Test validation with incompatible Ray versions.""" + # Different version + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:2.46.0" + ) + assert is_compatible is False + assert is_warning is False + assert "Ray version mismatch detected" in message + assert "CodeFlare SDK uses Ray" in message + assert "runtime image uses Ray" in message + + # Older version + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:1.13.0" + ) + assert is_compatible is False + assert is_warning is False + assert "Ray version mismatch detected" in message + + def test_validate_empty_image(self): + """Test validation with no custom image (should use default).""" + # Empty string + is_compatible, is_warning, message = validate_ray_version_compatibility("") + assert is_compatible is True + assert is_warning is False + assert "Using default Ray image compatible with SDK" in message + + # None + is_compatible, is_warning, message = validate_ray_version_compatibility(None) + assert is_compatible is True + assert is_warning is False + assert "Using default Ray image compatible with SDK" in message + + def test_validate_unknown_version(self): + """Test validation when version cannot be determined.""" + # SHA-based image + is_compatible, is_warning, message = validate_ray_version_compatibility( + "quay.io/modh/ray@sha256:6d076aeb38ab3c34a6a2ef0f58dc667089aa15826fa08a73273c629333e12f1e" + ) + assert is_compatible is True + assert is_warning is True + assert "Cannot determine Ray version" in message + + # Custom image without version + is_compatible, is_warning, message = validate_ray_version_compatibility( + "my-custom-ray:latest" + ) + assert is_compatible is True + assert is_warning is True + assert "Cannot determine Ray version" in message + + def test_validate_custom_sdk_version(self): + """Test validation with custom SDK version.""" + # Compatible with custom SDK version + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:2.46.0", "2.46.0" + ) + assert is_compatible is True + assert is_warning is False + assert "Ray versions match" in message + + # Incompatible with custom SDK version + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:2.47.1", "2.46.0" + ) + assert is_compatible is False + assert is_warning is False + assert "CodeFlare SDK uses Ray 2.46.0" in message + assert "runtime image uses Ray 2.47.1" in message + + def test_validate_message_content(self): + """Test that validation messages contain expected guidance.""" + # Mismatch message should contain helpful guidance + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:2.46.0" + ) + assert is_compatible is False + assert is_warning is False + assert "compatibility issues" in message.lower() + assert "unexpected behavior" in message.lower() + assert "please use a runtime image" in message.lower() + assert "update your sdk version" in message.lower() + + def test_semantic_version_comparison(self): + """Test that semantic version comparison works correctly.""" + # Test that 2.10.0 > 2.9.1 (would fail with string comparison) + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:2.10.0", "2.9.1" + ) + assert is_compatible is False + assert is_warning is False + assert "CodeFlare SDK uses Ray 2.9.1" in message + assert "runtime image uses Ray 2.10.0" in message + + # Test that 2.9.1 < 2.10.0 (would fail with string comparison) + is_compatible, is_warning, message = validate_ray_version_compatibility( + "ray:2.9.1", "2.10.0" + ) + assert is_compatible is False + assert is_warning is False + assert "CodeFlare SDK uses Ray 2.10.0" in message + assert "runtime image uses Ray 2.9.1" in message diff --git a/src/codeflare_sdk/common/utils/validation.py b/src/codeflare_sdk/common/utils/validation.py new file mode 100644 index 00000000..ec749f7c --- /dev/null +++ b/src/codeflare_sdk/common/utils/validation.py @@ -0,0 +1,134 @@ +# Copyright 2022-2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Validation utilities for the CodeFlare SDK. + +This module contains validation functions used across the SDK for ensuring +configuration compatibility and correctness. +""" + +import logging +import re +from typing import Optional, Tuple +from packaging.version import Version, InvalidVersion +from .constants import RAY_VERSION + +logger = logging.getLogger(__name__) + + +def extract_ray_version_from_image(image_name: str) -> Optional[str]: + """ + Extract Ray version from a container image name. + + Supports various image naming patterns: + - quay.io/modh/ray:2.47.1-py311-cu121 + - ray:2.47.1 + - some-registry/ray:2.47.1-py311 + - quay.io/modh/ray@sha256:... (falls back to None) + + Args: + image_name: The container image name/tag + + Returns: + The extracted Ray version, or None if not found + """ + if not image_name: + return None + + # Pattern to match semantic version after ray: or ray/ + # Looks for patterns like ray:2.47.1, ray:2.47.1-py311, etc. + patterns = [ + r"ray:(\d+\.\d+\.\d+)", # ray:2.47.1 + r"ray/[^:]*:(\d+\.\d+\.\d+)", # registry/ray:2.47.1 + r"/ray:(\d+\.\d+\.\d+)", # any-registry/ray:2.47.1 + ] + + for pattern in patterns: + match = re.search(pattern, image_name) + if match: + return match.group(1) + + # If we can't extract version, return None to indicate unknown + return None + + +def validate_ray_version_compatibility( + image_name: str, sdk_ray_version: str = RAY_VERSION +) -> Tuple[bool, bool, str]: + """ + Validate that the Ray version in the runtime image matches the SDK's Ray version. + + Args: + image_name: The container image name/tag + sdk_ray_version: The Ray version used by the CodeFlare SDK + + Returns: + tuple: (is_compatible, is_warning, message) + - is_compatible: True if versions match or cannot be determined, False if mismatch + - is_warning: True if this is a warning (non-fatal), False otherwise + - message: Descriptive message about the validation result + """ + if not image_name: + # No custom image specified, will use default - this is compatible + logger.debug("Using default Ray image compatible with SDK") + return True, False, "Using default Ray image compatible with SDK" + + image_ray_version = extract_ray_version_from_image(image_name) + + if image_ray_version is None: + # Cannot determine version from image name, issue a warning but allow + return ( + True, + True, + f"Cannot determine Ray version from image '{image_name}'. Please ensure it's compatible with Ray {sdk_ray_version}", + ) + + # Use semantic version comparison for robust version checking + try: + sdk_version = Version(sdk_ray_version) + image_version = Version(image_ray_version) + + if image_version != sdk_version: + # Version mismatch detected + message = ( + f"Ray version mismatch detected!\n" + f"CodeFlare SDK uses Ray {sdk_ray_version}, but runtime image uses Ray {image_ray_version}.\n" + f"This mismatch can cause compatibility issues and unexpected behavior.\n" + f"Please use a runtime image with Ray {sdk_ray_version} or update your SDK version." + ) + return False, False, message + except InvalidVersion as e: + # If version parsing fails, fall back to string comparison with a warning + logger.warning( + f"Failed to parse version for comparison ({e}), falling back to string comparison" + ) + if image_ray_version != sdk_ray_version: + message = ( + f"Ray version mismatch detected!\n" + f"CodeFlare SDK uses Ray {sdk_ray_version}, but runtime image uses Ray {image_ray_version}.\n" + f"This mismatch can cause compatibility issues and unexpected behavior.\n" + f"Please use a runtime image with Ray {sdk_ray_version} or update your SDK version." + ) + return False, False, message + + # Versions match + logger.debug( + f"Ray version validation successful: SDK and runtime image both use Ray {sdk_ray_version}" + ) + return ( + True, + False, + f"Ray versions match: SDK and runtime image both use Ray {sdk_ray_version}", + ) diff --git a/src/codeflare_sdk/ray/cluster/config.py b/src/codeflare_sdk/ray/cluster/config.py index dc61de2a..561e2aa4 100644 --- a/src/codeflare_sdk/ray/cluster/config.py +++ b/src/codeflare_sdk/ray/cluster/config.py @@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Union, get_args, get_origin from kubernetes.client import V1Toleration, V1Volume, V1VolumeMount + dir = pathlib.Path(__file__).parent.parent.resolve() # https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html diff --git a/src/codeflare_sdk/ray/cluster/test_config.py b/src/codeflare_sdk/ray/cluster/test_config.py index e405bc5b..31d8be8c 100644 --- a/src/codeflare_sdk/ray/cluster/test_config.py +++ b/src/codeflare_sdk/ray/cluster/test_config.py @@ -1,4 +1,4 @@ -# Copyright 2024 IBM, Red Hat +# Copyright 2022-2025 IBM, Red Hat # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,11 +20,13 @@ get_template_variables, ) from codeflare_sdk.ray.cluster.cluster import ClusterConfiguration, Cluster +from codeflare_sdk.common.utils.constants import RAY_VERSION from pathlib import Path import filecmp import pytest import os import yaml +import warnings parent = Path(__file__).resolve().parents[4] # project directory expected_clusters_dir = f"{parent}/tests/test_cluster_yamls" diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index a1577d91..67f8db43 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -1,4 +1,4 @@ -# Copyright 2025 IBM, Red Hat +# Copyright 2022-2025 IBM, Red Hat # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,12 +17,14 @@ """ import logging +import warnings from typing import Dict, Any, Optional, Tuple from python_client.kuberay_job_api import RayjobApi from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from ...common.utils import get_current_namespace +from ...common.utils.validation import validate_ray_version_compatibility from .status import ( RayJobDeploymentStatus, @@ -149,6 +151,9 @@ def submit(self) -> str: if not self.entrypoint: raise ValueError("entrypoint must be provided to submit a RayJob") + # Validate Ray version compatibility for both cluster_config and runtime_env + self._validate_ray_version_compatibility() + # Build the RayJob custom resource rayjob_cr = self._build_rayjob_cr() @@ -213,6 +218,42 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: return rayjob_cr + def _validate_ray_version_compatibility(self): + """ + Validate Ray version compatibility for cluster_config image only. + Raises ValueError if there is a version mismatch. + """ + # Validate cluster_config image if creating new cluster + if self._cluster_config is not None: + self._validate_cluster_config_image() + + def _validate_cluster_config_image(self): + """ + Validate that the Ray version in cluster_config image matches the SDK's Ray version. + """ + if not hasattr(self._cluster_config, "image"): + logger.debug( + "No image attribute found in cluster config, skipping validation" + ) + return + + image = self._cluster_config.image + if not image: + logger.debug("Cluster config image is empty, skipping validation") + return + + if not isinstance(image, str): + logger.warning( + f"Cluster config image should be a string, got {type(image).__name__}: {image}" + ) + return # Skip validation for malformed image + + is_compatible, is_warning, message = validate_ray_version_compatibility(image) + if not is_compatible: + raise ValueError(f"Cluster config image: {message}") + elif is_warning: + warnings.warn(f"Cluster config image: {message}") + def status( self, print_to_console: bool = True ) -> Tuple[CodeflareRayJobStatus, bool]: diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 1ecd4b48..6827ed03 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -1,4 +1,4 @@ -# Copyright 2025 IBM, Red Hat +# Copyright 2022-2025 IBM, Red Hat # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ from codeflare_sdk.ray.rayjobs.rayjob import RayJob from codeflare_sdk.ray.cluster.config import ClusterConfiguration +from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig def test_rayjob_submit_success(mocker): @@ -992,3 +993,153 @@ def test_build_ray_cluster_spec_with_gcs_ft(mocker): gcs_ft = spec["gcsFaultToleranceOptions"] assert gcs_ft["redisAddress"] == "redis://redis-service:6379" assert gcs_ft["externalStorageNamespace"] == "storage-ns" + + +class TestRayVersionValidation: + """Test Ray version validation in RayJob.""" + + def test_submit_with_cluster_config_compatible_image_passes(self, mocker): + """Test that submission passes with compatible cluster_config image.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + mock_api_instance.submit_job.return_value = True + + cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + namespace="test-namespace", + entrypoint="python script.py", + ) + + # Should not raise any validation errors + result = rayjob.submit() + assert result == "test-job" + + def test_submit_with_cluster_config_incompatible_image_fails(self, mocker): + """Test that submission fails with incompatible cluster_config image.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + cluster_config = ManagedClusterConfig(image="ray:2.8.0") # Different version + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + namespace="test-namespace", + entrypoint="python script.py", + ) + + # Should raise ValueError for version mismatch + with pytest.raises( + ValueError, match="Cluster config image: Ray version mismatch detected" + ): + rayjob.submit() + + def test_validate_ray_version_compatibility_method(self, mocker): + """Test the _validate_ray_version_compatibility method directly.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python script.py", + ) + + # Test with no cluster_config (should not raise) + rayjob._validate_ray_version_compatibility() # Should not raise + + # Test with compatible cluster_config version + rayjob._cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") + rayjob._validate_ray_version_compatibility() # Should not raise + + # Test with incompatible cluster_config version + rayjob._cluster_config = ManagedClusterConfig(image="ray:2.8.0") + with pytest.raises( + ValueError, match="Cluster config image: Ray version mismatch detected" + ): + rayjob._validate_ray_version_compatibility() + + # Test with unknown cluster_config version (should warn but not fail) + rayjob._cluster_config = ManagedClusterConfig(image="custom-image:latest") + with pytest.warns( + UserWarning, match="Cluster config image: Cannot determine Ray version" + ): + rayjob._validate_ray_version_compatibility() + + def test_validate_cluster_config_image_method(self, mocker): + """Test the _validate_cluster_config_image method directly.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + rayjob = RayJob( + job_name="test-job", + cluster_config=ManagedClusterConfig(), + namespace="test-namespace", + entrypoint="python script.py", + ) + + # Test with no image (should not raise) + rayjob._validate_cluster_config_image() # Should not raise + + # Test with compatible image + rayjob._cluster_config.image = f"ray:{RAY_VERSION}" + rayjob._validate_cluster_config_image() # Should not raise + + # Test with incompatible image + rayjob._cluster_config.image = "ray:2.8.0" + with pytest.raises( + ValueError, match="Cluster config image: Ray version mismatch detected" + ): + rayjob._validate_cluster_config_image() + + # Test with unknown image (should warn but not fail) + rayjob._cluster_config.image = "custom-image:latest" + with pytest.warns( + UserWarning, match="Cluster config image: Cannot determine Ray version" + ): + rayjob._validate_cluster_config_image() + + def test_validate_cluster_config_image_edge_cases(self, mocker): + """Test edge cases in _validate_cluster_config_image method.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + rayjob = RayJob( + job_name="test-job", + cluster_config=ManagedClusterConfig(), + namespace="test-namespace", + entrypoint="python script.py", + ) + + # Test with None image (should not raise) + rayjob._cluster_config.image = None + rayjob._validate_cluster_config_image() # Should not raise + + # Test with empty string image (should not raise) + rayjob._cluster_config.image = "" + rayjob._validate_cluster_config_image() # Should not raise + + # Test with non-string image (should log warning and skip) + rayjob._cluster_config.image = 123 + rayjob._validate_cluster_config_image() # Should log warning and not raise + + # Test with cluster config that has no image attribute + class MockClusterConfig: + pass + + rayjob._cluster_config = MockClusterConfig() + rayjob._validate_cluster_config_image() # Should not raise From 331e2f3d02bd332971b201b2ec1c365ef236fe5c Mon Sep 17 00:00:00 2001 From: lilylinh Date: Mon, 25 Aug 2025 12:34:39 +0100 Subject: [PATCH 10/33] Delete unsued code in config and test_config --- src/codeflare_sdk/ray/cluster/config.py | 1 - src/codeflare_sdk/ray/cluster/test_config.py | 2 -- src/codeflare_sdk/ray/rayjobs/rayjob.py | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/codeflare_sdk/ray/cluster/config.py b/src/codeflare_sdk/ray/cluster/config.py index 561e2aa4..dc61de2a 100644 --- a/src/codeflare_sdk/ray/cluster/config.py +++ b/src/codeflare_sdk/ray/cluster/config.py @@ -24,7 +24,6 @@ from typing import Dict, List, Optional, Union, get_args, get_origin from kubernetes.client import V1Toleration, V1Volume, V1VolumeMount - dir = pathlib.Path(__file__).parent.parent.resolve() # https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html diff --git a/src/codeflare_sdk/ray/cluster/test_config.py b/src/codeflare_sdk/ray/cluster/test_config.py index 31d8be8c..9f880df7 100644 --- a/src/codeflare_sdk/ray/cluster/test_config.py +++ b/src/codeflare_sdk/ray/cluster/test_config.py @@ -20,13 +20,11 @@ get_template_variables, ) from codeflare_sdk.ray.cluster.cluster import ClusterConfiguration, Cluster -from codeflare_sdk.common.utils.constants import RAY_VERSION from pathlib import Path import filecmp import pytest import os import yaml -import warnings parent = Path(__file__).resolve().parents[4] # project directory expected_clusters_dir = f"{parent}/tests/test_cluster_yamls" diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 67f8db43..6230a0e1 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -220,7 +220,7 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: def _validate_ray_version_compatibility(self): """ - Validate Ray version compatibility for cluster_config image only. + Validate Ray version compatibility for cluster_config image. Raises ValueError if there is a version mismatch. """ # Validate cluster_config image if creating new cluster From c5f121e0ec61cbc8b6fe23ac21561c2a9eb3c0fa Mon Sep 17 00:00:00 2001 From: Pat O'Connor Date: Tue, 19 Aug 2025 16:53:31 +0100 Subject: [PATCH 11/33] feat(RHOAIENG-29391): Store entrypoint scripts in configMaps Signed-off-by: Pat O'Connor --- src/codeflare_sdk/ray/rayjobs/config.py | 94 ++- src/codeflare_sdk/ray/rayjobs/rayjob.py | 283 ++++++- src/codeflare_sdk/ray/rayjobs/test_config.py | 39 + src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 729 +++++++++++++++++++ 4 files changed, 1143 insertions(+), 2 deletions(-) diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index 7a8e14e6..d335da51 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -20,7 +20,7 @@ import pathlib from dataclasses import dataclass, field, fields -from typing import Dict, List, Optional, Union, get_args, get_origin, Any +from typing import Dict, List, Optional, Union, get_args, get_origin, Any, Tuple from kubernetes.client import ( V1ConfigMapVolumeSource, V1KeyToPath, @@ -517,3 +517,95 @@ def _build_gcs_ft_options(self) -> Dict[str, Any]: } return gcs_ft_options + + def add_script_volumes( + self, configmap_name: str, mount_path: str = "/home/ray/scripts" + ): + """ + Add script volume and mount references to cluster configuration. + + Args: + configmap_name: Name of the ConfigMap containing scripts + mount_path: Where to mount scripts in containers (default: /home/ray/scripts) + """ + # Check if script volume already exists + volume_name = "ray-job-scripts" + existing_volume = next( + (v for v in self.volumes if getattr(v, "name", None) == volume_name), None + ) + if existing_volume: + logger.debug(f"Script volume '{volume_name}' already exists, skipping...") + return + + # Check if script mount already exists + existing_mount = next( + (m for m in self.volume_mounts if getattr(m, "name", None) == volume_name), + None, + ) + if existing_mount: + logger.debug( + f"Script volume mount '{volume_name}' already exists, skipping..." + ) + return + + # Add script volume to cluster configuration + script_volume = V1Volume( + name=volume_name, config_map=V1ConfigMapVolumeSource(name=configmap_name) + ) + self.volumes.append(script_volume) + + # Add script volume mount to cluster configuration + script_mount = V1VolumeMount(name=volume_name, mount_path=mount_path) + self.volume_mounts.append(script_mount) + + logger.info( + f"Added script volume '{configmap_name}' to cluster config: mount_path={mount_path}" + ) + + def validate_configmap_size(self, scripts: Dict[str, str]) -> None: + total_size = sum(len(content.encode("utf-8")) for content in scripts.values()) + if total_size > 1024 * 1024: # 1MB + raise ValueError( + f"ConfigMap size exceeds 1MB limit. Total size: {total_size} bytes" + ) + + def build_script_configmap_spec( + self, job_name: str, namespace: str, scripts: Dict[str, str] + ) -> Dict[str, Any]: + """ + Build ConfigMap specification for scripts + + Args: + job_name: Name of the RayJob (used for ConfigMap naming) + namespace: Kubernetes namespace + scripts: Dictionary of script_name -> script_content + + Returns: + Dict: ConfigMap specification ready for Kubernetes API + """ + configmap_name = f"{job_name}-scripts" + return { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": configmap_name, "namespace": namespace}, + "data": scripts, + } + + def build_script_volume_specs( + self, configmap_name: str, mount_path: str = "/home/ray/scripts" + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """ + Build volume and mount specifications for scripts + + Args: + configmap_name: Name of the ConfigMap containing scripts + mount_path: Where to mount scripts in containers + + Returns: + Tuple of (volume_spec, mount_spec) as dictionaries + """ + volume_spec = {"name": "ray-job-scripts", "configMap": {"name": configmap_name}} + + mount_spec = {"name": "ray-job-scripts", "mountPath": mount_path} + + return volume_spec, mount_spec diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 6230a0e1..acf93fdc 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -18,9 +18,14 @@ import logging import warnings +import os +import re +import ast from typing import Dict, Any, Optional, Tuple +from kubernetes import client +from ...common.kubernetes_cluster.auth import get_api_client from python_client.kuberay_job_api import RayjobApi - +from python_client.kuberay_cluster_api import RayClusterApi from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from ...common.utils import get_current_namespace @@ -36,6 +41,8 @@ logger = logging.getLogger(__name__) +mount_path = "/home/ray/scripts" + class RayJob: """ @@ -143,6 +150,7 @@ def __init__( logger.info(f"Using existing cluster: {self.cluster_name}") self._api = RayjobApi() + self._cluster_api = RayClusterApi() logger.info(f"Initialized RayJob: {self.name} in namespace: {self.namespace}") @@ -153,6 +161,17 @@ def submit(self) -> str: # Validate Ray version compatibility for both cluster_config and runtime_env self._validate_ray_version_compatibility() + # Automatically handle script files for new clusters + if self._cluster_config is not None: + scripts = self._extract_script_files_from_entrypoint() + if scripts: + self._handle_script_volumes_for_new_cluster(scripts) + + # Handle script files for existing clusters + elif self._cluster_name: + scripts = self._extract_script_files_from_entrypoint() + if scripts: + self._handle_script_volumes_for_existing_cluster(scripts) # Build the RayJob custom resource rayjob_cr = self._build_rayjob_cr() @@ -323,3 +342,265 @@ def _map_to_codeflare_status( return status_mapping.get( deployment_status, (CodeflareRayJobStatus.UNKNOWN, False) ) + + def _extract_script_files_from_entrypoint(self) -> Optional[Dict[str, str]]: + """ + Extract local Python script files from entrypoint command, plus their dependencies. + + Returns: + Dict of {script_name: script_content} if local scripts found, None otherwise + """ + if not self.entrypoint: + return None + + scripts = {} + # mount_path = "/home/ray/scripts" + processed_files = set() # Avoid infinite loops + + # Look for Python file patterns in entrypoint (e.g., "python script.py", "python /path/to/script.py") + python_file_pattern = r"(?:python\s+)?([./\w/]+\.py)" + matches = re.findall(python_file_pattern, self.entrypoint) + + # Process main scripts from entrypoint files + for script_path in matches: + self._process_script_and_imports( + script_path, scripts, mount_path, processed_files + ) + + # Update entrypoint paths to use mounted locations + for script_path in matches: + if script_path in [os.path.basename(s) for s in processed_files]: + old_path = script_path + new_path = f"{mount_path}/{os.path.basename(script_path)}" + self.entrypoint = self.entrypoint.replace(old_path, new_path) + + return scripts if scripts else None + + def _process_script_and_imports( + self, + script_path: str, + scripts: Dict[str, str], + mount_path: str, + processed_files: set, + ): + """Recursively process a script and its local imports""" + if script_path in processed_files: + return + + # Check if it's a local file (not already a container path) + if script_path.startswith("/home/ray/") or not os.path.isfile(script_path): + return + + processed_files.add(script_path) + + try: + with open(script_path, "r") as f: + script_content = f.read() + + script_name = os.path.basename(script_path) + scripts[script_name] = script_content + + logger.info( + f"Found local script: {script_path} -> will mount at {mount_path}/{script_name}" + ) + + # Parse imports in this script to find dependencies + self._find_local_imports( + script_content, + script_path, + lambda path: self._process_script_and_imports( + path, scripts, mount_path, processed_files + ), + ) + + except (IOError, OSError) as e: + logger.warning(f"Could not read script file {script_path}: {e}") + + def _find_local_imports( + self, script_content: str, script_path: str, process_callback + ): + """ + Find local Python imports in script content and process them. + + Args: + script_content: The content of the Python script + script_path: Path to the current script (for relative imports) + process_callback: Function to call for each found local import + """ + + try: + # Parse the Python AST to find imports + tree = ast.parse(script_content) + script_dir = os.path.dirname(os.path.abspath(script_path)) + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + # Handle: import module_name + for alias in node.names: + potential_file = os.path.join(script_dir, f"{alias.name}.py") + if os.path.isfile(potential_file): + process_callback(potential_file) + + elif isinstance(node, ast.ImportFrom): + # Handle: from module_name import something + if node.module: + potential_file = os.path.join(script_dir, f"{node.module}.py") + if os.path.isfile(potential_file): + process_callback(potential_file) + + except (SyntaxError, ValueError) as e: + logger.debug(f"Could not parse imports from {script_path}: {e}") + + def _handle_script_volumes_for_new_cluster(self, scripts: Dict[str, str]): + """Handle script volumes for new clusters (uses ManagedClusterConfig).""" + # Validate ConfigMap size before creation + self._cluster_config.validate_configmap_size(scripts) + + # Build ConfigMap spec using config.py + configmap_spec = self._cluster_config.build_script_configmap_spec( + job_name=self.name, namespace=self.namespace, scripts=scripts + ) + + # Create ConfigMap via Kubernetes API + configmap_name = self._create_configmap_from_spec(configmap_spec) + + # Add volumes to cluster config (config.py handles spec building) + self._cluster_config.add_script_volumes( + configmap_name=configmap_name, mount_path="/home/ray/scripts" + ) + + def _handle_script_volumes_for_existing_cluster(self, scripts: Dict[str, str]): + """Handle script volumes for existing clusters (updates RayCluster CR).""" + # Create config builder for utility methods + config_builder = ManagedClusterConfig() + + # Validate ConfigMap size before creation + config_builder.validate_configmap_size(scripts) + + # Build ConfigMap spec using config.py + configmap_spec = config_builder.build_script_configmap_spec( + job_name=self.name, namespace=self.namespace, scripts=scripts + ) + + # Create ConfigMap via Kubernetes API + configmap_name = self._create_configmap_from_spec(configmap_spec) + + # Update existing RayCluster + self._update_existing_cluster_for_scripts(configmap_name, config_builder) + + def _create_configmap_from_spec(self, configmap_spec: Dict[str, Any]) -> str: + """ + Create ConfigMap from specification via Kubernetes API. + + Args: + configmap_spec: ConfigMap specification dictionary + + Returns: + str: Name of the created ConfigMap + """ + + configmap_name = configmap_spec["metadata"]["name"] + + # Convert dict spec to V1ConfigMap + configmap = client.V1ConfigMap( + metadata=client.V1ObjectMeta(**configmap_spec["metadata"]), + data=configmap_spec["data"], + ) + + # Create ConfigMap via Kubernetes API + k8s_api = client.CoreV1Api(get_api_client()) + try: + k8s_api.create_namespaced_config_map( + namespace=self.namespace, body=configmap + ) + logger.info( + f"Created ConfigMap '{configmap_name}' with {len(configmap_spec['data'])} scripts" + ) + except client.ApiException as e: + if e.status == 409: # Already exists + logger.info(f"ConfigMap '{configmap_name}' already exists, updating...") + k8s_api.replace_namespaced_config_map( + name=configmap_name, namespace=self.namespace, body=configmap + ) + else: + raise RuntimeError( + f"Failed to create ConfigMap '{configmap_name}': {e}" + ) + + return configmap_name + + # Note: This only works once the pods have been restarted as the configmaps won't be picked up until then :/ + def _update_existing_cluster_for_scripts( + self, configmap_name: str, config_builder: ManagedClusterConfig + ): + """ + Update existing RayCluster to add script volumes and mounts. + + Args: + configmap_name: Name of the ConfigMap containing scripts + config_builder: ManagedClusterConfig instance for building specs + """ + + # Get existing RayCluster + api_instance = client.CustomObjectsApi(get_api_client()) + try: + ray_cluster = self._cluster_api.get_ray_cluster( + name=self.cluster_name, + k8s_namespace=self.namespace, + ) + except client.ApiException as e: + raise RuntimeError(f"Failed to get RayCluster '{self.cluster_name}': {e}") + + # Build script volume and mount specifications using config.py + script_volume, script_mount = config_builder.build_script_volume_specs( + configmap_name=configmap_name, mount_path="/home/ray/scripts" + ) + + # Helper function to check for duplicate volumes/mounts + def volume_exists(volumes_list, volume_name): + return any(v.get("name") == volume_name for v in volumes_list) + + def mount_exists(mounts_list, mount_name): + return any(m.get("name") == mount_name for m in mounts_list) + + # Add volumes and mounts to head group + head_spec = ray_cluster["spec"]["headGroupSpec"]["template"]["spec"] + if "volumes" not in head_spec: + head_spec["volumes"] = [] + if not volume_exists(head_spec["volumes"], script_volume["name"]): + head_spec["volumes"].append(script_volume) + + head_container = head_spec["containers"][0] # Ray head container + if "volumeMounts" not in head_container: + head_container["volumeMounts"] = [] + if not mount_exists(head_container["volumeMounts"], script_mount["name"]): + head_container["volumeMounts"].append(script_mount) + + # Add volumes and mounts to worker groups + for worker_group in ray_cluster["spec"]["workerGroupSpecs"]: + worker_spec = worker_group["template"]["spec"] + if "volumes" not in worker_spec: + worker_spec["volumes"] = [] + if not volume_exists(worker_spec["volumes"], script_volume["name"]): + worker_spec["volumes"].append(script_volume) + + worker_container = worker_spec["containers"][0] # Ray worker container + if "volumeMounts" not in worker_container: + worker_container["volumeMounts"] = [] + if not mount_exists(worker_container["volumeMounts"], script_mount["name"]): + worker_container["volumeMounts"].append(script_mount) + + # Update the RayCluster + try: + self._cluster_api.patch_ray_cluster( + name=self.cluster_name, + ray_patch=ray_cluster, + k8s_namespace=self.namespace, + ) + logger.info( + f"Updated RayCluster '{self.cluster_name}' with script volumes from ConfigMap '{configmap_name}'" + ) + except client.ApiException as e: + raise RuntimeError( + f"Failed to update RayCluster '{self.cluster_name}': {e}" + ) diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test_config.py index 80736295..7d7864c5 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_config.py +++ b/src/codeflare_sdk/ray/rayjobs/test_config.py @@ -131,3 +131,42 @@ def test_ray_usage_stats_with_other_user_envs(): # Total count should be correct (3 user envs) assert len(config.envs) == 3 + + +def test_add_script_volumes_existing_volume_early_return(): + """Test add_script_volumes early return when volume already exists.""" + from kubernetes.client import V1Volume, V1ConfigMapVolumeSource + + config = ManagedClusterConfig() + + # Pre-add a volume with same name + existing_volume = V1Volume( + name="ray-job-scripts", + config_map=V1ConfigMapVolumeSource(name="existing-scripts"), + ) + config.volumes.append(existing_volume) + + # Should return early and not add duplicate + config.add_script_volumes(configmap_name="new-scripts") + + # Should still have only one volume, no mount added + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 0 + + +def test_add_script_volumes_existing_mount_early_return(): + """Test add_script_volumes early return when mount already exists.""" + from kubernetes.client import V1VolumeMount + + config = ManagedClusterConfig() + + # Pre-add a mount with same name + existing_mount = V1VolumeMount(name="ray-job-scripts", mount_path="/existing/path") + config.volume_mounts.append(existing_mount) + + # Should return early and not add duplicate + config.add_script_volumes(configmap_name="new-scripts") + + # Should still have only one mount, no volume added + assert len(config.volumes) == 0 + assert len(config.volume_mounts) == 1 diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 6827ed03..0afd62d5 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -13,6 +13,7 @@ # limitations under the License. import pytest +import os from unittest.mock import MagicMock, patch from codeflare_sdk.common.utils.constants import CUDA_RUNTIME_IMAGE, RAY_VERSION @@ -31,6 +32,9 @@ def test_rayjob_submit_success(mocker): mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance + # Mock the RayClusterApi class + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + # Configure the mock to return success when submit is called mock_api_instance.submit.return_value = {"metadata": {"name": "test-rayjob"}} @@ -75,6 +79,9 @@ def test_rayjob_submit_failure(mocker): mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance + # Mock the RayClusterApi class + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + # Configure the mock to return failure (False/None) when submit_job is called mock_api_instance.submit_job.return_value = None @@ -1143,3 +1150,725 @@ class MockClusterConfig: rayjob._cluster_config = MockClusterConfig() rayjob._validate_cluster_config_image() # Should not raise + + +def test_extract_script_files_from_entrypoint_single_script(mocker, tmp_path): + """Test extracting a single script file from entrypoint.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Create a test script + test_script = tmp_path / "test_script.py" + test_script.write_text("print('Hello World!')") + + # Change to temp directory for test + original_cwd = os.getcwd() + os.chdir(tmp_path) + + try: + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint=f"python {test_script.name}", + namespace="test-namespace", + ) + + scripts = rayjob._extract_script_files_from_entrypoint() + + assert scripts is not None + assert test_script.name in scripts + assert scripts[test_script.name] == "print('Hello World!')" + assert f"/home/ray/scripts/{test_script.name}" in rayjob.entrypoint + finally: + os.chdir(original_cwd) + + +def test_extract_script_files_with_dependencies(mocker, tmp_path): + """Test extracting script files with local dependencies.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Create main script and dependency + main_script = tmp_path / "main.py" + main_script.write_text( + """ +import helper +from utils import calculate + +def main(): + helper.do_something() + result = calculate(42) + print(f"Result: {result}") + +if __name__ == "__main__": + main() +""" + ) + + helper_script = tmp_path / "helper.py" + helper_script.write_text( + """ +def do_something(): + print("Doing something...") +""" + ) + + utils_script = tmp_path / "utils.py" + utils_script.write_text( + """ +def calculate(x): + return x * 2 +""" + ) + + # Change to temp directory for test + original_cwd = os.getcwd() + os.chdir(tmp_path) + + try: + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python main.py", + namespace="test-namespace", + ) + + scripts = rayjob._extract_script_files_from_entrypoint() + + assert scripts is not None + assert len(scripts) == 3 + assert "main.py" in scripts + assert "helper.py" in scripts + assert "utils.py" in scripts + + # Verify content + assert "import helper" in scripts["main.py"] + assert "def do_something" in scripts["helper.py"] + assert "def calculate" in scripts["utils.py"] + + finally: + os.chdir(original_cwd) + + +def test_extract_script_files_no_local_scripts(mocker): + """Test entrypoint with no local script files.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python -c 'print(\"hello world\")'", + namespace="test-namespace", + ) + + scripts = rayjob._extract_script_files_from_entrypoint() + + assert scripts is None + + +def test_extract_script_files_nonexistent_script(mocker): + """Test entrypoint referencing non-existent script.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python nonexistent.py", + namespace="test-namespace", + ) + + scripts = rayjob._extract_script_files_from_entrypoint() + + assert scripts is None + + +def test_build_script_configmap_spec(): + """Test building ConfigMap specification for scripts.""" + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + config = ManagedClusterConfig() + scripts = {"main.py": "print('main')", "helper.py": "print('helper')"} + + spec = config.build_script_configmap_spec( + job_name="test-job", namespace="test-namespace", scripts=scripts + ) + + assert spec["apiVersion"] == "v1" + assert spec["kind"] == "ConfigMap" + assert spec["metadata"]["name"] == "test-job-scripts" + assert spec["metadata"]["namespace"] == "test-namespace" + assert spec["data"] == scripts + + +def test_build_script_volume_specs(): + """Test building volume and mount specifications for scripts.""" + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + config = ManagedClusterConfig() + + volume_spec, mount_spec = config.build_script_volume_specs( + configmap_name="test-scripts", mount_path="/custom/path" + ) + + assert volume_spec["name"] == "ray-job-scripts" + assert volume_spec["configMap"]["name"] == "test-scripts" + + assert mount_spec["name"] == "ray-job-scripts" + assert mount_spec["mountPath"] == "/custom/path" + + +def test_add_script_volumes(): + """Test adding script volumes to cluster configuration.""" + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + config = ManagedClusterConfig() + + # Initially no volumes + assert len(config.volumes) == 0 + assert len(config.volume_mounts) == 0 + + config.add_script_volumes(configmap_name="test-scripts") + + # Should have added one volume and one mount + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 1 + + volume = config.volumes[0] + mount = config.volume_mounts[0] + + assert volume.name == "ray-job-scripts" + assert volume.config_map.name == "test-scripts" + + assert mount.name == "ray-job-scripts" + assert mount.mount_path == "/home/ray/scripts" + + +def test_add_script_volumes_duplicate_prevention(): + """Test that adding script volumes twice doesn't create duplicates.""" + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + config = ManagedClusterConfig() + + # Add volumes twice + config.add_script_volumes(configmap_name="test-scripts") + config.add_script_volumes(configmap_name="test-scripts") + + # Should still have only one of each + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 1 + + +def test_create_configmap_from_spec(mocker): + """Test creating ConfigMap via Kubernetes API.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock Kubernetes API + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_api_instance = MagicMock() + mock_k8s_api.return_value = mock_api_instance + + # Mock get_api_client + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + result = rayjob._create_configmap_from_spec(configmap_spec) + + assert result == "test-scripts" + mock_api_instance.create_namespaced_config_map.assert_called_once() + + +def test_create_configmap_already_exists(mocker): + """Test creating ConfigMap when it already exists (409 conflict).""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock Kubernetes API + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_api_instance = MagicMock() + mock_k8s_api.return_value = mock_api_instance + + # Mock get_api_client + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + # Mock API exception for conflict + from kubernetes.client import ApiException + + mock_api_instance.create_namespaced_config_map.side_effect = ApiException( + status=409 + ) + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + result = rayjob._create_configmap_from_spec(configmap_spec) + + assert result == "test-scripts" + mock_api_instance.create_namespaced_config_map.assert_called_once() + mock_api_instance.replace_namespaced_config_map.assert_called_once() + + +def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): + """Test handling script volumes for new cluster creation.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock ConfigMap creation + mock_create = mocker.patch.object(RayJob, "_create_configmap_from_spec") + mock_create.return_value = "test-job-scripts" + + # Create test script + test_script = tmp_path / "test.py" + test_script.write_text("print('test')") + + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + cluster_config = ManagedClusterConfig() + + original_cwd = os.getcwd() + os.chdir(tmp_path) + + try: + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python test.py", + namespace="test-namespace", + ) + + scripts = {"test.py": "print('test')"} + rayjob._handle_script_volumes_for_new_cluster(scripts) + + # Verify ConfigMap creation was called + mock_create.assert_called_once() + + # Verify volumes were added to cluster config + assert len(cluster_config.volumes) == 1 + assert len(cluster_config.volume_mounts) == 1 + + finally: + os.chdir(original_cwd) + + +def test_ast_parsing_import_detection(mocker, tmp_path): + """Test AST parsing correctly detects import statements.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Create scripts with different import patterns + main_script = tmp_path / "main.py" + main_script.write_text( + """# Different import patterns +import helper +from utils import func1, func2 +from local_module import MyClass +import os # Standard library - should be ignored +import non_existent # Non-local - should be ignored +""" + ) + + helper_script = tmp_path / "helper.py" + helper_script.write_text("def helper_func(): pass") + + utils_script = tmp_path / "utils.py" + utils_script.write_text( + """def func1(): pass +def func2(): pass +""" + ) + + local_module_script = tmp_path / "local_module.py" + local_module_script.write_text("class MyClass: pass") + + original_cwd = os.getcwd() + os.chdir(tmp_path) + + try: + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python main.py", + namespace="test-namespace", + ) + + scripts = rayjob._extract_script_files_from_entrypoint() + + # Should find all local dependencies + assert scripts is not None + assert len(scripts) == 4 # main + 3 dependencies + assert "main.py" in scripts + assert "helper.py" in scripts + assert "utils.py" in scripts + assert "local_module.py" in scripts + + finally: + os.chdir(original_cwd) + + +def test_rayjob_submit_with_scripts_new_cluster(mocker, tmp_path): + """Test RayJob submission with script detection for new cluster.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + mock_api_instance.submit_job.return_value = True + + # Mock ConfigMap creation + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_k8s_instance = MagicMock() + mock_k8s_api.return_value = mock_k8s_instance + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + # Create test script + test_script = tmp_path / "test.py" + test_script.write_text("print('Hello from script!')") + + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + cluster_config = ManagedClusterConfig() + + original_cwd = os.getcwd() + os.chdir(tmp_path) + + try: + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python test.py", + namespace="test-namespace", + ) + + # Submit should detect scripts and handle them + result = rayjob.submit() + + assert result == "test-job" + + # Verify ConfigMap was created + mock_k8s_instance.create_namespaced_config_map.assert_called_once() + + # Verify volumes were added + assert len(cluster_config.volumes) == 1 + assert len(cluster_config.volume_mounts) == 1 + + # Verify entrypoint was updated + assert "/home/ray/scripts/test.py" in rayjob.entrypoint + + finally: + os.chdir(original_cwd) + + +def test_process_script_and_imports_io_error(mocker, tmp_path): + """Test _process_script_and_imports handles IO errors gracefully.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + scripts = {} + processed_files = set() + + # Mock os.path.isfile to return True but open() to raise IOError + mocker.patch("os.path.isfile", return_value=True) + mocker.patch("builtins.open", side_effect=IOError("Permission denied")) + + # Should handle the error gracefully and not crash + rayjob._process_script_and_imports( + "test.py", scripts, "/home/ray/scripts", processed_files + ) + + # Should add to processed_files but not to scripts (due to error) + assert "test.py" in processed_files + assert len(scripts) == 0 + + +def test_process_script_and_imports_container_path_skip(mocker): + """Test that scripts already in container paths are skipped.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + scripts = {} + processed_files = set() + + # Test script path already in container + rayjob._process_script_and_imports( + "/home/ray/scripts/test.py", scripts, "/home/ray/scripts", processed_files + ) + + # Should skip processing + assert len(scripts) == 0 + assert len(processed_files) == 0 + + +def test_process_script_and_imports_already_processed(mocker, tmp_path): + """Test that already processed scripts are skipped (infinite loop prevention).""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + scripts = {} + processed_files = {"test.py"} # Already processed + + # Should return early without processing + rayjob._process_script_and_imports( + "test.py", scripts, "/home/ray/scripts", processed_files + ) + + # Should remain unchanged + assert len(scripts) == 0 + assert processed_files == {"test.py"} + + +def test_find_local_imports_syntax_error(mocker): + """Test _find_local_imports handles syntax errors gracefully.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + # Invalid Python syntax + invalid_script_content = "import helper\ndef invalid_syntax(" + + mock_callback = mocker.Mock() + + # Should handle syntax error gracefully + rayjob._find_local_imports(invalid_script_content, "test.py", mock_callback) + + # Callback should not be called due to syntax error + mock_callback.assert_not_called() + + +def test_create_configmap_api_error_non_409(mocker): + """Test _create_configmap_from_spec handles non-409 API errors.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock Kubernetes API with 500 error + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_api_instance = mocker.Mock() + mock_k8s_api.return_value = mock_api_instance + + from kubernetes.client import ApiException + + mock_api_instance.create_namespaced_config_map.side_effect = ApiException( + status=500 + ) + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + # Should raise RuntimeError for non-409 API errors + with pytest.raises(RuntimeError, match="Failed to create ConfigMap"): + rayjob._create_configmap_from_spec(configmap_spec) + + +def test_update_existing_cluster_get_cluster_error(mocker): + """Test _update_existing_cluster_for_scripts handles get cluster errors.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock RayClusterApi with error + mock_cluster_api_class = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi" + ) + mock_cluster_api_instance = mocker.Mock() + mock_cluster_api_class.return_value = mock_cluster_api_instance + + from kubernetes.client import ApiException + + mock_cluster_api_instance.get_ray_cluster.side_effect = ApiException(status=404) + + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + config_builder = ManagedClusterConfig() + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + # Should raise RuntimeError when getting cluster fails + with pytest.raises(RuntimeError, match="Failed to get RayCluster"): + rayjob._update_existing_cluster_for_scripts("test-scripts", config_builder) + + +def test_update_existing_cluster_patch_error(mocker): + """Test _update_existing_cluster_for_scripts handles patch errors.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock RayClusterApi + mock_cluster_api_class = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi" + ) + mock_cluster_api_instance = mocker.Mock() + mock_cluster_api_class.return_value = mock_cluster_api_instance + + # Mock successful get but failed patch + mock_cluster_api_instance.get_ray_cluster.return_value = { + "spec": { + "headGroupSpec": { + "template": { + "spec": {"volumes": [], "containers": [{"volumeMounts": []}]} + } + }, + "workerGroupSpecs": [ + { + "template": { + "spec": {"volumes": [], "containers": [{"volumeMounts": []}]} + } + } + ], + } + } + + from kubernetes.client import ApiException + + mock_cluster_api_instance.patch_ray_cluster.side_effect = ApiException(status=500) + + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + config_builder = ManagedClusterConfig() + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + # Should raise RuntimeError when patching fails + with pytest.raises(RuntimeError, match="Failed to update RayCluster"): + rayjob._update_existing_cluster_for_scripts("test-scripts", config_builder) + + +def test_extract_script_files_empty_entrypoint(mocker): + """Test script extraction with empty entrypoint.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="", # Empty entrypoint + namespace="test-namespace", + ) + + scripts = rayjob._extract_script_files_from_entrypoint() + + assert scripts is None + + +def test_add_script_volumes_existing_volume_skip(): + """Test add_script_volumes skips when volume already exists (missing coverage).""" + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + from kubernetes.client import V1Volume, V1ConfigMapVolumeSource + + config = ManagedClusterConfig() + + # Pre-add a volume with same name + existing_volume = V1Volume( + name="ray-job-scripts", + config_map=V1ConfigMapVolumeSource(name="existing-scripts"), + ) + config.volumes.append(existing_volume) + + # Should skip adding duplicate volume + config.add_script_volumes(configmap_name="new-scripts") + + # Should still have only one volume + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 0 # Mount not added due to volume skip + + +def test_add_script_volumes_existing_mount_skip(): + """Test add_script_volumes skips when mount already exists (missing coverage).""" + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + from kubernetes.client import V1VolumeMount + + config = ManagedClusterConfig() + + # Pre-add a mount with same name + existing_mount = V1VolumeMount(name="ray-job-scripts", mount_path="/existing/path") + config.volume_mounts.append(existing_mount) + + # Should skip adding duplicate mount + config.add_script_volumes(configmap_name="new-scripts") + + # Should still have only one mount and no volume added + assert len(config.volumes) == 0 # Volume not added due to mount skip + assert len(config.volume_mounts) == 1 From f09fc017c9fac39862ad8a04798a7e9f09dbf67c Mon Sep 17 00:00:00 2001 From: Pat O'Connor Date: Wed, 27 Aug 2025 13:27:17 +0100 Subject: [PATCH 12/33] added kubeconfig loads to test Signed-off-by: Pat O'Connor --- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 31 ++++++++++++++++++++ src/codeflare_sdk/ray/rayjobs/test_status.py | 14 +++++++++ 2 files changed, 45 insertions(+) diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 0afd62d5..ff7a2639 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -446,11 +446,13 @@ def test_submit_with_auto_cluster(mocker): def test_namespace_auto_detection_success(mocker): """Test successful namespace auto-detection.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch( "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", return_value="detected-ns", ) mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") rayjob = RayJob( job_name="test-job", entrypoint="python script.py", cluster_name="test-cluster" @@ -461,10 +463,12 @@ def test_namespace_auto_detection_success(mocker): def test_namespace_auto_detection_fallback(mocker): """Test that namespace auto-detection failure raises an error.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch( "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", return_value=None ) mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") with pytest.raises(ValueError, match="Could not auto-detect Kubernetes namespace"): RayJob( @@ -476,11 +480,13 @@ def test_namespace_auto_detection_fallback(mocker): def test_namespace_explicit_override(mocker): """Test that explicit namespace overrides auto-detection.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch( "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", return_value="detected-ns", ) mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") rayjob = RayJob( job_name="test-job", @@ -494,7 +500,9 @@ def test_namespace_explicit_override(mocker): def test_shutdown_behavior_with_cluster_config(mocker): """Test that shutdown_after_job_finishes is True when cluster_config is provided.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig @@ -512,7 +520,9 @@ def test_shutdown_behavior_with_cluster_config(mocker): def test_shutdown_behavior_with_existing_cluster(mocker): """Test that shutdown_after_job_finishes is False when using existing cluster.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") rayjob = RayJob( job_name="test-job", @@ -526,7 +536,9 @@ def test_shutdown_behavior_with_existing_cluster(mocker): def test_rayjob_with_rayjob_cluster_config(mocker): """Test RayJob with the new ManagedClusterConfig.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig @@ -549,7 +561,9 @@ def test_rayjob_with_rayjob_cluster_config(mocker): def test_rayjob_cluster_config_validation(mocker): """Test validation of ManagedClusterConfig parameters.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig @@ -621,7 +635,9 @@ def test_build_ray_cluster_spec_integration(mocker): def test_rayjob_with_runtime_env(mocker): """Test RayJob with runtime environment configuration.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") runtime_env = {"pip": ["numpy", "pandas"]} @@ -642,7 +658,9 @@ def test_rayjob_with_runtime_env(mocker): def test_rayjob_with_active_deadline_and_ttl(mocker): """Test RayJob with both active deadline and TTL settings.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") rayjob = RayJob( job_name="test-job", @@ -664,7 +682,9 @@ def test_rayjob_with_active_deadline_and_ttl(mocker): def test_rayjob_cluster_name_generation_with_config(mocker): """Test cluster name generation when using cluster_config.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig @@ -684,7 +704,9 @@ def test_rayjob_cluster_name_generation_with_config(mocker): def test_rayjob_namespace_propagation_to_cluster_config(mocker): """Test that job namespace is propagated to cluster config when None.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") from codeflare_sdk.ray.rayjobs.rayjob import get_current_namespace @@ -719,7 +741,9 @@ def test_rayjob_error_handling_invalid_cluster_config(mocker): def test_rayjob_constructor_parameter_validation(mocker): """Test constructor parameter validation.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") # Test with valid parameters rayjob = RayJob( @@ -941,7 +965,9 @@ def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): def test_rayjob_user_override_shutdown_behavior(mocker): """Test that user can override the auto-detected shutdown behavior.""" + mocker.patch("kubernetes.config.load_kube_config") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") # Test 1: User overrides shutdown to True even when using existing cluster rayjob_existing_override = RayJob( @@ -1009,6 +1035,7 @@ def test_submit_with_cluster_config_compatible_image_passes(self, mocker): """Test that submission passes with compatible cluster_config image.""" mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance mock_api_instance.submit_job.return_value = True @@ -1030,6 +1057,7 @@ def test_submit_with_cluster_config_incompatible_image_fails(self, mocker): """Test that submission fails with incompatible cluster_config image.""" mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance @@ -1052,6 +1080,7 @@ def test_validate_ray_version_compatibility_method(self, mocker): """Test the _validate_ray_version_compatibility method directly.""" mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance @@ -1087,6 +1116,7 @@ def test_validate_cluster_config_image_method(self, mocker): """Test the _validate_cluster_config_image method directly.""" mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance @@ -1122,6 +1152,7 @@ def test_validate_cluster_config_image_edge_cases(self, mocker): """Test edge cases in _validate_cluster_config_image method.""" mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = MagicMock() mock_api_class.return_value = mock_api_instance diff --git a/src/codeflare_sdk/ray/rayjobs/test_status.py b/src/codeflare_sdk/ray/rayjobs/test_status.py index 6d2ce946..f3ed7ef8 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_status.py +++ b/src/codeflare_sdk/ray/rayjobs/test_status.py @@ -24,8 +24,11 @@ def test_rayjob_status(mocker): """ Test the RayJob status method with different deployment statuses. """ + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") # Mock the RayjobApi to avoid actual Kubernetes calls mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = mock_api_class.return_value # Create a RayJob instance @@ -101,7 +104,9 @@ def test_rayjob_status_unknown_deployment_status(mocker): """ Test handling of unknown deployment status from the API. """ + mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = mock_api_class.return_value rayjob = RayJob( @@ -129,7 +134,9 @@ def test_rayjob_status_missing_fields(mocker): """ Test handling of API response with missing fields. """ + mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = mock_api_class.return_value rayjob = RayJob( @@ -154,8 +161,11 @@ def test_map_to_codeflare_status(mocker): """ Test the _map_to_codeflare_status helper method directly. """ + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") # Mock the RayjobApi constructor to avoid authentication issues mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") rayjob = RayJob( job_name="test-job", @@ -217,8 +227,10 @@ def test_rayjob_status_print_no_job_found(mocker): """ Test that pretty_print.print_no_job_found is called when no job is found and print_to_console=True. """ + mocker.patch("kubernetes.config.load_kube_config") # Mock the RayjobApi and pretty_print mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = mock_api_class.return_value mock_print_no_job_found = mocker.patch( "codeflare_sdk.ray.rayjobs.pretty_print.print_no_job_found" @@ -248,8 +260,10 @@ def test_rayjob_status_print_job_found(mocker): """ Test that pretty_print.print_job_status is called when job is found and print_to_console=True. """ + mocker.patch("kubernetes.config.load_kube_config") # Mock the RayjobApi and pretty_print mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") mock_api_instance = mock_api_class.return_value mock_print_job_status = mocker.patch( "codeflare_sdk.ray.rayjobs.pretty_print.print_job_status" From b3c597b0db3d1457ebd7e4490d282c2d369d956a Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 13 Aug 2025 18:40:52 +0100 Subject: [PATCH 13/33] feat(RHOAIENG-26488): add lifecycled RayCluster demo notebook for RayJobs --- .../5_rayjob_lifecycled_cluster.ipynb | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb diff --git a/demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb b/demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb new file mode 100644 index 00000000..7f4fdc57 --- /dev/null +++ b/demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb @@ -0,0 +1,158 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9259e514", + "metadata": {}, + "source": [ + "# Submitting a RayJob which lifecycles its own RayCluster\n", + "\n", + "In this notebook, we will go through the basics of using the SDK to:\n", + " * Define a RayCluster configuration\n", + " * Use this configuration alongside a RayJob definition\n", + " * Submit the RayJob, and allow Kuberay Operator to lifecycle the RayCluster for the RayJob" + ] + }, + { + "cell_type": "markdown", + "id": "18136ea7", + "metadata": {}, + "source": [ + "## Defining and Submitting the RayJob" + ] + }, + { + "cell_type": "markdown", + "id": "a1c2545d", + "metadata": {}, + "source": [ + "First, we'll need to import the relevant CodeFlare SDK packages. You can do this by executing the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51e18292", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import RayJob, ManagedClusterConfig, TokenAuthentication" + ] + }, + { + "cell_type": "markdown", + "id": "649c5911", + "metadata": {}, + "source": [ + "Execute the below cell to authenticate the notebook via OpenShift.\n", + "\n", + "**TODO: Add guide to authenticate locally.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc364888", + "metadata": {}, + "outputs": [], + "source": [ + "auth = TokenAuthentication(\n", + " token = \"XXXXX\",\n", + " server = \"XXXXX\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "markdown", + "id": "5581eca9", + "metadata": {}, + "source": [ + "Next we'll need to define the ManagedClusterConfig. Kuberay will use this to spin up a short-lived RayCluster that will only exist as long as the job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3094c60a", + "metadata": {}, + "outputs": [], + "source": [ + "cluster_config = ManagedClusterConfig(\n", + " num_workers=2,\n", + " worker_cpu_requests=1,\n", + " worker_cpu_limits=1,\n", + " worker_memory_requests=4,\n", + " worker_memory_limits=4,\n", + " head_accelerators={'nvidia.com/gpu': 0},\n", + " worker_accelerators={'nvidia.com/gpu': 0},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "02a2b32b", + "metadata": {}, + "source": [ + "Lastly we can pass the ManagedClusterConfig into the RayJob and submit it. You do not need to worry about tearing down the cluster when the job has completed, that is handled for you!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e905ccea", + "metadata": {}, + "outputs": [], + "source": [ + "job = RayJob(\n", + " job_name=\"demo-rayjob\",\n", + " entrypoint=\"python -c 'print(\\\"Hello from RayJob!\\\")'\",\n", + " cluster_config=cluster_config,\n", + " namespace=\"your-namespace\"\n", + ")\n", + "\n", + "job.submit()" + ] + }, + { + "cell_type": "markdown", + "id": "f3612de2", + "metadata": {}, + "source": [ + "We can check the status of our cluster by executing the below cell. If it's not up immediately, run the cell a few more times until you see that it's in a 'running' state." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96d92f93", + "metadata": {}, + "outputs": [], + "source": [ + "job.status()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 723efb5b16d29d35febedaff3c6d2b5e79217317 Mon Sep 17 00:00:00 2001 From: Pawel Paszki Date: Fri, 29 Aug 2025 10:27:24 +0100 Subject: [PATCH 14/33] test: e2e rayjob --- tests/e2e/rayjob/__init__.py | 0 .../e2e/rayjob/existing_cluster_oauth_test.py | 139 ++++++++++++++ .../rayjob/lifecycled_cluster_oauth_test.py | 170 ++++++++++++++++++ .../ray_version_validation_oauth_test.py | 145 +++++++++++++++ 4 files changed, 454 insertions(+) create mode 100644 tests/e2e/rayjob/__init__.py create mode 100644 tests/e2e/rayjob/existing_cluster_oauth_test.py create mode 100644 tests/e2e/rayjob/lifecycled_cluster_oauth_test.py create mode 100644 tests/e2e/rayjob/ray_version_validation_oauth_test.py diff --git a/tests/e2e/rayjob/__init__.py b/tests/e2e/rayjob/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/e2e/rayjob/existing_cluster_oauth_test.py b/tests/e2e/rayjob/existing_cluster_oauth_test.py new file mode 100644 index 00000000..5face339 --- /dev/null +++ b/tests/e2e/rayjob/existing_cluster_oauth_test.py @@ -0,0 +1,139 @@ +import pytest +import sys +import os +from time import sleep + +# Add the parent directory to the path to import support +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from support import * + +from codeflare_sdk import ( + Cluster, + ClusterConfiguration, + TokenAuthentication, +) +from codeflare_sdk.ray.rayjobs import RayJob +from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus + +# This test creates a Ray Cluster and then submits a RayJob against the existing cluster on OpenShift + + +@pytest.mark.openshift +class TestRayJobExistingClusterOauth: + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + delete_kueue_resources(self) + + def test_rayjob_against_existing_cluster_oauth(self): + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + self.run_rayjob_against_existing_cluster_oauth() + + def run_rayjob_against_existing_cluster_oauth(self): + ray_image = get_ray_image() + + auth = TokenAuthentication( + token=run_oc_command(["whoami", "--show-token=true"]), + server=run_oc_command(["whoami", "--show-server=true"]), + skip_tls=True, + ) + auth.login() + + cluster_name = "existing-cluster" + + cluster = Cluster( + ClusterConfiguration( + name=cluster_name, + namespace=self.namespace, + num_workers=1, + head_cpu_requests="500m", + head_cpu_limits="500m", + worker_cpu_requests=1, + worker_cpu_limits=1, + worker_memory_requests=1, + worker_memory_limits=4, + image=ray_image, + write_to_file=True, + verify_tls=False, + ) + ) + + cluster.apply() + cluster.status() + cluster.wait_ready() + cluster.status() + cluster.details() + + print(f"Ray cluster '{cluster_name}' is ready!") + + job_name = "existing-cluster-rayjob" + + rayjob = RayJob( + job_name=job_name, + cluster_name=cluster_name, + namespace=self.namespace, + entrypoint="python -c \"import ray; ray.init(); print('Hello from RayJob!'); print(f'Ray version: {ray.__version__}'); import time; time.sleep(30); print('RayJob completed successfully!')\"", + runtime_env={ + "pip": ["torch", "pytorch-lightning", "torchmetrics", "torchvision"], + "env_vars": get_setup_env_variables(ACCELERATOR="cpu"), + }, + shutdown_after_job_finishes=False, + ) + + # Submit the job + print( + f"Submitting RayJob '{job_name}' against existing cluster '{cluster_name}'" + ) + submission_result = rayjob.submit() + assert ( + submission_result == job_name + ), f"Job submission failed, expected {job_name}, got {submission_result}" + print(f"Successfully submitted RayJob '{job_name}'!") + + # Monitor the job status until completion + self.monitor_rayjob_completion(rayjob) + + # Cleanup - manually tear down the cluster since job won't do it + print("🧹 Cleaning up Ray cluster") + cluster.down() + + def monitor_rayjob_completion(self, rayjob: RayJob, timeout: int = 900): + """ + Monitor a RayJob until it completes or fails. + Args: + rayjob: The RayJob instance to monitor + timeout: Maximum time to wait in seconds (default: 5 minutes) + """ + print(f"Monitoring RayJob '{rayjob.name}' status...") + + elapsed_time = 0 + check_interval = 10 # Check every 10 seconds + + while elapsed_time < timeout: + status, ready = rayjob.status(print_to_console=True) + + # Check if job has completed (either successfully or failed) + if status == CodeflareRayJobStatus.COMPLETE: + print(f"RayJob '{rayjob.name}' completed successfully!") + return + elif status == CodeflareRayJobStatus.FAILED: + raise AssertionError(f"RayJob '{rayjob.name}' failed!") + elif status == CodeflareRayJobStatus.RUNNING: + print(f"RayJob '{rayjob.name}' is still running...") + elif status == CodeflareRayJobStatus.UNKNOWN: + print(f"RayJob '{rayjob.name}' status is unknown") + + # Wait before next check + sleep(check_interval) + elapsed_time += check_interval + + # If we reach here, the job has timed out + final_status, _ = rayjob.status(print_to_console=True) + raise TimeoutError( + f"RayJob '{rayjob.name}' did not complete within {timeout} seconds. " + f"Final status: {final_status}" + ) diff --git a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py new file mode 100644 index 00000000..54186de3 --- /dev/null +++ b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py @@ -0,0 +1,170 @@ +import pytest +import sys +import os +from time import sleep + +# Add the parent directory to the path to import support +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from support import * + +from codeflare_sdk import ( + TokenAuthentication, + RayJob, + ManagedClusterConfig, +) +from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus + +# This test creates a RayJob that will create and lifecycle its own cluster on OpenShift + + +@pytest.mark.openshift +class TestRayJobLifecycledClusterOauth: + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + delete_kueue_resources(self) + + def test_rayjob_with_lifecycled_cluster_oauth(self): + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + self.run_rayjob_with_lifecycled_cluster_oauth() + + def run_rayjob_with_lifecycled_cluster_oauth(self): + ray_image = get_ray_image() + + auth = TokenAuthentication( + token=run_oc_command(["whoami", "--show-token=true"]), + server=run_oc_command(["whoami", "--show-server=true"]), + skip_tls=True, + ) + auth.login() + + job_name = "lifecycled-cluster-rayjob" + + # Create cluster configuration for auto-creation + cluster_config = ManagedClusterConfig( + head_cpu_requests="500m", + head_cpu_limits="500m", + head_memory_requests=1, + head_memory_limits=4, + num_workers=1, + worker_cpu_requests="500m", + worker_cpu_limits="500m", + worker_memory_requests=1, + worker_memory_limits=4, + image=ray_image, + ) + + # Create RayJob with embedded cluster - will auto-create and manage cluster lifecycle + rayjob = RayJob( + job_name=job_name, + cluster_config=cluster_config, # This triggers auto-cluster creation + namespace=self.namespace, + entrypoint="python -c \"import ray; ray.init(); print('Hello from auto-created cluster!'); print(f'Ray version: {ray.__version__}'); import time; time.sleep(30); print('RayJob completed successfully!')\"", + runtime_env={ + "pip": ["torch", "pytorch-lightning", "torchmetrics", "torchvision"], + "env_vars": get_setup_env_variables(ACCELERATOR="cpu"), + }, + shutdown_after_job_finishes=True, # Auto-cleanup cluster after job finishes + ttl_seconds_after_finished=30, # Wait 30s after job completion before cleanup + ) + + # Submit the job + print( + f"Submitting RayJob '{job_name}' with auto-cluster creation and lifecycle management" + ) + submission_result = rayjob.submit() + assert ( + submission_result == job_name + ), f"Job submission failed, expected {job_name}, got {submission_result}" + print( + f"Successfully submitted RayJob '{job_name}' with cluster '{rayjob.cluster_name}'!" + ) + + # Monitor the job status until completion + self.monitor_rayjob_completion(rayjob) + + # Verify cluster auto-cleanup + print("🔍 Verifying cluster auto-cleanup after job completion...") + self.verify_cluster_cleanup(rayjob.cluster_name, timeout=60) + + def monitor_rayjob_completion(self, rayjob: RayJob, timeout: int = 900): + """ + Monitor a RayJob until it completes or fails. + Args: + rayjob: The RayJob instance to monitor + timeout: Maximum time to wait in seconds (default: 15 minutes) + """ + print(f"Monitoring RayJob '{rayjob.name}' status...") + + elapsed_time = 0 + check_interval = 10 # Check every 10 seconds + + while elapsed_time < timeout: + status, ready = rayjob.status(print_to_console=True) + + # Check if job has completed (either successfully or failed) + if status == CodeflareRayJobStatus.COMPLETE: + print(f"RayJob '{rayjob.name}' completed successfully!") + return + elif status == CodeflareRayJobStatus.FAILED: + raise AssertionError(f"RayJob '{rayjob.name}' failed!") + elif status == CodeflareRayJobStatus.RUNNING: + print(f"RayJob '{rayjob.name}' is still running...") + elif status == CodeflareRayJobStatus.UNKNOWN: + print(f"RayJob '{rayjob.name}' status is unknown") + + # Wait before next check + sleep(check_interval) + elapsed_time += check_interval + + # If we reach here, the job has timed out + final_status, _ = rayjob.status(print_to_console=True) + raise TimeoutError( + f"RayJob '{rayjob.name}' did not complete within {timeout} seconds. " + f"Final status: {final_status}" + ) + + def verify_cluster_cleanup(self, cluster_name: str, timeout: int = 60): + """ + Verify that the cluster created by the RayJob has been cleaned up. + Args: + cluster_name: The name of the cluster to check for cleanup + timeout: Maximum time to wait for cleanup in seconds (default: 1 minute) + """ + from kubernetes import client + import kubernetes.client.rest + + elapsed_time = 0 + check_interval = 5 # Check every 5 seconds + + while elapsed_time < timeout: + try: + # Try to get the RayCluster resource + custom_api = client.CustomObjectsApi() + custom_api.get_namespaced_custom_object( + group="ray.io", + version="v1", + namespace=self.namespace, + plural="rayclusters", + name=cluster_name, + ) + print(f"Cluster '{cluster_name}' still exists, waiting for cleanup...") + sleep(check_interval) + elapsed_time += check_interval + except kubernetes.client.rest.ApiException as e: + if e.status == 404: + print( + f"✅ Cluster '{cluster_name}' has been successfully cleaned up!" + ) + return + else: + raise e + + # If we reach here, the cluster was not cleaned up in time + raise TimeoutError( + f"Cluster '{cluster_name}' was not cleaned up within {timeout} seconds" + ) diff --git a/tests/e2e/rayjob/ray_version_validation_oauth_test.py b/tests/e2e/rayjob/ray_version_validation_oauth_test.py new file mode 100644 index 00000000..68c69aee --- /dev/null +++ b/tests/e2e/rayjob/ray_version_validation_oauth_test.py @@ -0,0 +1,145 @@ +import pytest +import sys +import os + +# Add the parent directory to the path to import support +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from support import * + +from codeflare_sdk import ( + TokenAuthentication, + RayJob, + ManagedClusterConfig, +) + +# This test validates Ray version compatibility checking for RayJob with cluster lifecycling scenarios + + +@pytest.mark.openshift +class TestRayJobRayVersionValidationOauth: + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + delete_kueue_resources(self) + + def _create_basic_managed_cluster_config( + self, ray_image: str + ) -> ManagedClusterConfig: + """Helper method to create basic managed cluster configuration.""" + return ManagedClusterConfig( + head_cpu_requests="500m", + head_cpu_limits="500m", + head_memory_requests=1, + head_memory_limits=2, + num_workers=1, + worker_cpu_requests="500m", + worker_cpu_limits="500m", + worker_memory_requests=1, + worker_memory_limits=2, + image=ray_image, + ) + + def test_rayjob_lifecycled_cluster_incompatible_ray_version_oauth(self): + """Test that RayJob creation fails when cluster config specifies incompatible Ray version.""" + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + self.run_rayjob_lifecycled_cluster_incompatible_version() + + def run_rayjob_lifecycled_cluster_incompatible_version(self): + """Test Ray version validation with cluster lifecycling using incompatible image.""" + auth = TokenAuthentication( + token=run_oc_command(["whoami", "--show-token=true"]), + server=run_oc_command(["whoami", "--show-server=true"]), + skip_tls=True, + ) + auth.login() + + job_name = "incompatible-lifecycle-rayjob" + + # Create cluster configuration with incompatible Ray version (2.46.1 instead of expected 2.47.1) + incompatible_ray_image = "quay.io/modh/ray:2.46.1-py311-cu121" + + print( + f"Creating RayJob with incompatible Ray image in cluster config: {incompatible_ray_image}" + ) + + cluster_config = self._create_basic_managed_cluster_config( + incompatible_ray_image + ) + + # Create RayJob with incompatible cluster config - this should fail during submission + rayjob = RayJob( + job_name=job_name, + cluster_config=cluster_config, + namespace=self.namespace, + entrypoint="python -c 'print(\"This should not run due to version mismatch\")'", + shutdown_after_job_finishes=True, + ttl_seconds_after_finished=30, + ) + + print( + f"Attempting to submit RayJob '{job_name}' with incompatible Ray version..." + ) + + # This should fail during submission due to Ray version validation + with pytest.raises(ValueError, match="Ray version mismatch detected"): + rayjob.submit() + + print( + "✅ Ray version validation correctly prevented RayJob submission with incompatible cluster config!" + ) + + def test_rayjob_lifecycled_cluster_unknown_ray_version_oauth(self): + """Test that RayJob creation succeeds with warning when Ray version cannot be determined.""" + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + self.run_rayjob_lifecycled_cluster_unknown_version() + + def run_rayjob_lifecycled_cluster_unknown_version(self): + """Test Ray version validation with unknown image (should warn but not fail).""" + auth = TokenAuthentication( + token=run_oc_command(["whoami", "--show-token=true"]), + server=run_oc_command(["whoami", "--show-server=true"]), + skip_tls=True, + ) + auth.login() + + job_name = "unknown-version-rayjob" + + # Use an image where Ray version cannot be determined (SHA digest) + unknown_ray_image = "quay.io/modh/ray@sha256:6d076aeb38ab3c34a6a2ef0f58dc667089aa15826fa08a73273c629333e12f1e" + + print( + f"Creating RayJob with image where Ray version cannot be determined: {unknown_ray_image}" + ) + + cluster_config = self._create_basic_managed_cluster_config(unknown_ray_image) + + # Create RayJob with unknown version image - this should succeed with warning + rayjob = RayJob( + job_name=job_name, + cluster_config=cluster_config, + namespace=self.namespace, + entrypoint="python -c 'print(\"Testing unknown Ray version scenario\")'", + shutdown_after_job_finishes=True, + ttl_seconds_after_finished=30, + ) + + print(f"Attempting to submit RayJob '{job_name}' with unknown Ray version...") + + # This should succeed but with a warning + with pytest.warns(UserWarning, match="Cannot determine Ray version"): + submission_result = rayjob.submit() + + assert ( + submission_result == job_name + ), f"Job submission failed, expected {job_name}, got {submission_result}" + + print("✅ RayJob submission succeeded with warning for unknown Ray version!") + print( + f"Note: RayJob '{job_name}' was submitted successfully but may need manual cleanup." + ) From 52a351a04ec7ea8b1130534c9aa1528f556b66d5 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 27 Aug 2025 18:31:33 +0100 Subject: [PATCH 15/33] RHOAIENG-30720: Remove GCS FT for Lifecycled RayClusters --- codecov.yml | 1 - src/codeflare_sdk/common/utils/constants.py | 1 + src/codeflare_sdk/ray/rayjobs/config.py | 68 +------------------- src/codeflare_sdk/ray/rayjobs/rayjob.py | 14 ++-- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 39 ++--------- 5 files changed, 16 insertions(+), 107 deletions(-) diff --git a/codecov.yml b/codecov.yml index fab28aee..4494dcd4 100644 --- a/codecov.yml +++ b/codecov.yml @@ -15,4 +15,3 @@ coverage: default: target: 85% threshold: 2.5% - diff --git a/src/codeflare_sdk/common/utils/constants.py b/src/codeflare_sdk/common/utils/constants.py index fcd064d6..7e6147f6 100644 --- a/src/codeflare_sdk/common/utils/constants.py +++ b/src/codeflare_sdk/common/utils/constants.py @@ -12,3 +12,4 @@ "3.11": CUDA_PY311_RUNTIME_IMAGE, "3.12": CUDA_PY312_RUNTIME_IMAGE, } +MOUNT_PATH = "/home/ray/scripts" diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index d335da51..2b2f9131 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -41,7 +41,7 @@ import logging -from ...common.utils.constants import RAY_VERSION +from ...common.utils.constants import MOUNT_PATH, RAY_VERSION from ...common.utils.utils import update_image logger = logging.getLogger(__name__) @@ -141,14 +141,6 @@ class ManagedClusterConfig: A list of V1Volume objects to add to the Cluster volume_mounts: A list of V1VolumeMount objects to add to the Cluster - enable_gcs_ft: - A boolean indicating whether to enable GCS fault tolerance. - redis_address: - The address of the Redis server to use for GCS fault tolerance, required when enable_gcs_ft is True. - redis_password_secret: - Kubernetes secret reference containing Redis password. ex: {"name": "secret-name", "key": "password-key"} - external_storage_namespace: - The storage namespace to use for GCS fault tolerance. By default, KubeRay sets it to the UID of RayCluster. """ head_cpu_requests: Union[int, str] = 2 @@ -175,35 +167,10 @@ class ManagedClusterConfig: annotations: Dict[str, str] = field(default_factory=dict) volumes: list[V1Volume] = field(default_factory=list) volume_mounts: list[V1VolumeMount] = field(default_factory=list) - enable_gcs_ft: bool = False - redis_address: Optional[str] = None - redis_password_secret: Optional[Dict[str, str]] = None - external_storage_namespace: Optional[str] = None def __post_init__(self): self.envs["RAY_USAGE_STATS_ENABLED"] = "0" - if self.enable_gcs_ft: - if not self.redis_address: - raise ValueError( - "redis_address must be provided when enable_gcs_ft is True" - ) - - if self.redis_password_secret and not isinstance( - self.redis_password_secret, dict - ): - raise ValueError( - "redis_password_secret must be a dictionary with 'name' and 'key' fields" - ) - - if self.redis_password_secret and ( - "name" not in self.redis_password_secret - or "key" not in self.redis_password_secret - ): - raise ValueError( - "redis_password_secret must contain both 'name' and 'key' fields" - ) - self._validate_types() self._memory_to_string() self._validate_gpu_config(self.head_accelerators) @@ -288,11 +255,6 @@ def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]: "workerGroupSpecs": [self._build_worker_group_spec(cluster_name)], } - # Add GCS fault tolerance if enabled - if self.enable_gcs_ft: - gcs_ft_options = self._build_gcs_ft_options() - ray_cluster_spec["gcsFaultToleranceOptions"] = gcs_ft_options - return ray_cluster_spec def _build_head_group_spec(self) -> Dict[str, Any]: @@ -496,31 +458,7 @@ def _build_env_vars(self) -> list: """Build environment variables list.""" return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()] - def _build_gcs_ft_options(self) -> Dict[str, Any]: - """Build GCS fault tolerance options.""" - gcs_ft_options = {"redisAddress": self.redis_address} - - if ( - hasattr(self, "external_storage_namespace") - and self.external_storage_namespace - ): - gcs_ft_options["externalStorageNamespace"] = self.external_storage_namespace - - if hasattr(self, "redis_password_secret") and self.redis_password_secret: - gcs_ft_options["redisPassword"] = { - "valueFrom": { - "secretKeyRef": { - "name": self.redis_password_secret["name"], - "key": self.redis_password_secret["key"], - } - } - } - - return gcs_ft_options - - def add_script_volumes( - self, configmap_name: str, mount_path: str = "/home/ray/scripts" - ): + def add_script_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH): """ Add script volume and mount references to cluster configuration. @@ -592,7 +530,7 @@ def build_script_configmap_spec( } def build_script_volume_specs( - self, configmap_name: str, mount_path: str = "/home/ray/scripts" + self, configmap_name: str, mount_path: str = MOUNT_PATH ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Build volume and mount specifications for scripts diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index acf93fdc..072f5153 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -22,6 +22,7 @@ import re import ast from typing import Dict, Any, Optional, Tuple +from codeflare_sdk.common.utils.constants import MOUNT_PATH from kubernetes import client from ...common.kubernetes_cluster.auth import get_api_client from python_client.kuberay_job_api import RayjobApi @@ -41,8 +42,6 @@ logger = logging.getLogger(__name__) -mount_path = "/home/ray/scripts" - class RayJob: """ @@ -354,7 +353,6 @@ def _extract_script_files_from_entrypoint(self) -> Optional[Dict[str, str]]: return None scripts = {} - # mount_path = "/home/ray/scripts" processed_files = set() # Avoid infinite loops # Look for Python file patterns in entrypoint (e.g., "python script.py", "python /path/to/script.py") @@ -364,14 +362,14 @@ def _extract_script_files_from_entrypoint(self) -> Optional[Dict[str, str]]: # Process main scripts from entrypoint files for script_path in matches: self._process_script_and_imports( - script_path, scripts, mount_path, processed_files + script_path, scripts, MOUNT_PATH, processed_files ) # Update entrypoint paths to use mounted locations for script_path in matches: if script_path in [os.path.basename(s) for s in processed_files]: old_path = script_path - new_path = f"{mount_path}/{os.path.basename(script_path)}" + new_path = f"{MOUNT_PATH}/{os.path.basename(script_path)}" self.entrypoint = self.entrypoint.replace(old_path, new_path) return scripts if scripts else None @@ -466,7 +464,7 @@ def _handle_script_volumes_for_new_cluster(self, scripts: Dict[str, str]): # Add volumes to cluster config (config.py handles spec building) self._cluster_config.add_script_volumes( - configmap_name=configmap_name, mount_path="/home/ray/scripts" + configmap_name=configmap_name, mount_path=MOUNT_PATH ) def _handle_script_volumes_for_existing_cluster(self, scripts: Dict[str, str]): @@ -541,8 +539,6 @@ def _update_existing_cluster_for_scripts( config_builder: ManagedClusterConfig instance for building specs """ - # Get existing RayCluster - api_instance = client.CustomObjectsApi(get_api_client()) try: ray_cluster = self._cluster_api.get_ray_cluster( name=self.cluster_name, @@ -553,7 +549,7 @@ def _update_existing_cluster_for_scripts( # Build script volume and mount specifications using config.py script_volume, script_mount = config_builder.build_script_volume_specs( - configmap_name=configmap_name, mount_path="/home/ray/scripts" + configmap_name=configmap_name, mount_path=MOUNT_PATH ) # Helper function to check for duplicate volumes/mounts diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index ff7a2639..9b87cec5 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -15,7 +15,7 @@ import pytest import os from unittest.mock import MagicMock, patch -from codeflare_sdk.common.utils.constants import CUDA_RUNTIME_IMAGE, RAY_VERSION +from codeflare_sdk.common.utils.constants import MOUNT_PATH, RAY_VERSION from codeflare_sdk.ray.rayjobs.rayjob import RayJob from codeflare_sdk.ray.cluster.config import ClusterConfiguration @@ -1007,27 +1007,6 @@ def test_rayjob_user_override_shutdown_behavior(mocker): assert rayjob_override_priority.shutdown_after_job_finishes is True -def test_build_ray_cluster_spec_with_gcs_ft(mocker): - """Test build_ray_cluster_spec with GCS fault tolerance enabled.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - - # Create a test cluster config with GCS FT enabled - cluster_config = ManagedClusterConfig( - enable_gcs_ft=True, - redis_address="redis://redis-service:6379", - external_storage_namespace="storage-ns", - ) - - # Build the spec using the method on the cluster config - spec = cluster_config.build_ray_cluster_spec("test-cluster") - - # Verify GCS fault tolerance options - assert "gcsFaultToleranceOptions" in spec - gcs_ft = spec["gcsFaultToleranceOptions"] - assert gcs_ft["redisAddress"] == "redis://redis-service:6379" - assert gcs_ft["externalStorageNamespace"] == "storage-ns" - - class TestRayVersionValidation: """Test Ray version validation in RayJob.""" @@ -1210,7 +1189,7 @@ def test_extract_script_files_from_entrypoint_single_script(mocker, tmp_path): assert scripts is not None assert test_script.name in scripts assert scripts[test_script.name] == "print('Hello World!')" - assert f"/home/ray/scripts/{test_script.name}" in rayjob.entrypoint + assert f"{MOUNT_PATH}/{test_script.name}" in rayjob.entrypoint finally: os.chdir(original_cwd) @@ -1377,7 +1356,7 @@ def test_add_script_volumes(): assert volume.config_map.name == "test-scripts" assert mount.name == "ray-job-scripts" - assert mount.mount_path == "/home/ray/scripts" + assert mount.mount_path == MOUNT_PATH def test_add_script_volumes_duplicate_prevention(): @@ -1619,7 +1598,7 @@ def test_rayjob_submit_with_scripts_new_cluster(mocker, tmp_path): assert len(cluster_config.volume_mounts) == 1 # Verify entrypoint was updated - assert "/home/ray/scripts/test.py" in rayjob.entrypoint + assert f"{MOUNT_PATH}/test.py" in rayjob.entrypoint finally: os.chdir(original_cwd) @@ -1645,9 +1624,7 @@ def test_process_script_and_imports_io_error(mocker, tmp_path): mocker.patch("builtins.open", side_effect=IOError("Permission denied")) # Should handle the error gracefully and not crash - rayjob._process_script_and_imports( - "test.py", scripts, "/home/ray/scripts", processed_files - ) + rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) # Should add to processed_files but not to scripts (due to error) assert "test.py" in processed_files @@ -1671,7 +1648,7 @@ def test_process_script_and_imports_container_path_skip(mocker): # Test script path already in container rayjob._process_script_and_imports( - "/home/ray/scripts/test.py", scripts, "/home/ray/scripts", processed_files + f"{MOUNT_PATH}/test.py", scripts, MOUNT_PATH, processed_files ) # Should skip processing @@ -1695,9 +1672,7 @@ def test_process_script_and_imports_already_processed(mocker, tmp_path): processed_files = {"test.py"} # Already processed # Should return early without processing - rayjob._process_script_and_imports( - "test.py", scripts, "/home/ray/scripts", processed_files - ) + rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) # Should remain unchanged assert len(scripts) == 0 From 0d40b066a94255224c837e9ffef1c5ddd05c969b Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Fri, 29 Aug 2025 15:41:04 +0100 Subject: [PATCH 16/33] fix: update auth methods in rayjob notebooks --- .../4_rayjob_existing_cluster.ipynb | 19 +++++--------- ...cluster.ipynb => 5_submit_rayjob_cr.ipynb} | 26 +++++-------------- 2 files changed, 13 insertions(+), 32 deletions(-) rename demo-notebooks/guided-demos/{5_rayjob_lifecycled_cluster.ipynb => 5_submit_rayjob_cr.ipynb} (80%) diff --git a/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb b/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb index 5348099c..c0737db0 100644 --- a/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb +++ b/demo-notebooks/guided-demos/4_rayjob_existing_cluster.ipynb @@ -37,7 +37,7 @@ "metadata": {}, "outputs": [], "source": [ - "from codeflare_sdk import Cluster, ClusterConfiguration, RayJob, TokenAuthentication" + "from codeflare_sdk import Cluster, ClusterConfiguration, RayJob" ] }, { @@ -45,7 +45,7 @@ "id": "649c5911", "metadata": {}, "source": [ - "Execute the below cell to authenticate the notebook via OpenShift." + "Run the below `oc login` command using your Token and Server URL. Ensure the command is prepended by `!` and not `%`. This will work when running both locally and within RHOAI." ] }, { @@ -55,12 +55,7 @@ "metadata": {}, "outputs": [], "source": [ - "auth = TokenAuthentication(\n", - " token = \"XXXXX\",\n", - " server = \"XXXXX\",\n", - " skip_tls=False\n", - ")\n", - "auth.login()" + "!oc login --token= --server=" ] }, { @@ -116,7 +111,7 @@ "id": "a0e2a650", "metadata": {}, "source": [ - "## Creating and Submitting the RayJob" + "## Submitting the RayJob" ] }, { @@ -144,7 +139,7 @@ "rayjob = RayJob(\n", " job_name=\"sdk-test-job\",\n", " cluster_name=\"rayjob-cluster\",\n", - " namespace=\"rhods-notebooks\",\n", + " namespace=\"your-namespace\",\n", " entrypoint=\"python -c 'import time; time.sleep(20)'\",\n", ")\n", "\n", @@ -156,7 +151,7 @@ "id": "30a8899a", "metadata": {}, "source": [ - "We can observe the status of the RayJob in the same way as the RayCluster by invoking the `submit()` function via the below cell." + "We can observe the status of the RayJob in the same way as the RayCluster by invoking the `status()` function via the below cell." ] }, { @@ -166,7 +161,7 @@ "metadata": {}, "outputs": [], "source": [ - "rayjob.submit()" + "rayjob.status()" ] }, { diff --git a/demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb b/demo-notebooks/guided-demos/5_submit_rayjob_cr.ipynb similarity index 80% rename from demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb rename to demo-notebooks/guided-demos/5_submit_rayjob_cr.ipynb index 7f4fdc57..1d9630b7 100644 --- a/demo-notebooks/guided-demos/5_rayjob_lifecycled_cluster.ipynb +++ b/demo-notebooks/guided-demos/5_submit_rayjob_cr.ipynb @@ -5,7 +5,7 @@ "id": "9259e514", "metadata": {}, "source": [ - "# Submitting a RayJob which lifecycles its own RayCluster\n", + "# Submitting a RayJob CR\n", "\n", "In this notebook, we will go through the basics of using the SDK to:\n", " * Define a RayCluster configuration\n", @@ -18,14 +18,7 @@ "id": "18136ea7", "metadata": {}, "source": [ - "## Defining and Submitting the RayJob" - ] - }, - { - "cell_type": "markdown", - "id": "a1c2545d", - "metadata": {}, - "source": [ + "## Defining and Submitting the RayJob\n", "First, we'll need to import the relevant CodeFlare SDK packages. You can do this by executing the below cell." ] }, @@ -36,7 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "from codeflare_sdk import RayJob, ManagedClusterConfig, TokenAuthentication" + "from codeflare_sdk import RayJob, ManagedClusterConfig" ] }, { @@ -44,9 +37,7 @@ "id": "649c5911", "metadata": {}, "source": [ - "Execute the below cell to authenticate the notebook via OpenShift.\n", - "\n", - "**TODO: Add guide to authenticate locally.**" + "Run the below `oc login` command using your Token and Server URL. Ensure the command is prepended by `!` and not `%`. This will work when running both locally and within RHOAI." ] }, { @@ -56,12 +47,7 @@ "metadata": {}, "outputs": [], "source": [ - "auth = TokenAuthentication(\n", - " token = \"XXXXX\",\n", - " server = \"XXXXX\",\n", - " skip_tls=False\n", - ")\n", - "auth.login()" + "!oc login --token= --server=" ] }, { @@ -120,7 +106,7 @@ "id": "f3612de2", "metadata": {}, "source": [ - "We can check the status of our cluster by executing the below cell. If it's not up immediately, run the cell a few more times until you see that it's in a 'running' state." + "We can check the status of our job by executing the below cell. The status may appear as `unknown` for a time while the RayCluster spins up." ] }, { From 540c97b2d1436df6a1eb5e43420ab17f9cfb307e Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Mon, 1 Sep 2025 12:31:48 +0100 Subject: [PATCH 17/33] RHOAIENG-27792: Add stop and resubmit functions to RayJob --- pyproject.toml | 2 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 33 +++-- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 122 ++++++++++++++++++- 3 files changed, 146 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7664398a..dd2968d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" -python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "d1e750d9beac612ad455b951c1a789f971409ab3" } +python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "49419654418865a5838adc7f323f13d82454aa18" } [[tool.poetry.source]] name = "pypi" diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 072f5153..972aa139 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -154,29 +154,24 @@ def __init__( logger.info(f"Initialized RayJob: {self.name} in namespace: {self.namespace}") def submit(self) -> str: - # Validate required parameters if not self.entrypoint: - raise ValueError("entrypoint must be provided to submit a RayJob") + raise ValueError("Entrypoint must be provided to submit a RayJob") - # Validate Ray version compatibility for both cluster_config and runtime_env self._validate_ray_version_compatibility() + # Automatically handle script files for new clusters if self._cluster_config is not None: scripts = self._extract_script_files_from_entrypoint() if scripts: self._handle_script_volumes_for_new_cluster(scripts) - - # Handle script files for existing clusters elif self._cluster_name: scripts = self._extract_script_files_from_entrypoint() if scripts: self._handle_script_volumes_for_existing_cluster(scripts) - # Build the RayJob custom resource rayjob_cr = self._build_rayjob_cr() - # Submit the job - KubeRay operator handles everything else - logger.info(f"Submitting RayJob {self.name} to KubeRay operator") + logger.info(f"Submitting RayJob {self.name} to Kuberay operator") result = self._api.submit_job(k8s_namespace=self.namespace, job=rayjob_cr) if result: @@ -189,11 +184,31 @@ def submit(self) -> str: else: raise RuntimeError(f"Failed to submit RayJob {self.name}") + def stop(self): + """ + Suspend the Ray job. + """ + stopped = self._api.suspend_job(name=self.name, k8s_namespace=self.namespace) + if stopped: + logger.info(f"Successfully stopped the RayJob {self.name}") + return True + else: + raise RuntimeError(f"Failed to stop the RayJob {self.name}") + + def resubmit(self): + """ + Resubmit the Ray job. + """ + if self._api.resubmit_job(name=self.name, k8s_namespace=self.namespace): + logger.info(f"Successfully resubmitted the RayJob {self.name}") + return True + else: + raise RuntimeError(f"Failed to resubmit the RayJob {self.name}") + def _build_rayjob_cr(self) -> Dict[str, Any]: """ Build the RayJob custom resource specification using native RayJob capabilities. """ - # Basic RayJob custom resource structure rayjob_cr = { "apiVersion": "ray.io/v1", "kind": "RayJob", diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 9b87cec5..18973bfe 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -390,7 +390,7 @@ def test_submit_validation_no_entrypoint(mocker): ) with pytest.raises( - ValueError, match="entrypoint must be provided to submit a RayJob" + ValueError, match="Entrypoint must be provided to submit a RayJob" ): rayjob.submit() @@ -1878,3 +1878,123 @@ def test_add_script_volumes_existing_mount_skip(): # Should still have only one mount and no volume added assert len(config.volumes) == 0 # Volume not added due to mount skip assert len(config.volume_mounts) == 1 + + +def test_rayjob_stop_success(mocker, caplog): + """Test successful RayJob stop operation.""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + + mock_api_instance.suspend_job.return_value = { + "metadata": {"name": "test-rayjob"}, + "spec": {"suspend": True}, + } + + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python script.py", + ) + + with caplog.at_level("INFO"): + result = rayjob.stop() + + assert result is True + + mock_api_instance.suspend_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) + + # Verify success message was logged + assert "Successfully stopped the RayJob test-rayjob" in caplog.text + + +def test_rayjob_stop_failure(mocker): + """Test RayJob stop operation when API call fails.""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + + mock_api_instance.suspend_job.return_value = None + + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python script.py", + ) + + with pytest.raises(RuntimeError, match="Failed to stop the RayJob test-rayjob"): + rayjob.stop() + + mock_api_instance.suspend_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) + + +def test_rayjob_resubmit_success(mocker): + """Test successful RayJob resubmit operation.""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + + mock_api_instance.resubmit_job.return_value = { + "metadata": {"name": "test-rayjob"}, + "spec": {"suspend": False}, + } + + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python script.py", + ) + + result = rayjob.resubmit() + + assert result is True + + mock_api_instance.resubmit_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) + + +def test_rayjob_resubmit_failure(mocker): + """Test RayJob resubmit operation when API call fails.""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + + mock_api_instance.resubmit_job.return_value = None + + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python script.py", + ) + + with pytest.raises(RuntimeError, match="Failed to resubmit the RayJob test-rayjob"): + rayjob.resubmit() + + mock_api_instance.resubmit_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) From 08125024d1359d4cabf1da30d34e6d58b559c1da Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Mon, 1 Sep 2025 16:53:22 +0100 Subject: [PATCH 18/33] RHOAIENG-27792: Auto tear down training config map when job is deleted --- poetry.lock | 6 +- pyproject.toml | 2 +- src/codeflare_sdk/ray/rayjobs/config.py | 10 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 81 +++- src/codeflare_sdk/ray/rayjobs/test_config.py | 23 ++ src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 356 +++++++++++++++++- .../rayjob/lifecycled_cluster_oauth_test.py | 187 ++++----- 7 files changed, 520 insertions(+), 145 deletions(-) diff --git a/poetry.lock b/poetry.lock index e8380cf6..a81ccded 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3399,8 +3399,8 @@ kubernetes = ">=25.0.0" [package.source] type = "git" url = "https://github.com/ray-project/kuberay.git" -reference = "d1e750d9beac612ad455b951c1a789f971409ab3" -resolved_reference = "d1e750d9beac612ad455b951c1a789f971409ab3" +reference = "a16c0365e3b19a202d835097e1139eca9406b383" +resolved_reference = "a16c0365e3b19a202d835097e1139eca9406b383" subdirectory = "clients/python-client" [[package]] @@ -4790,4 +4790,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "d854f6abffad6c08100fdfeeb53d41fac01ef253f3d3b07cae3608d44768d4ee" +content-hash = "4634ae18b5b5f4bda0d926f60cc1c64e927b5435cc04a360ee0996436e91edbe" diff --git a/pyproject.toml b/pyproject.toml index dd2968d0..1c160bf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" -python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "49419654418865a5838adc7f323f13d82454aa18" } +python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "a16c0365e3b19a202d835097e1139eca9406b383" } [[tool.poetry.source]] name = "pypi" diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index 2b2f9131..fd8e199f 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -525,7 +525,15 @@ def build_script_configmap_spec( return { "apiVersion": "v1", "kind": "ConfigMap", - "metadata": {"name": configmap_name, "namespace": namespace}, + "metadata": { + "name": configmap_name, + "namespace": namespace, + "labels": { + "ray.io/job-name": job_name, + "app.kubernetes.io/managed-by": "codeflare-sdk", + "app.kubernetes.io/component": "rayjob-scripts", + }, + }, "data": scripts, } diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 972aa139..49ccafcb 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -159,16 +159,6 @@ def submit(self) -> str: self._validate_ray_version_compatibility() - # Automatically handle script files for new clusters - if self._cluster_config is not None: - scripts = self._extract_script_files_from_entrypoint() - if scripts: - self._handle_script_volumes_for_new_cluster(scripts) - elif self._cluster_name: - scripts = self._extract_script_files_from_entrypoint() - if scripts: - self._handle_script_volumes_for_existing_cluster(scripts) - rayjob_cr = self._build_rayjob_cr() logger.info(f"Submitting RayJob {self.name} to Kuberay operator") @@ -176,6 +166,17 @@ def submit(self) -> str: if result: logger.info(f"Successfully submitted RayJob {self.name}") + + # Handle script files after RayJob creation so we can set owner reference + if self._cluster_config is not None: + scripts = self._extract_script_files_from_entrypoint() + if scripts: + self._handle_script_volumes_for_new_cluster(scripts, result) + elif self._cluster_name: + scripts = self._extract_script_files_from_entrypoint() + if scripts: + self._handle_script_volumes_for_existing_cluster(scripts, result) + if self.shutdown_after_job_finishes: logger.info( f"Cluster will be automatically cleaned up {self.ttl_seconds_after_finished}s after job completion" @@ -205,6 +206,17 @@ def resubmit(self): else: raise RuntimeError(f"Failed to resubmit the RayJob {self.name}") + def delete(self): + """ + Delete the Ray job. + """ + deleted = self._api.delete_job(name=self.name, k8s_namespace=self.namespace) + if deleted: + logger.info(f"Successfully deleted the RayJob {self.name}") + return True + else: + raise RuntimeError(f"Failed to delete the RayJob {self.name}") + def _build_rayjob_cr(self) -> Dict[str, Any]: """ Build the RayJob custom resource specification using native RayJob capabilities. @@ -464,7 +476,9 @@ def _find_local_imports( except (SyntaxError, ValueError) as e: logger.debug(f"Could not parse imports from {script_path}: {e}") - def _handle_script_volumes_for_new_cluster(self, scripts: Dict[str, str]): + def _handle_script_volumes_for_new_cluster( + self, scripts: Dict[str, str], rayjob_result: Dict[str, Any] = None + ): """Handle script volumes for new clusters (uses ManagedClusterConfig).""" # Validate ConfigMap size before creation self._cluster_config.validate_configmap_size(scripts) @@ -474,15 +488,17 @@ def _handle_script_volumes_for_new_cluster(self, scripts: Dict[str, str]): job_name=self.name, namespace=self.namespace, scripts=scripts ) - # Create ConfigMap via Kubernetes API - configmap_name = self._create_configmap_from_spec(configmap_spec) + # Create ConfigMap via Kubernetes API with owner reference + configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) # Add volumes to cluster config (config.py handles spec building) self._cluster_config.add_script_volumes( configmap_name=configmap_name, mount_path=MOUNT_PATH ) - def _handle_script_volumes_for_existing_cluster(self, scripts: Dict[str, str]): + def _handle_script_volumes_for_existing_cluster( + self, scripts: Dict[str, str], rayjob_result: Dict[str, Any] = None + ): """Handle script volumes for existing clusters (updates RayCluster CR).""" # Create config builder for utility methods config_builder = ManagedClusterConfig() @@ -495,18 +511,21 @@ def _handle_script_volumes_for_existing_cluster(self, scripts: Dict[str, str]): job_name=self.name, namespace=self.namespace, scripts=scripts ) - # Create ConfigMap via Kubernetes API - configmap_name = self._create_configmap_from_spec(configmap_spec) + # Create ConfigMap via Kubernetes API with owner reference + configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) # Update existing RayCluster self._update_existing_cluster_for_scripts(configmap_name, config_builder) - def _create_configmap_from_spec(self, configmap_spec: Dict[str, Any]) -> str: + def _create_configmap_from_spec( + self, configmap_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None + ) -> str: """ Create ConfigMap from specification via Kubernetes API. Args: configmap_spec: ConfigMap specification dictionary + rayjob_result: The result from RayJob creation containing UID Returns: str: Name of the created ConfigMap @@ -514,9 +533,35 @@ def _create_configmap_from_spec(self, configmap_spec: Dict[str, Any]) -> str: configmap_name = configmap_spec["metadata"]["name"] + metadata = client.V1ObjectMeta(**configmap_spec["metadata"]) + + # Add owner reference if we have the RayJob result + if ( + rayjob_result + and isinstance(rayjob_result, dict) + and rayjob_result.get("metadata", {}).get("uid") + ): + logger.info( + f"Adding owner reference to ConfigMap '{configmap_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" + ) + metadata.owner_references = [ + client.V1OwnerReference( + api_version="ray.io/v1", + kind="RayJob", + name=self.name, + uid=rayjob_result["metadata"]["uid"], + controller=True, + block_owner_deletion=True, + ) + ] + else: + logger.warning( + f"No valid RayJob result with UID found, ConfigMap '{configmap_name}' will not have owner reference. Result: {rayjob_result}" + ) + # Convert dict spec to V1ConfigMap configmap = client.V1ConfigMap( - metadata=client.V1ObjectMeta(**configmap_spec["metadata"]), + metadata=metadata, data=configmap_spec["data"], ) diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test_config.py index 7d7864c5..82e9464f 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_config.py +++ b/src/codeflare_sdk/ray/rayjobs/test_config.py @@ -170,3 +170,26 @@ def test_add_script_volumes_existing_mount_early_return(): # Should still have only one mount, no volume added assert len(config.volumes) == 0 assert len(config.volume_mounts) == 1 + + +def test_build_script_configmap_spec_labels(): + """Test that build_script_configmap_spec creates ConfigMap with correct labels.""" + config = ManagedClusterConfig() + + job_name = "test-job" + namespace = "test-namespace" + scripts = {"script.py": "print('hello')", "helper.py": "# helper code"} + + configmap_spec = config.build_script_configmap_spec(job_name, namespace, scripts) + + assert configmap_spec["apiVersion"] == "v1" + assert configmap_spec["kind"] == "ConfigMap" + assert configmap_spec["metadata"]["name"] == f"{job_name}-scripts" + assert configmap_spec["metadata"]["namespace"] == namespace + + labels = configmap_spec["metadata"]["labels"] + assert labels["ray.io/job-name"] == job_name + assert labels["app.kubernetes.io/managed-by"] == "codeflare-sdk" + assert labels["app.kubernetes.io/component"] == "rayjob-scripts" + + assert configmap_spec["data"] == scripts diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 18973bfe..7c4823f8 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -1376,16 +1376,13 @@ def test_add_script_volumes_duplicate_prevention(): def test_create_configmap_from_spec(mocker): """Test creating ConfigMap via Kubernetes API.""" - # Mock kubernetes config loading mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - # Mock Kubernetes API mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") mock_api_instance = MagicMock() mock_k8s_api.return_value = mock_api_instance - # Mock get_api_client mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") rayjob = RayJob( @@ -1410,19 +1407,15 @@ def test_create_configmap_from_spec(mocker): def test_create_configmap_already_exists(mocker): """Test creating ConfigMap when it already exists (409 conflict).""" - # Mock kubernetes config loading mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - # Mock Kubernetes API mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") mock_api_instance = MagicMock() mock_k8s_api.return_value = mock_api_instance - # Mock get_api_client mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") - # Mock API exception for conflict from kubernetes.client import ApiException mock_api_instance.create_namespaced_config_map.side_effect = ApiException( @@ -1450,17 +1443,178 @@ def test_create_configmap_already_exists(mocker): mock_api_instance.replace_namespaced_config_map.assert_called_once() +def test_create_configmap_with_owner_reference_basic(mocker, caplog): + """Test creating ConfigMap with owner reference from valid RayJob result.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock Kubernetes API + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_api_instance = MagicMock() + mock_k8s_api.return_value = mock_api_instance + + # Mock get_api_client + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + # Mock client.V1ObjectMeta and V1ConfigMap + mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") + mock_metadata_instance = MagicMock() + mock_v1_metadata.return_value = mock_metadata_instance + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "test-scripts", + "namespace": "test-namespace", + "labels": { + "ray.io/job-name": "test-job", + "app.kubernetes.io/managed-by": "codeflare-sdk", + "app.kubernetes.io/component": "rayjob-scripts", + }, + }, + "data": {"test.py": "print('test')"}, + } + + # Valid RayJob result with UID + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "a4dd4c5a-ab61-411d-b4d1-4abb5177422a", + } + } + + with caplog.at_level("INFO"): + result = rayjob._create_configmap_from_spec(configmap_spec, rayjob_result) + + assert result == "test-scripts" + + # Verify owner reference was set + expected_owner_ref = mocker.ANY # We'll check via the logs + assert ( + "Adding owner reference to ConfigMap 'test-scripts' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" + in caplog.text + ) + + # Verify owner_references was set on metadata + assert mock_metadata_instance.owner_references is not None + mock_api_instance.create_namespaced_config_map.assert_called_once() + + +def test_create_configmap_without_owner_reference_no_uid(mocker, caplog): + """Test creating ConfigMap without owner reference when RayJob has no UID.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_api_instance = MagicMock() + mock_k8s_api.return_value = mock_api_instance + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") + mock_metadata_instance = MagicMock() + mock_v1_metadata.return_value = mock_metadata_instance + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + # RayJob result without UID + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + # No UID field + } + } + + with caplog.at_level("WARNING"): + result = rayjob._create_configmap_from_spec(configmap_spec, rayjob_result) + + assert result == "test-scripts" + + # Verify warning was logged and no owner reference was set + assert ( + "No valid RayJob result with UID found, ConfigMap 'test-scripts' will not have owner reference" + in caplog.text + ) + + # The important part is that the warning was logged, indicating no owner reference was set + mock_api_instance.create_namespaced_config_map.assert_called_once() + + +def test_create_configmap_with_invalid_rayjob_result(mocker, caplog): + """Test creating ConfigMap with None or invalid rayjob_result.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + + # Mock Kubernetes API + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_api_instance = MagicMock() + mock_k8s_api.return_value = mock_api_instance + + # Mock get_api_client + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + # Test with None + with caplog.at_level("WARNING"): + result = rayjob._create_configmap_from_spec(configmap_spec, None) + + assert result == "test-scripts" + assert "No valid RayJob result with UID found" in caplog.text + + # Test with string instead of dict + caplog.clear() + with caplog.at_level("WARNING"): + result = rayjob._create_configmap_from_spec(configmap_spec, "not-a-dict") + + assert result == "test-scripts" + assert "No valid RayJob result with UID found" in caplog.text + + def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): """Test handling script volumes for new cluster creation.""" - # Mock kubernetes config loading mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - # Mock ConfigMap creation mock_create = mocker.patch.object(RayJob, "_create_configmap_from_spec") mock_create.return_value = "test-job-scripts" - # Create test script test_script = tmp_path / "test.py" test_script.write_text("print('test')") @@ -1482,10 +1636,8 @@ def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): scripts = {"test.py": "print('test')"} rayjob._handle_script_volumes_for_new_cluster(scripts) - # Verify ConfigMap creation was called mock_create.assert_called_once() - # Verify volumes were added to cluster config assert len(cluster_config.volumes) == 1 assert len(cluster_config.volume_mounts) == 1 @@ -1495,11 +1647,9 @@ def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): def test_ast_parsing_import_detection(mocker, tmp_path): """Test AST parsing correctly detects import statements.""" - # Mock kubernetes config loading mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - # Create scripts with different import patterns main_script = tmp_path / "main.py" main_script.write_text( """# Different import patterns @@ -1537,7 +1687,6 @@ def func2(): pass scripts = rayjob._extract_script_files_from_entrypoint() - # Should find all local dependencies assert scripts is not None assert len(scripts) == 4 # main + 3 dependencies assert "main.py" in scripts @@ -1549,6 +1698,73 @@ def func2(): pass os.chdir(original_cwd) +def test_script_handling_timing_after_rayjob_submission(mocker, tmp_path): + """Test that script handling happens after RayJob is submitted (not before).""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + submit_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-12345", + } + } + mock_api_instance.submit_job.return_value = submit_result + + mock_handle_new = mocker.patch.object( + RayJob, "_handle_script_volumes_for_new_cluster" + ) + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + + test_script = tmp_path / "test.py" + test_script.write_text("print('test')") + + call_order = [] + + def track_submit(*args, **kwargs): + call_order.append("submit_job") + return submit_result + + def track_handle_scripts(*args, **kwargs): + call_order.append("handle_scripts") + assert len(args) >= 2 + assert args[1] == submit_result # rayjob_result should be second arg + + mock_api_instance.submit_job.side_effect = track_submit + mock_handle_new.side_effect = track_handle_scripts + + original_cwd = os.getcwd() + try: + os.chdir(tmp_path) + + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + cluster_config = ManagedClusterConfig() + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python test.py", + namespace="test-namespace", + ) + + rayjob.submit() + finally: + os.chdir(original_cwd) + + assert call_order == ["submit_job", "handle_scripts"] + + mock_api_instance.submit_job.assert_called_once() + mock_handle_new.assert_called_once() + + mock_handle_new.assert_called_with({"test.py": "print('test')"}, submit_result) + + def test_rayjob_submit_with_scripts_new_cluster(mocker, tmp_path): """Test RayJob submission with script detection for new cluster.""" # Mock kubernetes config loading @@ -1674,11 +1890,119 @@ def test_process_script_and_imports_already_processed(mocker, tmp_path): # Should return early without processing rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) - # Should remain unchanged assert len(scripts) == 0 assert processed_files == {"test.py"} +def test_submit_with_scripts_owner_reference_integration(mocker, tmp_path, caplog): + """Integration test for submit() with local scripts to verify end-to-end owner reference flow.""" + # Mock kubernetes config loading + mocker.patch("kubernetes.config.load_kube_config") + + # Mock the RayjobApi + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + # RayJob submission returns result with UID + submit_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "unique-rayjob-uid-12345", + } + } + mock_api_instance.submit_job.return_value = submit_result + + # Mock Kubernetes ConfigMap API + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_k8s_instance = MagicMock() + mock_k8s_api.return_value = mock_k8s_instance + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + # Capture the ConfigMap that gets created + created_configmap = None + + def capture_configmap(namespace, body): + nonlocal created_configmap + created_configmap = body + return body + + mock_k8s_instance.create_namespaced_config_map.side_effect = capture_configmap + + # Create test scripts + test_script = tmp_path / "main.py" + test_script.write_text("import helper\nprint('main')") + + helper_script = tmp_path / "helper.py" + helper_script.write_text("def help(): print('helper')") + + # Change to temp directory for script detection + original_cwd = os.getcwd() + try: + os.chdir(tmp_path) + + from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig + + cluster_config = ManagedClusterConfig() + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python main.py", + namespace="test-namespace", + ) + + with caplog.at_level("INFO"): + result = rayjob.submit() + + assert result == "test-job" + + # Verify RayJob was submitted first + mock_api_instance.submit_job.assert_called_once() + + # Verify ConfigMap was created with owner reference + mock_k8s_instance.create_namespaced_config_map.assert_called_once() + assert created_configmap is not None + + # Verify owner reference was set correctly + assert hasattr(created_configmap.metadata, "owner_references") + assert created_configmap.metadata.owner_references is not None + assert len(created_configmap.metadata.owner_references) == 1 + + owner_ref = created_configmap.metadata.owner_references[0] + assert owner_ref.api_version == "ray.io/v1" + assert owner_ref.kind == "RayJob" + assert owner_ref.name == "test-job" + assert owner_ref.uid == "unique-rayjob-uid-12345" + assert owner_ref.controller is True + assert owner_ref.block_owner_deletion is True + + # Verify labels were set + assert created_configmap.metadata.labels["ray.io/job-name"] == "test-job" + assert ( + created_configmap.metadata.labels["app.kubernetes.io/managed-by"] + == "codeflare-sdk" + ) + assert ( + created_configmap.metadata.labels["app.kubernetes.io/component"] + == "rayjob-scripts" + ) + + # Verify scripts were included + assert "main.py" in created_configmap.data + assert "helper.py" in created_configmap.data + + # Verify log message + assert ( + "Adding owner reference to ConfigMap 'test-job-scripts' with RayJob UID: unique-rayjob-uid-12345" + in caplog.text + ) + + finally: + os.chdir(original_cwd) + + def test_find_local_imports_syntax_error(mocker): """Test _find_local_imports handles syntax errors gracefully.""" mocker.patch("kubernetes.config.load_kube_config") diff --git a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py index 54186de3..41dd5280 100644 --- a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py +++ b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py @@ -3,48 +3,41 @@ import os from time import sleep -# Add the parent directory to the path to import support sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from support import * -from codeflare_sdk import ( - TokenAuthentication, - RayJob, - ManagedClusterConfig, -) +from codeflare_sdk import RayJob, ManagedClusterConfig from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus - -# This test creates a RayJob that will create and lifecycle its own cluster on OpenShift +import kubernetes.client.rest +from python_client.kuberay_job_api import RayjobApi +from python_client.kuberay_cluster_api import RayClusterApi @pytest.mark.openshift class TestRayJobLifecycledClusterOauth: + """Test RayJob with auto-created cluster lifecycle management on OpenShift.""" + def setup_method(self): initialize_kubernetes_client(self) def teardown_method(self): delete_namespace(self) - delete_kueue_resources(self) def test_rayjob_with_lifecycled_cluster_oauth(self): + """ + Test RayJob submission with embedded cluster configuration, including: + 1. Job submission with auto-cluster creation + 2. Job suspension (stop) and verification + 3. Job resumption (resubmit) and verification + 4. Job completion monitoring + 5. Automatic cluster cleanup after job deletion + """ self.setup_method() create_namespace(self) - create_kueue_resources(self) - self.run_rayjob_with_lifecycled_cluster_oauth() - - def run_rayjob_with_lifecycled_cluster_oauth(self): ray_image = get_ray_image() + self.job_api = RayjobApi() + job_name = "lifecycled-job" - auth = TokenAuthentication( - token=run_oc_command(["whoami", "--show-token=true"]), - server=run_oc_command(["whoami", "--show-server=true"]), - skip_tls=True, - ) - auth.login() - - job_name = "lifecycled-cluster-rayjob" - - # Create cluster configuration for auto-creation cluster_config = ManagedClusterConfig( head_cpu_requests="500m", head_cpu_limits="500m", @@ -58,113 +51,95 @@ def run_rayjob_with_lifecycled_cluster_oauth(self): image=ray_image, ) - # Create RayJob with embedded cluster - will auto-create and manage cluster lifecycle rayjob = RayJob( job_name=job_name, - cluster_config=cluster_config, # This triggers auto-cluster creation namespace=self.namespace, - entrypoint="python -c \"import ray; ray.init(); print('Hello from auto-created cluster!'); print(f'Ray version: {ray.__version__}'); import time; time.sleep(30); print('RayJob completed successfully!')\"", - runtime_env={ - "pip": ["torch", "pytorch-lightning", "torchmetrics", "torchvision"], - "env_vars": get_setup_env_variables(ACCELERATOR="cpu"), - }, - shutdown_after_job_finishes=True, # Auto-cleanup cluster after job finishes - ttl_seconds_after_finished=30, # Wait 30s after job completion before cleanup - ) - - # Submit the job - print( - f"Submitting RayJob '{job_name}' with auto-cluster creation and lifecycle management" + cluster_config=cluster_config, + entrypoint="python -c \"import ray; ray.init(); print('RayJob completed successfully')\"", + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + shutdown_after_job_finishes=True, ) - submission_result = rayjob.submit() - assert ( - submission_result == job_name - ), f"Job submission failed, expected {job_name}, got {submission_result}" - print( - f"Successfully submitted RayJob '{job_name}' with cluster '{rayjob.cluster_name}'!" - ) - - # Monitor the job status until completion - self.monitor_rayjob_completion(rayjob) - - # Verify cluster auto-cleanup - print("🔍 Verifying cluster auto-cleanup after job completion...") - self.verify_cluster_cleanup(rayjob.cluster_name, timeout=60) - - def monitor_rayjob_completion(self, rayjob: RayJob, timeout: int = 900): - """ - Monitor a RayJob until it completes or fails. - Args: - rayjob: The RayJob instance to monitor - timeout: Maximum time to wait in seconds (default: 15 minutes) - """ - print(f"Monitoring RayJob '{rayjob.name}' status...") + try: + # 1. Submit and wait for job to reach running state + assert rayjob.submit() == job_name + assert self.job_api.wait_until_job_running( + name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=60 + ), "Job did not reach running state" + + # 2. Stop (suspend) the job and + assert rayjob.stop(), "Job stop failed" + job_cr = self.job_api.get_job( + name=rayjob.name, k8s_namespace=rayjob.namespace + ) + assert job_cr["spec"]["suspend"] is True, "Job suspend not set to true" + + assert self._wait_for_job_status( + rayjob, "Suspended", timeout=30 + ), "Job did not reach Suspended state" + + # 3. Test Job Resubmission + assert rayjob.resubmit(), "Job resubmit failed" + job_cr = self.job_api.get_job( + name=rayjob.name, k8s_namespace=rayjob.namespace + ) + assert job_cr["spec"]["suspend"] is False, "Job suspend not set to false" + + assert self.job_api.wait_until_job_finished( + name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=300 + ), "Job did not complete" + + finally: + # 4. Delete the job and cleanup + assert rayjob.delete() + self.verify_cluster_cleanup(rayjob) + + def _wait_for_job_status( + self, + rayjob: RayJob, + expected_status: str, + timeout: int = 30, + ) -> bool: + """Wait for a job to reach a specific deployment status.""" elapsed_time = 0 - check_interval = 10 # Check every 10 seconds + check_interval = 2 while elapsed_time < timeout: - status, ready = rayjob.status(print_to_console=True) - - # Check if job has completed (either successfully or failed) - if status == CodeflareRayJobStatus.COMPLETE: - print(f"RayJob '{rayjob.name}' completed successfully!") - return - elif status == CodeflareRayJobStatus.FAILED: - raise AssertionError(f"RayJob '{rayjob.name}' failed!") - elif status == CodeflareRayJobStatus.RUNNING: - print(f"RayJob '{rayjob.name}' is still running...") - elif status == CodeflareRayJobStatus.UNKNOWN: - print(f"RayJob '{rayjob.name}' status is unknown") - - # Wait before next check + status = self.job_api.get_job_status( + name=rayjob.name, k8s_namespace=rayjob.namespace + ) + if status and status.get("jobDeploymentStatus") == expected_status: + return True + sleep(check_interval) elapsed_time += check_interval - # If we reach here, the job has timed out - final_status, _ = rayjob.status(print_to_console=True) - raise TimeoutError( - f"RayJob '{rayjob.name}' did not complete within {timeout} seconds. " - f"Final status: {final_status}" - ) - - def verify_cluster_cleanup(self, cluster_name: str, timeout: int = 60): - """ - Verify that the cluster created by the RayJob has been cleaned up. - Args: - cluster_name: The name of the cluster to check for cleanup - timeout: Maximum time to wait for cleanup in seconds (default: 1 minute) - """ - from kubernetes import client - import kubernetes.client.rest + return False + def verify_cluster_cleanup(self, rayjob: RayJob, timeout: int = 60): + """Verify that the cluster created by the RayJob has been cleaned up.""" elapsed_time = 0 - check_interval = 5 # Check every 5 seconds + check_interval = 5 + cluster_api = RayClusterApi() while elapsed_time < timeout: try: - # Try to get the RayCluster resource - custom_api = client.CustomObjectsApi() - custom_api.get_namespaced_custom_object( - group="ray.io", - version="v1", - namespace=self.namespace, - plural="rayclusters", - name=cluster_name, + cluster_info = cluster_api.get_ray_cluster( + name=rayjob.cluster_name, k8s_namespace=rayjob.namespace ) - print(f"Cluster '{cluster_name}' still exists, waiting for cleanup...") + # Cluster doesn't exist + if cluster_info is None: + return + sleep(check_interval) elapsed_time += check_interval + except kubernetes.client.rest.ApiException as e: if e.status == 404: - print( - f"✅ Cluster '{cluster_name}' has been successfully cleaned up!" - ) return else: raise e - # If we reach here, the cluster was not cleaned up in time raise TimeoutError( - f"Cluster '{cluster_name}' was not cleaned up within {timeout} seconds" + f"Cluster '{rayjob.cluster_name}' was not cleaned up within {timeout} seconds" ) From 125b01079d7bb80db9f2f6f5ed7d1cc04baaeec9 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Fri, 5 Sep 2025 09:00:25 +0100 Subject: [PATCH 19/33] RHOAIENG-27792: rayjob test improvements --- poetry.lock | 47 ++- pyproject.toml | 6 +- src/codeflare_sdk/ray/rayjobs/test_config.py | 34 ++ src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 349 ++++++++++++++---- .../rayjob/lifecycled_cluster_oauth_test.py | 7 +- 5 files changed, 362 insertions(+), 81 deletions(-) diff --git a/poetry.lock b/poetry.lock index a81ccded..828285ee 100644 --- a/poetry.lock +++ b/poetry.lock @@ -557,6 +557,18 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "chardet" +version = "5.2.0" +description = "Universal encoding detector for Python 3" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] + [[package]] name = "charset-normalizer" version = "3.4.2" @@ -905,6 +917,27 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "diff-cover" +version = "9.6.0" +description = "Run coverage and linting reports on diffs" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "diff_cover-9.6.0-py3-none-any.whl", hash = "sha256:29fbeb52d77a0b8c811e5580d5dbf41801a838da2ed54319a599da8f7233c547"}, + {file = "diff_cover-9.6.0.tar.gz", hash = "sha256:75e5bc056dcaa68c6c87c9fb4e07c9e60daef15b6e8d034d56d2da9e2c84a872"}, +] + +[package.dependencies] +chardet = ">=3.0.0" +Jinja2 = ">=2.7.1" +pluggy = ">=0.13.1,<2" +Pygments = ">=2.19.1,<3.0.0" + +[package.extras] +toml = ["tomli (>=1.2.1)"] + [[package]] name = "distlib" version = "0.3.9" @@ -1558,7 +1591,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["docs", "test"] +groups = ["dev", "docs", "test"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -1928,7 +1961,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["docs", "test"] +groups = ["dev", "docs", "test"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -2737,7 +2770,7 @@ version = "1.6.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.9" -groups = ["test"] +groups = ["dev", "test"] files = [ {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, @@ -3293,7 +3326,7 @@ version = "2.19.2" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" -groups = ["main", "docs", "test"] +groups = ["main", "dev", "docs", "test"] files = [ {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, @@ -3399,8 +3432,8 @@ kubernetes = ">=25.0.0" [package.source] type = "git" url = "https://github.com/ray-project/kuberay.git" -reference = "a16c0365e3b19a202d835097e1139eca9406b383" -resolved_reference = "a16c0365e3b19a202d835097e1139eca9406b383" +reference = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" +resolved_reference = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" subdirectory = "clients/python-client" [[package]] @@ -4790,4 +4823,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "4634ae18b5b5f4bda0d926f60cc1c64e927b5435cc04a360ee0996436e91edbe" +content-hash = "9a052e3a816450844fa2cf3427e4660715977aca3b14561d1e9991899624b7c2" diff --git a/pyproject.toml b/pyproject.toml index 1c160bf6..6d13354e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" -python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "a16c0365e3b19a202d835097e1139eca9406b383" } +python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" } [[tool.poetry.source]] name = "pypi" @@ -59,6 +59,10 @@ pytest-mock = "3.11.1" pytest-timeout = "2.3.1" jupyterlab = "4.3.1" + +[tool.poetry.group.dev.dependencies] +diff-cover = "^9.6.0" + [tool.pytest.ini_options] filterwarnings = [ "ignore::DeprecationWarning:pkg_resources", diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test_config.py index 82e9464f..d19864ba 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_config.py +++ b/src/codeflare_sdk/ray/rayjobs/test_config.py @@ -82,6 +82,40 @@ def test_gpu_validation_fails_with_unsupported_accelerator(): ManagedClusterConfig(head_accelerators={"unsupported.com/accelerator": 1}) +def test_config_type_validation_errors(mocker): + """Test that type validation properly raises errors with incorrect types.""" + # Mock the _is_type method to return False for type checking + mocker.patch.object( + ManagedClusterConfig, + "_is_type", + side_effect=lambda value, expected_type: False, # Always fail type check + ) + + # This should raise TypeError during initialization + with pytest.raises(TypeError, match="Type validation failed"): + ManagedClusterConfig() + + +def test_config_is_type_method(): + """Test the _is_type static method for type checking.""" + # Test basic types + assert ManagedClusterConfig._is_type("test", str) is True + assert ManagedClusterConfig._is_type(123, int) is True + assert ManagedClusterConfig._is_type(123, str) is False + + # Test optional types (Union with None) + from typing import Optional + + assert ManagedClusterConfig._is_type(None, Optional[str]) is True + assert ManagedClusterConfig._is_type("test", Optional[str]) is True + assert ManagedClusterConfig._is_type(123, Optional[str]) is False + + # Test dict types + assert ManagedClusterConfig._is_type({}, dict) is True + assert ManagedClusterConfig._is_type({"key": "value"}, dict) is True + assert ManagedClusterConfig._is_type([], dict) is False + + def test_ray_usage_stats_always_disabled_by_default(): """Test that RAY_USAGE_STATS_ENABLED is always set to '0' by default""" config = ManagedClusterConfig() diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 7c4823f8..54ad61dd 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -20,6 +20,13 @@ from codeflare_sdk.ray.rayjobs.rayjob import RayJob from codeflare_sdk.ray.cluster.config import ClusterConfiguration from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig +from kubernetes.client import ( + V1Volume, + V1VolumeMount, + V1Toleration, + V1ConfigMapVolumeSource, + ApiException, +) def test_rayjob_submit_success(mocker): @@ -274,8 +281,6 @@ def test_build_ray_cluster_spec(mocker): }, } # Use ManagedClusterConfig which has the build_ray_cluster_spec method - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig(num_workers=2) # Mock the method that will be called @@ -353,9 +358,6 @@ def test_build_rayjob_cr_with_auto_cluster(mocker): "workerGroupSpecs": [{"replicas": 2}], }, } - # Use ManagedClusterConfig and mock its build_ray_cluster_spec method - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig(num_workers=2) # Mock the method that will be called @@ -415,8 +417,6 @@ def test_submit_with_auto_cluster(mocker): mock_api_instance.submit_job.return_value = True # Use ManagedClusterConfig and mock its build_ray_cluster_spec method - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig(num_workers=1) # Mock the method that will be called @@ -504,8 +504,6 @@ def test_shutdown_behavior_with_cluster_config(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() rayjob = RayJob( @@ -540,8 +538,6 @@ def test_rayjob_with_rayjob_cluster_config(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", @@ -565,8 +561,6 @@ def test_rayjob_cluster_config_validation(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - # Test with minimal valid config cluster_config = ManagedClusterConfig() @@ -603,8 +597,6 @@ def test_build_ray_cluster_spec_integration(mocker): # Mock the RayjobApi class entirely mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() # Mock the build_ray_cluster_spec method on the cluster config @@ -686,8 +678,6 @@ def test_rayjob_cluster_name_generation_with_config(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() rayjob = RayJob( @@ -708,15 +698,11 @@ def test_rayjob_namespace_propagation_to_cluster_config(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - from codeflare_sdk.ray.rayjobs.rayjob import get_current_namespace - mocker.patch( "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", return_value="detected-ns", ) - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() rayjob = RayJob( @@ -767,8 +753,6 @@ def test_rayjob_constructor_parameter_validation(mocker): def test_build_ray_cluster_spec_function(mocker): """Test the build_ray_cluster_spec method directly.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - # Create a test cluster config cluster_config = ManagedClusterConfig( num_workers=2, @@ -806,8 +790,6 @@ def test_build_ray_cluster_spec_function(mocker): def test_build_ray_cluster_spec_with_accelerators(mocker): """Test build_ray_cluster_spec with GPU accelerators.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - # Create a test cluster config with GPU accelerators cluster_config = ManagedClusterConfig( head_accelerators={"nvidia.com/gpu": 1}, @@ -833,9 +815,6 @@ def test_build_ray_cluster_spec_with_accelerators(mocker): def test_build_ray_cluster_spec_with_custom_volumes(mocker): """Test build_ray_cluster_spec with custom volumes and volume mounts.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - from kubernetes.client import V1Volume, V1VolumeMount - # Create custom volumes and volume mounts custom_volume = V1Volume(name="custom-data", empty_dir={}) custom_volume_mount = V1VolumeMount(name="custom-data", mount_path="/data") @@ -863,8 +842,6 @@ def test_build_ray_cluster_spec_with_custom_volumes(mocker): def test_build_ray_cluster_spec_with_environment_variables(mocker): """Test build_ray_cluster_spec with environment variables.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - # Create a test cluster config with environment variables cluster_config = ManagedClusterConfig( envs={"CUDA_VISIBLE_DEVICES": "0", "RAY_DISABLE_IMPORT_WARNING": "1"}, @@ -895,9 +872,6 @@ def test_build_ray_cluster_spec_with_environment_variables(mocker): def test_build_ray_cluster_spec_with_tolerations(mocker): """Test build_ray_cluster_spec with tolerations.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - from kubernetes.client import V1Toleration - # Create test tolerations head_toleration = V1Toleration( key="node-role.kubernetes.io/master", operator="Exists", effect="NoSchedule" @@ -932,8 +906,6 @@ def test_build_ray_cluster_spec_with_tolerations(mocker): def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): """Test build_ray_cluster_spec with image pull secrets.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - # Create a test cluster config with image pull secrets cluster_config = ManagedClusterConfig( image_pull_secrets=["my-registry-secret", "another-secret"] @@ -981,8 +953,6 @@ def test_rayjob_user_override_shutdown_behavior(mocker): assert rayjob_existing_override.shutdown_after_job_finishes is True # Test 2: User overrides shutdown to False even when creating new cluster - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() rayjob_new_override = RayJob( @@ -1300,8 +1270,6 @@ def test_extract_script_files_nonexistent_script(mocker): def test_build_script_configmap_spec(): """Test building ConfigMap specification for scripts.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - config = ManagedClusterConfig() scripts = {"main.py": "print('main')", "helper.py": "print('helper')"} @@ -1318,8 +1286,6 @@ def test_build_script_configmap_spec(): def test_build_script_volume_specs(): """Test building volume and mount specifications for scripts.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - config = ManagedClusterConfig() volume_spec, mount_spec = config.build_script_volume_specs( @@ -1335,8 +1301,6 @@ def test_build_script_volume_specs(): def test_add_script_volumes(): """Test adding script volumes to cluster configuration.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - config = ManagedClusterConfig() # Initially no volumes @@ -1361,8 +1325,6 @@ def test_add_script_volumes(): def test_add_script_volumes_duplicate_prevention(): """Test that adding script volumes twice doesn't create duplicates.""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - config = ManagedClusterConfig() # Add volumes twice @@ -1416,8 +1378,6 @@ def test_create_configmap_already_exists(mocker): mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") - from kubernetes.client import ApiException - mock_api_instance.create_namespaced_config_map.side_effect = ApiException( status=409 ) @@ -1618,8 +1578,6 @@ def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): test_script = tmp_path / "test.py" test_script.write_text("print('test')") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() original_cwd = os.getcwd() @@ -1742,8 +1700,6 @@ def track_handle_scripts(*args, **kwargs): try: os.chdir(tmp_path) - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() rayjob = RayJob( @@ -1786,8 +1742,6 @@ def test_rayjob_submit_with_scripts_new_cluster(mocker, tmp_path): test_script = tmp_path / "test.py" test_script.write_text("print('Hello from script!')") - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() original_cwd = os.getcwd() @@ -1942,8 +1896,6 @@ def capture_configmap(namespace, body): try: os.chdir(tmp_path) - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - cluster_config = ManagedClusterConfig() rayjob = RayJob( @@ -2037,8 +1989,6 @@ def test_create_configmap_api_error_non_409(mocker): mock_api_instance = mocker.Mock() mock_k8s_api.return_value = mock_api_instance - from kubernetes.client import ApiException - mock_api_instance.create_namespaced_config_map.side_effect = ApiException( status=500 ) @@ -2076,12 +2026,8 @@ def test_update_existing_cluster_get_cluster_error(mocker): mock_cluster_api_instance = mocker.Mock() mock_cluster_api_class.return_value = mock_cluster_api_instance - from kubernetes.client import ApiException - mock_cluster_api_instance.get_ray_cluster.side_effect = ApiException(status=404) - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - config_builder = ManagedClusterConfig() rayjob = RayJob( @@ -2126,12 +2072,8 @@ def test_update_existing_cluster_patch_error(mocker): } } - from kubernetes.client import ApiException - mock_cluster_api_instance.patch_ray_cluster.side_effect = ApiException(status=500) - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - config_builder = ManagedClusterConfig() rayjob = RayJob( @@ -2165,9 +2107,6 @@ def test_extract_script_files_empty_entrypoint(mocker): def test_add_script_volumes_existing_volume_skip(): """Test add_script_volumes skips when volume already exists (missing coverage).""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - from kubernetes.client import V1Volume, V1ConfigMapVolumeSource - config = ManagedClusterConfig() # Pre-add a volume with same name @@ -2187,9 +2126,6 @@ def test_add_script_volumes_existing_volume_skip(): def test_add_script_volumes_existing_mount_skip(): """Test add_script_volumes skips when mount already exists (missing coverage).""" - from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig - from kubernetes.client import V1VolumeMount - config = ManagedClusterConfig() # Pre-add a mount with same name @@ -2322,3 +2258,274 @@ def test_rayjob_resubmit_failure(mocker): mock_api_instance.resubmit_job.assert_called_once_with( name="test-rayjob", k8s_namespace="test-namespace" ) + + +def test_rayjob_delete_success(mocker): + """Test successful RayJob deletion.""" + # Mock the API + mocker.patch("kubernetes.config.load_kube_config") + mock_api_instance = mocker.MagicMock() + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + rayjob = RayJob( + job_name="test-rayjob", + entrypoint="python script.py", + cluster_name="test-cluster", + ) + + mock_api_instance.delete_job.return_value = True + + result = rayjob.delete() + + assert result is True + mock_api_instance.delete_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) + + +def test_rayjob_delete_failure(mocker): + """Test failed RayJob deletion.""" + mock_api_instance = mocker.MagicMock() + mocker.patch("kubernetes.config.load_kube_config") + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + rayjob = RayJob( + job_name="test-rayjob", + entrypoint="python script.py", + cluster_name="test-cluster", + ) + + mock_api_instance.delete_job.return_value = False + + with pytest.raises(RuntimeError, match="Failed to delete the RayJob test-rayjob"): + rayjob.delete() + + mock_api_instance.delete_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) + + +def test_rayjob_init_both_none_error(mocker): + """Test RayJob initialization error when both cluster_name and cluster_config are None.""" + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + with pytest.raises( + ValueError, + match="Configuration Error: You must provide either 'cluster_name' .* or 'cluster_config'", + ): + RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name=None, + cluster_config=None, + ) + + +def test_rayjob_init_missing_cluster_name_with_no_config(mocker): + """Test RayJob initialization error when cluster_name is None without cluster_config.""" + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + with pytest.raises( + ValueError, + match="Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'", + ): + rayjob = RayJob.__new__(RayJob) + rayjob.name = "test-job" + rayjob.entrypoint = "python script.py" + rayjob.runtime_env = None + rayjob.ttl_seconds_after_finished = 0 + rayjob.active_deadline_seconds = None + rayjob.shutdown_after_job_finishes = False + rayjob.namespace = "test-namespace" + rayjob._cluster_name = None + rayjob._cluster_config = None + if rayjob._cluster_config is None and rayjob._cluster_name is None: + raise ValueError( + "❌ Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'" + ) + + +def test_handle_script_volumes_for_existing_cluster_direct_call(mocker): + """Test _handle_script_volumes_for_existing_cluster method directly.""" + # Mock APIs + mock_api_instance = mocker.MagicMock() + mock_cluster_api = mocker.MagicMock() + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi", return_value=mock_cluster_api + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + # Mock the Kubernetes API for ConfigMap creation + mock_k8s_api = mocker.MagicMock() + mocker.patch("kubernetes.client.CoreV1Api", return_value=mock_k8s_api) + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client", return_value=None) + + # Mock existing cluster + mock_cluster = { + "spec": { + "headGroupSpec": { + "template": { + "spec": {"containers": [{"volumeMounts": []}], "volumes": []} + } + }, + "workerGroupSpecs": [ + { + "template": { + "spec": {"containers": [{"volumeMounts": []}], "volumes": []} + } + } + ], + } + } + mock_cluster_api.get_ray_cluster.return_value = mock_cluster + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="existing-cluster", + ) + + scripts = {"test_script.py": "print('Hello World')"} + rayjob._handle_script_volumes_for_existing_cluster( + scripts, {"metadata": {"uid": "test-uid"}} + ) + + mock_k8s_api.create_namespaced_config_map.assert_called_once() + created_configmap = mock_k8s_api.create_namespaced_config_map.call_args[1]["body"] + assert "test_script.py" in created_configmap.data + + mock_cluster_api.patch_ray_cluster.assert_called_once_with( + name="existing-cluster", ray_patch=mock_cluster, k8s_namespace="test-namespace" + ) + + +def test_handle_script_volumes_for_existing_cluster_no_volumes_init(mocker): + """Test _handle_script_volumes_for_existing_cluster when volumes/mounts don't exist initially.""" + mock_api_instance = mocker.MagicMock() + mock_cluster_api = mocker.MagicMock() + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi", return_value=mock_cluster_api + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + mock_k8s_api = mocker.MagicMock() + mocker.patch("kubernetes.client.CoreV1Api", return_value=mock_k8s_api) + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client", return_value=None) + + # Mock existing cluster WITHOUT volumes/volumeMounts (to test initialization) + mock_cluster = { + "spec": { + "headGroupSpec": {"template": {"spec": {"containers": [{}]}}}, + "workerGroupSpecs": [{"template": {"spec": {"containers": [{}]}}}], + } + } + mock_cluster_api.get_ray_cluster.return_value = mock_cluster + + # Create RayJob with existing cluster + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="existing-cluster", + ) + + # Call the method directly with test scripts + scripts = {"test_script.py": "print('Hello World')"} + rayjob._handle_script_volumes_for_existing_cluster( + scripts, {"metadata": {"uid": "test-uid"}} + ) + + # Verify volumes and volumeMounts were initialized + patched_cluster = mock_cluster_api.patch_ray_cluster.call_args[1]["ray_patch"] + + # Check head group + head_spec = patched_cluster["spec"]["headGroupSpec"]["template"]["spec"] + assert "volumes" in head_spec + assert len(head_spec["volumes"]) == 1 + assert "volumeMounts" in head_spec["containers"][0] + assert len(head_spec["containers"][0]["volumeMounts"]) == 1 + + # Check worker group + worker_spec = patched_cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"] + assert "volumes" in worker_spec + assert len(worker_spec["volumes"]) == 1 + assert "volumeMounts" in worker_spec["containers"][0] + assert len(worker_spec["containers"][0]["volumeMounts"]) == 1 + + +def test_update_existing_cluster_for_scripts_api_errors(mocker): + """Test _update_existing_cluster_for_scripts error handling.""" + mock_api_instance = mocker.MagicMock() + mock_cluster_api = mocker.MagicMock() + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi", return_value=mock_cluster_api + ) + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + # Mock config builder + mock_config_builder = mocker.MagicMock() + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.ManagedClusterConfig", + return_value=mock_config_builder, + ) + + # Set up config builder to return valid specs + mock_config_builder.build_script_volume_specs.return_value = ( + {"name": "script-volume", "configMap": {"name": "test-configmap"}}, + {"name": "script-volume", "mountPath": "/home/ray/scripts"}, + ) + + # Mock cluster API to raise error + mock_cluster_api.get_ray_cluster.side_effect = ApiException( + status=404, reason="Not Found" + ) + + # Create RayJob + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + cluster_name="existing-cluster", + ) + + # Call the method directly + with pytest.raises( + RuntimeError, match="Failed to get RayCluster 'existing-cluster'" + ): + rayjob._update_existing_cluster_for_scripts( + "test-configmap", mock_config_builder + ) diff --git a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py index 41dd5280..7db71441 100644 --- a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py +++ b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py @@ -7,7 +7,10 @@ from support import * from codeflare_sdk import RayJob, ManagedClusterConfig -from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus +from codeflare_sdk.ray.rayjobs.status import ( + CodeflareRayJobStatus, + RayJobDeploymentStatus, +) import kubernetes.client.rest from python_client.kuberay_job_api import RayjobApi from python_client.kuberay_cluster_api import RayClusterApi @@ -64,7 +67,7 @@ def test_rayjob_with_lifecycled_cluster_oauth(self): # 1. Submit and wait for job to reach running state assert rayjob.submit() == job_name assert self.job_api.wait_until_job_running( - name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=60 + name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=300 ), "Job did not reach running state" # 2. Stop (suspend) the job and From e8fc76b16c2b52812735d28776b6e3614bc69cbc Mon Sep 17 00:00:00 2001 From: Laura Fitzgerald Date: Mon, 6 Oct 2025 19:07:08 +0100 Subject: [PATCH 20/33] added codecov permissions --- .github/workflows/coverage-badge.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/coverage-badge.yaml b/.github/workflows/coverage-badge.yaml index 2c3b40fa..05084d31 100644 --- a/.github/workflows/coverage-badge.yaml +++ b/.github/workflows/coverage-badge.yaml @@ -9,6 +9,9 @@ on: jobs: report: + permissions: + contents: write + pull-requests: write runs-on: ubuntu-latest steps: From 0c4382d54c3e3ed8c2ea2516d1b296ea004e5169 Mon Sep 17 00:00:00 2001 From: Bryan Keane Date: Tue, 29 Jul 2025 17:24:45 +0100 Subject: [PATCH 21/33] feat(RHOAIENG-26480): Run RayJobs against existing RayClusters --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 828285ee..aa315f21 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" From eb871e25c6a32c6175567af626452c0050f1cc27 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Thu, 2 Oct 2025 12:53:48 +0100 Subject: [PATCH 22/33] RHOAIENG-32532: Add kueue integration and update tests --- .github/workflows/e2e_tests.yaml | 34 +- .github/workflows/rayjob_e2e_tests.yaml | 172 +++ src/codeflare_sdk/common/utils/k8s_utils.py | 6 +- .../ray/cluster/build_ray_cluster.py | 1 - src/codeflare_sdk/ray/cluster/cluster.py | 18 +- src/codeflare_sdk/ray/cluster/test_config.py | 2 +- src/codeflare_sdk/ray/rayjobs/config.py | 17 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 50 +- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 1057 ++++++----------- src/codeflare_sdk/ray/rayjobs/test_status.py | 69 ++ tests/e2e/cluster_apply_kind_test.py | 95 +- .../e2e/rayjob/existing_cluster_oauth_test.py | 139 --- .../rayjob/lifecycled_cluster_oauth_test.py | 148 --- .../ray_version_validation_oauth_test.py | 18 - .../rayjob/rayjob_existing_cluster_test.py | 111 ++ .../rayjob/rayjob_lifecycled_cluster_test.py | 163 +++ tests/e2e/support.py | 329 ++++- .../appwrapper/unit-test-all-params.yaml | 1 - tests/test_cluster_yamls/kueue/aw_kueue.yaml | 1 - .../kueue/ray_cluster_kueue.yaml | 1 - .../ray/default-appwrapper.yaml | 1 - .../ray/default-ray-cluster.yaml | 1 - .../ray/unit-test-all-params.yaml | 1 - 23 files changed, 1301 insertions(+), 1134 deletions(-) create mode 100644 .github/workflows/rayjob_e2e_tests.yaml delete mode 100644 tests/e2e/rayjob/existing_cluster_oauth_test.py delete mode 100644 tests/e2e/rayjob/lifecycled_cluster_oauth_test.py create mode 100644 tests/e2e/rayjob/rayjob_existing_cluster_test.py create mode 100644 tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index ba59a9e1..44bf1214 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -5,13 +5,13 @@ on: pull_request: branches: - main - - 'release-*' + - "release-*" - ray-jobs-feature paths-ignore: - - 'docs/**' - - '**.adoc' - - '**.md' - - 'LICENSE' + - "docs/**" + - "**.adoc" + - "**.md" + - "LICENSE" concurrency: group: ${{ github.head_ref }}-${{ github.workflow }} @@ -33,9 +33,9 @@ jobs: - name: Checkout common repo code uses: actions/checkout@v4 with: - repository: 'project-codeflare/codeflare-common' - ref: 'main' - path: 'common' + repository: "project-codeflare/codeflare-common" + ref: "main" + path: "common" - name: Checkout CodeFlare operator repository uses: actions/checkout@v4 @@ -46,7 +46,7 @@ jobs: - name: Set Go uses: actions/setup-go@v5 with: - go-version-file: './codeflare-operator/go.mod' + go-version-file: "./codeflare-operator/go.mod" cache-dependency-path: "./codeflare-operator/go.sum" - name: Set up gotestfmt @@ -76,7 +76,7 @@ jobs: run: | cd codeflare-operator echo Setting up CodeFlare stack - make setup-e2e + make setup-e2e KUEUE_VERSION=v0.13.4 KUBERAY_VERSION=v1.4.0 echo Deploying CodeFlare operator make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e" kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager @@ -95,6 +95,10 @@ jobs: kubectl create clusterrolebinding sdk-user-namespace-creator --clusterrole=namespace-creator --user=sdk-user kubectl create clusterrole raycluster-creator --verb=get,list,create,delete,patch --resource=rayclusters kubectl create clusterrolebinding sdk-user-raycluster-creator --clusterrole=raycluster-creator --user=sdk-user + kubectl create clusterrole rayjob-creator --verb=get,list,create,delete,patch --resource=rayjobs + kubectl create clusterrolebinding sdk-user-rayjob-creator --clusterrole=rayjob-creator --user=sdk-user + kubectl create clusterrole rayjob-status-reader --verb=get,list,patch,update --resource=rayjobs/status + kubectl create clusterrolebinding sdk-user-rayjob-status-reader --clusterrole=rayjob-status-reader --user=sdk-user kubectl create clusterrole appwrapper-creator --verb=get,list,create,delete,patch --resource=appwrappers kubectl create clusterrolebinding sdk-user-appwrapper-creator --clusterrole=appwrapper-creator --user=sdk-user kubectl create clusterrole resourceflavor-creator --verb=get,list,create,delete --resource=resourceflavors @@ -122,7 +126,7 @@ jobs: pip install poetry poetry install --with test,docs echo "Running e2e tests..." - poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1 + poetry run pytest -v -s ./tests/e2e/ -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1 env: GRPC_DNS_RESOLVER: "native" @@ -146,7 +150,13 @@ jobs: if: always() && steps.deploy.outcome == 'success' run: | echo "Printing KubeRay operator logs" - kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kuberay.log + kubectl logs -n default --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kuberay.log + + - name: Print Kueue controller logs + if: always() && steps.deploy.outcome == 'success' + run: | + echo "Printing Kueue controller logs" + kubectl logs -n kueue-system --tail -1 -l control-plane=controller-manager | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kueue.log - name: Export all KinD pod logs uses: ./common/github-actions/kind-export-logs diff --git a/.github/workflows/rayjob_e2e_tests.yaml b/.github/workflows/rayjob_e2e_tests.yaml new file mode 100644 index 00000000..c4856fd3 --- /dev/null +++ b/.github/workflows/rayjob_e2e_tests.yaml @@ -0,0 +1,172 @@ +# rayjob e2e tests workflow for CodeFlare-SDK +name: rayjob-e2e + +on: + pull_request: + branches: + - main + - "release-*" + - ray-jobs-feature + paths-ignore: + - "docs/**" + - "**.adoc" + - "**.md" + - "LICENSE" + +concurrency: + group: ${{ github.head_ref }}-${{ github.workflow }} + cancel-in-progress: true + +env: + CODEFLARE_OPERATOR_IMG: "quay.io/project-codeflare/codeflare-operator:dev" + +jobs: + kubernetes-rayjob: + runs-on: gpu-t4-4-core + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Checkout common repo code + uses: actions/checkout@v4 + with: + repository: "project-codeflare/codeflare-common" + ref: "main" + path: "common" + + - name: Checkout CodeFlare operator repository + uses: actions/checkout@v4 + with: + repository: project-codeflare/codeflare-operator + path: codeflare-operator + + - name: Set Go + uses: actions/setup-go@v5 + with: + go-version-file: "./codeflare-operator/go.mod" + cache-dependency-path: "./codeflare-operator/go.sum" + + - name: Set up gotestfmt + uses: gotesttools/gotestfmt-action@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up specific Python version + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" # caching pip dependencies + + - name: Setup NVidia GPU environment for KinD + uses: ./common/github-actions/nvidia-gpu-setup + + - name: Setup and start KinD cluster + uses: ./common/github-actions/kind + with: + worker-nodes: 1 + + - name: Install NVidia GPU operator for KinD + uses: ./common/github-actions/nvidia-gpu-operator + + - name: Deploy CodeFlare stack + id: deploy + run: | + cd codeflare-operator + echo Setting up CodeFlare stack + make setup-e2e KUEUE_VERSION=v0.13.4 KUBERAY_VERSION=v1.4.0 + echo Deploying CodeFlare operator + make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e" + kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager + cd .. + + - name: Add user to KinD + uses: ./common/github-actions/kind-add-user + with: + user-name: sdk-user + + - name: Configure RBAC for sdk user with limited permissions + run: | + kubectl create clusterrole list-ingresses --verb=get,list --resource=ingresses + kubectl create clusterrolebinding sdk-user-list-ingresses --clusterrole=list-ingresses --user=sdk-user + kubectl create clusterrole namespace-creator --verb=get,list,create,delete,patch --resource=namespaces + kubectl create clusterrolebinding sdk-user-namespace-creator --clusterrole=namespace-creator --user=sdk-user + kubectl create clusterrole raycluster-creator --verb=get,list,create,delete,patch --resource=rayclusters + kubectl create clusterrolebinding sdk-user-raycluster-creator --clusterrole=raycluster-creator --user=sdk-user + kubectl create clusterrole rayjob-creator --verb=get,list,create,delete,patch --resource=rayjobs + kubectl create clusterrolebinding sdk-user-rayjob-creator --clusterrole=rayjob-creator --user=sdk-user + kubectl create clusterrole rayjob-status-reader --verb=get,list,patch,update --resource=rayjobs/status + kubectl create clusterrolebinding sdk-user-rayjob-status-reader --clusterrole=rayjob-status-reader --user=sdk-user + kubectl create clusterrole appwrapper-creator --verb=get,list,create,delete,patch --resource=appwrappers + kubectl create clusterrolebinding sdk-user-appwrapper-creator --clusterrole=appwrapper-creator --user=sdk-user + kubectl create clusterrole resourceflavor-creator --verb=get,list,create,delete --resource=resourceflavors + kubectl create clusterrolebinding sdk-user-resourceflavor-creator --clusterrole=resourceflavor-creator --user=sdk-user + kubectl create clusterrole clusterqueue-creator --verb=get,list,create,delete,patch --resource=clusterqueues + kubectl create clusterrolebinding sdk-user-clusterqueue-creator --clusterrole=clusterqueue-creator --user=sdk-user + kubectl create clusterrole localqueue-creator --verb=get,list,create,delete,patch --resource=localqueues + kubectl create clusterrolebinding sdk-user-localqueue-creator --clusterrole=localqueue-creator --user=sdk-user + kubectl create clusterrole list-secrets --verb=get,list --resource=secrets + kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user + kubectl create clusterrole pod-creator --verb=get,list,watch --resource=pods + kubectl create clusterrolebinding sdk-user-pod-creator --clusterrole=pod-creator --user=sdk-user + kubectl create clusterrole service-reader --verb=get,list,watch --resource=services + kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user + kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward + kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user + kubectl config use-context sdk-user + + - name: Run RayJob E2E tests + run: | + export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }} + echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV + + set -euo pipefail + pip install poetry + poetry install --with test,docs + echo "Running RayJob e2e tests..." + poetry run pytest -v -s ./tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1 + + - name: Switch to kind-cluster context to print logs + if: always() && steps.deploy.outcome == 'success' + run: kubectl config use-context kind-cluster + + - name: Print Pytest output log + if: always() && steps.deploy.outcome == 'success' + run: | + echo "Printing Pytest output logs" + cat ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log + + - name: Print CodeFlare operator logs + if: always() && steps.deploy.outcome == 'success' + run: | + echo "Printing CodeFlare operator logs" + kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log + + - name: Print KubeRay operator logs + if: always() && steps.deploy.outcome == 'success' + run: | + echo "Printing KubeRay operator logs" + kubectl logs -n default --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kuberay.log + + - name: Print Kueue controller logs + if: always() && steps.deploy.outcome == 'success' + run: | + echo "Printing Kueue controller logs" + kubectl logs -n kueue-system --tail -1 -l control-plane=controller-manager | tee ${CODEFLARE_TEST_OUTPUT_DIR}/kueue.log + + - name: Export all KinD pod logs + uses: ./common/github-actions/kind-export-logs + if: always() && steps.deploy.outcome == 'success' + with: + output-directory: ${CODEFLARE_TEST_OUTPUT_DIR} + + - name: Upload logs + uses: actions/upload-artifact@v4 + if: always() && steps.deploy.outcome == 'success' + with: + name: logs + retention-days: 10 + path: | + ${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log diff --git a/src/codeflare_sdk/common/utils/k8s_utils.py b/src/codeflare_sdk/common/utils/k8s_utils.py index 57eccf2d..e2e03a5d 100644 --- a/src/codeflare_sdk/common/utils/k8s_utils.py +++ b/src/codeflare_sdk/common/utils/k8s_utils.py @@ -7,14 +7,10 @@ from ..kubernetes_cluster import config_check, _kube_api_error_handling -def get_current_namespace(): +def get_current_namespace(): # pragma: no cover """ Retrieves the current Kubernetes namespace. - This function attempts to detect the current namespace by: - 1. First checking if running inside a pod (reading from service account namespace file) - 2. Falling back to reading from the current kubeconfig context - Returns: str: The current namespace or None if not found. diff --git a/src/codeflare_sdk/ray/cluster/build_ray_cluster.py b/src/codeflare_sdk/ray/cluster/build_ray_cluster.py index 936f3275..6a3984b1 100644 --- a/src/codeflare_sdk/ray/cluster/build_ray_cluster.py +++ b/src/codeflare_sdk/ray/cluster/build_ray_cluster.py @@ -133,7 +133,6 @@ def build_ray_cluster(cluster: "codeflare_sdk.ray.cluster.Cluster"): "enableIngress": False, "rayStartParams": { "dashboard-host": "0.0.0.0", - "dashboard-port": "8265", "block": "true", "num-gpus": str(head_gpu_count), "resources": head_resources, diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index 037f6bdf..9509a8be 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -208,6 +208,10 @@ def apply(self, force=False): self._throw_for_no_raycluster() namespace = self.config.namespace name = self.config.name + + # Regenerate resource_yaml to reflect any configuration changes + self.resource_yaml = self.create_resource() + try: self.config_check() api_instance = client.CustomObjectsApi(get_api_client()) @@ -389,9 +393,14 @@ def is_dashboard_ready(self) -> bool: bool: True if the dashboard is ready, False otherwise. """ + + dashboard_uri = self.cluster_dashboard_uri() + if dashboard_uri is None: + return False + try: response = requests.get( - self.cluster_dashboard_uri(), + dashboard_uri, headers=self._client_headers, timeout=5, verify=self._client_verify_tls, @@ -399,6 +408,10 @@ def is_dashboard_ready(self) -> bool: except requests.exceptions.SSLError: # pragma no cover # SSL exception occurs when oauth ingress has been created but cluster is not up return False + except Exception: # pragma no cover + # Any other exception (connection errors, timeouts, etc.) + return False + if response.status_code == 200: return True else: @@ -506,6 +519,8 @@ def cluster_dashboard_uri(self) -> str: ): protocol = "https" if route["spec"].get("tls") else "http" return f"{protocol}://{route['spec']['host']}" + # No route found for this cluster + return "Dashboard not available yet, have you run cluster.up()?" else: try: api_instance = client.NetworkingV1Api(get_api_client()) @@ -785,6 +800,7 @@ def remove_autogenerated_fields(resource): del resource[key] else: remove_autogenerated_fields(resource[key]) + elif isinstance(resource, list): for item in resource: remove_autogenerated_fields(item) diff --git a/src/codeflare_sdk/ray/cluster/test_config.py b/src/codeflare_sdk/ray/cluster/test_config.py index 9f880df7..e405bc5b 100644 --- a/src/codeflare_sdk/ray/cluster/test_config.py +++ b/src/codeflare_sdk/ray/cluster/test_config.py @@ -1,4 +1,4 @@ -# Copyright 2022-2025 IBM, Red Hat +# Copyright 2024 IBM, Red Hat # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index fd8e199f..02ced875 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -133,10 +133,8 @@ class ManagedClusterConfig: accelerator_configs: A dictionary of custom resource mappings to map extended resource requests to RayCluster resource names. Defaults to DEFAULT_ACCELERATORS but can be overridden with custom mappings. - local_queue: - The name of the queue to use for the cluster. annotations: - A dictionary of annotations to apply to the cluster. + A dictionary of annotations to apply to the Job. volumes: A list of V1Volume objects to add to the Cluster volume_mounts: @@ -163,7 +161,6 @@ class ManagedClusterConfig: accelerator_configs: Dict[str, str] = field( default_factory=lambda: DEFAULT_ACCELERATORS.copy() ) - local_queue: Optional[str] = None annotations: Dict[str, str] = field(default_factory=dict) volumes: list[V1Volume] = field(default_factory=list) volume_mounts: list[V1VolumeMount] = field(default_factory=list) @@ -250,7 +247,6 @@ def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]: """ ray_cluster_spec = { "rayVersion": RAY_VERSION, - "enableInTreeAutoscaling": False, "headGroupSpec": self._build_head_group_spec(), "workerGroupSpecs": [self._build_worker_group_spec(cluster_name)], } @@ -290,7 +286,6 @@ def _build_head_ray_params(self) -> Dict[str, str]: """Build Ray start parameters for head node.""" params = { "dashboard-host": "0.0.0.0", - "dashboard-port": "8265", "block": "true", } @@ -348,12 +343,9 @@ def _build_head_container(self) -> V1Container: self.head_accelerators, ), volume_mounts=self._generate_volume_mounts(), + env=self._build_env_vars() if hasattr(self, "envs") and self.envs else None, ) - # Add environment variables if specified - if hasattr(self, "envs") and self.envs: - container.env = self._build_env_vars() - return container def _build_worker_container(self) -> V1Container: @@ -375,12 +367,9 @@ def _build_worker_container(self) -> V1Container: self.worker_accelerators, ), volume_mounts=self._generate_volume_mounts(), + env=self._build_env_vars() if hasattr(self, "envs") and self.envs else None, ) - # Add environment variables if specified - if hasattr(self, "envs") and self.envs: - container.env = self._build_env_vars() - return container def _build_resource_requirements( diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 49ccafcb..228f9bb0 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -22,6 +22,7 @@ import re import ast from typing import Dict, Any, Optional, Tuple +from codeflare_sdk.common.kueue.kueue import get_default_kueue_name from codeflare_sdk.common.utils.constants import MOUNT_PATH from kubernetes import client from ...common.kubernetes_cluster.auth import get_api_client @@ -59,9 +60,9 @@ def __init__( cluster_config: Optional[ManagedClusterConfig] = None, namespace: Optional[str] = None, runtime_env: Optional[Dict[str, Any]] = None, - shutdown_after_job_finishes: Optional[bool] = None, ttl_seconds_after_finished: int = 0, active_deadline_seconds: Optional[int] = None, + local_queue: Optional[str] = None, ): """ Initialize a RayJob instance. @@ -73,12 +74,11 @@ def __init__( cluster_config: Configuration for creating a new cluster (optional if cluster_name provided) namespace: The Kubernetes namespace (auto-detected if not specified) runtime_env: Ray runtime environment configuration (optional) - shutdown_after_job_finishes: Whether to shut down cluster after job finishes (optional) ttl_seconds_after_finished: Seconds to wait before cleanup after job finishes (default: 0) active_deadline_seconds: Maximum time the job can run before being terminated (optional) + local_queue: The Kueue LocalQueue to submit the job to (optional) Note: - shutdown_after_job_finishes is automatically detected but can be overridden: - True if cluster_config is provided (new cluster will be cleaned up) - False if cluster_name is provided (existing cluster will not be shut down) - User can explicitly set this value to override auto-detection @@ -108,17 +108,7 @@ def __init__( self.runtime_env = runtime_env self.ttl_seconds_after_finished = ttl_seconds_after_finished self.active_deadline_seconds = active_deadline_seconds - - # Auto-set shutdown_after_job_finishes based on cluster_config presence - # If cluster_config is provided, we want to clean up the cluster after job finishes - # If using existing cluster, we don't want to shut it down - # User can override this behavior by explicitly setting shutdown_after_job_finishes - if shutdown_after_job_finishes is not None: - self.shutdown_after_job_finishes = shutdown_after_job_finishes - elif cluster_config is not None: - self.shutdown_after_job_finishes = True - else: - self.shutdown_after_job_finishes = False + self.local_queue = local_queue if namespace is None: detected_namespace = get_current_namespace() @@ -177,10 +167,6 @@ def submit(self) -> str: if scripts: self._handle_script_volumes_for_existing_cluster(scripts, result) - if self.shutdown_after_job_finishes: - logger.info( - f"Cluster will be automatically cleaned up {self.ttl_seconds_after_finished}s after job completion" - ) return self.name else: raise RuntimeError(f"Failed to submit RayJob {self.name}") @@ -230,11 +216,37 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: }, "spec": { "entrypoint": self.entrypoint, - "shutdownAfterJobFinishes": self.shutdown_after_job_finishes, "ttlSecondsAfterFinished": self.ttl_seconds_after_finished, + "shutdownAfterJobFinishes": self._cluster_config is not None, }, } + labels = {} + # If cluster_config is provided, use the local_queue from the cluster_config + if self._cluster_config is not None: + if self.local_queue: + labels["kueue.x-k8s.io/queue-name"] = self.local_queue + else: + default_queue = get_default_kueue_name(self.namespace) + if default_queue: + labels["kueue.x-k8s.io/queue-name"] = default_queue + else: + # No default queue found, use "default" as fallback + labels["kueue.x-k8s.io/queue-name"] = "default" + logger.warning( + f"No default Kueue LocalQueue found in namespace '{self.namespace}'. " + f"Using 'default' as the queue name. If a LocalQueue named 'default' " + f"does not exist, the RayJob submission will fail. " + f"To fix this, please explicitly specify the 'local_queue' parameter." + ) + + rayjob_cr["metadata"]["labels"] = labels + + # When using Kueue (queue label present), start with suspend=true + # Kueue will unsuspend the job once the workload is admitted + if labels.get("kueue.x-k8s.io/queue-name"): + rayjob_cr["spec"]["suspend"] = True + # Add active deadline if specified if self.active_deadline_seconds: rayjob_cr["spec"]["activeDeadlineSeconds"] = self.active_deadline_seconds diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 54ad61dd..829265d6 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -29,23 +29,52 @@ ) -def test_rayjob_submit_success(mocker): - """Test successful RayJob submission.""" - # Mock kubernetes config loading +# Global test setup that runs automatically for ALL tests +@pytest.fixture(autouse=True) +def auto_mock_setup(mocker): + """Automatically mock common dependencies for all tests.""" mocker.patch("kubernetes.config.load_kube_config") - # Mock the RayjobApi class entirely - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + # Always mock get_default_kueue_name to prevent K8s API calls + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", + return_value="default-queue", + ) + + mock_get_ns = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_rayjob_instance = MagicMock() + mock_rayjob_api.return_value = mock_rayjob_instance - # Mock the RayClusterApi class - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_cluster_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_cluster_instance = MagicMock() + mock_cluster_api.return_value = mock_cluster_instance + + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_k8s_instance = MagicMock() + mock_k8s_api.return_value = mock_k8s_instance + + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + + # Return the mocked instances so tests can configure them as needed + return { + "rayjob_api": mock_rayjob_instance, + "cluster_api": mock_cluster_instance, + "k8s_api": mock_k8s_instance, + "get_current_namespace": mock_get_ns, + } + + +def test_rayjob_submit_success(auto_mock_setup): + """Test successful RayJob submission.""" + mock_api_instance = auto_mock_setup["rayjob_api"] - # Configure the mock to return success when submit is called mock_api_instance.submit.return_value = {"metadata": {"name": "test-rayjob"}} - # Create RayJob instance rayjob = RayJob( job_name="test-rayjob", cluster_name="test-ray-cluster", @@ -54,20 +83,15 @@ def test_rayjob_submit_success(mocker): runtime_env={"pip": ["requests"]}, ) - # Submit the job job_id = rayjob.submit() - # Assertions assert job_id == "test-rayjob" - # Verify the API was called with correct parameters mock_api_instance.submit_job.assert_called_once() call_args = mock_api_instance.submit_job.call_args - # Check the namespace parameter assert call_args.kwargs["k8s_namespace"] == "test-namespace" - # Check the job custom resource job_cr = call_args.kwargs["job"] assert job_cr["metadata"]["name"] == "test-rayjob" assert job_cr["metadata"]["namespace"] == "test-namespace" @@ -76,23 +100,12 @@ def test_rayjob_submit_success(mocker): assert job_cr["spec"]["runtimeEnvYAML"] == "{'pip': ['requests']}" -def test_rayjob_submit_failure(mocker): +def test_rayjob_submit_failure(auto_mock_setup): """Test RayJob submission failure.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") + mock_api_instance = auto_mock_setup["rayjob_api"] - # Mock the RayjobApi class entirely - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance - - # Mock the RayClusterApi class - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - # Configure the mock to return failure (False/None) when submit_job is called mock_api_instance.submit_job.return_value = None - # Create a RayJob instance rayjob = RayJob( job_name="test-rayjob", cluster_name="test-ray-cluster", @@ -101,19 +114,12 @@ def test_rayjob_submit_failure(mocker): runtime_env={"pip": ["numpy"]}, ) - # Test that RuntimeError is raised on failure with pytest.raises(RuntimeError, match="Failed to submit RayJob test-rayjob"): rayjob.submit() -def test_rayjob_init_validation_both_provided(mocker): +def test_rayjob_init_validation_both_provided(auto_mock_setup): """Test that providing both cluster_name and cluster_config raises error.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - cluster_config = ClusterConfiguration(name="test-cluster", namespace="test") with pytest.raises( @@ -128,14 +134,8 @@ def test_rayjob_init_validation_both_provided(mocker): ) -def test_rayjob_init_validation_neither_provided(mocker): +def test_rayjob_init_validation_neither_provided(auto_mock_setup): """Test that providing neither cluster_name nor cluster_config raises error.""" - # Mock kubernetes config loading (though this should fail before reaching it) - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely (though this should fail before reaching it) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - with pytest.raises( ValueError, match="❌ Configuration Error: You must provide either 'cluster_name'", @@ -143,14 +143,8 @@ def test_rayjob_init_validation_neither_provided(mocker): RayJob(job_name="test-job", entrypoint="python script.py") -def test_rayjob_init_with_cluster_config(mocker): +def test_rayjob_init_with_cluster_config(auto_mock_setup): """Test RayJob initialization with cluster configuration for auto-creation.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - cluster_config = ClusterConfiguration( name="auto-cluster", namespace="test-namespace", num_workers=2 ) @@ -168,14 +162,8 @@ def test_rayjob_init_with_cluster_config(mocker): assert rayjob._cluster_name is None -def test_rayjob_cluster_name_generation(mocker): +def test_rayjob_cluster_name_generation(auto_mock_setup): """Test that cluster names are generated when config has empty name.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - cluster_config = ClusterConfiguration( name="", # Empty name should trigger generation namespace="test-namespace", @@ -192,14 +180,8 @@ def test_rayjob_cluster_name_generation(mocker): assert rayjob.cluster_name == "my-job-cluster" -def test_rayjob_cluster_config_namespace_none(mocker): +def test_rayjob_cluster_config_namespace_none(auto_mock_setup): """Test that cluster config namespace is set when None.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - cluster_config = ClusterConfiguration( name="test-cluster", namespace=None, # This should be set to job namespace @@ -216,14 +198,8 @@ def test_rayjob_cluster_config_namespace_none(mocker): assert rayjob.namespace == "job-namespace" -def test_rayjob_with_active_deadline_seconds(mocker): +def test_rayjob_with_active_deadline_seconds(auto_mock_setup): """Test RayJob CR generation with active deadline seconds.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - rayjob = RayJob( job_name="test-job", cluster_name="test-cluster", @@ -237,15 +213,8 @@ def test_rayjob_with_active_deadline_seconds(mocker): assert rayjob_cr["spec"]["activeDeadlineSeconds"] == 30 -def test_build_ray_cluster_spec_no_config_error(mocker): +def test_build_ray_cluster_spec_no_config_error(auto_mock_setup): """Test _build_ray_cluster_spec raises error when no cluster config.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Create RayJob with cluster_name (no cluster_config) rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -253,22 +222,14 @@ def test_build_ray_cluster_spec_no_config_error(mocker): namespace="test-namespace", ) - # Since we removed _build_ray_cluster_spec method, this test is no longer applicable - # The method is now called internally by _build_rayjob_cr when needed - # We can test this by calling _build_rayjob_cr instead rayjob_cr = rayjob._build_rayjob_cr() - # Should use clusterSelector for existing cluster assert rayjob_cr["spec"]["clusterSelector"]["ray.io/cluster"] == "existing-cluster" assert "rayClusterSpec" not in rayjob_cr["spec"] -def test_build_ray_cluster_spec(mocker): +def test_build_ray_cluster_spec(mocker, auto_mock_setup): """Test _build_ray_cluster_spec method.""" - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") mock_ray_cluster = { "apiVersion": "ray.io/v1", @@ -280,10 +241,7 @@ def test_build_ray_cluster_spec(mocker): "workerGroupSpecs": [{"replicas": 2}], }, } - # Use ManagedClusterConfig which has the build_ray_cluster_spec method cluster_config = ManagedClusterConfig(num_workers=2) - - # Mock the method that will be called mocker.patch.object( cluster_config, "build_ray_cluster_spec", return_value=mock_ray_cluster["spec"] ) @@ -295,24 +253,16 @@ def test_build_ray_cluster_spec(mocker): namespace="test-namespace", ) - # Test the integration through _build_rayjob_cr rayjob_cr = rayjob._build_rayjob_cr() - # Should have rayClusterSpec assert "rayClusterSpec" in rayjob_cr["spec"] - - # Verify build_ray_cluster_spec was called on the cluster config cluster_config.build_ray_cluster_spec.assert_called_once_with( cluster_name="test-job-cluster" ) -def test_build_rayjob_cr_with_existing_cluster(mocker): +def test_build_rayjob_cr_with_existing_cluster(auto_mock_setup): """Test _build_rayjob_cr method with existing cluster.""" - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") rayjob = RayJob( job_name="test-job", @@ -324,30 +274,20 @@ def test_build_rayjob_cr_with_existing_cluster(mocker): rayjob_cr = rayjob._build_rayjob_cr() - # Check basic structure assert rayjob_cr["apiVersion"] == "ray.io/v1" assert rayjob_cr["kind"] == "RayJob" assert rayjob_cr["metadata"]["name"] == "test-job" - - # Check lifecycle parameters spec = rayjob_cr["spec"] assert spec["entrypoint"] == "python main.py" - # shutdownAfterJobFinishes should be False when using existing cluster (auto-set) assert spec["shutdownAfterJobFinishes"] is False assert spec["ttlSecondsAfterFinished"] == 300 - # Should use clusterSelector for existing cluster assert spec["clusterSelector"]["ray.io/cluster"] == "existing-cluster" assert "rayClusterSpec" not in spec -def test_build_rayjob_cr_with_auto_cluster(mocker): +def test_build_rayjob_cr_with_auto_cluster(mocker, auto_mock_setup): """Test _build_rayjob_cr method with auto-created cluster.""" - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_ray_cluster = { "apiVersion": "ray.io/v1", "kind": "RayCluster", @@ -360,7 +300,6 @@ def test_build_rayjob_cr_with_auto_cluster(mocker): } cluster_config = ManagedClusterConfig(num_workers=2) - # Mock the method that will be called mocker.patch.object( cluster_config, "build_ray_cluster_spec", return_value=mock_ray_cluster["spec"] ) @@ -373,17 +312,12 @@ def test_build_rayjob_cr_with_auto_cluster(mocker): ) rayjob_cr = rayjob._build_rayjob_cr() - - # Should use rayClusterSpec for auto-created cluster assert rayjob_cr["spec"]["rayClusterSpec"] == mock_ray_cluster["spec"] assert "clusterSelector" not in rayjob_cr["spec"] -def test_submit_validation_no_entrypoint(mocker): +def test_submit_validation_no_entrypoint(auto_mock_setup): """Test that submit() raises error when entrypoint is None.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - rayjob = RayJob( job_name="test-job", cluster_name="test-cluster", @@ -397,9 +331,9 @@ def test_submit_validation_no_entrypoint(mocker): rayjob.submit() -def test_submit_with_auto_cluster(mocker): +def test_submit_with_auto_cluster(mocker, auto_mock_setup): """Test successful submission with auto-created cluster.""" - mocker.patch("kubernetes.config.load_kube_config") + mock_api_instance = auto_mock_setup["rayjob_api"] mock_ray_cluster = { "apiVersion": "ray.io/v1", @@ -410,16 +344,9 @@ def test_submit_with_auto_cluster(mocker): "workerGroupSpecs": [{"replicas": 1}], }, } - # Mock the RayjobApi - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance mock_api_instance.submit_job.return_value = True - # Use ManagedClusterConfig and mock its build_ray_cluster_spec method cluster_config = ManagedClusterConfig(num_workers=1) - - # Mock the method that will be called mocker.patch.object( cluster_config, "build_ray_cluster_spec", return_value=mock_ray_cluster["spec"] ) @@ -435,7 +362,6 @@ def test_submit_with_auto_cluster(mocker): assert result == "test-job" - # Verify the correct RayJob CR was submitted mock_api_instance.submit_job.assert_called_once() call_args = mock_api_instance.submit_job.call_args @@ -444,15 +370,9 @@ def test_submit_with_auto_cluster(mocker): assert job_cr["spec"]["rayClusterSpec"] == mock_ray_cluster["spec"] -def test_namespace_auto_detection_success(mocker): +def test_namespace_auto_detection_success(auto_mock_setup): """Test successful namespace auto-detection.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="detected-ns", - ) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + auto_mock_setup["get_current_namespace"].return_value = "detected-ns" rayjob = RayJob( job_name="test-job", entrypoint="python script.py", cluster_name="test-cluster" @@ -461,14 +381,9 @@ def test_namespace_auto_detection_success(mocker): assert rayjob.namespace == "detected-ns" -def test_namespace_auto_detection_fallback(mocker): +def test_namespace_auto_detection_fallback(auto_mock_setup): """Test that namespace auto-detection failure raises an error.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", return_value=None - ) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + auto_mock_setup["get_current_namespace"].return_value = None with pytest.raises(ValueError, match="Could not auto-detect Kubernetes namespace"): RayJob( @@ -478,15 +393,9 @@ def test_namespace_auto_detection_fallback(mocker): ) -def test_namespace_explicit_override(mocker): +def test_namespace_explicit_override(auto_mock_setup): """Test that explicit namespace overrides auto-detection.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="detected-ns", - ) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + auto_mock_setup["get_current_namespace"].return_value = "detected-ns" rayjob = RayJob( job_name="test-job", @@ -498,46 +407,8 @@ def test_namespace_explicit_override(mocker): assert rayjob.namespace == "explicit-ns" -def test_shutdown_behavior_with_cluster_config(mocker): - """Test that shutdown_after_job_finishes is True when cluster_config is provided.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - cluster_config = ManagedClusterConfig() - - rayjob = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_config=cluster_config, - namespace="test-namespace", - ) - - assert rayjob.shutdown_after_job_finishes is True - - -def test_shutdown_behavior_with_existing_cluster(mocker): - """Test that shutdown_after_job_finishes is False when using existing cluster.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - rayjob = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_name="existing-cluster", - namespace="test-namespace", - ) - - assert rayjob.shutdown_after_job_finishes is False - - -def test_rayjob_with_rayjob_cluster_config(mocker): +def test_rayjob_with_rayjob_cluster_config(auto_mock_setup): """Test RayJob with the new ManagedClusterConfig.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", @@ -555,13 +426,8 @@ def test_rayjob_with_rayjob_cluster_config(mocker): assert rayjob.cluster_name == "test-job-cluster" # Generated from job name -def test_rayjob_cluster_config_validation(mocker): +def test_rayjob_cluster_config_validation(auto_mock_setup): """Test validation of ManagedClusterConfig parameters.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - # Test with minimal valid config cluster_config = ManagedClusterConfig() rayjob = RayJob( @@ -574,32 +440,20 @@ def test_rayjob_cluster_config_validation(mocker): assert rayjob._cluster_config is not None -def test_rayjob_missing_entrypoint_validation(mocker): +def test_rayjob_missing_entrypoint_validation(auto_mock_setup): """Test that RayJob requires entrypoint for submission.""" - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Should raise an error during construction with pytest.raises( TypeError, match="missing 1 required positional argument: 'entrypoint'" ): RayJob( job_name="test-job", cluster_name="test-cluster", - # No entrypoint provided ) -def test_build_ray_cluster_spec_integration(mocker): +def test_build_ray_cluster_spec_integration(mocker, auto_mock_setup): """Test integration with the new build_ray_cluster_spec method.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi class entirely - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - cluster_config = ManagedClusterConfig() - - # Mock the build_ray_cluster_spec method on the cluster config mock_spec = {"spec": "test-spec"} mocker.patch.object( cluster_config, "build_ray_cluster_spec", return_value=mock_spec @@ -612,25 +466,17 @@ def test_build_ray_cluster_spec_integration(mocker): namespace="test-namespace", ) - # Build the RayJob CR rayjob_cr = rayjob._build_rayjob_cr() - # Verify the method was called correctly cluster_config.build_ray_cluster_spec.assert_called_once_with( cluster_name="test-job-cluster" ) - - # Verify the spec is included in the RayJob CR assert "rayClusterSpec" in rayjob_cr["spec"] assert rayjob_cr["spec"]["rayClusterSpec"] == mock_spec -def test_rayjob_with_runtime_env(mocker): +def test_rayjob_with_runtime_env(auto_mock_setup): """Test RayJob with runtime environment configuration.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - runtime_env = {"pip": ["numpy", "pandas"]} rayjob = RayJob( @@ -643,16 +489,12 @@ def test_rayjob_with_runtime_env(mocker): assert rayjob.runtime_env == runtime_env - # Verify runtime env is included in the CR rayjob_cr = rayjob._build_rayjob_cr() assert rayjob_cr["spec"]["runtimeEnvYAML"] == str(runtime_env) -def test_rayjob_with_active_deadline_and_ttl(mocker): +def test_rayjob_with_active_deadline_and_ttl(auto_mock_setup): """Test RayJob with both active deadline and TTL settings.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") rayjob = RayJob( job_name="test-job", @@ -666,17 +508,13 @@ def test_rayjob_with_active_deadline_and_ttl(mocker): assert rayjob.active_deadline_seconds == 300 assert rayjob.ttl_seconds_after_finished == 600 - # Verify both are included in the CR rayjob_cr = rayjob._build_rayjob_cr() assert rayjob_cr["spec"]["activeDeadlineSeconds"] == 300 assert rayjob_cr["spec"]["ttlSecondsAfterFinished"] == 600 -def test_rayjob_cluster_name_generation_with_config(mocker): +def test_rayjob_cluster_name_generation_with_config(auto_mock_setup): """Test cluster name generation when using cluster_config.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") cluster_config = ManagedClusterConfig() @@ -688,20 +526,11 @@ def test_rayjob_cluster_name_generation_with_config(mocker): ) assert rayjob.cluster_name == "my-job-cluster" - # Note: cluster_config.name is not set in RayJob (it's only for resource config) - # The cluster name is generated independently for the RayJob -def test_rayjob_namespace_propagation_to_cluster_config(mocker): +def test_rayjob_namespace_propagation_to_cluster_config(auto_mock_setup): """Test that job namespace is propagated to cluster config when None.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="detected-ns", - ) + auto_mock_setup["get_current_namespace"].return_value = "detected-ns" cluster_config = ManagedClusterConfig() @@ -714,9 +543,8 @@ def test_rayjob_namespace_propagation_to_cluster_config(mocker): assert rayjob.namespace == "detected-ns" -def test_rayjob_error_handling_invalid_cluster_config(mocker): +def test_rayjob_error_handling_invalid_cluster_config(auto_mock_setup): """Test error handling with invalid cluster configuration.""" - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") with pytest.raises(ValueError): RayJob( @@ -725,13 +553,8 @@ def test_rayjob_error_handling_invalid_cluster_config(mocker): ) -def test_rayjob_constructor_parameter_validation(mocker): +def test_rayjob_constructor_parameter_validation(auto_mock_setup): """Test constructor parameter validation.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - # Test with valid parameters rayjob = RayJob( job_name="test-job", entrypoint="python script.py", @@ -751,9 +574,8 @@ def test_rayjob_constructor_parameter_validation(mocker): assert rayjob.active_deadline_seconds == 600 -def test_build_ray_cluster_spec_function(mocker): +def test_build_ray_cluster_spec_function(): """Test the build_ray_cluster_spec method directly.""" - # Create a test cluster config cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", @@ -762,23 +584,16 @@ def test_build_ray_cluster_spec_function(mocker): worker_memory_requests="256Mi", ) - # Build the spec using the method on the cluster config spec = cluster_config.build_ray_cluster_spec("test-cluster") - - # Verify basic structure assert "rayVersion" in spec - assert "enableInTreeAutoscaling" in spec assert "headGroupSpec" in spec assert "workerGroupSpecs" in spec - # Verify head group spec head_spec = spec["headGroupSpec"] assert head_spec["serviceType"] == "ClusterIP" assert head_spec["enableIngress"] is False assert "rayStartParams" in head_spec assert "template" in head_spec - - # Verify worker group spec worker_specs = spec["workerGroupSpecs"] assert len(worker_specs) == 1 worker_spec = worker_specs[0] @@ -788,24 +603,19 @@ def test_build_ray_cluster_spec_function(mocker): assert worker_spec["groupName"] == "worker-group-test-cluster" -def test_build_ray_cluster_spec_with_accelerators(mocker): +def test_build_ray_cluster_spec_with_accelerators(): """Test build_ray_cluster_spec with GPU accelerators.""" - # Create a test cluster config with GPU accelerators cluster_config = ManagedClusterConfig( head_accelerators={"nvidia.com/gpu": 1}, worker_accelerators={"nvidia.com/gpu": 2}, ) - # Build the spec using the method on the cluster config spec = cluster_config.build_ray_cluster_spec("test-cluster") - - # Verify head group has GPU parameters head_spec = spec["headGroupSpec"] head_params = head_spec["rayStartParams"] assert "num-gpus" in head_params assert head_params["num-gpus"] == "1" - # Verify worker group has GPU parameters worker_specs = spec["workerGroupSpecs"] worker_spec = worker_specs[0] worker_params = worker_spec["rayStartParams"] @@ -813,43 +623,32 @@ def test_build_ray_cluster_spec_with_accelerators(mocker): assert worker_params["num-gpus"] == "2" -def test_build_ray_cluster_spec_with_custom_volumes(mocker): +def test_build_ray_cluster_spec_with_custom_volumes(): """Test build_ray_cluster_spec with custom volumes and volume mounts.""" - # Create custom volumes and volume mounts custom_volume = V1Volume(name="custom-data", empty_dir={}) custom_volume_mount = V1VolumeMount(name="custom-data", mount_path="/data") - - # Create a test cluster config with custom volumes cluster_config = ManagedClusterConfig( volumes=[custom_volume], volume_mounts=[custom_volume_mount], ) - # Build the spec using the method on the cluster config spec = cluster_config.build_ray_cluster_spec("test-cluster") - - # Verify custom volumes are included head_spec = spec["headGroupSpec"] - head_pod_spec = head_spec["template"].spec # Access the spec attribute - # Note: We can't easily check DEFAULT_VOLUMES length since they're now part of the class + head_pod_spec = head_spec["template"].spec assert len(head_pod_spec.volumes) > 0 - # Verify custom volume mounts are included - head_container = head_pod_spec.containers[0] # Access the containers attribute - # Note: We can't easily check DEFAULT_VOLUME_MOUNTS length since they're now part of the class + head_container = head_pod_spec.containers[0] assert len(head_container.volume_mounts) > 0 -def test_build_ray_cluster_spec_with_environment_variables(mocker): +def test_build_ray_cluster_spec_with_environment_variables(): """Test build_ray_cluster_spec with environment variables.""" - # Create a test cluster config with environment variables cluster_config = ManagedClusterConfig( envs={"CUDA_VISIBLE_DEVICES": "0", "RAY_DISABLE_IMPORT_WARNING": "1"}, ) spec = cluster_config.build_ray_cluster_spec("test-cluster") - # Verify environment variables are included in head container head_spec = spec["headGroupSpec"] head_pod_spec = head_spec["template"].spec head_container = head_pod_spec.containers[0] @@ -857,8 +656,6 @@ def test_build_ray_cluster_spec_with_environment_variables(mocker): env_vars = {env.name: env.value for env in head_container.env} assert env_vars["CUDA_VISIBLE_DEVICES"] == "0" assert env_vars["RAY_DISABLE_IMPORT_WARNING"] == "1" - - # Verify environment variables are included in worker container worker_specs = spec["workerGroupSpecs"] worker_spec = worker_specs[0] worker_pod_spec = worker_spec["template"].spec @@ -870,9 +667,8 @@ def test_build_ray_cluster_spec_with_environment_variables(mocker): assert worker_env_vars["RAY_DISABLE_IMPORT_WARNING"] == "1" -def test_build_ray_cluster_spec_with_tolerations(mocker): +def test_build_ray_cluster_spec_with_tolerations(): """Test build_ray_cluster_spec with tolerations.""" - # Create test tolerations head_toleration = V1Toleration( key="node-role.kubernetes.io/master", operator="Exists", effect="NoSchedule" ) @@ -880,42 +676,36 @@ def test_build_ray_cluster_spec_with_tolerations(mocker): key="nvidia.com/gpu", operator="Exists", effect="NoSchedule" ) - # Create a test cluster config with tolerations cluster_config = ManagedClusterConfig( head_tolerations=[head_toleration], worker_tolerations=[worker_toleration], ) spec = cluster_config.build_ray_cluster_spec("test-cluster") - - # Verify head tolerations head_spec = spec["headGroupSpec"] - head_pod_spec = head_spec["template"].spec # Access the spec attribute + head_pod_spec = head_spec["template"].spec assert hasattr(head_pod_spec, "tolerations") assert len(head_pod_spec.tolerations) == 1 assert head_pod_spec.tolerations[0].key == "node-role.kubernetes.io/master" - # Verify worker tolerations worker_specs = spec["workerGroupSpecs"] worker_spec = worker_specs[0] - worker_pod_spec = worker_spec["template"].spec # Access the spec attribute + worker_pod_spec = worker_spec["template"].spec assert hasattr(worker_pod_spec, "tolerations") assert len(worker_pod_spec.tolerations) == 1 assert worker_pod_spec.tolerations[0].key == "nvidia.com/gpu" -def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): +def test_build_ray_cluster_spec_with_image_pull_secrets(): """Test build_ray_cluster_spec with image pull secrets.""" - # Create a test cluster config with image pull secrets cluster_config = ManagedClusterConfig( image_pull_secrets=["my-registry-secret", "another-secret"] ) spec = cluster_config.build_ray_cluster_spec("test-cluster") - # Verify image pull secrets are included in head pod head_spec = spec["headGroupSpec"] - head_pod_spec = head_spec["template"].spec # Access the spec attribute + head_pod_spec = head_spec["template"].spec assert hasattr(head_pod_spec, "image_pull_secrets") head_secrets = head_pod_spec.image_pull_secrets @@ -923,7 +713,6 @@ def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): assert head_secrets[0].name == "my-registry-secret" assert head_secrets[1].name == "another-secret" - # Verify image pull secrets are included in worker pod worker_specs = spec["workerGroupSpecs"] worker_spec = worker_specs[0] worker_pod_spec = worker_spec["template"].spec @@ -935,58 +724,12 @@ def test_build_ray_cluster_spec_with_image_pull_secrets(mocker): assert worker_secrets[1].name == "another-secret" -def test_rayjob_user_override_shutdown_behavior(mocker): - """Test that user can override the auto-detected shutdown behavior.""" - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - - # Test 1: User overrides shutdown to True even when using existing cluster - rayjob_existing_override = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_name="existing-cluster", - shutdown_after_job_finishes=True, # User override - namespace="test-namespace", # Explicitly specify namespace - ) - - assert rayjob_existing_override.shutdown_after_job_finishes is True - - # Test 2: User overrides shutdown to False even when creating new cluster - cluster_config = ManagedClusterConfig() - - rayjob_new_override = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_config=cluster_config, - shutdown_after_job_finishes=False, # User override - namespace="test-namespace", # Explicitly specify namespace - ) - - assert rayjob_new_override.shutdown_after_job_finishes is False - - # Test 3: User override takes precedence over auto-detection - rayjob_override_priority = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_config=cluster_config, - shutdown_after_job_finishes=True, # Should override auto-detection - namespace="test-namespace", # Explicitly specify namespace - ) - - assert rayjob_override_priority.shutdown_after_job_finishes is True - - class TestRayVersionValidation: """Test Ray version validation in RayJob.""" - def test_submit_with_cluster_config_compatible_image_passes(self, mocker): + def test_submit_with_cluster_config_compatible_image_passes(self, auto_mock_setup): """Test that submission passes with compatible cluster_config image.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = True cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") @@ -998,17 +741,12 @@ def test_submit_with_cluster_config_compatible_image_passes(self, mocker): entrypoint="python script.py", ) - # Should not raise any validation errors result = rayjob.submit() assert result == "test-job" - def test_submit_with_cluster_config_incompatible_image_fails(self, mocker): + def test_submit_with_cluster_config_incompatible_image_fails(self, auto_mock_setup): """Test that submission fails with incompatible cluster_config image.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + # cluster_config = ManagedClusterConfig(image="ray:2.8.0") # Different version @@ -1019,19 +757,14 @@ def test_submit_with_cluster_config_incompatible_image_fails(self, mocker): entrypoint="python script.py", ) - # Should raise ValueError for version mismatch with pytest.raises( ValueError, match="Cluster config image: Ray version mismatch detected" ): rayjob.submit() - def test_validate_ray_version_compatibility_method(self, mocker): + def test_validate_ray_version_compatibility_method(self, auto_mock_setup): """Test the _validate_ray_version_compatibility method directly.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + # rayjob = RayJob( job_name="test-job", @@ -1040,34 +773,24 @@ def test_validate_ray_version_compatibility_method(self, mocker): entrypoint="python script.py", ) - # Test with no cluster_config (should not raise) - rayjob._validate_ray_version_compatibility() # Should not raise - - # Test with compatible cluster_config version + rayjob._validate_ray_version_compatibility() rayjob._cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") - rayjob._validate_ray_version_compatibility() # Should not raise - - # Test with incompatible cluster_config version + rayjob._validate_ray_version_compatibility() rayjob._cluster_config = ManagedClusterConfig(image="ray:2.8.0") with pytest.raises( ValueError, match="Cluster config image: Ray version mismatch detected" ): rayjob._validate_ray_version_compatibility() - # Test with unknown cluster_config version (should warn but not fail) rayjob._cluster_config = ManagedClusterConfig(image="custom-image:latest") with pytest.warns( UserWarning, match="Cluster config image: Cannot determine Ray version" ): rayjob._validate_ray_version_compatibility() - def test_validate_cluster_config_image_method(self, mocker): + def test_validate_cluster_config_image_method(self, auto_mock_setup): """Test the _validate_cluster_config_image method directly.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + # rayjob = RayJob( job_name="test-job", @@ -1076,34 +799,23 @@ def test_validate_cluster_config_image_method(self, mocker): entrypoint="python script.py", ) - # Test with no image (should not raise) - rayjob._validate_cluster_config_image() # Should not raise - - # Test with compatible image + rayjob._validate_cluster_config_image() rayjob._cluster_config.image = f"ray:{RAY_VERSION}" - rayjob._validate_cluster_config_image() # Should not raise - - # Test with incompatible image + rayjob._validate_cluster_config_image() rayjob._cluster_config.image = "ray:2.8.0" with pytest.raises( ValueError, match="Cluster config image: Ray version mismatch detected" ): rayjob._validate_cluster_config_image() - # Test with unknown image (should warn but not fail) rayjob._cluster_config.image = "custom-image:latest" with pytest.warns( UserWarning, match="Cluster config image: Cannot determine Ray version" ): rayjob._validate_cluster_config_image() - def test_validate_cluster_config_image_edge_cases(self, mocker): + def test_validate_cluster_config_image_edge_cases(self, auto_mock_setup): """Test edge cases in _validate_cluster_config_image method.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance rayjob = RayJob( job_name="test-job", @@ -1112,31 +824,22 @@ def test_validate_cluster_config_image_edge_cases(self, mocker): entrypoint="python script.py", ) - # Test with None image (should not raise) rayjob._cluster_config.image = None - rayjob._validate_cluster_config_image() # Should not raise - - # Test with empty string image (should not raise) + rayjob._validate_cluster_config_image() rayjob._cluster_config.image = "" - rayjob._validate_cluster_config_image() # Should not raise - - # Test with non-string image (should log warning and skip) + rayjob._validate_cluster_config_image() rayjob._cluster_config.image = 123 - rayjob._validate_cluster_config_image() # Should log warning and not raise + rayjob._validate_cluster_config_image() - # Test with cluster config that has no image attribute class MockClusterConfig: pass rayjob._cluster_config = MockClusterConfig() - rayjob._validate_cluster_config_image() # Should not raise + rayjob._validate_cluster_config_image() -def test_extract_script_files_from_entrypoint_single_script(mocker, tmp_path): +def test_extract_script_files_from_entrypoint_single_script(auto_mock_setup, tmp_path): """Test extracting a single script file from entrypoint.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") # Create a test script test_script = tmp_path / "test_script.py" @@ -1164,11 +867,8 @@ def test_extract_script_files_from_entrypoint_single_script(mocker, tmp_path): os.chdir(original_cwd) -def test_extract_script_files_with_dependencies(mocker, tmp_path): +def test_extract_script_files_with_dependencies(auto_mock_setup, tmp_path): """Test extracting script files with local dependencies.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") # Create main script and dependency main_script = tmp_path / "main.py" @@ -1223,7 +923,6 @@ def calculate(x): assert "helper.py" in scripts assert "utils.py" in scripts - # Verify content assert "import helper" in scripts["main.py"] assert "def do_something" in scripts["helper.py"] assert "def calculate" in scripts["utils.py"] @@ -1232,11 +931,8 @@ def calculate(x): os.chdir(original_cwd) -def test_extract_script_files_no_local_scripts(mocker): +def test_extract_script_files_no_local_scripts(auto_mock_setup): """Test entrypoint with no local script files.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") rayjob = RayJob( job_name="test-job", @@ -1250,11 +946,8 @@ def test_extract_script_files_no_local_scripts(mocker): assert scripts is None -def test_extract_script_files_nonexistent_script(mocker): +def test_extract_script_files_nonexistent_script(auto_mock_setup): """Test entrypoint referencing non-existent script.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") rayjob = RayJob( job_name="test-job", @@ -1309,7 +1002,6 @@ def test_add_script_volumes(): config.add_script_volumes(configmap_name="test-scripts") - # Should have added one volume and one mount assert len(config.volumes) == 1 assert len(config.volume_mounts) == 1 @@ -1331,21 +1023,13 @@ def test_add_script_volumes_duplicate_prevention(): config.add_script_volumes(configmap_name="test-scripts") config.add_script_volumes(configmap_name="test-scripts") - # Should still have only one of each assert len(config.volumes) == 1 assert len(config.volume_mounts) == 1 -def test_create_configmap_from_spec(mocker): +def test_create_configmap_from_spec(auto_mock_setup): """Test creating ConfigMap via Kubernetes API.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_api_instance = MagicMock() - mock_k8s_api.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + mock_api_instance = auto_mock_setup["k8s_api"] rayjob = RayJob( job_name="test-job", @@ -1367,16 +1051,9 @@ def test_create_configmap_from_spec(mocker): mock_api_instance.create_namespaced_config_map.assert_called_once() -def test_create_configmap_already_exists(mocker): +def test_create_configmap_already_exists(auto_mock_setup): """Test creating ConfigMap when it already exists (409 conflict).""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_api_instance = MagicMock() - mock_k8s_api.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + mock_api_instance = auto_mock_setup["k8s_api"] mock_api_instance.create_namespaced_config_map.side_effect = ApiException( status=409 @@ -1403,19 +1080,9 @@ def test_create_configmap_already_exists(mocker): mock_api_instance.replace_namespaced_config_map.assert_called_once() -def test_create_configmap_with_owner_reference_basic(mocker, caplog): +def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, caplog): """Test creating ConfigMap with owner reference from valid RayJob result.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Mock Kubernetes API - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_api_instance = MagicMock() - mock_k8s_api.return_value = mock_api_instance - - # Mock get_api_client - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + mock_api_instance = auto_mock_setup["k8s_api"] # Mock client.V1ObjectMeta and V1ConfigMap mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") @@ -1465,21 +1132,15 @@ def test_create_configmap_with_owner_reference_basic(mocker, caplog): in caplog.text ) - # Verify owner_references was set on metadata assert mock_metadata_instance.owner_references is not None mock_api_instance.create_namespaced_config_map.assert_called_once() -def test_create_configmap_without_owner_reference_no_uid(mocker, caplog): +def test_create_configmap_without_owner_reference_no_uid( + mocker, auto_mock_setup, caplog +): """Test creating ConfigMap without owner reference when RayJob has no UID.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_api_instance = MagicMock() - mock_k8s_api.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + mock_api_instance = auto_mock_setup["k8s_api"] mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") mock_metadata_instance = MagicMock() @@ -1523,19 +1184,9 @@ def test_create_configmap_without_owner_reference_no_uid(mocker, caplog): mock_api_instance.create_namespaced_config_map.assert_called_once() -def test_create_configmap_with_invalid_rayjob_result(mocker, caplog): +def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): """Test creating ConfigMap with None or invalid rayjob_result.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Mock Kubernetes API - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_api_instance = MagicMock() - mock_k8s_api.return_value = mock_api_instance - - # Mock get_api_client - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + mock_api_instance = auto_mock_setup["k8s_api"] rayjob = RayJob( job_name="test-job", @@ -1567,10 +1218,9 @@ def test_create_configmap_with_invalid_rayjob_result(mocker, caplog): assert "No valid RayJob result with UID found" in caplog.text -def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): +def test_handle_script_volumes_for_new_cluster(mocker, auto_mock_setup, tmp_path): """Test handling script volumes for new cluster creation.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + # auto_mock_setup handles kubernetes and API mocking mock_create = mocker.patch.object(RayJob, "_create_configmap_from_spec") mock_create.return_value = "test-job-scripts" @@ -1603,10 +1253,8 @@ def test_handle_script_volumes_for_new_cluster(mocker, tmp_path): os.chdir(original_cwd) -def test_ast_parsing_import_detection(mocker, tmp_path): +def test_ast_parsing_import_detection(auto_mock_setup, tmp_path): """Test AST parsing correctly detects import statements.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") main_script = tmp_path / "main.py" main_script.write_text( @@ -1656,13 +1304,11 @@ def func2(): pass os.chdir(original_cwd) -def test_script_handling_timing_after_rayjob_submission(mocker, tmp_path): +def test_script_handling_timing_after_rayjob_submission( + mocker, auto_mock_setup, tmp_path +): """Test that script handling happens after RayJob is submitted (not before).""" - mocker.patch("kubernetes.config.load_kube_config") - - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + mock_api_instance = auto_mock_setup["rayjob_api"] submit_result = { "metadata": { @@ -1677,7 +1323,7 @@ def test_script_handling_timing_after_rayjob_submission(mocker, tmp_path): RayJob, "_handle_script_volumes_for_new_cluster" ) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + # RayClusterApi is already mocked by auto_mock_setup test_script = tmp_path / "test.py" test_script.write_text("print('test')") @@ -1721,22 +1367,12 @@ def track_handle_scripts(*args, **kwargs): mock_handle_new.assert_called_with({"test.py": "print('test')"}, submit_result) -def test_rayjob_submit_with_scripts_new_cluster(mocker, tmp_path): +def test_rayjob_submit_with_scripts_new_cluster(auto_mock_setup, tmp_path): """Test RayJob submission with script detection for new cluster.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = True - # Mock ConfigMap creation - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_k8s_instance = MagicMock() - mock_k8s_api.return_value = mock_k8s_instance - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") + mock_k8s_instance = auto_mock_setup["k8s_api"] # Create test script test_script = tmp_path / "test.py" @@ -1760,24 +1396,18 @@ def test_rayjob_submit_with_scripts_new_cluster(mocker, tmp_path): assert result == "test-job" - # Verify ConfigMap was created mock_k8s_instance.create_namespaced_config_map.assert_called_once() - # Verify volumes were added assert len(cluster_config.volumes) == 1 assert len(cluster_config.volume_mounts) == 1 - - # Verify entrypoint was updated assert f"{MOUNT_PATH}/test.py" in rayjob.entrypoint finally: os.chdir(original_cwd) -def test_process_script_and_imports_io_error(mocker, tmp_path): +def test_process_script_and_imports_io_error(mocker, auto_mock_setup, tmp_path): """Test _process_script_and_imports handles IO errors gracefully.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") rayjob = RayJob( job_name="test-job", @@ -1793,19 +1423,13 @@ def test_process_script_and_imports_io_error(mocker, tmp_path): mocker.patch("os.path.isfile", return_value=True) mocker.patch("builtins.open", side_effect=IOError("Permission denied")) - # Should handle the error gracefully and not crash rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) - - # Should add to processed_files but not to scripts (due to error) assert "test.py" in processed_files assert len(scripts) == 0 -def test_process_script_and_imports_container_path_skip(mocker): +def test_process_script_and_imports_container_path_skip(auto_mock_setup): """Test that scripts already in container paths are skipped.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1821,16 +1445,12 @@ def test_process_script_and_imports_container_path_skip(mocker): f"{MOUNT_PATH}/test.py", scripts, MOUNT_PATH, processed_files ) - # Should skip processing assert len(scripts) == 0 assert len(processed_files) == 0 -def test_process_script_and_imports_already_processed(mocker, tmp_path): +def test_process_script_and_imports_already_processed(auto_mock_setup, tmp_path): """Test that already processed scripts are skipped (infinite loop prevention).""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1841,22 +1461,18 @@ def test_process_script_and_imports_already_processed(mocker, tmp_path): scripts = {} processed_files = {"test.py"} # Already processed - # Should return early without processing rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) assert len(scripts) == 0 assert processed_files == {"test.py"} -def test_submit_with_scripts_owner_reference_integration(mocker, tmp_path, caplog): +def test_submit_with_scripts_owner_reference_integration( + mocker, auto_mock_setup, tmp_path, caplog +): """Integration test for submit() with local scripts to verify end-to-end owner reference flow.""" - # Mock kubernetes config loading - mocker.patch("kubernetes.config.load_kube_config") - - # Mock the RayjobApi - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_k8s_instance = auto_mock_setup["k8s_api"] # RayJob submission returns result with UID submit_result = { @@ -1868,12 +1484,6 @@ def test_submit_with_scripts_owner_reference_integration(mocker, tmp_path, caplo } mock_api_instance.submit_job.return_value = submit_result - # Mock Kubernetes ConfigMap API - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_k8s_instance = MagicMock() - mock_k8s_api.return_value = mock_k8s_instance - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") - # Capture the ConfigMap that gets created created_configmap = None @@ -1910,10 +1520,7 @@ def capture_configmap(namespace, body): assert result == "test-job" - # Verify RayJob was submitted first mock_api_instance.submit_job.assert_called_once() - - # Verify ConfigMap was created with owner reference mock_k8s_instance.create_namespaced_config_map.assert_called_once() assert created_configmap is not None @@ -1941,11 +1548,8 @@ def capture_configmap(namespace, body): == "rayjob-scripts" ) - # Verify scripts were included assert "main.py" in created_configmap.data assert "helper.py" in created_configmap.data - - # Verify log message assert ( "Adding owner reference to ConfigMap 'test-job-scripts' with RayJob UID: unique-rayjob-uid-12345" in caplog.text @@ -1955,11 +1559,8 @@ def capture_configmap(namespace, body): os.chdir(original_cwd) -def test_find_local_imports_syntax_error(mocker): +def test_find_local_imports_syntax_error(mocker, auto_mock_setup): """Test _find_local_imports handles syntax errors gracefully.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1972,29 +1573,19 @@ def test_find_local_imports_syntax_error(mocker): mock_callback = mocker.Mock() - # Should handle syntax error gracefully rayjob._find_local_imports(invalid_script_content, "test.py", mock_callback) - - # Callback should not be called due to syntax error mock_callback.assert_not_called() -def test_create_configmap_api_error_non_409(mocker): +def test_create_configmap_api_error_non_409(auto_mock_setup): """Test _create_configmap_from_spec handles non-409 API errors.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Mock Kubernetes API with 500 error - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_api_instance = mocker.Mock() - mock_k8s_api.return_value = mock_api_instance + mock_api_instance = auto_mock_setup["k8s_api"] + # Configure to raise 500 error mock_api_instance.create_namespaced_config_map.side_effect = ApiException( status=500 ) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") - rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -2009,23 +1600,15 @@ def test_create_configmap_api_error_non_409(mocker): "data": {"test.py": "print('test')"}, } - # Should raise RuntimeError for non-409 API errors with pytest.raises(RuntimeError, match="Failed to create ConfigMap"): rayjob._create_configmap_from_spec(configmap_spec) -def test_update_existing_cluster_get_cluster_error(mocker): +def test_update_existing_cluster_get_cluster_error(mocker, auto_mock_setup): """Test _update_existing_cluster_for_scripts handles get cluster errors.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Mock RayClusterApi with error - mock_cluster_api_class = mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi" - ) - mock_cluster_api_instance = mocker.Mock() - mock_cluster_api_class.return_value = mock_cluster_api_instance + mock_cluster_api_instance = auto_mock_setup["cluster_api"] + # Configure it to raise an error mock_cluster_api_instance.get_ray_cluster.side_effect = ApiException(status=404) config_builder = ManagedClusterConfig() @@ -2037,22 +1620,13 @@ def test_update_existing_cluster_get_cluster_error(mocker): namespace="test-namespace", ) - # Should raise RuntimeError when getting cluster fails with pytest.raises(RuntimeError, match="Failed to get RayCluster"): rayjob._update_existing_cluster_for_scripts("test-scripts", config_builder) -def test_update_existing_cluster_patch_error(mocker): +def test_update_existing_cluster_patch_error(mocker, auto_mock_setup): """Test _update_existing_cluster_for_scripts handles patch errors.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - - # Mock RayClusterApi - mock_cluster_api_class = mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi" - ) - mock_cluster_api_instance = mocker.Mock() - mock_cluster_api_class.return_value = mock_cluster_api_instance + mock_cluster_api_instance = auto_mock_setup["cluster_api"] # Mock successful get but failed patch mock_cluster_api_instance.get_ray_cluster.return_value = { @@ -2083,16 +1657,12 @@ def test_update_existing_cluster_patch_error(mocker): namespace="test-namespace", ) - # Should raise RuntimeError when patching fails with pytest.raises(RuntimeError, match="Failed to update RayCluster"): rayjob._update_existing_cluster_for_scripts("test-scripts", config_builder) -def test_extract_script_files_empty_entrypoint(mocker): +def test_extract_script_files_empty_entrypoint(auto_mock_setup): """Test script extraction with empty entrypoint.""" - mocker.patch("kubernetes.config.load_kube_config") - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -2116,10 +1686,7 @@ def test_add_script_volumes_existing_volume_skip(): ) config.volumes.append(existing_volume) - # Should skip adding duplicate volume config.add_script_volumes(configmap_name="new-scripts") - - # Should still have only one volume assert len(config.volumes) == 1 assert len(config.volume_mounts) == 0 # Mount not added due to volume skip @@ -2132,23 +1699,14 @@ def test_add_script_volumes_existing_mount_skip(): existing_mount = V1VolumeMount(name="ray-job-scripts", mount_path="/existing/path") config.volume_mounts.append(existing_mount) - # Should skip adding duplicate mount config.add_script_volumes(configmap_name="new-scripts") - - # Should still have only one mount and no volume added assert len(config.volumes) == 0 # Volume not added due to mount skip assert len(config.volume_mounts) == 1 -def test_rayjob_stop_success(mocker, caplog): +def test_rayjob_stop_success(auto_mock_setup, caplog): """Test successful RayJob stop operation.""" - mocker.patch("kubernetes.config.load_kube_config") - - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.suspend_job.return_value = { "metadata": {"name": "test-rayjob"}, @@ -2175,15 +1733,9 @@ def test_rayjob_stop_success(mocker, caplog): assert "Successfully stopped the RayJob test-rayjob" in caplog.text -def test_rayjob_stop_failure(mocker): +def test_rayjob_stop_failure(auto_mock_setup): """Test RayJob stop operation when API call fails.""" - mocker.patch("kubernetes.config.load_kube_config") - - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.suspend_job.return_value = None @@ -2202,15 +1754,9 @@ def test_rayjob_stop_failure(mocker): ) -def test_rayjob_resubmit_success(mocker): +def test_rayjob_resubmit_success(auto_mock_setup): """Test successful RayJob resubmit operation.""" - mocker.patch("kubernetes.config.load_kube_config") - - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.resubmit_job.return_value = { "metadata": {"name": "test-rayjob"}, @@ -2233,15 +1779,9 @@ def test_rayjob_resubmit_success(mocker): ) -def test_rayjob_resubmit_failure(mocker): +def test_rayjob_resubmit_failure(auto_mock_setup): """Test RayJob resubmit operation when API call fails.""" - mocker.patch("kubernetes.config.load_kube_config") - - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.resubmit_job.return_value = None @@ -2260,18 +1800,9 @@ def test_rayjob_resubmit_failure(mocker): ) -def test_rayjob_delete_success(mocker): +def test_rayjob_delete_success(auto_mock_setup): """Test successful RayJob deletion.""" - # Mock the API - mocker.patch("kubernetes.config.load_kube_config") - mock_api_instance = mocker.MagicMock() - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) + mock_api_instance = auto_mock_setup["rayjob_api"] rayjob = RayJob( job_name="test-rayjob", @@ -2289,17 +1820,9 @@ def test_rayjob_delete_success(mocker): ) -def test_rayjob_delete_failure(mocker): +def test_rayjob_delete_failure(auto_mock_setup): """Test failed RayJob deletion.""" - mock_api_instance = mocker.MagicMock() - mocker.patch("kubernetes.config.load_kube_config") - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) + mock_api_instance = auto_mock_setup["rayjob_api"] rayjob = RayJob( job_name="test-rayjob", @@ -2317,13 +1840,8 @@ def test_rayjob_delete_failure(mocker): ) -def test_rayjob_init_both_none_error(mocker): +def test_rayjob_init_both_none_error(auto_mock_setup): """Test RayJob initialization error when both cluster_name and cluster_config are None.""" - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) - with pytest.raises( ValueError, match="Configuration Error: You must provide either 'cluster_name' .* or 'cluster_config'", @@ -2336,13 +1854,8 @@ def test_rayjob_init_both_none_error(mocker): ) -def test_rayjob_init_missing_cluster_name_with_no_config(mocker): +def test_rayjob_init_missing_cluster_name_with_no_config(auto_mock_setup): """Test RayJob initialization error when cluster_name is None without cluster_config.""" - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) - with pytest.raises( ValueError, match="Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'", @@ -2353,7 +1866,7 @@ def test_rayjob_init_missing_cluster_name_with_no_config(mocker): rayjob.runtime_env = None rayjob.ttl_seconds_after_finished = 0 rayjob.active_deadline_seconds = None - rayjob.shutdown_after_job_finishes = False + rayjob.shutdown_after_job_finishes = True rayjob.namespace = "test-namespace" rayjob._cluster_name = None rayjob._cluster_config = None @@ -2363,26 +1876,11 @@ def test_rayjob_init_missing_cluster_name_with_no_config(mocker): ) -def test_handle_script_volumes_for_existing_cluster_direct_call(mocker): +def test_handle_script_volumes_for_existing_cluster_direct_call(auto_mock_setup): """Test _handle_script_volumes_for_existing_cluster method directly.""" - # Mock APIs - mock_api_instance = mocker.MagicMock() - mock_cluster_api = mocker.MagicMock() - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi", return_value=mock_cluster_api - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) - - # Mock the Kubernetes API for ConfigMap creation - mock_k8s_api = mocker.MagicMock() - mocker.patch("kubernetes.client.CoreV1Api", return_value=mock_k8s_api) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client", return_value=None) + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_cluster_api = auto_mock_setup["cluster_api"] + mock_k8s_api = auto_mock_setup["k8s_api"] # Mock existing cluster mock_cluster = { @@ -2423,24 +1921,11 @@ def test_handle_script_volumes_for_existing_cluster_direct_call(mocker): ) -def test_handle_script_volumes_for_existing_cluster_no_volumes_init(mocker): +def test_handle_script_volumes_for_existing_cluster_no_volumes_init(auto_mock_setup): """Test _handle_script_volumes_for_existing_cluster when volumes/mounts don't exist initially.""" - mock_api_instance = mocker.MagicMock() - mock_cluster_api = mocker.MagicMock() - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi", return_value=mock_cluster_api - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) - - mock_k8s_api = mocker.MagicMock() - mocker.patch("kubernetes.client.CoreV1Api", return_value=mock_k8s_api) - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client", return_value=None) + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_cluster_api = auto_mock_setup["cluster_api"] + mock_k8s_api = auto_mock_setup["k8s_api"] # Mock existing cluster WITHOUT volumes/volumeMounts (to test initialization) mock_cluster = { @@ -2482,20 +1967,10 @@ def test_handle_script_volumes_for_existing_cluster_no_volumes_init(mocker): assert len(worker_spec["containers"][0]["volumeMounts"]) == 1 -def test_update_existing_cluster_for_scripts_api_errors(mocker): +def test_update_existing_cluster_for_scripts_api_errors(mocker, auto_mock_setup): """Test _update_existing_cluster_for_scripts error handling.""" - mock_api_instance = mocker.MagicMock() - mock_cluster_api = mocker.MagicMock() - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayjobApi", return_value=mock_api_instance - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi", return_value=mock_cluster_api - ) - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_cluster_api = auto_mock_setup["cluster_api"] # Mock config builder mock_config_builder = mocker.MagicMock() @@ -2529,3 +2004,193 @@ def test_update_existing_cluster_for_scripts_api_errors(mocker): rayjob._update_existing_cluster_for_scripts( "test-configmap", mock_config_builder ) + + +def test_rayjob_kueue_label_no_default_queue(auto_mock_setup, mocker, caplog): + """Test RayJob falls back to 'default' queue when no default queue exists.""" + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", + return_value=None, + ) + + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + cluster_config = ManagedClusterConfig() + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + with caplog.at_level("WARNING"): + rayjob.submit() + + # Verify the submitted job has the fallback label + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert submitted_job["metadata"]["labels"]["kueue.x-k8s.io/queue-name"] == "default" + + # Verify warning was logged + assert "No default Kueue LocalQueue found" in caplog.text + + +def test_rayjob_kueue_explicit_local_queue(auto_mock_setup): + """Test RayJob uses explicitly specified local queue.""" + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + cluster_config = ManagedClusterConfig() + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + local_queue="custom-queue", + ) + + rayjob.submit() + + # Verify the submitted job has the explicit queue label + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert ( + submitted_job["metadata"]["labels"]["kueue.x-k8s.io/queue-name"] + == "custom-queue" + ) + + +def test_rayjob_no_kueue_label_for_existing_cluster(auto_mock_setup): + """Test RayJob doesn't add Kueue label for existing clusters.""" + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + # Using existing cluster (no cluster_config) + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python script.py", + ) + + rayjob.submit() + + # Verify no Kueue label was added + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert "kueue.x-k8s.io/queue-name" not in submitted_job["metadata"]["labels"] + + +def test_rayjob_with_ttl_and_deadline(auto_mock_setup): + """Test RayJob with TTL and active deadline seconds.""" + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + cluster_config = ManagedClusterConfig() + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ttl_seconds_after_finished=300, + active_deadline_seconds=600, + ) + + rayjob.submit() + + # Verify TTL and deadline were set + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert submitted_job["spec"]["ttlSecondsAfterFinished"] == 300 + assert submitted_job["spec"]["activeDeadlineSeconds"] == 600 + + +def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): + """Test RayJob sets shutdownAfterJobFinishes correctly.""" + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + # Test with managed cluster (should shutdown) + cluster_config = ManagedClusterConfig() + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + rayjob.submit() + + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert submitted_job["spec"]["shutdownAfterJobFinishes"] is True + + # Test with existing cluster (should not shutdown) + rayjob2 = RayJob( + job_name="test-job2", + cluster_name="existing-cluster", + entrypoint="python script.py", + ) + + rayjob2.submit() + + call_args2 = mock_api_instance.submit_job.call_args + submitted_job2 = call_args2.kwargs["job"] + assert submitted_job2["spec"]["shutdownAfterJobFinishes"] is False + + +def test_rayjob_stop_delete_resubmit_logging(auto_mock_setup, caplog): + """Test logging for stop, delete, and resubmit operations.""" + mock_api_instance = auto_mock_setup["rayjob_api"] + + # Test stop with logging + mock_api_instance.suspend_job.return_value = { + "metadata": {"name": "test-rayjob"}, + "spec": {"suspend": True}, + } + + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python script.py", + ) + + with caplog.at_level("INFO"): + result = rayjob.stop() + + assert result is True + assert "Successfully stopped the RayJob test-rayjob" in caplog.text + + # Test delete with logging + caplog.clear() + mock_api_instance.delete_job.return_value = True + + with caplog.at_level("INFO"): + result = rayjob.delete() + + assert result is True + assert "Successfully deleted the RayJob test-rayjob" in caplog.text + + # Test resubmit with logging + caplog.clear() + mock_api_instance.resubmit_job.return_value = { + "metadata": {"name": "test-rayjob"}, + "spec": {"suspend": False}, + } + + with caplog.at_level("INFO"): + result = rayjob.resubmit() + + assert result is True + assert "Successfully resubmitted the RayJob test-rayjob" in caplog.text + + +def test_rayjob_initialization_logging(auto_mock_setup, caplog): + """Test RayJob initialization logging.""" + with caplog.at_level("INFO"): + cluster_config = ManagedClusterConfig() + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python script.py", + ) + + assert "Creating new cluster: test-job-cluster" in caplog.text + assert "Initialized RayJob: test-job in namespace: test-namespace" in caplog.text diff --git a/src/codeflare_sdk/ray/rayjobs/test_status.py b/src/codeflare_sdk/ray/rayjobs/test_status.py index f3ed7ef8..2f2b9957 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_status.py +++ b/src/codeflare_sdk/ray/rayjobs/test_status.py @@ -302,3 +302,72 @@ def test_rayjob_status_print_job_found(mocker): assert status == CodeflareRayJobStatus.RUNNING assert ready == False + + +def test_rayjob_status_all_deployment_states(mocker): + """Test RayJob status method with all deployment states.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_api_instance = mock_api_class.return_value + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Test mapping of deployment statuses to CodeflareRayJobStatus + test_cases = [ + # (deployment_status_str, expected CodeflareRayJobStatus, expected ready) + ("Complete", CodeflareRayJobStatus.COMPLETE, True), + ("Running", CodeflareRayJobStatus.RUNNING, False), + ("Failed", CodeflareRayJobStatus.FAILED, False), + ("Suspended", CodeflareRayJobStatus.SUSPENDED, False), + ] + + for deployment_status_str, expected_status, expected_ready in test_cases: + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": deployment_status_str, + "startTime": "2025-07-28T11:37:07Z", + "failed": 0, + "succeeded": 0, + "rayClusterName": "test-cluster", + } + status, ready = rayjob.status(print_to_console=False) + assert status == expected_status, f"Failed for {deployment_status_str}" + assert ( + ready == expected_ready + ), f"Failed ready check for {deployment_status_str}" + + +def test_rayjob_status_with_end_time(mocker): + """Test RayJob status with end time field.""" + mocker.patch("kubernetes.config.load_kube_config") + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_api_instance = mock_api_class.return_value + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-ns", + entrypoint="python test.py", + ) + + # Test with end time field + mock_api_instance.get_job_status.return_value = { + "jobId": "test-job-abc123", + "jobDeploymentStatus": "Complete", + "startTime": "2025-07-28T11:37:07Z", + "endTime": "2025-07-28T11:47:07Z", + "failed": 0, + "succeeded": 1, + "rayClusterName": "test-cluster", + } + + status, ready = rayjob.status(print_to_console=False) + assert status == CodeflareRayJobStatus.COMPLETE + assert ready == True diff --git a/tests/e2e/cluster_apply_kind_test.py b/tests/e2e/cluster_apply_kind_test.py index 398bf73b..e023e92d 100644 --- a/tests/e2e/cluster_apply_kind_test.py +++ b/tests/e2e/cluster_apply_kind_test.py @@ -1,6 +1,8 @@ from codeflare_sdk import Cluster, ClusterConfiguration import pytest +import time from kubernetes import client +from codeflare_sdk.common.utils import constants from support import ( initialize_kubernetes_client, @@ -38,6 +40,7 @@ def test_cluster_apply(self): worker_cpu_limits="1", worker_memory_requests="1Gi", worker_memory_limits="2Gi", + image=f"rayproject/ray:{constants.RAY_VERSION}", write_to_file=True, verify_tls=False, ) @@ -47,9 +50,9 @@ def test_cluster_apply(self): cluster.apply() # Wait for the cluster to be ready - cluster.wait_ready() - status = cluster.status() - assert status["ready"], f"Cluster {cluster_name} is not ready: {status}" + cluster.wait_ready(dashboard_check=False) + status, ready = cluster.status() + assert ready, f"Cluster {cluster_name} is not ready: {status}" # Verify the cluster is created ray_cluster = get_ray_cluster(cluster_name, namespace) @@ -58,7 +61,7 @@ def test_cluster_apply(self): ray_cluster["spec"]["workerGroupSpecs"][0]["replicas"] == 1 ), "Initial worker count does not match" - # Update configuration with 3 workers + # Update configuration with 2 workers updated_config = ClusterConfiguration( name=cluster_name, namespace=namespace, @@ -71,6 +74,7 @@ def test_cluster_apply(self): worker_cpu_limits="1", worker_memory_requests="1Gi", worker_memory_limits="2Gi", + image=f"rayproject/ray:{constants.RAY_VERSION}", write_to_file=True, verify_tls=False, ) @@ -79,12 +83,15 @@ def test_cluster_apply(self): cluster.config = updated_config cluster.apply() + # Give Kubernetes a moment to process the update + time.sleep(5) + # Wait for the updated cluster to be ready - cluster.wait_ready() - updated_status = cluster.status() - assert updated_status[ - "ready" - ], f"Cluster {cluster_name} is not ready after update: {updated_status}" + cluster.wait_ready(dashboard_check=False) + updated_status, updated_ready = cluster.status() + assert ( + updated_ready + ), f"Cluster {cluster_name} is not ready after update: {updated_status}" # Verify the cluster is updated updated_ray_cluster = get_ray_cluster(cluster_name, namespace) @@ -94,63 +101,19 @@ def test_cluster_apply(self): # Clean up cluster.down() - ray_cluster = get_ray_cluster(cluster_name, namespace) - assert ray_cluster is None, "Cluster was not deleted successfully" - def test_apply_invalid_update(self): - self.setup_method() - create_namespace(self) + # Wait for deletion to complete (finalizers may delay deletion) + max_wait = 30 # seconds + wait_interval = 2 + elapsed = 0 - cluster_name = "test-cluster-apply-invalid" - namespace = self.namespace + while elapsed < max_wait: + ray_cluster = get_ray_cluster(cluster_name, namespace) + if ray_cluster is None: + break + time.sleep(wait_interval) + elapsed += wait_interval - # Initial configuration - initial_config = ClusterConfiguration( - name=cluster_name, - namespace=namespace, - num_workers=1, - head_cpu_requests="500m", - head_cpu_limits="1", - head_memory_requests="1Gi", - head_memory_limits="2Gi", - worker_cpu_requests="500m", - worker_cpu_limits="1", - worker_memory_requests="1Gi", - worker_memory_limits="2Gi", - write_to_file=True, - verify_tls=False, - ) - - # Create the cluster - cluster = Cluster(initial_config) - cluster.apply() - - # Wait for the cluster to be ready - cluster.wait_ready() - status = cluster.status() - assert status["ready"], f"Cluster {cluster_name} is not ready: {status}" - - # Update with an invalid configuration (e.g., immutable field change) - invalid_config = ClusterConfiguration( - name=cluster_name, - namespace=namespace, - num_workers=2, - head_cpu_requests="1", - head_cpu_limits="2", # Changing CPU limits (immutable) - head_memory_requests="1Gi", - head_memory_limits="2Gi", - worker_cpu_requests="500m", - worker_cpu_limits="1", - worker_memory_requests="1Gi", - worker_memory_limits="2Gi", - write_to_file=True, - verify_tls=False, - ) - - # Try to apply the invalid configuration and expect failure - cluster.config = invalid_config - with pytest.raises(RuntimeError, match="Immutable fields detected"): - cluster.apply() - - # Clean up - cluster.down() + assert ( + ray_cluster is None + ), f"Cluster was not deleted successfully after {max_wait}s" diff --git a/tests/e2e/rayjob/existing_cluster_oauth_test.py b/tests/e2e/rayjob/existing_cluster_oauth_test.py deleted file mode 100644 index 5face339..00000000 --- a/tests/e2e/rayjob/existing_cluster_oauth_test.py +++ /dev/null @@ -1,139 +0,0 @@ -import pytest -import sys -import os -from time import sleep - -# Add the parent directory to the path to import support -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from support import * - -from codeflare_sdk import ( - Cluster, - ClusterConfiguration, - TokenAuthentication, -) -from codeflare_sdk.ray.rayjobs import RayJob -from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus - -# This test creates a Ray Cluster and then submits a RayJob against the existing cluster on OpenShift - - -@pytest.mark.openshift -class TestRayJobExistingClusterOauth: - def setup_method(self): - initialize_kubernetes_client(self) - - def teardown_method(self): - delete_namespace(self) - delete_kueue_resources(self) - - def test_rayjob_against_existing_cluster_oauth(self): - self.setup_method() - create_namespace(self) - create_kueue_resources(self) - self.run_rayjob_against_existing_cluster_oauth() - - def run_rayjob_against_existing_cluster_oauth(self): - ray_image = get_ray_image() - - auth = TokenAuthentication( - token=run_oc_command(["whoami", "--show-token=true"]), - server=run_oc_command(["whoami", "--show-server=true"]), - skip_tls=True, - ) - auth.login() - - cluster_name = "existing-cluster" - - cluster = Cluster( - ClusterConfiguration( - name=cluster_name, - namespace=self.namespace, - num_workers=1, - head_cpu_requests="500m", - head_cpu_limits="500m", - worker_cpu_requests=1, - worker_cpu_limits=1, - worker_memory_requests=1, - worker_memory_limits=4, - image=ray_image, - write_to_file=True, - verify_tls=False, - ) - ) - - cluster.apply() - cluster.status() - cluster.wait_ready() - cluster.status() - cluster.details() - - print(f"Ray cluster '{cluster_name}' is ready!") - - job_name = "existing-cluster-rayjob" - - rayjob = RayJob( - job_name=job_name, - cluster_name=cluster_name, - namespace=self.namespace, - entrypoint="python -c \"import ray; ray.init(); print('Hello from RayJob!'); print(f'Ray version: {ray.__version__}'); import time; time.sleep(30); print('RayJob completed successfully!')\"", - runtime_env={ - "pip": ["torch", "pytorch-lightning", "torchmetrics", "torchvision"], - "env_vars": get_setup_env_variables(ACCELERATOR="cpu"), - }, - shutdown_after_job_finishes=False, - ) - - # Submit the job - print( - f"Submitting RayJob '{job_name}' against existing cluster '{cluster_name}'" - ) - submission_result = rayjob.submit() - assert ( - submission_result == job_name - ), f"Job submission failed, expected {job_name}, got {submission_result}" - print(f"Successfully submitted RayJob '{job_name}'!") - - # Monitor the job status until completion - self.monitor_rayjob_completion(rayjob) - - # Cleanup - manually tear down the cluster since job won't do it - print("🧹 Cleaning up Ray cluster") - cluster.down() - - def monitor_rayjob_completion(self, rayjob: RayJob, timeout: int = 900): - """ - Monitor a RayJob until it completes or fails. - Args: - rayjob: The RayJob instance to monitor - timeout: Maximum time to wait in seconds (default: 5 minutes) - """ - print(f"Monitoring RayJob '{rayjob.name}' status...") - - elapsed_time = 0 - check_interval = 10 # Check every 10 seconds - - while elapsed_time < timeout: - status, ready = rayjob.status(print_to_console=True) - - # Check if job has completed (either successfully or failed) - if status == CodeflareRayJobStatus.COMPLETE: - print(f"RayJob '{rayjob.name}' completed successfully!") - return - elif status == CodeflareRayJobStatus.FAILED: - raise AssertionError(f"RayJob '{rayjob.name}' failed!") - elif status == CodeflareRayJobStatus.RUNNING: - print(f"RayJob '{rayjob.name}' is still running...") - elif status == CodeflareRayJobStatus.UNKNOWN: - print(f"RayJob '{rayjob.name}' status is unknown") - - # Wait before next check - sleep(check_interval) - elapsed_time += check_interval - - # If we reach here, the job has timed out - final_status, _ = rayjob.status(print_to_console=True) - raise TimeoutError( - f"RayJob '{rayjob.name}' did not complete within {timeout} seconds. " - f"Final status: {final_status}" - ) diff --git a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py b/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py deleted file mode 100644 index 7db71441..00000000 --- a/tests/e2e/rayjob/lifecycled_cluster_oauth_test.py +++ /dev/null @@ -1,148 +0,0 @@ -import pytest -import sys -import os -from time import sleep - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from support import * - -from codeflare_sdk import RayJob, ManagedClusterConfig -from codeflare_sdk.ray.rayjobs.status import ( - CodeflareRayJobStatus, - RayJobDeploymentStatus, -) -import kubernetes.client.rest -from python_client.kuberay_job_api import RayjobApi -from python_client.kuberay_cluster_api import RayClusterApi - - -@pytest.mark.openshift -class TestRayJobLifecycledClusterOauth: - """Test RayJob with auto-created cluster lifecycle management on OpenShift.""" - - def setup_method(self): - initialize_kubernetes_client(self) - - def teardown_method(self): - delete_namespace(self) - - def test_rayjob_with_lifecycled_cluster_oauth(self): - """ - Test RayJob submission with embedded cluster configuration, including: - 1. Job submission with auto-cluster creation - 2. Job suspension (stop) and verification - 3. Job resumption (resubmit) and verification - 4. Job completion monitoring - 5. Automatic cluster cleanup after job deletion - """ - self.setup_method() - create_namespace(self) - ray_image = get_ray_image() - self.job_api = RayjobApi() - job_name = "lifecycled-job" - - cluster_config = ManagedClusterConfig( - head_cpu_requests="500m", - head_cpu_limits="500m", - head_memory_requests=1, - head_memory_limits=4, - num_workers=1, - worker_cpu_requests="500m", - worker_cpu_limits="500m", - worker_memory_requests=1, - worker_memory_limits=4, - image=ray_image, - ) - - rayjob = RayJob( - job_name=job_name, - namespace=self.namespace, - cluster_config=cluster_config, - entrypoint="python -c \"import ray; ray.init(); print('RayJob completed successfully')\"", - runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, - shutdown_after_job_finishes=True, - ) - - try: - # 1. Submit and wait for job to reach running state - assert rayjob.submit() == job_name - assert self.job_api.wait_until_job_running( - name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=300 - ), "Job did not reach running state" - - # 2. Stop (suspend) the job and - assert rayjob.stop(), "Job stop failed" - job_cr = self.job_api.get_job( - name=rayjob.name, k8s_namespace=rayjob.namespace - ) - assert job_cr["spec"]["suspend"] is True, "Job suspend not set to true" - - assert self._wait_for_job_status( - rayjob, "Suspended", timeout=30 - ), "Job did not reach Suspended state" - - # 3. Test Job Resubmission - assert rayjob.resubmit(), "Job resubmit failed" - job_cr = self.job_api.get_job( - name=rayjob.name, k8s_namespace=rayjob.namespace - ) - assert job_cr["spec"]["suspend"] is False, "Job suspend not set to false" - - assert self.job_api.wait_until_job_finished( - name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=300 - ), "Job did not complete" - - finally: - # 4. Delete the job and cleanup - assert rayjob.delete() - self.verify_cluster_cleanup(rayjob) - - def _wait_for_job_status( - self, - rayjob: RayJob, - expected_status: str, - timeout: int = 30, - ) -> bool: - """Wait for a job to reach a specific deployment status.""" - elapsed_time = 0 - check_interval = 2 - - while elapsed_time < timeout: - status = self.job_api.get_job_status( - name=rayjob.name, k8s_namespace=rayjob.namespace - ) - if status and status.get("jobDeploymentStatus") == expected_status: - return True - - sleep(check_interval) - elapsed_time += check_interval - - return False - - def verify_cluster_cleanup(self, rayjob: RayJob, timeout: int = 60): - """Verify that the cluster created by the RayJob has been cleaned up.""" - elapsed_time = 0 - check_interval = 5 - cluster_api = RayClusterApi() - - while elapsed_time < timeout: - try: - cluster_info = cluster_api.get_ray_cluster( - name=rayjob.cluster_name, k8s_namespace=rayjob.namespace - ) - # Cluster doesn't exist - if cluster_info is None: - return - - sleep(check_interval) - elapsed_time += check_interval - - except kubernetes.client.rest.ApiException as e: - if e.status == 404: - return - else: - raise e - - raise TimeoutError( - f"Cluster '{rayjob.cluster_name}' was not cleaned up within {timeout} seconds" - ) diff --git a/tests/e2e/rayjob/ray_version_validation_oauth_test.py b/tests/e2e/rayjob/ray_version_validation_oauth_test.py index 68c69aee..794d739a 100644 --- a/tests/e2e/rayjob/ray_version_validation_oauth_test.py +++ b/tests/e2e/rayjob/ray_version_validation_oauth_test.py @@ -7,15 +7,11 @@ from support import * from codeflare_sdk import ( - TokenAuthentication, RayJob, ManagedClusterConfig, ) -# This test validates Ray version compatibility checking for RayJob with cluster lifecycling scenarios - -@pytest.mark.openshift class TestRayJobRayVersionValidationOauth: def setup_method(self): initialize_kubernetes_client(self) @@ -50,12 +46,6 @@ def test_rayjob_lifecycled_cluster_incompatible_ray_version_oauth(self): def run_rayjob_lifecycled_cluster_incompatible_version(self): """Test Ray version validation with cluster lifecycling using incompatible image.""" - auth = TokenAuthentication( - token=run_oc_command(["whoami", "--show-token=true"]), - server=run_oc_command(["whoami", "--show-server=true"]), - skip_tls=True, - ) - auth.login() job_name = "incompatible-lifecycle-rayjob" @@ -76,7 +66,6 @@ def run_rayjob_lifecycled_cluster_incompatible_version(self): cluster_config=cluster_config, namespace=self.namespace, entrypoint="python -c 'print(\"This should not run due to version mismatch\")'", - shutdown_after_job_finishes=True, ttl_seconds_after_finished=30, ) @@ -101,12 +90,6 @@ def test_rayjob_lifecycled_cluster_unknown_ray_version_oauth(self): def run_rayjob_lifecycled_cluster_unknown_version(self): """Test Ray version validation with unknown image (should warn but not fail).""" - auth = TokenAuthentication( - token=run_oc_command(["whoami", "--show-token=true"]), - server=run_oc_command(["whoami", "--show-server=true"]), - skip_tls=True, - ) - auth.login() job_name = "unknown-version-rayjob" @@ -125,7 +108,6 @@ def run_rayjob_lifecycled_cluster_unknown_version(self): cluster_config=cluster_config, namespace=self.namespace, entrypoint="python -c 'print(\"Testing unknown Ray version scenario\")'", - shutdown_after_job_finishes=True, ttl_seconds_after_finished=30, ) diff --git a/tests/e2e/rayjob/rayjob_existing_cluster_test.py b/tests/e2e/rayjob/rayjob_existing_cluster_test.py new file mode 100644 index 00000000..b62ea1ef --- /dev/null +++ b/tests/e2e/rayjob/rayjob_existing_cluster_test.py @@ -0,0 +1,111 @@ +import pytest +import sys +import os +from time import sleep + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from support import * + +from codeflare_sdk import ( + Cluster, + ClusterConfiguration, +) +from codeflare_sdk import RayJob, TokenAuthentication +from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus +from python_client.kuberay_job_api import RayjobApi + + +class TestRayJobExistingCluster: + """Test RayJob against existing Kueue-managed clusters.""" + + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + delete_kueue_resources(self) + + def test_existing_kueue_cluster(self): + """Test RayJob against Kueue-managed RayCluster.""" + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + + cluster_name = "kueue-cluster" + + if is_openshift(): + auth = TokenAuthentication( + token=run_oc_command(["whoami", "--show-token=true"]), + server=run_oc_command(["whoami", "--show-server=true"]), + skip_tls=True, + ) + auth.login() + + cluster = Cluster( + ClusterConfiguration( + name=cluster_name, + namespace=self.namespace, + num_workers=1, + head_cpu_requests="500m", + head_cpu_limits="500m", + worker_cpu_requests=1, + worker_cpu_limits=1, + worker_memory_requests=1, + worker_memory_limits=4, + image=get_ray_image(), + local_queue=self.local_queues[0], + write_to_file=True, + verify_tls=False, + ) + ) + + cluster.apply() + sleep(20) + + # RayJob with explicit local_queue + rayjob_explicit = RayJob( + job_name="job-explicit-queue", + cluster_name=cluster_name, + namespace=self.namespace, + entrypoint="python -c \"import ray; ray.init(); print('Job with explicit queue')\"", + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + local_queue=self.local_queues[0], + ) + + # RayJob using default queue + rayjob_default = RayJob( + job_name="job-default-queue", + cluster_name=cluster_name, + namespace=self.namespace, + entrypoint="python -c \"import ray; ray.init(); print('Job with default queue')\"", + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + ) + + try: + # Test RayJob with explicit queue + assert rayjob_explicit.submit() == "job-explicit-queue" + self._wait_completion(rayjob_explicit) + + # Test RayJob with default queue + assert rayjob_default.submit() == "job-default-queue" + self._wait_completion(rayjob_default) + finally: + rayjob_explicit.delete() + rayjob_default.delete() + cluster.down() + + def _wait_completion(self, rayjob: RayJob, timeout: int = 600): + """Wait for RayJob completion.""" + elapsed = 0 + interval = 10 + + while elapsed < timeout: + status, _ = rayjob.status(print_to_console=False) + if status == CodeflareRayJobStatus.COMPLETE: + return + elif status == CodeflareRayJobStatus.FAILED: + raise AssertionError(f"RayJob '{rayjob.name}' failed") + sleep(interval) + elapsed += interval + + raise TimeoutError(f"RayJob '{rayjob.name}' timeout after {timeout}s") diff --git a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py new file mode 100644 index 00000000..51c72df6 --- /dev/null +++ b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py @@ -0,0 +1,163 @@ +import pytest +import sys +import os +from time import sleep + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from support import * + +from codeflare_sdk import RayJob, ManagedClusterConfig +from python_client.kuberay_job_api import RayjobApi +from python_client.kuberay_cluster_api import RayClusterApi + + +class TestRayJobLifecycledCluster: + """Test RayJob with auto-created cluster lifecycle management.""" + + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + delete_kueue_resources(self) + + def test_lifecycled_kueue_managed(self): + """Test RayJob with Kueue-managed lifecycled cluster.""" + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + + self.job_api = RayjobApi() + cluster_api = RayClusterApi() + job_name = "kueue-lifecycled" + + # Get platform-appropriate resource configurations + resources = get_platform_appropriate_resources() + + cluster_config = ManagedClusterConfig( + head_cpu_requests=resources["head_cpu_requests"], + head_cpu_limits=resources["head_cpu_limits"], + head_memory_requests=resources["head_memory_requests"], + head_memory_limits=resources["head_memory_limits"], + num_workers=1, + worker_cpu_requests=resources["worker_cpu_requests"], + worker_cpu_limits=resources["worker_cpu_limits"], + worker_memory_requests=resources["worker_memory_requests"], + worker_memory_limits=resources["worker_memory_limits"], + ) + + rayjob = RayJob( + job_name=job_name, + namespace=self.namespace, + cluster_config=cluster_config, + entrypoint="python -c \"import ray; ray.init(); print('Kueue job done')\"", + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + local_queue=self.local_queues[0], + ) + + try: + assert rayjob.submit() == job_name + assert self.job_api.wait_until_job_running( + name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=600 + ) + + assert self.job_api.wait_until_job_finished( + name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=300 + ) + finally: + try: + rayjob.delete() + except Exception: + pass # Job might already be deleted + verify_rayjob_cluster_cleanup(cluster_api, rayjob.name, rayjob.namespace) + + def test_lifecycled_kueue_resource_queueing(self): + """Test Kueue resource queueing with lifecycled clusters.""" + self.setup_method() + create_namespace(self) + create_limited_kueue_resources(self) + + self.job_api = RayjobApi() + cluster_api = RayClusterApi() + + # Get platform-appropriate resource configurations + resources = get_platform_appropriate_resources() + + cluster_config = ManagedClusterConfig( + head_cpu_requests=resources["head_cpu_requests"], + head_cpu_limits=resources["head_cpu_limits"], + head_memory_requests=resources["head_memory_requests"], + head_memory_limits=resources["head_memory_limits"], + num_workers=0, + ) + + job1 = None + job2 = None + try: + job1 = RayJob( + job_name="holder", + namespace=self.namespace, + cluster_config=cluster_config, + entrypoint='python -c "import ray; import time; ray.init(); time.sleep(15)"', + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + local_queue=self.local_queues[0], + ) + assert job1.submit() == "holder" + assert self.job_api.wait_until_job_running( + name=job1.name, k8s_namespace=job1.namespace, timeout=60 + ) + + job2 = RayJob( + job_name="waiter", + namespace=self.namespace, + cluster_config=cluster_config, + entrypoint='python -c "import ray; ray.init()"', + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + local_queue=self.local_queues[0], + ) + assert job2.submit() == "waiter" + + # Wait for Kueue to process the job + sleep(5) + job2_cr = self.job_api.get_job(name=job2.name, k8s_namespace=job2.namespace) + + # For RayJobs with managed clusters, check if Kueue is holding resources + job2_status = job2_cr.get("status", {}) + ray_cluster_name = job2_status.get("rayClusterName", "") + + # If RayCluster is not created yet, it means Kueue is holding the job + if not ray_cluster_name: + # This is the expected behavior + job_is_queued = True + else: + # Check RayCluster resources - if all are 0, it's queued + ray_cluster_status = job2_status.get("rayClusterStatus", {}) + desired_cpu = ray_cluster_status.get("desiredCPU", "0") + desired_memory = ray_cluster_status.get("desiredMemory", "0") + + # Kueue creates the RayCluster but with 0 resources when queued + job_is_queued = desired_cpu == "0" and desired_memory == "0" + + assert job_is_queued, "Job2 should be queued by Kueue while Job1 is running" + + assert self.job_api.wait_until_job_finished( + name=job1.name, k8s_namespace=job1.namespace, timeout=60 + ) + + assert wait_for_kueue_admission( + self, self.job_api, job2.name, job2.namespace, timeout=30 + ) + + assert self.job_api.wait_until_job_finished( + name=job2.name, k8s_namespace=job2.namespace, timeout=60 + ) + finally: + for job in [job1, job2]: + if job: + try: + job.delete() + verify_rayjob_cluster_cleanup( + cluster_api, job.name, job.namespace + ) + except: + pass diff --git a/tests/e2e/support.py b/tests/e2e/support.py index fe9261a2..85b3dd35 100644 --- a/tests/e2e/support.py +++ b/tests/e2e/support.py @@ -2,6 +2,7 @@ import random import string import subprocess +from time import sleep from codeflare_sdk import get_cluster from kubernetes import client, config from codeflare_sdk.common.kubernetes_cluster.kube_api_helpers import ( @@ -27,11 +28,82 @@ def get_ray_cluster(cluster_name, namespace): raise +def is_openshift(): + """Detect if running on OpenShift by checking for OpenShift-specific API resources.""" + try: + api = client.ApiClient() + discovery = client.ApisApi(api) + # Check for OpenShift-specific API group + groups = discovery.get_api_versions().groups + for group in groups: + if group.name == "image.openshift.io": + return True + return False + except Exception: + # If we can't determine, assume it's not OpenShift + return False + + def get_ray_image(): - return os.getenv( - "RAY_IMAGE", - get_ray_image_for_python_version(warn_on_unsupported=False), - ) + """ + Get appropriate Ray image based on platform (OpenShift vs Kind/vanilla K8s). + + The tests marked with @pytest.mark.openshift can run on both OpenShift and Kind clusters + with Kueue installed. This function automatically selects the appropriate image: + - OpenShift: Uses the CUDA runtime image (quay.io/modh/ray:...) + - Kind/K8s: Uses the standard Ray image (rayproject/ray:VERSION) + + You can override this behavior by setting the RAY_IMAGE environment variable. + """ + # Allow explicit override via environment variable + if "RAY_IMAGE" in os.environ: + return os.environ["RAY_IMAGE"] + + # Auto-detect platform and return appropriate image + if is_openshift(): + return get_ray_image_for_python_version() + else: + # Use standard Ray image for Kind/vanilla K8s + return f"rayproject/ray:{constants.RAY_VERSION}" + + +def get_platform_appropriate_resources(): + """ + Get appropriate resource configurations based on platform. + + OpenShift with MODH images requires more memory than Kind with standard Ray images. + + Returns: + dict: Resource configurations with keys: + - head_cpu_requests, head_cpu_limits + - head_memory_requests, head_memory_limits + - worker_cpu_requests, worker_cpu_limits + - worker_memory_requests, worker_memory_limits + """ + if is_openshift(): + # MODH runtime images require more memory + return { + "head_cpu_requests": "1", + "head_cpu_limits": "1.5", + "head_memory_requests": 7, + "head_memory_limits": 8, + "worker_cpu_requests": "1", + "worker_cpu_limits": "1", + "worker_memory_requests": 5, + "worker_memory_limits": 6, + } + else: + # Standard Ray images require less memory + return { + "head_cpu_requests": "1", + "head_cpu_limits": "1.5", + "head_memory_requests": 7, + "head_memory_limits": 8, + "worker_cpu_requests": "1", + "worker_cpu_limits": "1", + "worker_memory_requests": 2, + "worker_memory_limits": 3, + } def get_setup_env_variables(**kwargs): @@ -147,6 +219,17 @@ def run_oc_command(args): return None +def run_kubectl_command(args): + try: + result = subprocess.run( + ["kubectl"] + args, capture_output=True, text=True, check=True + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"Error executing 'kubectl {' '.join(args)}': {e}") + return None + + def create_cluster_queue(self, cluster_queue, flavor): cluster_queue_json = { "apiVersion": "kueue.x-k8s.io/v1beta1", @@ -161,9 +244,9 @@ def create_cluster_queue(self, cluster_queue, flavor): { "name": flavor, "resources": [ - {"name": "cpu", "nominalQuota": 9}, - {"name": "memory", "nominalQuota": "36Gi"}, - {"name": "nvidia.com/gpu", "nominalQuota": 1}, + {"name": "cpu", "nominalQuota": 20}, + {"name": "memory", "nominalQuota": "80Gi"}, + {"name": "nvidia.com/gpu", "nominalQuota": 2}, ], }, ], @@ -301,7 +384,6 @@ def create_kueue_resources( def delete_kueue_resources(self): - # Delete if given cluster-queue exists for cq in self.cluster_queues: try: self.custom_api.delete_cluster_custom_object( @@ -409,3 +491,234 @@ def assert_get_cluster_and_jobsubmit( assert job_list[0].submission_id == submission_id cluster.down() + + +def wait_for_kueue_admission(self, job_api, job_name, namespace, timeout=120): + print(f"Waiting for Kueue admission of job '{job_name}'...") + elapsed_time = 0 + check_interval = 5 + + while elapsed_time < timeout: + try: + job_cr = job_api.get_job(name=job_name, k8s_namespace=namespace) + + # Check if the job is no longer suspended + is_suspended = job_cr.get("spec", {}).get("suspend", False) + + if not is_suspended: + print(f"✓ Job '{job_name}' admitted by Kueue (no longer suspended)") + return True + + # Debug: Check workload status every 10 seconds + if elapsed_time % 10 == 0: + workload = get_kueue_workload_for_job(self, job_name, namespace) + if workload: + conditions = workload.get("status", {}).get("conditions", []) + print(f" DEBUG: Workload conditions for '{job_name}':") + for condition in conditions: + print( + f" - {condition.get('type')}: {condition.get('status')} - {condition.get('reason', '')} - {condition.get('message', '')}" + ) + + # Optional: Check status conditions for more details + conditions = job_cr.get("status", {}).get("conditions", []) + for condition in conditions: + if ( + condition.get("type") == "Suspended" + and condition.get("status") == "False" + ): + print( + f"✓ Job '{job_name}' admitted by Kueue (Suspended=False condition)" + ) + return True + + except Exception as e: + print(f"Error checking job status: {e}") + + sleep(check_interval) + elapsed_time += check_interval + + print(f"✗ Timeout waiting for Kueue admission of job '{job_name}'") + return False + + +def create_limited_kueue_resources(self): + print("Creating limited Kueue resources for preemption testing...") + + # Create a resource flavor with default (no special labels/tolerations) + resource_flavor = f"limited-flavor-{random_choice()}" + create_resource_flavor( + self, resource_flavor, default=True, with_labels=False, with_tolerations=False + ) + self.resource_flavors = [resource_flavor] + + # Create a cluster queue with very limited resources + # Adjust quota based on platform - OpenShift needs more memory + if is_openshift(): + # MODH images need more memory, so higher quota but still limited to allow only 1 job + cpu_quota = 3 + memory_quota = "15Gi" # One job needs ~8Gi head, allow some buffer + else: + # Standard Ray images - one job needs ~8G head + 500m submitter + cpu_quota = 3 + memory_quota = "10Gi" # Enough for one job (8G head + submitter), but not two + + cluster_queue_name = f"limited-cq-{random_choice()}" + cluster_queue_json = { + "apiVersion": "kueue.x-k8s.io/v1beta1", + "kind": "ClusterQueue", + "metadata": {"name": cluster_queue_name}, + "spec": { + "namespaceSelector": {}, + "resourceGroups": [ + { + "coveredResources": ["cpu", "memory"], + "flavors": [ + { + "name": resource_flavor, + "resources": [ + { + "name": "cpu", + "nominalQuota": cpu_quota, + }, + { + "name": "memory", + "nominalQuota": memory_quota, + }, + ], + } + ], + } + ], + }, + } + + try: + self.custom_api.create_cluster_custom_object( + group="kueue.x-k8s.io", + plural="clusterqueues", + version="v1beta1", + body=cluster_queue_json, + ) + print(f"✓ Created limited ClusterQueue: {cluster_queue_name}") + except Exception as e: + print(f"Error creating limited ClusterQueue: {e}") + raise + + self.cluster_queues = [cluster_queue_name] + + # Create a local queue + local_queue_name = f"limited-lq-{random_choice()}" + create_local_queue(self, cluster_queue_name, local_queue_name, is_default=True) + self.local_queues = [local_queue_name] + + print("✓ Limited Kueue resources created successfully") + + +def get_kueue_workload_for_job(self, job_name, namespace): + try: + # List all workloads in the namespace + workloads = self.custom_api.list_namespaced_custom_object( + group="kueue.x-k8s.io", + version="v1beta1", + plural="workloads", + namespace=namespace, + ) + + # Find workload with matching RayJob owner reference + for workload in workloads.get("items", []): + owner_refs = workload.get("metadata", {}).get("ownerReferences", []) + + for owner_ref in owner_refs: + if ( + owner_ref.get("kind") == "RayJob" + and owner_ref.get("name") == job_name + ): + workload_name = workload.get("metadata", {}).get("name") + print( + f"✓ Found Kueue workload '{workload_name}' for RayJob '{job_name}'" + ) + return workload + + print(f"✗ No Kueue workload found for RayJob '{job_name}'") + return None + + except Exception as e: + print(f"Error getting Kueue workload for job '{job_name}': {e}") + return None + + +def wait_for_job_status( + job_api, rayjob_name: str, namespace: str, expected_status: str, timeout: int = 30 +) -> bool: + """ + Wait for a RayJob to reach a specific deployment status. + + Args: + job_api: RayjobApi instance + rayjob_name: Name of the RayJob + namespace: Namespace of the RayJob + expected_status: Expected jobDeploymentStatus value + timeout: Maximum time to wait in seconds + + Returns: + bool: True if status reached, False if timeout + """ + elapsed_time = 0 + check_interval = 2 + + while elapsed_time < timeout: + status = job_api.get_job_status(name=rayjob_name, k8s_namespace=namespace) + if status and status.get("jobDeploymentStatus") == expected_status: + return True + + sleep(check_interval) + elapsed_time += check_interval + + return False + + +def verify_rayjob_cluster_cleanup( + cluster_api, rayjob_name: str, namespace: str, timeout: int = 60 +): + """ + Verify that the RayCluster created by a RayJob has been cleaned up. + Handles KubeRay's automatic suffix addition to cluster names. + + Args: + cluster_api: RayClusterApi instance + rayjob_name: Name of the RayJob + namespace: Namespace to check + timeout: Maximum time to wait in seconds + + Raises: + TimeoutError: If cluster is not cleaned up within timeout + """ + elapsed_time = 0 + check_interval = 5 + + while elapsed_time < timeout: + # List all RayClusters in the namespace + clusters = cluster_api.list_ray_clusters( + k8s_namespace=namespace, async_req=False + ) + + # Check if any cluster exists that starts with our job name + found = False + for cluster in clusters.get("items", []): + cluster_name = cluster.get("metadata", {}).get("name", "") + # KubeRay creates clusters with pattern: {job_name}-raycluster-{suffix} + if cluster_name.startswith(f"{rayjob_name}-raycluster"): + found = True + break + + if not found: + # No cluster found, cleanup successful + return + + sleep(check_interval) + elapsed_time += check_interval + + raise TimeoutError( + f"RayCluster for job '{rayjob_name}' was not cleaned up within {timeout} seconds" + ) diff --git a/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml b/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml index 3d710bfc..fe07e331 100644 --- a/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml +++ b/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml @@ -39,7 +39,6 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - dashboard-port: '8265' num-gpus: '1' resources: '"{\"TPU\": 2}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/kueue/aw_kueue.yaml b/tests/test_cluster_yamls/kueue/aw_kueue.yaml index 7f72d25b..92e5078d 100644 --- a/tests/test_cluster_yamls/kueue/aw_kueue.yaml +++ b/tests/test_cluster_yamls/kueue/aw_kueue.yaml @@ -33,7 +33,6 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml index 7a5a62ba..04331aed 100644 --- a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml +++ b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml @@ -33,7 +33,6 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/ray/default-appwrapper.yaml b/tests/test_cluster_yamls/ray/default-appwrapper.yaml index 734f3d33..1041f3b5 100644 --- a/tests/test_cluster_yamls/ray/default-appwrapper.yaml +++ b/tests/test_cluster_yamls/ray/default-appwrapper.yaml @@ -31,7 +31,6 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/ray/default-ray-cluster.yaml b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml index cc5f2ada..213b22cf 100644 --- a/tests/test_cluster_yamls/ray/default-ray-cluster.yaml +++ b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml @@ -23,7 +23,6 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - dashboard-port: '8265' num-gpus: '0' resources: '"{}"' serviceType: ClusterIP diff --git a/tests/test_cluster_yamls/ray/unit-test-all-params.yaml b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml index 213a082a..7c7d82d6 100644 --- a/tests/test_cluster_yamls/ray/unit-test-all-params.yaml +++ b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml @@ -30,7 +30,6 @@ spec: rayStartParams: block: 'true' dashboard-host: 0.0.0.0 - dashboard-port: '8265' num-gpus: '1' resources: '"{\"TPU\": 2}"' serviceType: ClusterIP From 8b34fdcd57014ce231fadaa8b95e9220f9175240 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Mon, 22 Sep 2025 18:49:02 +0100 Subject: [PATCH 23/33] RHOAIENG-34085: Fix ConfigMap mount --- src/codeflare_sdk/ray/rayjobs/config.py | 1 + src/codeflare_sdk/ray/rayjobs/rayjob.py | 88 +++++++-------- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 40 ++++--- .../rayjob/rayjob_lifecycled_cluster_test.py | 104 ++++++++++++++++-- 4 files changed, 159 insertions(+), 74 deletions(-) diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index 02ced875..d4b8c213 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -247,6 +247,7 @@ def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]: """ ray_cluster_spec = { "rayVersion": RAY_VERSION, + "enableInTreeAutoscaling": False, # Required for Kueue-managed jobs "headGroupSpec": self._build_head_group_spec(), "workerGroupSpecs": [self._build_worker_group_spec(cluster_name)], } diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 228f9bb0..a6ec1ade 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -149,6 +149,14 @@ def submit(self) -> str: self._validate_ray_version_compatibility() + # Extract scripts to check if we need ConfigMaps + scripts = self._extract_script_files_from_entrypoint() + + # Pre-declare ConfigMap in cluster config for new clusters + if scripts and self._cluster_config: + configmap_name = f"{self.name}-scripts" + self._cluster_config.add_script_volumes(configmap_name, MOUNT_PATH) + rayjob_cr = self._build_rayjob_cr() logger.info(f"Submitting RayJob {self.name} to Kuberay operator") @@ -157,20 +165,41 @@ def submit(self) -> str: if result: logger.info(f"Successfully submitted RayJob {self.name}") - # Handle script files after RayJob creation so we can set owner reference - if self._cluster_config is not None: - scripts = self._extract_script_files_from_entrypoint() - if scripts: - self._handle_script_volumes_for_new_cluster(scripts, result) - elif self._cluster_name: - scripts = self._extract_script_files_from_entrypoint() - if scripts: - self._handle_script_volumes_for_existing_cluster(scripts, result) + # Create ConfigMap with owner reference after RayJob exists + if scripts: + self._create_script_configmap(scripts, result) return self.name else: raise RuntimeError(f"Failed to submit RayJob {self.name}") + def _create_script_configmap( + self, scripts: Dict[str, str], rayjob_result: Dict[str, Any] + ): + """ + Create ConfigMap with owner reference for script files. + + For new clusters: ConfigMap volume was pre-declared, just create it. + For existing clusters: Create ConfigMap and patch the cluster. + """ + # Get a config builder for utility methods + config_builder = ( + self._cluster_config if self._cluster_config else ManagedClusterConfig() + ) + + # Validate and build ConfigMap spec + config_builder.validate_configmap_size(scripts) + configmap_spec = config_builder.build_script_configmap_spec( + job_name=self.name, namespace=self.namespace, scripts=scripts + ) + + # Create ConfigMap with owner reference + configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) + + # For existing clusters, update the cluster with volumes + if self._cluster_name and not self._cluster_config: + self._update_existing_cluster_for_scripts(configmap_name, config_builder) + def stop(self): """ Suspend the Ray job. @@ -488,47 +517,6 @@ def _find_local_imports( except (SyntaxError, ValueError) as e: logger.debug(f"Could not parse imports from {script_path}: {e}") - def _handle_script_volumes_for_new_cluster( - self, scripts: Dict[str, str], rayjob_result: Dict[str, Any] = None - ): - """Handle script volumes for new clusters (uses ManagedClusterConfig).""" - # Validate ConfigMap size before creation - self._cluster_config.validate_configmap_size(scripts) - - # Build ConfigMap spec using config.py - configmap_spec = self._cluster_config.build_script_configmap_spec( - job_name=self.name, namespace=self.namespace, scripts=scripts - ) - - # Create ConfigMap via Kubernetes API with owner reference - configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) - - # Add volumes to cluster config (config.py handles spec building) - self._cluster_config.add_script_volumes( - configmap_name=configmap_name, mount_path=MOUNT_PATH - ) - - def _handle_script_volumes_for_existing_cluster( - self, scripts: Dict[str, str], rayjob_result: Dict[str, Any] = None - ): - """Handle script volumes for existing clusters (updates RayCluster CR).""" - # Create config builder for utility methods - config_builder = ManagedClusterConfig() - - # Validate ConfigMap size before creation - config_builder.validate_configmap_size(scripts) - - # Build ConfigMap spec using config.py - configmap_spec = config_builder.build_script_configmap_spec( - job_name=self.name, namespace=self.namespace, scripts=scripts - ) - - # Create ConfigMap via Kubernetes API with owner reference - configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) - - # Update existing RayCluster - self._update_existing_cluster_for_scripts(configmap_name, config_builder) - def _create_configmap_from_spec( self, configmap_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None ) -> str: diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 829265d6..971f4342 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -586,6 +586,8 @@ def test_build_ray_cluster_spec_function(): spec = cluster_config.build_ray_cluster_spec("test-cluster") assert "rayVersion" in spec + assert "enableInTreeAutoscaling" in spec + assert spec["enableInTreeAutoscaling"] is False # Required for Kueue assert "headGroupSpec" in spec assert "workerGroupSpecs" in spec @@ -1304,11 +1306,13 @@ def func2(): pass os.chdir(original_cwd) -def test_script_handling_timing_after_rayjob_submission( - mocker, auto_mock_setup, tmp_path -): - """Test that script handling happens after RayJob is submitted (not before).""" - mock_api_instance = auto_mock_setup["rayjob_api"] +def test_script_handling_kubernetes_best_practice_flow(mocker, tmp_path): + """Test the Kubernetes best practice flow: pre-declare volume, submit, create ConfigMap.""" + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance submit_result = { "metadata": { @@ -1319,9 +1323,8 @@ def test_script_handling_timing_after_rayjob_submission( } mock_api_instance.submit_job.return_value = submit_result - mock_handle_new = mocker.patch.object( - RayJob, "_handle_script_volumes_for_new_cluster" - ) + mock_create_cm = mocker.patch.object(RayJob, "_create_script_configmap") + mock_add_volumes = mocker.patch.object(ManagedClusterConfig, "add_script_volumes") # RayClusterApi is already mocked by auto_mock_setup @@ -1330,17 +1333,22 @@ def test_script_handling_timing_after_rayjob_submission( call_order = [] + def track_add_volumes(*args, **kwargs): + call_order.append("add_volumes") + # Should be called with ConfigMap name + assert args[0] == "test-job-scripts" + def track_submit(*args, **kwargs): call_order.append("submit_job") return submit_result - def track_handle_scripts(*args, **kwargs): - call_order.append("handle_scripts") - assert len(args) >= 2 + def track_create_cm(*args, **kwargs): + call_order.append("create_configmap") assert args[1] == submit_result # rayjob_result should be second arg + mock_add_volumes.side_effect = track_add_volumes mock_api_instance.submit_job.side_effect = track_submit - mock_handle_new.side_effect = track_handle_scripts + mock_create_cm.side_effect = track_create_cm original_cwd = os.getcwd() try: @@ -1359,12 +1367,14 @@ def track_handle_scripts(*args, **kwargs): finally: os.chdir(original_cwd) - assert call_order == ["submit_job", "handle_scripts"] + # Verify the order: add volumes → submit → create ConfigMap + assert call_order == ["add_volumes", "submit_job", "create_configmap"] + mock_add_volumes.assert_called_once() mock_api_instance.submit_job.assert_called_once() - mock_handle_new.assert_called_once() + mock_create_cm.assert_called_once() - mock_handle_new.assert_called_with({"test.py": "print('test')"}, submit_result) + mock_create_cm.assert_called_with({"test.py": "print('test')"}, submit_result) def test_rayjob_submit_with_scripts_new_cluster(auto_mock_setup, tmp_path): diff --git a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py index 51c72df6..3f9bbf03 100644 --- a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py @@ -2,11 +2,14 @@ import sys import os from time import sleep +import tempfile sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from support import * from codeflare_sdk import RayJob, ManagedClusterConfig + +from kubernetes import client from python_client.kuberay_job_api import RayjobApi from python_client.kuberay_cluster_api import RayClusterApi @@ -22,7 +25,7 @@ def teardown_method(self): delete_kueue_resources(self) def test_lifecycled_kueue_managed(self): - """Test RayJob with Kueue-managed lifecycled cluster.""" + """Test RayJob with Kueue-managed lifecycled cluster with ConfigMap validation.""" self.setup_method() create_namespace(self) create_kueue_resources(self) @@ -46,17 +49,36 @@ def test_lifecycled_kueue_managed(self): worker_memory_limits=resources["worker_memory_limits"], ) - rayjob = RayJob( - job_name=job_name, - namespace=self.namespace, - cluster_config=cluster_config, - entrypoint="python -c \"import ray; ray.init(); print('Kueue job done')\"", - runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, - local_queue=self.local_queues[0], - ) + # Create a temporary script file to test ConfigMap functionality + with tempfile.NamedTemporaryFile( + mode="w", suffix=".py", delete=False, dir=os.getcwd() + ) as script_file: + script_file.write( + """ + import ray + ray.init() + print('Kueue job with ConfigMap done') + ray.shutdown() + """ + ) + script_file.flush() + script_filename = os.path.basename(script_file.name) try: + rayjob = RayJob( + job_name=job_name, + namespace=self.namespace, + cluster_config=cluster_config, + entrypoint=f"python {script_filename}", + runtime_env={"env_vars": get_setup_env_variables(ACCELERATOR="cpu")}, + local_queue=self.local_queues[0], + ) + assert rayjob.submit() == job_name + + # Verify ConfigMap was created with owner reference + self.verify_configmap_with_owner_reference(rayjob) + assert self.job_api.wait_until_job_running( name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=600 ) @@ -70,6 +92,12 @@ def test_lifecycled_kueue_managed(self): except Exception: pass # Job might already be deleted verify_rayjob_cluster_cleanup(cluster_api, rayjob.name, rayjob.namespace) + # Clean up the temporary script file + if "script_filename" in locals(): + try: + os.remove(script_filename) + except: + pass def test_lifecycled_kueue_resource_queueing(self): """Test Kueue resource queueing with lifecycled clusters.""" @@ -161,3 +189,61 @@ def test_lifecycled_kueue_resource_queueing(self): ) except: pass + + def verify_configmap_with_owner_reference(self, rayjob: RayJob): + """Verify that the ConfigMap was created with proper owner reference to the RayJob.""" + v1 = client.CoreV1Api() + configmap_name = f"{rayjob.name}-scripts" + + try: + # Get the ConfigMap + configmap = v1.read_namespaced_config_map( + name=configmap_name, namespace=rayjob.namespace + ) + + # Verify ConfigMap exists + assert configmap is not None, f"ConfigMap {configmap_name} not found" + + # Verify it contains the script + assert configmap.data is not None, "ConfigMap has no data" + assert len(configmap.data) > 0, "ConfigMap data is empty" + + # Verify owner reference + assert ( + configmap.metadata.owner_references is not None + ), "ConfigMap has no owner references" + assert ( + len(configmap.metadata.owner_references) > 0 + ), "ConfigMap owner references list is empty" + + owner_ref = configmap.metadata.owner_references[0] + assert ( + owner_ref.api_version == "ray.io/v1" + ), f"Wrong API version: {owner_ref.api_version}" + assert owner_ref.kind == "RayJob", f"Wrong kind: {owner_ref.kind}" + assert owner_ref.name == rayjob.name, f"Wrong owner name: {owner_ref.name}" + assert ( + owner_ref.controller is True + ), "Owner reference controller not set to true" + assert ( + owner_ref.block_owner_deletion is True + ), "Owner reference blockOwnerDeletion not set to true" + + # Verify labels + assert configmap.metadata.labels.get("ray.io/job-name") == rayjob.name + assert ( + configmap.metadata.labels.get("app.kubernetes.io/managed-by") + == "codeflare-sdk" + ) + assert ( + configmap.metadata.labels.get("app.kubernetes.io/component") + == "rayjob-scripts" + ) + + print(f"✓ ConfigMap {configmap_name} verified with proper owner reference") + + except client.rest.ApiException as e: + if e.status == 404: + raise AssertionError(f"ConfigMap {configmap_name} not found") + else: + raise e From bf690a91740473a0038d700ea8b355baa2fed237 Mon Sep 17 00:00:00 2001 From: Pat O'Connor Date: Tue, 30 Sep 2025 11:42:48 +0100 Subject: [PATCH 24/33] task(RHOAIENG-33283): Elegantly handle runtime_env Signed-off-by: Pat O'Connor --- .github/workflows/rayjob_e2e_tests.yaml | 6 +- src/codeflare_sdk/common/utils/constants.py | 2 +- src/codeflare_sdk/common/utils/test_utils.py | 209 +++ src/codeflare_sdk/ray/rayjobs/config.py | 62 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 550 ++++++-- src/codeflare_sdk/ray/rayjobs/test_config.py | 152 ++- src/codeflare_sdk/ray/rayjobs/test_rayjob.py | 1213 +++++++++-------- .../rayjob/rayjob_existing_cluster_test.py | 6 +- .../rayjob/rayjob_lifecycled_cluster_test.py | 4 +- 9 files changed, 1436 insertions(+), 768 deletions(-) create mode 100644 src/codeflare_sdk/common/utils/test_utils.py diff --git a/.github/workflows/rayjob_e2e_tests.yaml b/.github/workflows/rayjob_e2e_tests.yaml index c4856fd3..b5ba067f 100644 --- a/.github/workflows/rayjob_e2e_tests.yaml +++ b/.github/workflows/rayjob_e2e_tests.yaml @@ -115,6 +115,10 @@ jobs: kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user + kubectl create clusterrole configmap-manager --verb=get,list,create,delete,update,patch --resource=configmaps + kubectl create clusterrolebinding sdk-user-configmap-manager --clusterrole=configmap-manager --user=sdk-user + kubectl create clusterrole workload-reader --verb=get,list,watch --resource=workloads + kubectl create clusterrolebinding sdk-user-workload-reader --clusterrole=workload-reader --user=sdk-user kubectl config use-context sdk-user - name: Run RayJob E2E tests @@ -126,7 +130,7 @@ jobs: pip install poetry poetry install --with test,docs echo "Running RayJob e2e tests..." - poetry run pytest -v -s ./tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1 + poetry run pytest -v -s ./tests/e2e/rayjob/ > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1 - name: Switch to kind-cluster context to print logs if: always() && steps.deploy.outcome == 'success' diff --git a/src/codeflare_sdk/common/utils/constants.py b/src/codeflare_sdk/common/utils/constants.py index 7e6147f6..00559d2e 100644 --- a/src/codeflare_sdk/common/utils/constants.py +++ b/src/codeflare_sdk/common/utils/constants.py @@ -12,4 +12,4 @@ "3.11": CUDA_PY311_RUNTIME_IMAGE, "3.12": CUDA_PY312_RUNTIME_IMAGE, } -MOUNT_PATH = "/home/ray/scripts" +MOUNT_PATH = "/home/ray/files" diff --git a/src/codeflare_sdk/common/utils/test_utils.py b/src/codeflare_sdk/common/utils/test_utils.py new file mode 100644 index 00000000..d330bc4d --- /dev/null +++ b/src/codeflare_sdk/common/utils/test_utils.py @@ -0,0 +1,209 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for common/utils/utils.py +""" + +import pytest +from collections import namedtuple +from codeflare_sdk.common.utils.utils import ( + update_image, + get_ray_image_for_python_version, +) +from codeflare_sdk.common.utils.constants import ( + SUPPORTED_PYTHON_VERSIONS, + CUDA_PY311_RUNTIME_IMAGE, + CUDA_PY312_RUNTIME_IMAGE, +) + + +def test_update_image_with_empty_string_python_311(mocker): + """Test that update_image() with empty string returns default image for Python 3.11.""" + # Mock sys.version_info to simulate Python 3.11 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0)) + + # Test with empty image (should use default for Python 3.11) + image = update_image("") + assert image == CUDA_PY311_RUNTIME_IMAGE + assert image == SUPPORTED_PYTHON_VERSIONS["3.11"] + + +def test_update_image_with_empty_string_python_312(mocker): + """Test that update_image() with empty string returns default image for Python 3.12.""" + # Mock sys.version_info to simulate Python 3.12 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0)) + + # Test with empty image (should use default for Python 3.12) + image = update_image("") + assert image == CUDA_PY312_RUNTIME_IMAGE + assert image == SUPPORTED_PYTHON_VERSIONS["3.12"] + + +def test_update_image_with_none_python_311(mocker): + """Test that update_image() with None returns default image for Python 3.11.""" + # Mock sys.version_info to simulate Python 3.11 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0)) + + # Test with None image (should use default for Python 3.11) + image = update_image(None) + assert image == CUDA_PY311_RUNTIME_IMAGE + + +def test_update_image_with_none_python_312(mocker): + """Test that update_image() with None returns default image for Python 3.12.""" + # Mock sys.version_info to simulate Python 3.12 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0)) + + # Test with None image (should use default for Python 3.12) + image = update_image(None) + assert image == CUDA_PY312_RUNTIME_IMAGE + + +def test_update_image_with_unsupported_python_version(mocker): + """Test update_image() warning for unsupported Python versions.""" + # Mock sys.version_info to simulate Python 3.8 (unsupported) + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 8, 0, "final", 0)) + + # Mock warnings.warn to check if it gets called + warn_mock = mocker.patch("warnings.warn") + + # Call update_image with empty image + image = update_image("") + + # Assert that the warning was called with the expected message + warn_mock.assert_called_once() + assert "No default Ray image defined for 3.8" in warn_mock.call_args[0][0] + assert "3.11, 3.12" in warn_mock.call_args[0][0] + + # Assert that no image was set since the Python version is not supported + assert image is None + + +def test_update_image_with_provided_custom_image(): + """Test that providing a custom image bypasses auto-detection.""" + custom_image = "my-custom-ray:latest" + image = update_image(custom_image) + + # Should return the provided image unchanged + assert image == custom_image + + +def test_update_image_with_provided_image_empty_string(): + """Test update_image() with provided custom image as a non-empty string.""" + custom_image = "docker.io/rayproject/ray:2.40.0" + image = update_image(custom_image) + + # Should return the provided image unchanged + assert image == custom_image + + +def test_get_ray_image_for_python_version_explicit_311(): + """Test get_ray_image_for_python_version() with explicit Python 3.11.""" + image = get_ray_image_for_python_version("3.11") + assert image == CUDA_PY311_RUNTIME_IMAGE + + +def test_get_ray_image_for_python_version_explicit_312(): + """Test get_ray_image_for_python_version() with explicit Python 3.12.""" + image = get_ray_image_for_python_version("3.12") + assert image == CUDA_PY312_RUNTIME_IMAGE + + +def test_get_ray_image_for_python_version_auto_detect_311(mocker): + """Test get_ray_image_for_python_version() auto-detects Python 3.11.""" + # Mock sys.version_info to simulate Python 3.11 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0)) + + # Test with None (should auto-detect) + image = get_ray_image_for_python_version() + assert image == CUDA_PY311_RUNTIME_IMAGE + + +def test_get_ray_image_for_python_version_auto_detect_312(mocker): + """Test get_ray_image_for_python_version() auto-detects Python 3.12.""" + # Mock sys.version_info to simulate Python 3.12 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0)) + + # Test with None (should auto-detect) + image = get_ray_image_for_python_version() + assert image == CUDA_PY312_RUNTIME_IMAGE + + +def test_get_ray_image_for_python_version_unsupported_with_warning(mocker): + """Test get_ray_image_for_python_version() warns for unsupported versions.""" + warn_mock = mocker.patch("warnings.warn") + + # Test with unsupported version and warn_on_unsupported=True (default) + image = get_ray_image_for_python_version("3.9", warn_on_unsupported=True) + + # Should have warned + warn_mock.assert_called_once() + assert "No default Ray image defined for 3.9" in warn_mock.call_args[0][0] + + # Should return None + assert image is None + + +def test_get_ray_image_for_python_version_unsupported_without_warning(): + """Test get_ray_image_for_python_version() falls back to 3.12 without warning.""" + # Test with unsupported version and warn_on_unsupported=False + image = get_ray_image_for_python_version("3.10", warn_on_unsupported=False) + + # Should fall back to Python 3.12 image + assert image == CUDA_PY312_RUNTIME_IMAGE + + +def test_get_ray_image_for_python_version_unsupported_silent_fallback(): + """Test get_ray_image_for_python_version() silently falls back for old versions.""" + # Test with Python 3.8 and warn_on_unsupported=False + image = get_ray_image_for_python_version("3.8", warn_on_unsupported=False) + + # Should fall back to Python 3.12 image without warning + assert image == CUDA_PY312_RUNTIME_IMAGE + + +def test_get_ray_image_for_python_version_none_defaults_to_current(mocker): + """Test that passing None to get_ray_image_for_python_version() uses current Python.""" + # Mock sys.version_info to simulate Python 3.11 + VersionInfo = namedtuple( + "version_info", ["major", "minor", "micro", "releaselevel", "serial"] + ) + mocker.patch("sys.version_info", VersionInfo(3, 11, 5, "final", 0)) + + # Passing None should detect the mocked version + image = get_ray_image_for_python_version(None, warn_on_unsupported=True) + + assert image == CUDA_PY311_RUNTIME_IMAGE diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index d4b8c213..023d57f6 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -448,70 +448,70 @@ def _build_env_vars(self) -> list: """Build environment variables list.""" return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()] - def add_script_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH): + def add_file_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH): """ - Add script volume and mount references to cluster configuration. + Add file volume and mount references to cluster configuration. Args: - configmap_name: Name of the ConfigMap containing scripts - mount_path: Where to mount scripts in containers (default: /home/ray/scripts) + configmap_name: Name of the ConfigMap containing files + mount_path: Where to mount files in containers (default: /home/ray/scripts) """ - # Check if script volume already exists - volume_name = "ray-job-scripts" + # Check if file volume already exists + volume_name = "ray-job-files" existing_volume = next( (v for v in self.volumes if getattr(v, "name", None) == volume_name), None ) if existing_volume: - logger.debug(f"Script volume '{volume_name}' already exists, skipping...") + logger.debug(f"File volume '{volume_name}' already exists, skipping...") return - # Check if script mount already exists + # Check if file mount already exists existing_mount = next( (m for m in self.volume_mounts if getattr(m, "name", None) == volume_name), None, ) if existing_mount: logger.debug( - f"Script volume mount '{volume_name}' already exists, skipping..." + f"File volume mount '{volume_name}' already exists, skipping..." ) return - # Add script volume to cluster configuration - script_volume = V1Volume( + # Add file volume to cluster configuration + file_volume = V1Volume( name=volume_name, config_map=V1ConfigMapVolumeSource(name=configmap_name) ) - self.volumes.append(script_volume) + self.volumes.append(file_volume) - # Add script volume mount to cluster configuration - script_mount = V1VolumeMount(name=volume_name, mount_path=mount_path) - self.volume_mounts.append(script_mount) + # Add file volume mount to cluster configuration + file_mount = V1VolumeMount(name=volume_name, mount_path=mount_path) + self.volume_mounts.append(file_mount) logger.info( - f"Added script volume '{configmap_name}' to cluster config: mount_path={mount_path}" + f"Added file volume '{configmap_name}' to cluster config: mount_path={mount_path}" ) - def validate_configmap_size(self, scripts: Dict[str, str]) -> None: - total_size = sum(len(content.encode("utf-8")) for content in scripts.values()) + def validate_configmap_size(self, files: Dict[str, str]) -> None: + total_size = sum(len(content.encode("utf-8")) for content in files.values()) if total_size > 1024 * 1024: # 1MB raise ValueError( f"ConfigMap size exceeds 1MB limit. Total size: {total_size} bytes" ) - def build_script_configmap_spec( - self, job_name: str, namespace: str, scripts: Dict[str, str] + def build_file_configmap_spec( + self, job_name: str, namespace: str, files: Dict[str, str] ) -> Dict[str, Any]: """ - Build ConfigMap specification for scripts + Build ConfigMap specification for files Args: job_name: Name of the RayJob (used for ConfigMap naming) namespace: Kubernetes namespace - scripts: Dictionary of script_name -> script_content + files: Dictionary of file_name -> file_content Returns: Dict: ConfigMap specification ready for Kubernetes API """ - configmap_name = f"{job_name}-scripts" + configmap_name = f"{job_name}-files" return { "apiVersion": "v1", "kind": "ConfigMap", @@ -521,27 +521,27 @@ def build_script_configmap_spec( "labels": { "ray.io/job-name": job_name, "app.kubernetes.io/managed-by": "codeflare-sdk", - "app.kubernetes.io/component": "rayjob-scripts", + "app.kubernetes.io/component": "rayjob-files", }, }, - "data": scripts, + "data": files, } - def build_script_volume_specs( + def build_file_volume_specs( self, configmap_name: str, mount_path: str = MOUNT_PATH ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ - Build volume and mount specifications for scripts + Build volume and mount specifications for files Args: - configmap_name: Name of the ConfigMap containing scripts - mount_path: Where to mount scripts in containers + configmap_name: Name of the ConfigMap containing files + mount_path: Where to mount files in containers Returns: Tuple of (volume_spec, mount_spec) as dictionaries """ - volume_spec = {"name": "ray-job-scripts", "configMap": {"name": configmap_name}} + volume_spec = {"name": "ray-job-files", "configMap": {"name": configmap_name}} - mount_spec = {"name": "ray-job-scripts", "mountPath": mount_path} + mount_spec = {"name": "ray-job-files", "mountPath": mount_path} return volume_spec, mount_spec diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index a6ec1ade..ed0411e7 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -21,10 +21,13 @@ import os import re import ast -from typing import Dict, Any, Optional, Tuple +import yaml +from typing import Dict, Any, Optional, Tuple, List from codeflare_sdk.common.kueue.kueue import get_default_kueue_name from codeflare_sdk.common.utils.constants import MOUNT_PATH from kubernetes import client + +from codeflare_sdk.common.utils.utils import get_ray_image_for_python_version from ...common.kubernetes_cluster.auth import get_api_client from python_client.kuberay_job_api import RayjobApi from python_client.kuberay_cluster_api import RayClusterApi @@ -43,6 +46,9 @@ logger = logging.getLogger(__name__) +# Regex pattern for finding Python files in entrypoint commands +PYTHON_FILE_PATTERN = r"(?:python\s+)?([./\w/]+\.py)" + class RayJob: """ @@ -149,13 +155,13 @@ def submit(self) -> str: self._validate_ray_version_compatibility() - # Extract scripts to check if we need ConfigMaps - scripts = self._extract_script_files_from_entrypoint() + # Extract files from entrypoint and runtime_env working_dir + files = self._extract_all_local_files() - # Pre-declare ConfigMap in cluster config for new clusters - if scripts and self._cluster_config: - configmap_name = f"{self.name}-scripts" - self._cluster_config.add_script_volumes(configmap_name, MOUNT_PATH) + # Create ConfigMap for files (will be mounted to submitter pod) + configmap_name = None + if files: + configmap_name = f"{self.name}-files" rayjob_cr = self._build_rayjob_cr() @@ -166,40 +172,31 @@ def submit(self) -> str: logger.info(f"Successfully submitted RayJob {self.name}") # Create ConfigMap with owner reference after RayJob exists - if scripts: - self._create_script_configmap(scripts, result) + if files: + self._create_file_configmap(files, result) return self.name else: raise RuntimeError(f"Failed to submit RayJob {self.name}") - def _create_script_configmap( - self, scripts: Dict[str, str], rayjob_result: Dict[str, Any] + def _create_file_configmap( + self, files: Dict[str, str], rayjob_result: Dict[str, Any] ): """ - Create ConfigMap with owner reference for script files. - - For new clusters: ConfigMap volume was pre-declared, just create it. - For existing clusters: Create ConfigMap and patch the cluster. + Create ConfigMap with owner reference for local files. """ - # Get a config builder for utility methods - config_builder = ( - self._cluster_config if self._cluster_config else ManagedClusterConfig() - ) + # Use a basic config builder for ConfigMap creation + config_builder = ManagedClusterConfig() # Validate and build ConfigMap spec - config_builder.validate_configmap_size(scripts) - configmap_spec = config_builder.build_script_configmap_spec( - job_name=self.name, namespace=self.namespace, scripts=scripts + config_builder.validate_configmap_size(files) + configmap_spec = config_builder.build_file_configmap_spec( + job_name=self.name, namespace=self.namespace, files=files ) # Create ConfigMap with owner reference configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) - # For existing clusters, update the cluster with volumes - if self._cluster_name and not self._cluster_config: - self._update_existing_cluster_for_scripts(configmap_name, config_builder) - def stop(self): """ Suspend the Ray job. @@ -224,13 +221,18 @@ def resubmit(self): def delete(self): """ Delete the Ray job. + Returns True if deleted successfully or if already deleted. """ deleted = self._api.delete_job(name=self.name, k8s_namespace=self.namespace) if deleted: logger.info(f"Successfully deleted the RayJob {self.name}") return True else: - raise RuntimeError(f"Failed to delete the RayJob {self.name}") + # The python client logs "rayjob custom resource already deleted" + # and returns False when the job doesn't exist. + # This is not an error - treat it as successful deletion. + logger.info(f"RayJob {self.name} already deleted or does not exist") + return True def _build_rayjob_cr(self) -> Dict[str, Any]: """ @@ -280,9 +282,20 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: if self.active_deadline_seconds: rayjob_cr["spec"]["activeDeadlineSeconds"] = self.active_deadline_seconds - # Add runtime environment if specified - if self.runtime_env: - rayjob_cr["spec"]["runtimeEnvYAML"] = str(self.runtime_env) + # Extract files once and use for both runtime_env and submitter pod + files = self._extract_all_local_files() + + # Add runtime environment (can be inferred even if not explicitly specified) + processed_runtime_env = self._process_runtime_env(files) + if processed_runtime_env: + rayjob_cr["spec"]["runtimeEnvYAML"] = processed_runtime_env + + # Add submitterPodTemplate if we have files to mount + if files: + configmap_name = f"{self.name}-files" + rayjob_cr["spec"][ + "submitterPodTemplate" + ] = self._build_submitter_pod_template(files, configmap_name) # Configure cluster: either use existing or create new if self._cluster_config is not None: @@ -304,6 +317,62 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: return rayjob_cr + def _build_submitter_pod_template( + self, files: Dict[str, str], configmap_name: str + ) -> Dict[str, Any]: + """ + Build submitterPodTemplate with ConfigMap volume mount for local files. + + Args: + files: Dict of file_name -> file_content + configmap_name: Name of the ConfigMap containing the files + + Returns: + submitterPodTemplate specification + """ + # Image has to be hard coded for the job submitter + image = get_ray_image_for_python_version() + if ( + self._cluster_config + and hasattr(self._cluster_config, "image") + and self._cluster_config.image + ): + image = self._cluster_config.image + + # Build ConfigMap items for each file + config_map_items = [] + for file_name in files.keys(): + config_map_items.append({"key": file_name, "path": file_name}) + + submitter_pod_template = { + "spec": { + "restartPolicy": "Never", + "containers": [ + { + "name": "ray-job-submitter", + "image": image, + "volumeMounts": [ + {"name": "ray-job-files", "mountPath": MOUNT_PATH} + ], + } + ], + "volumes": [ + { + "name": "ray-job-files", + "configMap": { + "name": configmap_name, + "items": config_map_items, + }, + } + ], + } + } + + logger.info( + f"Built submitterPodTemplate with {len(files)} files mounted at {MOUNT_PATH}, using image: {image}" + ) + return submitter_pod_template + def _validate_ray_version_compatibility(self): """ Validate Ray version compatibility for cluster_config image. @@ -410,112 +479,365 @@ def _map_to_codeflare_status( deployment_status, (CodeflareRayJobStatus.UNKNOWN, False) ) - def _extract_script_files_from_entrypoint(self) -> Optional[Dict[str, str]]: + def _extract_files_from_entrypoint(self) -> Optional[Dict[str, str]]: """ Extract local Python script files from entrypoint command, plus their dependencies. Returns: - Dict of {script_name: script_content} if local scripts found, None otherwise + Dict of {file_name: file_content} if local files found, None otherwise """ if not self.entrypoint: return None - scripts = {} + files = {} processed_files = set() # Avoid infinite loops # Look for Python file patterns in entrypoint (e.g., "python script.py", "python /path/to/script.py") - python_file_pattern = r"(?:python\s+)?([./\w/]+\.py)" - matches = re.findall(python_file_pattern, self.entrypoint) + matches = re.findall(PYTHON_FILE_PATTERN, self.entrypoint) - # Process main scripts from entrypoint files - for script_path in matches: - self._process_script_and_imports( - script_path, scripts, MOUNT_PATH, processed_files + # Process main files from entrypoint + for file_path in matches: + self._process_file_and_imports( + file_path, files, MOUNT_PATH, processed_files ) - # Update entrypoint paths to use mounted locations - for script_path in matches: - if script_path in [os.path.basename(s) for s in processed_files]: - old_path = script_path - new_path = f"{MOUNT_PATH}/{os.path.basename(script_path)}" - self.entrypoint = self.entrypoint.replace(old_path, new_path) - - return scripts if scripts else None + return files if files else None - def _process_script_and_imports( + def _process_file_and_imports( self, - script_path: str, - scripts: Dict[str, str], + file_path: str, + files: Dict[str, str], mount_path: str, processed_files: set, ): - """Recursively process a script and its local imports""" - if script_path in processed_files: + """ + Recursively process a file and its local imports + """ + if file_path in processed_files: return # Check if it's a local file (not already a container path) - if script_path.startswith("/home/ray/") or not os.path.isfile(script_path): + if file_path.startswith("/home/ray/") or not os.path.isfile(file_path): return - processed_files.add(script_path) + processed_files.add(file_path) try: - with open(script_path, "r") as f: - script_content = f.read() + with open(file_path, "r") as f: + file_content = f.read() - script_name = os.path.basename(script_path) - scripts[script_name] = script_content + file_name = os.path.basename(file_path) + files[file_name] = file_content logger.info( - f"Found local script: {script_path} -> will mount at {mount_path}/{script_name}" + f"Found local file: {file_path} -> will mount at {mount_path}/{file_name}" ) - # Parse imports in this script to find dependencies + # Parse imports in this file to find dependencies self._find_local_imports( - script_content, - script_path, - lambda path: self._process_script_and_imports( - path, scripts, mount_path, processed_files + file_content, + file_path, + lambda path: self._process_file_and_imports( + path, files, mount_path, processed_files ), ) except (IOError, OSError) as e: - logger.warning(f"Could not read script file {script_path}: {e}") + logger.warning(f"Could not read file {file_path}: {e}") - def _find_local_imports( - self, script_content: str, script_path: str, process_callback - ): + def _find_local_imports(self, file_content: str, file_path: str, process_callback): """ - Find local Python imports in script content and process them. + Find local Python imports in file content and process them. Args: - script_content: The content of the Python script - script_path: Path to the current script (for relative imports) + file_content: The content of the Python file + file_path: Path to the current file (for relative imports) process_callback: Function to call for each found local import """ try: # Parse the Python AST to find imports - tree = ast.parse(script_content) - script_dir = os.path.dirname(os.path.abspath(script_path)) + tree = ast.parse(file_content) + file_dir = os.path.dirname(os.path.abspath(file_path)) for node in ast.walk(tree): if isinstance(node, ast.Import): # Handle: import module_name for alias in node.names: - potential_file = os.path.join(script_dir, f"{alias.name}.py") + potential_file = os.path.join(file_dir, f"{alias.name}.py") if os.path.isfile(potential_file): process_callback(potential_file) elif isinstance(node, ast.ImportFrom): # Handle: from module_name import something if node.module: - potential_file = os.path.join(script_dir, f"{node.module}.py") + potential_file = os.path.join(file_dir, f"{node.module}.py") if os.path.isfile(potential_file): process_callback(potential_file) except (SyntaxError, ValueError) as e: - logger.debug(f"Could not parse imports from {script_path}: {e}") + logger.debug(f"Could not parse imports from {file_path}: {e}") + + def _extract_all_local_files(self) -> Optional[Dict[str, str]]: + """ + Extract all local files from both entrypoint and runtime_env working_dir. + + Note: If runtime_env has a remote working_dir, we don't extract local files + to avoid conflicts. The remote working_dir should contain all needed files. + + Returns: + Dict of {file_name: file_content} if local files found, None otherwise + """ + # If there's a remote working_dir, don't extract local files to avoid conflicts + if ( + self.runtime_env + and "working_dir" in self.runtime_env + and not os.path.isdir(self.runtime_env["working_dir"]) + ): + logger.info( + f"Remote working_dir detected: {self.runtime_env['working_dir']}. " + "Skipping local file extraction - all files should come from remote source." + ) + return None + + files = {} + processed_files = set() + + # Extract files from entrypoint (always check for local files in entrypoint) + entrypoint_files = self._extract_files_from_entrypoint() + if entrypoint_files: + files.update(entrypoint_files) + processed_files.update(entrypoint_files.keys()) + + # Extract files from runtime_env working_dir if it's a local directory + if ( + self.runtime_env + and "working_dir" in self.runtime_env + and os.path.isdir(self.runtime_env["working_dir"]) + ): + working_dir_files = self._extract_working_dir_files( + self.runtime_env["working_dir"], processed_files + ) + if working_dir_files: + files.update(working_dir_files) + + # If no working_dir specified in runtime_env, try to infer and extract files from inferred directory + elif not self.runtime_env or "working_dir" not in self.runtime_env: + inferred_working_dir = self._infer_working_dir_from_entrypoint() + if inferred_working_dir: + working_dir_files = self._extract_working_dir_files( + inferred_working_dir, processed_files + ) + if working_dir_files: + files.update(working_dir_files) + + return files if files else None + + def _extract_working_dir_files( + self, working_dir: str, processed_files: set + ) -> Dict[str, str]: + """ + Extract all Python files from working directory. + + Args: + working_dir: Path to working directory + processed_files: Set of already processed file names to avoid duplicates + + Returns: + Dict of {file_name: file_content} + """ + files_dict = {} + + try: + for root, dirs, files in os.walk(working_dir): + for file in files: + if file.endswith(".py") and file not in processed_files: + file_path = os.path.join(root, file) + try: + with open(file_path, "r") as f: + content = f.read() + files_dict[file] = content + processed_files.add(file) + logger.info( + f"Added working directory file: {file_path} -> {MOUNT_PATH}/{file}" + ) + except (IOError, OSError) as e: + logger.warning(f"Could not read file {file_path}: {e}") + except (IOError, OSError) as e: + logger.warning(f"Could not scan working directory {working_dir}: {e}") + + return files_dict + + def _process_runtime_env( + self, files: Optional[Dict[str, str]] = None + ) -> Optional[str]: + """ + Process runtime_env field to handle env_vars, pip dependencies, and working_dir. + Can also infer working directory from entrypoint even if runtime_env is not provided. + + Returns: + Processed runtime environment as YAML string, or None if no processing needed + """ + processed_env = {} + + # Handle env_vars + if self.runtime_env and "env_vars" in self.runtime_env: + processed_env["env_vars"] = self.runtime_env["env_vars"] + logger.info( + f"Added {len(self.runtime_env['env_vars'])} environment variables to runtime_env" + ) + + # Handle pip dependencies + if self.runtime_env and "pip" in self.runtime_env: + pip_deps = self._process_pip_dependencies(self.runtime_env["pip"]) + if pip_deps: + processed_env["pip"] = pip_deps + + # Handle working_dir - if it's a local path, set it to mount path + if self.runtime_env and "working_dir" in self.runtime_env: + working_dir = self.runtime_env["working_dir"] + if os.path.isdir(working_dir): + # Local working directory - will be mounted at MOUNT_PATH + processed_env["working_dir"] = MOUNT_PATH + logger.info( + f"Local working directory will be packaged and mounted at: {MOUNT_PATH}" + ) + self._adjust_entrypoint_for_mounted_files() + else: + # Remote URI (e.g., GitHub) - pass through as-is + processed_env["working_dir"] = working_dir + logger.info(f"Using remote working directory: {working_dir}") + + # If no working_dir specified but we have files, set working_dir to mount path + elif not self.runtime_env or "working_dir" not in self.runtime_env: + if files: + # Local files found - will be mounted at MOUNT_PATH + processed_env["working_dir"] = MOUNT_PATH + logger.info( + f"Local files will be packaged and mounted at: {MOUNT_PATH}" + ) + self._adjust_entrypoint_for_mounted_files() + + # Convert to YAML string if we have any processed environment + if processed_env: + return yaml.dump(processed_env, default_flow_style=False) + + return None + + def _process_pip_dependencies(self, pip_spec) -> Optional[List[str]]: + """ + Process pip dependencies from runtime_env. + + Args: + pip_spec: Can be a list of packages, a string path to requirements.txt, or dict + + Returns: + List of pip dependencies + """ + if isinstance(pip_spec, list): + # Already a list of dependencies + logger.info(f"Using provided pip dependencies: {len(pip_spec)} packages") + return pip_spec + elif isinstance(pip_spec, str): + # Assume it's a path to requirements.txt + return self._parse_requirements_file(pip_spec) + elif isinstance(pip_spec, dict): + # Handle dict format (e.g., {"packages": [...], "pip_check": False}) + if "packages" in pip_spec: + logger.info( + f"Using pip dependencies from dict: {len(pip_spec['packages'])} packages" + ) + return pip_spec["packages"] + + logger.warning(f"Unsupported pip specification format: {type(pip_spec)}") + return None + + def _parse_requirements_file(self, requirements_path: str) -> Optional[List[str]]: + """ + Parse a requirements.txt file and return list of dependencies. + + Args: + requirements_path: Path to requirements.txt file + + Returns: + List of pip dependencies + """ + if not os.path.isfile(requirements_path): + logger.warning(f"Requirements file not found: {requirements_path}") + return None + + try: + with open(requirements_path, "r") as f: + lines = f.readlines() + + # Parse requirements, filtering out comments and empty lines + requirements = [] + for line in lines: + line = line.strip() + if line and not line.startswith("#"): + requirements.append(line) + + logger.info( + f"Parsed {len(requirements)} dependencies from {requirements_path}" + ) + return requirements + + except (IOError, OSError) as e: + logger.warning(f"Could not read requirements file {requirements_path}: {e}") + return None + + def _infer_working_dir_from_entrypoint(self) -> Optional[str]: + """ + Infer working directory from entrypoint path when it contains directory components. + Only useful for entrypoints with paths like 'python src/script.py'. + + Returns: + Inferred working directory path, or None if just simple filenames + """ + if not self.entrypoint: + return None + + # Look for Python file patterns in entrypoint + matches = re.findall(PYTHON_FILE_PATTERN, self.entrypoint) + + for script_path in matches: + # Only infer working directory if the path has directory components + if "/" in script_path or "\\" in script_path: + if os.path.isfile(script_path): + working_dir = os.path.dirname(os.path.abspath(script_path)) + logger.info( + f"Inferred working directory from entrypoint: {working_dir}" + ) + return working_dir + else: + # File doesn't exist locally, but path has directory components + working_dir = os.path.dirname(os.path.abspath(script_path)) + logger.info( + f"Inferred working directory from entrypoint path: {working_dir}" + ) + return working_dir + + # For simple filenames like "script.py" we don't need to infer the working directory + return None + + def _adjust_entrypoint_for_mounted_files(self): + """ + Adjust the entrypoint command to use just filenames since files are mounted at MOUNT_PATH. + """ + if not self.entrypoint: + return + + # Look for Python file patterns in entrypoint + matches = re.findall(PYTHON_FILE_PATTERN, self.entrypoint) + + for script_path in matches: + if os.path.isfile(script_path): + # Use just the filename since files will be mounted at MOUNT_PATH + filename = os.path.basename(script_path) + self.entrypoint = self.entrypoint.replace(script_path, filename) + logger.info( + f"Adjusted entrypoint for mounted files: {script_path} -> {filename}" + ) def _create_configmap_from_spec( self, configmap_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None @@ -572,7 +894,7 @@ def _create_configmap_from_spec( namespace=self.namespace, body=configmap ) logger.info( - f"Created ConfigMap '{configmap_name}' with {len(configmap_spec['data'])} scripts" + f"Created ConfigMap '{configmap_name}' with {len(configmap_spec['data'])} files" ) except client.ApiException as e: if e.status == 409: # Already exists @@ -586,77 +908,3 @@ def _create_configmap_from_spec( ) return configmap_name - - # Note: This only works once the pods have been restarted as the configmaps won't be picked up until then :/ - def _update_existing_cluster_for_scripts( - self, configmap_name: str, config_builder: ManagedClusterConfig - ): - """ - Update existing RayCluster to add script volumes and mounts. - - Args: - configmap_name: Name of the ConfigMap containing scripts - config_builder: ManagedClusterConfig instance for building specs - """ - - try: - ray_cluster = self._cluster_api.get_ray_cluster( - name=self.cluster_name, - k8s_namespace=self.namespace, - ) - except client.ApiException as e: - raise RuntimeError(f"Failed to get RayCluster '{self.cluster_name}': {e}") - - # Build script volume and mount specifications using config.py - script_volume, script_mount = config_builder.build_script_volume_specs( - configmap_name=configmap_name, mount_path=MOUNT_PATH - ) - - # Helper function to check for duplicate volumes/mounts - def volume_exists(volumes_list, volume_name): - return any(v.get("name") == volume_name for v in volumes_list) - - def mount_exists(mounts_list, mount_name): - return any(m.get("name") == mount_name for m in mounts_list) - - # Add volumes and mounts to head group - head_spec = ray_cluster["spec"]["headGroupSpec"]["template"]["spec"] - if "volumes" not in head_spec: - head_spec["volumes"] = [] - if not volume_exists(head_spec["volumes"], script_volume["name"]): - head_spec["volumes"].append(script_volume) - - head_container = head_spec["containers"][0] # Ray head container - if "volumeMounts" not in head_container: - head_container["volumeMounts"] = [] - if not mount_exists(head_container["volumeMounts"], script_mount["name"]): - head_container["volumeMounts"].append(script_mount) - - # Add volumes and mounts to worker groups - for worker_group in ray_cluster["spec"]["workerGroupSpecs"]: - worker_spec = worker_group["template"]["spec"] - if "volumes" not in worker_spec: - worker_spec["volumes"] = [] - if not volume_exists(worker_spec["volumes"], script_volume["name"]): - worker_spec["volumes"].append(script_volume) - - worker_container = worker_spec["containers"][0] # Ray worker container - if "volumeMounts" not in worker_container: - worker_container["volumeMounts"] = [] - if not mount_exists(worker_container["volumeMounts"], script_mount["name"]): - worker_container["volumeMounts"].append(script_mount) - - # Update the RayCluster - try: - self._cluster_api.patch_ray_cluster( - name=self.cluster_name, - ray_patch=ray_cluster, - k8s_namespace=self.namespace, - ) - logger.info( - f"Updated RayCluster '{self.cluster_name}' with script volumes from ConfigMap '{configmap_name}'" - ) - except client.ApiException as e: - raise RuntimeError( - f"Failed to update RayCluster '{self.cluster_name}': {e}" - ) diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test_config.py index d19864ba..4f538763 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_config.py +++ b/src/codeflare_sdk/ray/rayjobs/test_config.py @@ -4,6 +4,8 @@ import pytest from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig, DEFAULT_ACCELERATORS +from kubernetes.client import V1VolumeMount +from kubernetes.client import V1Volume, V1ConfigMapVolumeSource def test_accelerator_configs_defaults_to_default_accelerators(): @@ -167,63 +169,177 @@ def test_ray_usage_stats_with_other_user_envs(): assert len(config.envs) == 3 -def test_add_script_volumes_existing_volume_early_return(): - """Test add_script_volumes early return when volume already exists.""" - from kubernetes.client import V1Volume, V1ConfigMapVolumeSource +def test_add_file_volumes_existing_volume_early_return(): + """Test add_file_volumes early return when volume already exists.""" config = ManagedClusterConfig() # Pre-add a volume with same name existing_volume = V1Volume( - name="ray-job-scripts", - config_map=V1ConfigMapVolumeSource(name="existing-scripts"), + name="ray-job-files", + config_map=V1ConfigMapVolumeSource(name="existing-files"), ) config.volumes.append(existing_volume) # Should return early and not add duplicate - config.add_script_volumes(configmap_name="new-scripts") + config.add_file_volumes(configmap_name="new-files") # Should still have only one volume, no mount added assert len(config.volumes) == 1 assert len(config.volume_mounts) == 0 -def test_add_script_volumes_existing_mount_early_return(): - """Test add_script_volumes early return when mount already exists.""" - from kubernetes.client import V1VolumeMount +def test_add_file_volumes_existing_mount_early_return(): + """Test add_file_volumes early return when mount already exists.""" config = ManagedClusterConfig() # Pre-add a mount with same name - existing_mount = V1VolumeMount(name="ray-job-scripts", mount_path="/existing/path") + existing_mount = V1VolumeMount(name="ray-job-files", mount_path="/existing/path") config.volume_mounts.append(existing_mount) # Should return early and not add duplicate - config.add_script_volumes(configmap_name="new-scripts") + config.add_file_volumes(configmap_name="new-files") # Should still have only one mount, no volume added assert len(config.volumes) == 0 assert len(config.volume_mounts) == 1 -def test_build_script_configmap_spec_labels(): - """Test that build_script_configmap_spec creates ConfigMap with correct labels.""" +def test_build_file_configmap_spec_labels(): + """Test that build_file_configmap_spec creates ConfigMap with correct labels.""" config = ManagedClusterConfig() job_name = "test-job" namespace = "test-namespace" - scripts = {"script.py": "print('hello')", "helper.py": "# helper code"} + files = {"test.py": "print('hello')", "helper.py": "# helper code"} - configmap_spec = config.build_script_configmap_spec(job_name, namespace, scripts) + configmap_spec = config.build_file_configmap_spec(job_name, namespace, files) assert configmap_spec["apiVersion"] == "v1" assert configmap_spec["kind"] == "ConfigMap" - assert configmap_spec["metadata"]["name"] == f"{job_name}-scripts" + assert configmap_spec["metadata"]["name"] == f"{job_name}-files" assert configmap_spec["metadata"]["namespace"] == namespace labels = configmap_spec["metadata"]["labels"] assert labels["ray.io/job-name"] == job_name assert labels["app.kubernetes.io/managed-by"] == "codeflare-sdk" - assert labels["app.kubernetes.io/component"] == "rayjob-scripts" + assert labels["app.kubernetes.io/component"] == "rayjob-files" - assert configmap_spec["data"] == scripts + assert configmap_spec["data"] == files + + +def test_managed_cluster_config_uses_update_image_for_head(mocker): + """Test that ManagedClusterConfig calls update_image() for head container.""" + # Mock update_image where it's used (in config module), not where it's defined + mock_update_image = mocker.patch( + "codeflare_sdk.ray.rayjobs.config.update_image", + return_value="mocked-image:latest", + ) + + config = ManagedClusterConfig(image="custom-image:v1") + + # Build cluster spec (which should call update_image) + spec = config.build_ray_cluster_spec("test-cluster") + + # Verify update_image was called for head container + assert mock_update_image.called + # Verify head container has the mocked image + head_container = spec["headGroupSpec"]["template"].spec.containers[0] + assert head_container.image == "mocked-image:latest" + + +def test_managed_cluster_config_uses_update_image_for_worker(mocker): + """Test that ManagedClusterConfig calls update_image() for worker container.""" + # Mock update_image where it's used (in config module), not where it's defined + mock_update_image = mocker.patch( + "codeflare_sdk.ray.rayjobs.config.update_image", + return_value="mocked-image:latest", + ) + + config = ManagedClusterConfig(image="custom-image:v1") + + # Build cluster spec (which should call update_image) + spec = config.build_ray_cluster_spec("test-cluster") + + # Verify update_image was called for worker container + assert mock_update_image.called + # Verify worker container has the mocked image + worker_container = spec["workerGroupSpecs"][0]["template"].spec.containers[0] + assert worker_container.image == "mocked-image:latest" + + +def test_managed_cluster_config_with_empty_image_uses_update_image(mocker): + """Test that empty image triggers update_image() to auto-detect.""" + # Mock update_image where it's used (in config module), not where it's defined + mock_update_image = mocker.patch( + "codeflare_sdk.ray.rayjobs.config.update_image", + return_value="auto-detected-image:py3.12", + ) + + config = ManagedClusterConfig(image="") + + # Build cluster spec + spec = config.build_ray_cluster_spec("test-cluster") + + # Verify update_image was called with empty string + mock_update_image.assert_called_with("") + + # Verify containers have the auto-detected image + head_container = spec["headGroupSpec"]["template"].spec.containers[0] + assert head_container.image == "auto-detected-image:py3.12" + + worker_container = spec["workerGroupSpecs"][0]["template"].spec.containers[0] + assert worker_container.image == "auto-detected-image:py3.12" + + +def test_build_ray_cluster_spec_has_enable_in_tree_autoscaling_false(): + """Test that build_ray_cluster_spec sets enableInTreeAutoscaling to False.""" + config = ManagedClusterConfig() + + spec = config.build_ray_cluster_spec("test-cluster") + + # Verify enableInTreeAutoscaling is set to False (required for Kueue) + assert "enableInTreeAutoscaling" in spec + assert spec["enableInTreeAutoscaling"] is False + + +def test_build_ray_cluster_spec_autoscaling_disabled_for_kueue(): + """Test that autoscaling is explicitly disabled for Kueue-managed jobs.""" + config = ManagedClusterConfig(num_workers=3) + + spec = config.build_ray_cluster_spec("kueue-cluster") + + # Verify enableInTreeAutoscaling is False + assert spec["enableInTreeAutoscaling"] is False + + # Verify worker replicas are fixed (min == max == replicas) + worker_spec = spec["workerGroupSpecs"][0] + assert worker_spec["replicas"] == 3 + assert worker_spec["minReplicas"] == 3 + assert worker_spec["maxReplicas"] == 3 + + +def test_managed_cluster_config_default_image_integration(): + """Test that ManagedClusterConfig works with default images (integration test).""" + # Create config without specifying an image (should auto-detect based on Python version) + config = ManagedClusterConfig() + + # Build cluster spec + spec = config.build_ray_cluster_spec("test-cluster") + + # Verify head container has an image (should be auto-detected) + head_container = spec["headGroupSpec"]["template"].spec.containers[0] + assert head_container.image is not None + assert len(head_container.image) > 0 + # Should be one of the supported images + from codeflare_sdk.common.utils.constants import ( + CUDA_PY311_RUNTIME_IMAGE, + CUDA_PY312_RUNTIME_IMAGE, + ) + + assert head_container.image in [CUDA_PY311_RUNTIME_IMAGE, CUDA_PY312_RUNTIME_IMAGE] + + # Verify worker container has the same image + worker_container = spec["workerGroupSpecs"][0]["template"].spec.containers[0] + assert worker_container.image == head_container.image diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py index 971f4342..15c53c8d 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test_rayjob.py @@ -32,7 +32,9 @@ # Global test setup that runs automatically for ALL tests @pytest.fixture(autouse=True) def auto_mock_setup(mocker): - """Automatically mock common dependencies for all tests.""" + """ + Automatically mock common dependencies for all tests. + """ mocker.patch("kubernetes.config.load_kube_config") # Always mock get_default_kueue_name to prevent K8s API calls @@ -70,7 +72,9 @@ def auto_mock_setup(mocker): def test_rayjob_submit_success(auto_mock_setup): - """Test successful RayJob submission.""" + """ + Test successful RayJob submission. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit.return_value = {"metadata": {"name": "test-rayjob"}} @@ -97,11 +101,13 @@ def test_rayjob_submit_success(auto_mock_setup): assert job_cr["metadata"]["namespace"] == "test-namespace" assert job_cr["spec"]["entrypoint"] == "python -c 'print(\"hello world\")'" assert job_cr["spec"]["clusterSelector"]["ray.io/cluster"] == "test-ray-cluster" - assert job_cr["spec"]["runtimeEnvYAML"] == "{'pip': ['requests']}" + assert job_cr["spec"]["runtimeEnvYAML"] == "pip:\n- requests\n" def test_rayjob_submit_failure(auto_mock_setup): - """Test RayJob submission failure.""" + """ + Test RayJob submission failure. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = None @@ -110,7 +116,7 @@ def test_rayjob_submit_failure(auto_mock_setup): job_name="test-rayjob", cluster_name="test-ray-cluster", namespace="default", - entrypoint="python script.py", + entrypoint="python test.py", runtime_env={"pip": ["numpy"]}, ) @@ -119,7 +125,9 @@ def test_rayjob_submit_failure(auto_mock_setup): def test_rayjob_init_validation_both_provided(auto_mock_setup): - """Test that providing both cluster_name and cluster_config raises error.""" + """ + Test that providing both cluster_name and cluster_config raises error. + """ cluster_config = ClusterConfiguration(name="test-cluster", namespace="test") with pytest.raises( @@ -130,21 +138,25 @@ def test_rayjob_init_validation_both_provided(auto_mock_setup): job_name="test-job", cluster_name="existing-cluster", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", ) def test_rayjob_init_validation_neither_provided(auto_mock_setup): - """Test that providing neither cluster_name nor cluster_config raises error.""" + """ + Test that providing neither cluster_name nor cluster_config raises error. + """ with pytest.raises( ValueError, match="❌ Configuration Error: You must provide either 'cluster_name'", ): - RayJob(job_name="test-job", entrypoint="python script.py") + RayJob(job_name="test-job", entrypoint="python test.py") def test_rayjob_init_with_cluster_config(auto_mock_setup): - """Test RayJob initialization with cluster configuration for auto-creation.""" + """ + Test RayJob initialization with cluster configuration for auto-creation. + """ cluster_config = ClusterConfiguration( name="auto-cluster", namespace="test-namespace", num_workers=2 ) @@ -152,7 +164,7 @@ def test_rayjob_init_with_cluster_config(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", namespace="test-namespace", ) @@ -163,7 +175,9 @@ def test_rayjob_init_with_cluster_config(auto_mock_setup): def test_rayjob_cluster_name_generation(auto_mock_setup): - """Test that cluster names are generated when config has empty name.""" + """ + Test that cluster names are generated when config has empty name. + """ cluster_config = ClusterConfiguration( name="", # Empty name should trigger generation namespace="test-namespace", @@ -173,7 +187,7 @@ def test_rayjob_cluster_name_generation(auto_mock_setup): rayjob = RayJob( job_name="my-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", namespace="test-namespace", ) @@ -181,7 +195,9 @@ def test_rayjob_cluster_name_generation(auto_mock_setup): def test_rayjob_cluster_config_namespace_none(auto_mock_setup): - """Test that cluster config namespace is set when None.""" + """ + Test that cluster config namespace is set when None. + """ cluster_config = ClusterConfiguration( name="test-cluster", namespace=None, # This should be set to job namespace @@ -192,14 +208,16 @@ def test_rayjob_cluster_config_namespace_none(auto_mock_setup): job_name="test-job", cluster_config=cluster_config, namespace="job-namespace", - entrypoint="python script.py", + entrypoint="python test.py", ) assert rayjob.namespace == "job-namespace" def test_rayjob_with_active_deadline_seconds(auto_mock_setup): - """Test RayJob CR generation with active deadline seconds.""" + """ + Test RayJob CR generation with active deadline seconds. + """ rayjob = RayJob( job_name="test-job", cluster_name="test-cluster", @@ -214,11 +232,13 @@ def test_rayjob_with_active_deadline_seconds(auto_mock_setup): def test_build_ray_cluster_spec_no_config_error(auto_mock_setup): - """Test _build_ray_cluster_spec raises error when no cluster config.""" + """ + Test _build_ray_cluster_spec raises error when no cluster config. + """ rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint="python script.py", + entrypoint="python test.py", namespace="test-namespace", ) @@ -229,7 +249,9 @@ def test_build_ray_cluster_spec_no_config_error(auto_mock_setup): def test_build_ray_cluster_spec(mocker, auto_mock_setup): - """Test _build_ray_cluster_spec method.""" + """ + Test _build_ray_cluster_spec method. + """ mock_ray_cluster = { "apiVersion": "ray.io/v1", @@ -249,7 +271,7 @@ def test_build_ray_cluster_spec(mocker, auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", namespace="test-namespace", ) @@ -262,7 +284,9 @@ def test_build_ray_cluster_spec(mocker, auto_mock_setup): def test_build_rayjob_cr_with_existing_cluster(auto_mock_setup): - """Test _build_rayjob_cr method with existing cluster.""" + """ + Test _build_rayjob_cr method with existing cluster. + """ rayjob = RayJob( job_name="test-job", @@ -287,7 +311,9 @@ def test_build_rayjob_cr_with_existing_cluster(auto_mock_setup): def test_build_rayjob_cr_with_auto_cluster(mocker, auto_mock_setup): - """Test _build_rayjob_cr method with auto-created cluster.""" + """ + Test _build_rayjob_cr method with auto-created cluster. + """ mock_ray_cluster = { "apiVersion": "ray.io/v1", "kind": "RayCluster", @@ -317,7 +343,9 @@ def test_build_rayjob_cr_with_auto_cluster(mocker, auto_mock_setup): def test_submit_validation_no_entrypoint(auto_mock_setup): - """Test that submit() raises error when entrypoint is None.""" + """ + Test that submit() raises error when entrypoint is None. + """ rayjob = RayJob( job_name="test-job", cluster_name="test-cluster", @@ -332,7 +360,9 @@ def test_submit_validation_no_entrypoint(auto_mock_setup): def test_submit_with_auto_cluster(mocker, auto_mock_setup): - """Test successful submission with auto-created cluster.""" + """ + Test successful submission with auto-created cluster. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_ray_cluster = { @@ -354,7 +384,7 @@ def test_submit_with_auto_cluster(mocker, auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", namespace="test-namespace", ) @@ -371,35 +401,41 @@ def test_submit_with_auto_cluster(mocker, auto_mock_setup): def test_namespace_auto_detection_success(auto_mock_setup): - """Test successful namespace auto-detection.""" + """ + Test successful namespace auto-detection. + """ auto_mock_setup["get_current_namespace"].return_value = "detected-ns" rayjob = RayJob( - job_name="test-job", entrypoint="python script.py", cluster_name="test-cluster" + job_name="test-job", entrypoint="python test.py", cluster_name="test-cluster" ) assert rayjob.namespace == "detected-ns" def test_namespace_auto_detection_fallback(auto_mock_setup): - """Test that namespace auto-detection failure raises an error.""" + """ + Test that namespace auto-detection failure raises an error. + """ auto_mock_setup["get_current_namespace"].return_value = None with pytest.raises(ValueError, match="Could not auto-detect Kubernetes namespace"): RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name="test-cluster", ) def test_namespace_explicit_override(auto_mock_setup): - """Test that explicit namespace overrides auto-detection.""" + """ + Test that explicit namespace overrides auto-detection. + """ auto_mock_setup["get_current_namespace"].return_value = "detected-ns" rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name="test-cluster", namespace="explicit-ns", ) @@ -408,7 +444,9 @@ def test_namespace_explicit_override(auto_mock_setup): def test_rayjob_with_rayjob_cluster_config(auto_mock_setup): - """Test RayJob with the new ManagedClusterConfig.""" + """ + Test RayJob with the new ManagedClusterConfig. + """ cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", @@ -417,7 +455,7 @@ def test_rayjob_with_rayjob_cluster_config(auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_config=cluster_config, namespace="test-namespace", ) @@ -427,12 +465,14 @@ def test_rayjob_with_rayjob_cluster_config(auto_mock_setup): def test_rayjob_cluster_config_validation(auto_mock_setup): - """Test validation of ManagedClusterConfig parameters.""" + """ + Test validation of ManagedClusterConfig parameters. + """ cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_config=cluster_config, namespace="test-namespace", ) @@ -441,7 +481,9 @@ def test_rayjob_cluster_config_validation(auto_mock_setup): def test_rayjob_missing_entrypoint_validation(auto_mock_setup): - """Test that RayJob requires entrypoint for submission.""" + """ + Test that RayJob requires entrypoint for submission. + """ with pytest.raises( TypeError, match="missing 1 required positional argument: 'entrypoint'" ): @@ -452,7 +494,9 @@ def test_rayjob_missing_entrypoint_validation(auto_mock_setup): def test_build_ray_cluster_spec_integration(mocker, auto_mock_setup): - """Test integration with the new build_ray_cluster_spec method.""" + """ + Test integration with the new build_ray_cluster_spec method. + """ cluster_config = ManagedClusterConfig() mock_spec = {"spec": "test-spec"} mocker.patch.object( @@ -461,7 +505,7 @@ def test_build_ray_cluster_spec_integration(mocker, auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_config=cluster_config, namespace="test-namespace", ) @@ -476,12 +520,39 @@ def test_build_ray_cluster_spec_integration(mocker, auto_mock_setup): def test_rayjob_with_runtime_env(auto_mock_setup): - """Test RayJob with runtime environment configuration.""" + """ + Test RayJob with runtime environment configuration. + """ runtime_env = {"pip": ["numpy", "pandas"]} rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", + cluster_name="test-cluster", + runtime_env=runtime_env, + namespace="test-namespace", + ) + + assert rayjob.runtime_env == runtime_env + + rayjob_cr = rayjob._build_rayjob_cr() + assert rayjob_cr["spec"]["runtimeEnvYAML"] == "pip:\n- numpy\n- pandas\n" + + +def test_rayjob_with_remote_working_dir(auto_mock_setup): + """ + Test RayJob with remote working directory in runtime_env. + Should not extract local files and should pass through remote URL. + """ + runtime_env = { + "working_dir": "https://github.com/org/repo/archive/refs/heads/main.zip", + "pip": ["numpy", "pandas"], + "env_vars": {"TEST_VAR": "test_value"}, + } + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", cluster_name="test-cluster", runtime_env=runtime_env, namespace="test-namespace", @@ -489,16 +560,38 @@ def test_rayjob_with_runtime_env(auto_mock_setup): assert rayjob.runtime_env == runtime_env + # Should not extract any local files due to remote working_dir + files = rayjob._extract_all_local_files() + assert files is None + rayjob_cr = rayjob._build_rayjob_cr() - assert rayjob_cr["spec"]["runtimeEnvYAML"] == str(runtime_env) + + # Should have runtimeEnvYAML with all fields + expected_runtime_env = ( + "env_vars:\n" + " TEST_VAR: test_value\n" + "pip:\n" + "- numpy\n" + "- pandas\n" + "working_dir: https://github.com/org/repo/archive/refs/heads/main.zip\n" + ) + assert rayjob_cr["spec"]["runtimeEnvYAML"] == expected_runtime_env + + # Should not have submitterPodTemplate since no local files + assert "submitterPodTemplate" not in rayjob_cr["spec"] + + # Entrypoint should be unchanged + assert rayjob_cr["spec"]["entrypoint"] == "python test.py" def test_rayjob_with_active_deadline_and_ttl(auto_mock_setup): - """Test RayJob with both active deadline and TTL settings.""" + """ + Test RayJob with both active deadline and TTL settings. + """ rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name="test-cluster", active_deadline_seconds=300, ttl_seconds_after_finished=600, @@ -514,13 +607,15 @@ def test_rayjob_with_active_deadline_and_ttl(auto_mock_setup): def test_rayjob_cluster_name_generation_with_config(auto_mock_setup): - """Test cluster name generation when using cluster_config.""" + """ + Test cluster name generation when using cluster_config. + """ cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="my-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_config=cluster_config, namespace="test-namespace", # Explicitly specify namespace ) @@ -529,14 +624,16 @@ def test_rayjob_cluster_name_generation_with_config(auto_mock_setup): def test_rayjob_namespace_propagation_to_cluster_config(auto_mock_setup): - """Test that job namespace is propagated to cluster config when None.""" + """ + Test that job namespace is propagated to cluster config when None. + """ auto_mock_setup["get_current_namespace"].return_value = "detected-ns" cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_config=cluster_config, ) @@ -544,20 +641,24 @@ def test_rayjob_namespace_propagation_to_cluster_config(auto_mock_setup): def test_rayjob_error_handling_invalid_cluster_config(auto_mock_setup): - """Test error handling with invalid cluster configuration.""" + """ + Test error handling with invalid cluster configuration. + """ with pytest.raises(ValueError): RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", ) def test_rayjob_constructor_parameter_validation(auto_mock_setup): - """Test constructor parameter validation.""" + """ + Test constructor parameter validation. + """ rayjob = RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name="test-cluster", namespace="test-ns", runtime_env={"pip": ["numpy"]}, @@ -566,7 +667,7 @@ def test_rayjob_constructor_parameter_validation(auto_mock_setup): ) assert rayjob.name == "test-job" - assert rayjob.entrypoint == "python script.py" + assert rayjob.entrypoint == "python test.py" assert rayjob.cluster_name == "test-cluster" assert rayjob.namespace == "test-ns" assert rayjob.runtime_env == {"pip": ["numpy"]} @@ -575,7 +676,9 @@ def test_rayjob_constructor_parameter_validation(auto_mock_setup): def test_build_ray_cluster_spec_function(): - """Test the build_ray_cluster_spec method directly.""" + """ + Test the build_ray_cluster_spec method directly. + """ cluster_config = ManagedClusterConfig( num_workers=2, head_cpu_requests="500m", @@ -606,7 +709,9 @@ def test_build_ray_cluster_spec_function(): def test_build_ray_cluster_spec_with_accelerators(): - """Test build_ray_cluster_spec with GPU accelerators.""" + """ + Test build_ray_cluster_spec with GPU accelerators. + """ cluster_config = ManagedClusterConfig( head_accelerators={"nvidia.com/gpu": 1}, worker_accelerators={"nvidia.com/gpu": 2}, @@ -626,7 +731,9 @@ def test_build_ray_cluster_spec_with_accelerators(): def test_build_ray_cluster_spec_with_custom_volumes(): - """Test build_ray_cluster_spec with custom volumes and volume mounts.""" + """ + Test build_ray_cluster_spec with custom volumes and volume mounts. + """ custom_volume = V1Volume(name="custom-data", empty_dir={}) custom_volume_mount = V1VolumeMount(name="custom-data", mount_path="/data") cluster_config = ManagedClusterConfig( @@ -644,7 +751,9 @@ def test_build_ray_cluster_spec_with_custom_volumes(): def test_build_ray_cluster_spec_with_environment_variables(): - """Test build_ray_cluster_spec with environment variables.""" + """ + Test build_ray_cluster_spec with environment variables. + """ cluster_config = ManagedClusterConfig( envs={"CUDA_VISIBLE_DEVICES": "0", "RAY_DISABLE_IMPORT_WARNING": "1"}, ) @@ -670,7 +779,9 @@ def test_build_ray_cluster_spec_with_environment_variables(): def test_build_ray_cluster_spec_with_tolerations(): - """Test build_ray_cluster_spec with tolerations.""" + """ + Test build_ray_cluster_spec with tolerations. + """ head_toleration = V1Toleration( key="node-role.kubernetes.io/master", operator="Exists", effect="NoSchedule" ) @@ -699,7 +810,9 @@ def test_build_ray_cluster_spec_with_tolerations(): def test_build_ray_cluster_spec_with_image_pull_secrets(): - """Test build_ray_cluster_spec with image pull secrets.""" + """ + Test build_ray_cluster_spec with image pull secrets. + """ cluster_config = ManagedClusterConfig( image_pull_secrets=["my-registry-secret", "another-secret"] ) @@ -726,155 +839,169 @@ def test_build_ray_cluster_spec_with_image_pull_secrets(): assert worker_secrets[1].name == "another-secret" -class TestRayVersionValidation: - """Test Ray version validation in RayJob.""" +def test_submit_with_cluster_config_compatible_image_passes(auto_mock_setup): + """ + Test that submission passes with compatible cluster_config image. + """ + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = True - def test_submit_with_cluster_config_compatible_image_passes(self, auto_mock_setup): - """Test that submission passes with compatible cluster_config image.""" - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_api_instance.submit_job.return_value = True + cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") - cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + namespace="test-namespace", + entrypoint="python test.py", + ) - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - namespace="test-namespace", - entrypoint="python script.py", - ) + result = rayjob.submit() + assert result == "test-job" - result = rayjob.submit() - assert result == "test-job" - def test_submit_with_cluster_config_incompatible_image_fails(self, auto_mock_setup): - """Test that submission fails with incompatible cluster_config image.""" - # +def test_submit_with_cluster_config_incompatible_image_fails(auto_mock_setup): + """ + Test that submission fails with incompatible cluster_config image. + """ - cluster_config = ManagedClusterConfig(image="ray:2.8.0") # Different version + cluster_config = ManagedClusterConfig(image="ray:2.8.0") # Different version - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - namespace="test-namespace", - entrypoint="python script.py", - ) + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + namespace="test-namespace", + entrypoint="python test.py", + ) - with pytest.raises( - ValueError, match="Cluster config image: Ray version mismatch detected" - ): - rayjob.submit() + with pytest.raises( + ValueError, match="Cluster config image: Ray version mismatch detected" + ): + rayjob.submit() - def test_validate_ray_version_compatibility_method(self, auto_mock_setup): - """Test the _validate_ray_version_compatibility method directly.""" - # - rayjob = RayJob( - job_name="test-job", - cluster_name="test-cluster", - namespace="test-namespace", - entrypoint="python script.py", - ) +def test_validate_ray_version_compatibility_method(auto_mock_setup): + """ + Test the _validate_ray_version_compatibility method directly. + """ + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python test.py", + ) + rayjob._validate_ray_version_compatibility() + rayjob._cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") + rayjob._validate_ray_version_compatibility() + rayjob._cluster_config = ManagedClusterConfig(image="ray:2.8.0") + with pytest.raises( + ValueError, match="Cluster config image: Ray version mismatch detected" + ): rayjob._validate_ray_version_compatibility() - rayjob._cluster_config = ManagedClusterConfig(image=f"ray:{RAY_VERSION}") + + rayjob._cluster_config = ManagedClusterConfig(image="custom-image:latest") + with pytest.warns( + UserWarning, match="Cluster config image: Cannot determine Ray version" + ): rayjob._validate_ray_version_compatibility() - rayjob._cluster_config = ManagedClusterConfig(image="ray:2.8.0") - with pytest.raises( - ValueError, match="Cluster config image: Ray version mismatch detected" - ): - rayjob._validate_ray_version_compatibility() - - rayjob._cluster_config = ManagedClusterConfig(image="custom-image:latest") - with pytest.warns( - UserWarning, match="Cluster config image: Cannot determine Ray version" - ): - rayjob._validate_ray_version_compatibility() - - def test_validate_cluster_config_image_method(self, auto_mock_setup): - """Test the _validate_cluster_config_image method directly.""" - # - rayjob = RayJob( - job_name="test-job", - cluster_config=ManagedClusterConfig(), - namespace="test-namespace", - entrypoint="python script.py", - ) +def test_validate_cluster_config_image_method(auto_mock_setup): + """ + Test the _validate_cluster_config_image method directly. + """ + + rayjob = RayJob( + job_name="test-job", + cluster_config=ManagedClusterConfig(), + namespace="test-namespace", + entrypoint="python test.py", + ) + + rayjob._validate_cluster_config_image() + rayjob._cluster_config.image = f"ray:{RAY_VERSION}" + rayjob._validate_cluster_config_image() + rayjob._cluster_config.image = "ray:2.8.0" + with pytest.raises( + ValueError, match="Cluster config image: Ray version mismatch detected" + ): rayjob._validate_cluster_config_image() - rayjob._cluster_config.image = f"ray:{RAY_VERSION}" + + rayjob._cluster_config.image = "custom-image:latest" + with pytest.warns( + UserWarning, match="Cluster config image: Cannot determine Ray version" + ): rayjob._validate_cluster_config_image() - rayjob._cluster_config.image = "ray:2.8.0" - with pytest.raises( - ValueError, match="Cluster config image: Ray version mismatch detected" - ): - rayjob._validate_cluster_config_image() - rayjob._cluster_config.image = "custom-image:latest" - with pytest.warns( - UserWarning, match="Cluster config image: Cannot determine Ray version" - ): - rayjob._validate_cluster_config_image() - def test_validate_cluster_config_image_edge_cases(self, auto_mock_setup): - """Test edge cases in _validate_cluster_config_image method.""" +def test_validate_cluster_config_image_edge_cases(auto_mock_setup): + """ + Test edge cases in _validate_cluster_config_image method. + """ - rayjob = RayJob( - job_name="test-job", - cluster_config=ManagedClusterConfig(), - namespace="test-namespace", - entrypoint="python script.py", - ) + rayjob = RayJob( + job_name="test-job", + cluster_config=ManagedClusterConfig(), + namespace="test-namespace", + entrypoint="python test.py", + ) - rayjob._cluster_config.image = None - rayjob._validate_cluster_config_image() - rayjob._cluster_config.image = "" - rayjob._validate_cluster_config_image() - rayjob._cluster_config.image = 123 - rayjob._validate_cluster_config_image() + rayjob._cluster_config.image = None + rayjob._validate_cluster_config_image() + rayjob._cluster_config.image = "" + rayjob._validate_cluster_config_image() + rayjob._cluster_config.image = 123 + rayjob._validate_cluster_config_image() - class MockClusterConfig: - pass + class MockClusterConfig: + pass - rayjob._cluster_config = MockClusterConfig() - rayjob._validate_cluster_config_image() + rayjob._cluster_config = MockClusterConfig() + rayjob._validate_cluster_config_image() -def test_extract_script_files_from_entrypoint_single_script(auto_mock_setup, tmp_path): - """Test extracting a single script file from entrypoint.""" +def test_extract_files_from_entrypoint_single_file(auto_mock_setup, tmp_path): + """ + Test extracting a single file from entrypoint. + """ - # Create a test script - test_script = tmp_path / "test_script.py" - test_script.write_text("print('Hello World!')") + # Create a test file + test_file = tmp_path / "test_file.py" + test_file.write_text("print('Hello World!')") # Change to temp directory for test original_cwd = os.getcwd() os.chdir(tmp_path) try: + # Use a path that would need adjustment + entrypoint_with_path = f"python ./{test_file.name}" rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint=f"python {test_script.name}", + entrypoint=entrypoint_with_path, namespace="test-namespace", ) - scripts = rayjob._extract_script_files_from_entrypoint() + files = rayjob._extract_files_from_entrypoint() - assert scripts is not None - assert test_script.name in scripts - assert scripts[test_script.name] == "print('Hello World!')" - assert f"{MOUNT_PATH}/{test_script.name}" in rayjob.entrypoint + assert files is not None + assert test_file.name in files + assert files[test_file.name] == "print('Hello World!')" + assert entrypoint_with_path == rayjob.entrypoint finally: os.chdir(original_cwd) -def test_extract_script_files_with_dependencies(auto_mock_setup, tmp_path): - """Test extracting script files with local dependencies.""" +def test_extract_files_with_dependencies(auto_mock_setup, tmp_path): + """ + Test extracting files with local dependencies. + """ - # Create main script and dependency - main_script = tmp_path / "main.py" - main_script.write_text( + # Create main file and dependency + main_file = tmp_path / "main.py" + main_file.write_text( """ import helper from utils import calculate @@ -889,16 +1016,16 @@ def main(): """ ) - helper_script = tmp_path / "helper.py" - helper_script.write_text( + helper_file = tmp_path / "helper.py" + helper_file.write_text( """ def do_something(): print("Doing something...") """ ) - utils_script = tmp_path / "utils.py" - utils_script.write_text( + utils_file = tmp_path / "utils.py" + utils_file.write_text( """ def calculate(x): return x * 2 @@ -917,24 +1044,26 @@ def calculate(x): namespace="test-namespace", ) - scripts = rayjob._extract_script_files_from_entrypoint() + files = rayjob._extract_files_from_entrypoint() - assert scripts is not None - assert len(scripts) == 3 - assert "main.py" in scripts - assert "helper.py" in scripts - assert "utils.py" in scripts + assert files is not None + assert len(files) == 3 + assert "main.py" in files + assert "helper.py" in files + assert "utils.py" in files - assert "import helper" in scripts["main.py"] - assert "def do_something" in scripts["helper.py"] - assert "def calculate" in scripts["utils.py"] + assert "import helper" in files["main.py"] + assert "def do_something" in files["helper.py"] + assert "def calculate" in files["utils.py"] finally: os.chdir(original_cwd) -def test_extract_script_files_no_local_scripts(auto_mock_setup): - """Test entrypoint with no local script files.""" +def test_extract_files_no_local_files(auto_mock_setup): + """ + Test entrypoint with no local files. + """ rayjob = RayJob( job_name="test-job", @@ -943,13 +1072,15 @@ def test_extract_script_files_no_local_scripts(auto_mock_setup): namespace="test-namespace", ) - scripts = rayjob._extract_script_files_from_entrypoint() + files = rayjob._extract_files_from_entrypoint() - assert scripts is None + assert files is None -def test_extract_script_files_nonexistent_script(auto_mock_setup): - """Test entrypoint referencing non-existent script.""" +def test_extract_files_nonexistent_file(auto_mock_setup): + """ + Test entrypoint referencing non-existent file. + """ rayjob = RayJob( job_name="test-job", @@ -958,51 +1089,57 @@ def test_extract_script_files_nonexistent_script(auto_mock_setup): namespace="test-namespace", ) - scripts = rayjob._extract_script_files_from_entrypoint() + files = rayjob._extract_files_from_entrypoint() - assert scripts is None + assert files is None -def test_build_script_configmap_spec(): - """Test building ConfigMap specification for scripts.""" +def test_build_file_configmap_spec(): + """ + Test building ConfigMap specification for files. + """ config = ManagedClusterConfig() - scripts = {"main.py": "print('main')", "helper.py": "print('helper')"} + files = {"main.py": "print('main')", "helper.py": "print('helper')"} - spec = config.build_script_configmap_spec( - job_name="test-job", namespace="test-namespace", scripts=scripts + spec = config.build_file_configmap_spec( + job_name="test-job", namespace="test-namespace", files=files ) assert spec["apiVersion"] == "v1" assert spec["kind"] == "ConfigMap" - assert spec["metadata"]["name"] == "test-job-scripts" + assert spec["metadata"]["name"] == "test-job-files" assert spec["metadata"]["namespace"] == "test-namespace" - assert spec["data"] == scripts + assert spec["data"] == files -def test_build_script_volume_specs(): - """Test building volume and mount specifications for scripts.""" +def test_build_file_volume_specs(): + """ + Test building volume and mount specifications for files. + """ config = ManagedClusterConfig() - volume_spec, mount_spec = config.build_script_volume_specs( - configmap_name="test-scripts", mount_path="/custom/path" + volume_spec, mount_spec = config.build_file_volume_specs( + configmap_name="test-files", mount_path="/custom/path" ) - assert volume_spec["name"] == "ray-job-scripts" - assert volume_spec["configMap"]["name"] == "test-scripts" + assert volume_spec["name"] == "ray-job-files" + assert volume_spec["configMap"]["name"] == "test-files" - assert mount_spec["name"] == "ray-job-scripts" + assert mount_spec["name"] == "ray-job-files" assert mount_spec["mountPath"] == "/custom/path" -def test_add_script_volumes(): - """Test adding script volumes to cluster configuration.""" +def test_add_file_volumes(): + """ + Test adding file volumes to cluster configuration. + """ config = ManagedClusterConfig() # Initially no volumes assert len(config.volumes) == 0 assert len(config.volume_mounts) == 0 - config.add_script_volumes(configmap_name="test-scripts") + config.add_file_volumes(configmap_name="test-files") assert len(config.volumes) == 1 assert len(config.volume_mounts) == 1 @@ -1010,27 +1147,31 @@ def test_add_script_volumes(): volume = config.volumes[0] mount = config.volume_mounts[0] - assert volume.name == "ray-job-scripts" - assert volume.config_map.name == "test-scripts" + assert volume.name == "ray-job-files" + assert volume.config_map.name == "test-files" - assert mount.name == "ray-job-scripts" + assert mount.name == "ray-job-files" assert mount.mount_path == MOUNT_PATH -def test_add_script_volumes_duplicate_prevention(): - """Test that adding script volumes twice doesn't create duplicates.""" +def test_add_file_volumes_duplicate_prevention(): + """ + Test that adding file volumes twice doesn't create duplicates. + """ config = ManagedClusterConfig() # Add volumes twice - config.add_script_volumes(configmap_name="test-scripts") - config.add_script_volumes(configmap_name="test-scripts") + config.add_file_volumes(configmap_name="test-files") + config.add_file_volumes(configmap_name="test-files") assert len(config.volumes) == 1 assert len(config.volume_mounts) == 1 def test_create_configmap_from_spec(auto_mock_setup): - """Test creating ConfigMap via Kubernetes API.""" + """ + Test creating ConfigMap via Kubernetes API. + """ mock_api_instance = auto_mock_setup["k8s_api"] rayjob = RayJob( @@ -1043,18 +1184,20 @@ def test_create_configmap_from_spec(auto_mock_setup): configmap_spec = { "apiVersion": "v1", "kind": "ConfigMap", - "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } result = rayjob._create_configmap_from_spec(configmap_spec) - assert result == "test-scripts" + assert result == "test-files" mock_api_instance.create_namespaced_config_map.assert_called_once() def test_create_configmap_already_exists(auto_mock_setup): - """Test creating ConfigMap when it already exists (409 conflict).""" + """ + Test creating ConfigMap when it already exists (409 conflict). + """ mock_api_instance = auto_mock_setup["k8s_api"] mock_api_instance.create_namespaced_config_map.side_effect = ApiException( @@ -1071,19 +1214,21 @@ def test_create_configmap_already_exists(auto_mock_setup): configmap_spec = { "apiVersion": "v1", "kind": "ConfigMap", - "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } result = rayjob._create_configmap_from_spec(configmap_spec) - assert result == "test-scripts" + assert result == "test-files" mock_api_instance.create_namespaced_config_map.assert_called_once() mock_api_instance.replace_namespaced_config_map.assert_called_once() def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, caplog): - """Test creating ConfigMap with owner reference from valid RayJob result.""" + """ + Test creating ConfigMap with owner reference from valid RayJob result. + """ mock_api_instance = auto_mock_setup["k8s_api"] # Mock client.V1ObjectMeta and V1ConfigMap @@ -1102,12 +1247,12 @@ def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, ca "apiVersion": "v1", "kind": "ConfigMap", "metadata": { - "name": "test-scripts", + "name": "test-files", "namespace": "test-namespace", "labels": { "ray.io/job-name": "test-job", "app.kubernetes.io/managed-by": "codeflare-sdk", - "app.kubernetes.io/component": "rayjob-scripts", + "app.kubernetes.io/component": "rayjob-files", }, }, "data": {"test.py": "print('test')"}, @@ -1125,12 +1270,12 @@ def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, ca with caplog.at_level("INFO"): result = rayjob._create_configmap_from_spec(configmap_spec, rayjob_result) - assert result == "test-scripts" + assert result == "test-files" # Verify owner reference was set expected_owner_ref = mocker.ANY # We'll check via the logs assert ( - "Adding owner reference to ConfigMap 'test-scripts' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" + "Adding owner reference to ConfigMap 'test-files' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" in caplog.text ) @@ -1141,7 +1286,9 @@ def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, ca def test_create_configmap_without_owner_reference_no_uid( mocker, auto_mock_setup, caplog ): - """Test creating ConfigMap without owner reference when RayJob has no UID.""" + """ + Test creating ConfigMap without owner reference when RayJob has no UID. + """ mock_api_instance = auto_mock_setup["k8s_api"] mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") @@ -1158,7 +1305,7 @@ def test_create_configmap_without_owner_reference_no_uid( configmap_spec = { "apiVersion": "v1", "kind": "ConfigMap", - "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } @@ -1174,11 +1321,11 @@ def test_create_configmap_without_owner_reference_no_uid( with caplog.at_level("WARNING"): result = rayjob._create_configmap_from_spec(configmap_spec, rayjob_result) - assert result == "test-scripts" + assert result == "test-files" # Verify warning was logged and no owner reference was set assert ( - "No valid RayJob result with UID found, ConfigMap 'test-scripts' will not have owner reference" + "No valid RayJob result with UID found, ConfigMap 'test-files' will not have owner reference" in caplog.text ) @@ -1187,7 +1334,9 @@ def test_create_configmap_without_owner_reference_no_uid( def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): - """Test creating ConfigMap with None or invalid rayjob_result.""" + """ + Test creating ConfigMap with None or invalid rayjob_result. + """ mock_api_instance = auto_mock_setup["k8s_api"] rayjob = RayJob( @@ -1200,7 +1349,7 @@ def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): configmap_spec = { "apiVersion": "v1", "kind": "ConfigMap", - "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } @@ -1208,7 +1357,7 @@ def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): with caplog.at_level("WARNING"): result = rayjob._create_configmap_from_spec(configmap_spec, None) - assert result == "test-scripts" + assert result == "test-files" assert "No valid RayJob result with UID found" in caplog.text # Test with string instead of dict @@ -1216,50 +1365,17 @@ def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): with caplog.at_level("WARNING"): result = rayjob._create_configmap_from_spec(configmap_spec, "not-a-dict") - assert result == "test-scripts" + assert result == "test-files" assert "No valid RayJob result with UID found" in caplog.text -def test_handle_script_volumes_for_new_cluster(mocker, auto_mock_setup, tmp_path): - """Test handling script volumes for new cluster creation.""" - # auto_mock_setup handles kubernetes and API mocking - - mock_create = mocker.patch.object(RayJob, "_create_configmap_from_spec") - mock_create.return_value = "test-job-scripts" - - test_script = tmp_path / "test.py" - test_script.write_text("print('test')") - - cluster_config = ManagedClusterConfig() - - original_cwd = os.getcwd() - os.chdir(tmp_path) - - try: - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - entrypoint="python test.py", - namespace="test-namespace", - ) - - scripts = {"test.py": "print('test')"} - rayjob._handle_script_volumes_for_new_cluster(scripts) - - mock_create.assert_called_once() - - assert len(cluster_config.volumes) == 1 - assert len(cluster_config.volume_mounts) == 1 - - finally: - os.chdir(original_cwd) - - def test_ast_parsing_import_detection(auto_mock_setup, tmp_path): - """Test AST parsing correctly detects import statements.""" + """ + Test AST parsing correctly detects import statements. + """ - main_script = tmp_path / "main.py" - main_script.write_text( + main_file = tmp_path / "main.py" + main_file.write_text( """# Different import patterns import helper from utils import func1, func2 @@ -1269,18 +1385,18 @@ def test_ast_parsing_import_detection(auto_mock_setup, tmp_path): """ ) - helper_script = tmp_path / "helper.py" - helper_script.write_text("def helper_func(): pass") + helper_file = tmp_path / "helper.py" + helper_file.write_text("def helper_func(): pass") - utils_script = tmp_path / "utils.py" - utils_script.write_text( + utils_file = tmp_path / "utils.py" + utils_file.write_text( """def func1(): pass def func2(): pass """ ) - local_module_script = tmp_path / "local_module.py" - local_module_script.write_text("class MyClass: pass") + local_module_file = tmp_path / "local_module.py" + local_module_file.write_text("class MyClass: pass") original_cwd = os.getcwd() os.chdir(tmp_path) @@ -1293,21 +1409,23 @@ def func2(): pass namespace="test-namespace", ) - scripts = rayjob._extract_script_files_from_entrypoint() + files = rayjob._extract_files_from_entrypoint() - assert scripts is not None - assert len(scripts) == 4 # main + 3 dependencies - assert "main.py" in scripts - assert "helper.py" in scripts - assert "utils.py" in scripts - assert "local_module.py" in scripts + assert files is not None + assert len(files) == 4 # main + 3 dependencies + assert "main.py" in files + assert "helper.py" in files + assert "utils.py" in files + assert "local_module.py" in files finally: os.chdir(original_cwd) -def test_script_handling_kubernetes_best_practice_flow(mocker, tmp_path): - """Test the Kubernetes best practice flow: pre-declare volume, submit, create ConfigMap.""" +def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): + """ + Test the Kubernetes best practice flow: pre-declare volume, submit, create ConfigMap. + """ mocker.patch("kubernetes.config.load_kube_config") mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") @@ -1323,20 +1441,20 @@ def test_script_handling_kubernetes_best_practice_flow(mocker, tmp_path): } mock_api_instance.submit_job.return_value = submit_result - mock_create_cm = mocker.patch.object(RayJob, "_create_script_configmap") - mock_add_volumes = mocker.patch.object(ManagedClusterConfig, "add_script_volumes") + mock_create_cm = mocker.patch.object(RayJob, "_create_file_configmap") + mock_add_volumes = mocker.patch.object(ManagedClusterConfig, "add_file_volumes") # RayClusterApi is already mocked by auto_mock_setup - test_script = tmp_path / "test.py" - test_script.write_text("print('test')") + test_file = tmp_path / "test.py" + test_file.write_text("print('test')") call_order = [] def track_add_volumes(*args, **kwargs): call_order.append("add_volumes") # Should be called with ConfigMap name - assert args[0] == "test-job-scripts" + assert args[0] == "test-job-files" def track_submit(*args, **kwargs): call_order.append("submit_job") @@ -1367,26 +1485,27 @@ def track_create_cm(*args, **kwargs): finally: os.chdir(original_cwd) - # Verify the order: add volumes → submit → create ConfigMap - assert call_order == ["add_volumes", "submit_job", "create_configmap"] + # Verify the order: submit → create ConfigMap + assert call_order == ["submit_job", "create_configmap"] - mock_add_volumes.assert_called_once() mock_api_instance.submit_job.assert_called_once() mock_create_cm.assert_called_once() mock_create_cm.assert_called_with({"test.py": "print('test')"}, submit_result) -def test_rayjob_submit_with_scripts_new_cluster(auto_mock_setup, tmp_path): - """Test RayJob submission with script detection for new cluster.""" +def test_rayjob_submit_with_files_new_cluster(auto_mock_setup, tmp_path): + """ + Test RayJob submission with file detection for new cluster. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = True mock_k8s_instance = auto_mock_setup["k8s_api"] - # Create test script - test_script = tmp_path / "test.py" - test_script.write_text("print('Hello from script!')") + # Create test file + test_file = tmp_path / "test.py" + test_file.write_text("print('Hello from the test file!')") cluster_config = ManagedClusterConfig() @@ -1401,23 +1520,26 @@ def test_rayjob_submit_with_scripts_new_cluster(auto_mock_setup, tmp_path): namespace="test-namespace", ) - # Submit should detect scripts and handle them + # Submit should detect files and handle them result = rayjob.submit() assert result == "test-job" mock_k8s_instance.create_namespaced_config_map.assert_called_once() - assert len(cluster_config.volumes) == 1 - assert len(cluster_config.volume_mounts) == 1 - assert f"{MOUNT_PATH}/test.py" in rayjob.entrypoint + assert len(cluster_config.volumes) == 0 + assert len(cluster_config.volume_mounts) == 0 + # Entrypoint should be adjusted to use just the filename + assert rayjob.entrypoint == "python test.py" finally: os.chdir(original_cwd) -def test_process_script_and_imports_io_error(mocker, auto_mock_setup, tmp_path): - """Test _process_script_and_imports handles IO errors gracefully.""" +def test_process_file_and_imports_io_error(mocker, auto_mock_setup, tmp_path): + """ + Test _process_file_and_imports handles IO errors gracefully. + """ rayjob = RayJob( job_name="test-job", @@ -1426,20 +1548,22 @@ def test_process_script_and_imports_io_error(mocker, auto_mock_setup, tmp_path): namespace="test-namespace", ) - scripts = {} + files = {} processed_files = set() # Mock os.path.isfile to return True but open() to raise IOError mocker.patch("os.path.isfile", return_value=True) mocker.patch("builtins.open", side_effect=IOError("Permission denied")) - rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) + rayjob._process_file_and_imports("test.py", files, MOUNT_PATH, processed_files) assert "test.py" in processed_files - assert len(scripts) == 0 + assert len(files) == 0 -def test_process_script_and_imports_container_path_skip(auto_mock_setup): - """Test that scripts already in container paths are skipped.""" +def test_process_file_and_imports_container_path_skip(auto_mock_setup): + """ + Test that files already in container paths are skipped. + """ rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1447,20 +1571,22 @@ def test_process_script_and_imports_container_path_skip(auto_mock_setup): namespace="test-namespace", ) - scripts = {} + files = {} processed_files = set() - # Test script path already in container - rayjob._process_script_and_imports( - f"{MOUNT_PATH}/test.py", scripts, MOUNT_PATH, processed_files + # Test file path already in container + rayjob._process_file_and_imports( + f"{MOUNT_PATH}/test.py", files, MOUNT_PATH, processed_files ) - assert len(scripts) == 0 + assert len(files) == 0 assert len(processed_files) == 0 -def test_process_script_and_imports_already_processed(auto_mock_setup, tmp_path): - """Test that already processed scripts are skipped (infinite loop prevention).""" +def test_process_file_and_imports_already_processed(auto_mock_setup, tmp_path): + """ + Test that already processed files are skipped (infinite loop prevention). + """ rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1468,19 +1594,21 @@ def test_process_script_and_imports_already_processed(auto_mock_setup, tmp_path) namespace="test-namespace", ) - scripts = {} + files = {} processed_files = {"test.py"} # Already processed - rayjob._process_script_and_imports("test.py", scripts, MOUNT_PATH, processed_files) + rayjob._process_file_and_imports("test.py", files, MOUNT_PATH, processed_files) - assert len(scripts) == 0 + assert len(files) == 0 assert processed_files == {"test.py"} -def test_submit_with_scripts_owner_reference_integration( +def test_submit_with_files_owner_reference_integration( mocker, auto_mock_setup, tmp_path, caplog ): - """Integration test for submit() with local scripts to verify end-to-end owner reference flow.""" + """ + Integration test for submit() with local files to verify end-to-end owner reference flow. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_k8s_instance = auto_mock_setup["k8s_api"] @@ -1504,14 +1632,14 @@ def capture_configmap(namespace, body): mock_k8s_instance.create_namespaced_config_map.side_effect = capture_configmap - # Create test scripts - test_script = tmp_path / "main.py" - test_script.write_text("import helper\nprint('main')") + # Create test files + test_file = tmp_path / "main.py" + test_file.write_text("import helper\nprint('main')") - helper_script = tmp_path / "helper.py" - helper_script.write_text("def help(): print('helper')") + helper_file = tmp_path / "helper.py" + helper_file.write_text("def help(): print('helper')") - # Change to temp directory for script detection + # Change to temp directory for file detection original_cwd = os.getcwd() try: os.chdir(tmp_path) @@ -1555,13 +1683,13 @@ def capture_configmap(namespace, body): ) assert ( created_configmap.metadata.labels["app.kubernetes.io/component"] - == "rayjob-scripts" + == "rayjob-files" ) assert "main.py" in created_configmap.data assert "helper.py" in created_configmap.data assert ( - "Adding owner reference to ConfigMap 'test-job-scripts' with RayJob UID: unique-rayjob-uid-12345" + "Adding owner reference to ConfigMap 'test-job-files' with RayJob UID: unique-rayjob-uid-12345" in caplog.text ) @@ -1570,7 +1698,9 @@ def capture_configmap(namespace, body): def test_find_local_imports_syntax_error(mocker, auto_mock_setup): - """Test _find_local_imports handles syntax errors gracefully.""" + """ + Test _find_local_imports handles syntax errors gracefully. + """ rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1579,16 +1709,18 @@ def test_find_local_imports_syntax_error(mocker, auto_mock_setup): ) # Invalid Python syntax - invalid_script_content = "import helper\ndef invalid_syntax(" + invalid_file_content = "import helper\ndef invalid_syntax(" mock_callback = mocker.Mock() - rayjob._find_local_imports(invalid_script_content, "test.py", mock_callback) + rayjob._find_local_imports(invalid_file_content, "test.py", mock_callback) mock_callback.assert_not_called() def test_create_configmap_api_error_non_409(auto_mock_setup): - """Test _create_configmap_from_spec handles non-409 API errors.""" + """ + Test _create_configmap_from_spec handles non-409 API errors. + """ mock_api_instance = auto_mock_setup["k8s_api"] # Configure to raise 500 error @@ -1606,7 +1738,7 @@ def test_create_configmap_api_error_non_409(auto_mock_setup): configmap_spec = { "apiVersion": "v1", "kind": "ConfigMap", - "metadata": {"name": "test-scripts", "namespace": "test-namespace"}, + "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } @@ -1614,65 +1746,10 @@ def test_create_configmap_api_error_non_409(auto_mock_setup): rayjob._create_configmap_from_spec(configmap_spec) -def test_update_existing_cluster_get_cluster_error(mocker, auto_mock_setup): - """Test _update_existing_cluster_for_scripts handles get cluster errors.""" - mock_cluster_api_instance = auto_mock_setup["cluster_api"] - - # Configure it to raise an error - mock_cluster_api_instance.get_ray_cluster.side_effect = ApiException(status=404) - - config_builder = ManagedClusterConfig() - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - with pytest.raises(RuntimeError, match="Failed to get RayCluster"): - rayjob._update_existing_cluster_for_scripts("test-scripts", config_builder) - - -def test_update_existing_cluster_patch_error(mocker, auto_mock_setup): - """Test _update_existing_cluster_for_scripts handles patch errors.""" - mock_cluster_api_instance = auto_mock_setup["cluster_api"] - - # Mock successful get but failed patch - mock_cluster_api_instance.get_ray_cluster.return_value = { - "spec": { - "headGroupSpec": { - "template": { - "spec": {"volumes": [], "containers": [{"volumeMounts": []}]} - } - }, - "workerGroupSpecs": [ - { - "template": { - "spec": {"volumes": [], "containers": [{"volumeMounts": []}]} - } - } - ], - } - } - - mock_cluster_api_instance.patch_ray_cluster.side_effect = ApiException(status=500) - - config_builder = ManagedClusterConfig() - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - with pytest.raises(RuntimeError, match="Failed to update RayCluster"): - rayjob._update_existing_cluster_for_scripts("test-scripts", config_builder) - - -def test_extract_script_files_empty_entrypoint(auto_mock_setup): - """Test script extraction with empty entrypoint.""" +def test_extract_files_empty_entrypoint(auto_mock_setup): + """ + Test file extraction with empty entrypoint. + """ rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", @@ -1680,42 +1757,48 @@ def test_extract_script_files_empty_entrypoint(auto_mock_setup): namespace="test-namespace", ) - scripts = rayjob._extract_script_files_from_entrypoint() + files = rayjob._extract_files_from_entrypoint() - assert scripts is None + assert files is None -def test_add_script_volumes_existing_volume_skip(): - """Test add_script_volumes skips when volume already exists (missing coverage).""" +def test_add_file_volumes_existing_volume_skip(): + """ + Test add_file_volumes skips when volume already exists (missing coverage). + """ config = ManagedClusterConfig() # Pre-add a volume with same name existing_volume = V1Volume( - name="ray-job-scripts", - config_map=V1ConfigMapVolumeSource(name="existing-scripts"), + name="ray-job-files", + config_map=V1ConfigMapVolumeSource(name="existing-files"), ) config.volumes.append(existing_volume) - config.add_script_volumes(configmap_name="new-scripts") + config.add_file_volumes(configmap_name="new-files") assert len(config.volumes) == 1 assert len(config.volume_mounts) == 0 # Mount not added due to volume skip -def test_add_script_volumes_existing_mount_skip(): - """Test add_script_volumes skips when mount already exists (missing coverage).""" +def test_add_file_volumes_existing_mount_skip(): + """ + Test add_file_volumes skips when mount already exists (missing coverage). + """ config = ManagedClusterConfig() # Pre-add a mount with same name - existing_mount = V1VolumeMount(name="ray-job-scripts", mount_path="/existing/path") + existing_mount = V1VolumeMount(name="ray-job-files", mount_path="/existing/path") config.volume_mounts.append(existing_mount) - config.add_script_volumes(configmap_name="new-scripts") + config.add_file_volumes(configmap_name="new-files") assert len(config.volumes) == 0 # Volume not added due to mount skip assert len(config.volume_mounts) == 1 def test_rayjob_stop_success(auto_mock_setup, caplog): - """Test successful RayJob stop operation.""" + """ + Test successful RayJob stop operation. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.suspend_job.return_value = { @@ -1727,7 +1810,7 @@ def test_rayjob_stop_success(auto_mock_setup, caplog): job_name="test-rayjob", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python script.py", + entrypoint="python test.py", ) with caplog.at_level("INFO"): @@ -1744,7 +1827,9 @@ def test_rayjob_stop_success(auto_mock_setup, caplog): def test_rayjob_stop_failure(auto_mock_setup): - """Test RayJob stop operation when API call fails.""" + """ + Test RayJob stop operation when API call fails. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.suspend_job.return_value = None @@ -1753,7 +1838,7 @@ def test_rayjob_stop_failure(auto_mock_setup): job_name="test-rayjob", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python script.py", + entrypoint="python test.py", ) with pytest.raises(RuntimeError, match="Failed to stop the RayJob test-rayjob"): @@ -1765,7 +1850,9 @@ def test_rayjob_stop_failure(auto_mock_setup): def test_rayjob_resubmit_success(auto_mock_setup): - """Test successful RayJob resubmit operation.""" + """ + Test successful RayJob resubmit operation. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.resubmit_job.return_value = { @@ -1777,7 +1864,7 @@ def test_rayjob_resubmit_success(auto_mock_setup): job_name="test-rayjob", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python script.py", + entrypoint="python test.py", ) result = rayjob.resubmit() @@ -1790,7 +1877,9 @@ def test_rayjob_resubmit_success(auto_mock_setup): def test_rayjob_resubmit_failure(auto_mock_setup): - """Test RayJob resubmit operation when API call fails.""" + """ + Test RayJob resubmit operation when API call fails. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.resubmit_job.return_value = None @@ -1799,7 +1888,7 @@ def test_rayjob_resubmit_failure(auto_mock_setup): job_name="test-rayjob", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python script.py", + entrypoint="python test.py", ) with pytest.raises(RuntimeError, match="Failed to resubmit the RayJob test-rayjob"): @@ -1811,12 +1900,14 @@ def test_rayjob_resubmit_failure(auto_mock_setup): def test_rayjob_delete_success(auto_mock_setup): - """Test successful RayJob deletion.""" + """ + Test successful RayJob deletion. + """ mock_api_instance = auto_mock_setup["rayjob_api"] rayjob = RayJob( job_name="test-rayjob", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name="test-cluster", ) @@ -1830,20 +1921,27 @@ def test_rayjob_delete_success(auto_mock_setup): ) -def test_rayjob_delete_failure(auto_mock_setup): - """Test failed RayJob deletion.""" +def test_rayjob_delete_already_deleted(auto_mock_setup, caplog): + """ + Test RayJob deletion when already deleted (should succeed gracefully). + """ mock_api_instance = auto_mock_setup["rayjob_api"] rayjob = RayJob( job_name="test-rayjob", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name="test-cluster", ) + # Python client returns False when job doesn't exist/already deleted mock_api_instance.delete_job.return_value = False - with pytest.raises(RuntimeError, match="Failed to delete the RayJob test-rayjob"): - rayjob.delete() + with caplog.at_level("INFO"): + result = rayjob.delete() + + # Should succeed (not raise error) when already deleted + assert result is True + assert "already deleted or does not exist" in caplog.text mock_api_instance.delete_job.assert_called_once_with( name="test-rayjob", k8s_namespace="test-namespace" @@ -1851,28 +1949,32 @@ def test_rayjob_delete_failure(auto_mock_setup): def test_rayjob_init_both_none_error(auto_mock_setup): - """Test RayJob initialization error when both cluster_name and cluster_config are None.""" + """ + Test RayJob initialization error when both cluster_name and cluster_config are None. + """ with pytest.raises( ValueError, match="Configuration Error: You must provide either 'cluster_name' .* or 'cluster_config'", ): RayJob( job_name="test-job", - entrypoint="python script.py", + entrypoint="python test.py", cluster_name=None, cluster_config=None, ) def test_rayjob_init_missing_cluster_name_with_no_config(auto_mock_setup): - """Test RayJob initialization error when cluster_name is None without cluster_config.""" + """ + Test RayJob initialization error when cluster_name is None without cluster_config. + """ with pytest.raises( ValueError, match="Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'", ): rayjob = RayJob.__new__(RayJob) rayjob.name = "test-job" - rayjob.entrypoint = "python script.py" + rayjob.entrypoint = "python test.py" rayjob.runtime_env = None rayjob.ttl_seconds_after_finished = 0 rayjob.active_deadline_seconds = None @@ -1886,138 +1988,10 @@ def test_rayjob_init_missing_cluster_name_with_no_config(auto_mock_setup): ) -def test_handle_script_volumes_for_existing_cluster_direct_call(auto_mock_setup): - """Test _handle_script_volumes_for_existing_cluster method directly.""" - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_cluster_api = auto_mock_setup["cluster_api"] - mock_k8s_api = auto_mock_setup["k8s_api"] - - # Mock existing cluster - mock_cluster = { - "spec": { - "headGroupSpec": { - "template": { - "spec": {"containers": [{"volumeMounts": []}], "volumes": []} - } - }, - "workerGroupSpecs": [ - { - "template": { - "spec": {"containers": [{"volumeMounts": []}], "volumes": []} - } - } - ], - } - } - mock_cluster_api.get_ray_cluster.return_value = mock_cluster - - rayjob = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_name="existing-cluster", - ) - - scripts = {"test_script.py": "print('Hello World')"} - rayjob._handle_script_volumes_for_existing_cluster( - scripts, {"metadata": {"uid": "test-uid"}} - ) - - mock_k8s_api.create_namespaced_config_map.assert_called_once() - created_configmap = mock_k8s_api.create_namespaced_config_map.call_args[1]["body"] - assert "test_script.py" in created_configmap.data - - mock_cluster_api.patch_ray_cluster.assert_called_once_with( - name="existing-cluster", ray_patch=mock_cluster, k8s_namespace="test-namespace" - ) - - -def test_handle_script_volumes_for_existing_cluster_no_volumes_init(auto_mock_setup): - """Test _handle_script_volumes_for_existing_cluster when volumes/mounts don't exist initially.""" - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_cluster_api = auto_mock_setup["cluster_api"] - mock_k8s_api = auto_mock_setup["k8s_api"] - - # Mock existing cluster WITHOUT volumes/volumeMounts (to test initialization) - mock_cluster = { - "spec": { - "headGroupSpec": {"template": {"spec": {"containers": [{}]}}}, - "workerGroupSpecs": [{"template": {"spec": {"containers": [{}]}}}], - } - } - mock_cluster_api.get_ray_cluster.return_value = mock_cluster - - # Create RayJob with existing cluster - rayjob = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_name="existing-cluster", - ) - - # Call the method directly with test scripts - scripts = {"test_script.py": "print('Hello World')"} - rayjob._handle_script_volumes_for_existing_cluster( - scripts, {"metadata": {"uid": "test-uid"}} - ) - - # Verify volumes and volumeMounts were initialized - patched_cluster = mock_cluster_api.patch_ray_cluster.call_args[1]["ray_patch"] - - # Check head group - head_spec = patched_cluster["spec"]["headGroupSpec"]["template"]["spec"] - assert "volumes" in head_spec - assert len(head_spec["volumes"]) == 1 - assert "volumeMounts" in head_spec["containers"][0] - assert len(head_spec["containers"][0]["volumeMounts"]) == 1 - - # Check worker group - worker_spec = patched_cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"] - assert "volumes" in worker_spec - assert len(worker_spec["volumes"]) == 1 - assert "volumeMounts" in worker_spec["containers"][0] - assert len(worker_spec["containers"][0]["volumeMounts"]) == 1 - - -def test_update_existing_cluster_for_scripts_api_errors(mocker, auto_mock_setup): - """Test _update_existing_cluster_for_scripts error handling.""" - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_cluster_api = auto_mock_setup["cluster_api"] - - # Mock config builder - mock_config_builder = mocker.MagicMock() - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.ManagedClusterConfig", - return_value=mock_config_builder, - ) - - # Set up config builder to return valid specs - mock_config_builder.build_script_volume_specs.return_value = ( - {"name": "script-volume", "configMap": {"name": "test-configmap"}}, - {"name": "script-volume", "mountPath": "/home/ray/scripts"}, - ) - - # Mock cluster API to raise error - mock_cluster_api.get_ray_cluster.side_effect = ApiException( - status=404, reason="Not Found" - ) - - # Create RayJob - rayjob = RayJob( - job_name="test-job", - entrypoint="python script.py", - cluster_name="existing-cluster", - ) - - # Call the method directly - with pytest.raises( - RuntimeError, match="Failed to get RayCluster 'existing-cluster'" - ): - rayjob._update_existing_cluster_for_scripts( - "test-configmap", mock_config_builder - ) - - def test_rayjob_kueue_label_no_default_queue(auto_mock_setup, mocker, caplog): - """Test RayJob falls back to 'default' queue when no default queue exists.""" + """ + Test RayJob falls back to 'default' queue when no default queue exists. + """ mocker.patch( "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", return_value=None, @@ -2030,7 +2004,7 @@ def test_rayjob_kueue_label_no_default_queue(auto_mock_setup, mocker, caplog): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", ) with caplog.at_level("WARNING"): @@ -2046,7 +2020,9 @@ def test_rayjob_kueue_label_no_default_queue(auto_mock_setup, mocker, caplog): def test_rayjob_kueue_explicit_local_queue(auto_mock_setup): - """Test RayJob uses explicitly specified local queue.""" + """ + Test RayJob uses explicitly specified local queue. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} @@ -2054,7 +2030,7 @@ def test_rayjob_kueue_explicit_local_queue(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", local_queue="custom-queue", ) @@ -2070,7 +2046,9 @@ def test_rayjob_kueue_explicit_local_queue(auto_mock_setup): def test_rayjob_no_kueue_label_for_existing_cluster(auto_mock_setup): - """Test RayJob doesn't add Kueue label for existing clusters.""" + """ + Test RayJob doesn't add Kueue label for existing clusters. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} @@ -2078,7 +2056,7 @@ def test_rayjob_no_kueue_label_for_existing_cluster(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint="python script.py", + entrypoint="python test.py", ) rayjob.submit() @@ -2090,7 +2068,9 @@ def test_rayjob_no_kueue_label_for_existing_cluster(auto_mock_setup): def test_rayjob_with_ttl_and_deadline(auto_mock_setup): - """Test RayJob with TTL and active deadline seconds.""" + """ + Test RayJob with TTL and active deadline seconds. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} @@ -2098,7 +2078,7 @@ def test_rayjob_with_ttl_and_deadline(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", ttl_seconds_after_finished=300, active_deadline_seconds=600, ) @@ -2113,7 +2093,9 @@ def test_rayjob_with_ttl_and_deadline(auto_mock_setup): def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): - """Test RayJob sets shutdownAfterJobFinishes correctly.""" + """ + Test RayJob sets shutdownAfterJobFinishes correctly. + """ mock_api_instance = auto_mock_setup["rayjob_api"] mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} @@ -2122,7 +2104,7 @@ def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", ) rayjob.submit() @@ -2135,7 +2117,7 @@ def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): rayjob2 = RayJob( job_name="test-job2", cluster_name="existing-cluster", - entrypoint="python script.py", + entrypoint="python test.py", ) rayjob2.submit() @@ -2146,7 +2128,9 @@ def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): def test_rayjob_stop_delete_resubmit_logging(auto_mock_setup, caplog): - """Test logging for stop, delete, and resubmit operations.""" + """ + Test logging for stop, delete, and resubmit operations. + """ mock_api_instance = auto_mock_setup["rayjob_api"] # Test stop with logging @@ -2159,7 +2143,7 @@ def test_rayjob_stop_delete_resubmit_logging(auto_mock_setup, caplog): job_name="test-rayjob", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python script.py", + entrypoint="python test.py", ) with caplog.at_level("INFO"): @@ -2193,14 +2177,117 @@ def test_rayjob_stop_delete_resubmit_logging(auto_mock_setup, caplog): def test_rayjob_initialization_logging(auto_mock_setup, caplog): - """Test RayJob initialization logging.""" + """ + Test RayJob initialization logging. + """ with caplog.at_level("INFO"): cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python script.py", + entrypoint="python test.py", ) assert "Creating new cluster: test-job-cluster" in caplog.text assert "Initialized RayJob: test-job in namespace: test-namespace" in caplog.text + + +def test_build_submitter_pod_template_uses_default_image(auto_mock_setup, mocker): + """ + Test that _build_submitter_pod_template() uses get_ray_image_for_python_version() for default image. + """ + # Mock get_ray_image_for_python_version to verify it's called + mock_get_image = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_ray_image_for_python_version", + return_value="auto-detected-image:py3.12", + ) + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + files = {"test.py": "print('hello')"} + configmap_name = "test-files" + + # Call _build_submitter_pod_template + submitter_template = rayjob._build_submitter_pod_template(files, configmap_name) + + # Verify get_ray_image_for_python_version was called + mock_get_image.assert_called_once() + + # Verify the submitter pod uses the auto-detected image + assert ( + submitter_template["spec"]["containers"][0]["image"] + == "auto-detected-image:py3.12" + ) + + +def test_build_submitter_pod_template_uses_cluster_config_image( + auto_mock_setup, mocker +): + """ + Test that _build_submitter_pod_template() uses cluster_config image when provided. + """ + # Mock get_ray_image_for_python_version (should be called but overridden) + mock_get_image = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_ray_image_for_python_version", + return_value="auto-detected-image:py3.12", + ) + + cluster_config = ManagedClusterConfig(image="custom-cluster-image:v1") + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python test.py", + namespace="test-namespace", + ) + + files = {"test.py": "print('hello')"} + configmap_name = "test-files" + + # Call _build_submitter_pod_template + submitter_template = rayjob._build_submitter_pod_template(files, configmap_name) + + # Verify get_ray_image_for_python_version was called + mock_get_image.assert_called_once() + + # Verify the submitter pod uses the cluster config image (overrides default) + assert ( + submitter_template["spec"]["containers"][0]["image"] + == "custom-cluster-image:v1" + ) + + +def test_build_submitter_pod_template_with_files(auto_mock_setup): + """ + Test that _build_submitter_pod_template() correctly builds ConfigMap items for files. + """ + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + files = {"main.py": "print('main')", "helper.py": "print('helper')"} + configmap_name = "test-files" + + # Call _build_submitter_pod_template + submitter_template = rayjob._build_submitter_pod_template(files, configmap_name) + + # Verify ConfigMap items are created for each file + config_map_items = submitter_template["spec"]["volumes"][0]["configMap"]["items"] + assert len(config_map_items) == 2 + + # Verify each file has a ConfigMap item + file_names = [item["key"] for item in config_map_items] + assert "main.py" in file_names + assert "helper.py" in file_names + + # Verify paths match keys + for item in config_map_items: + assert item["key"] == item["path"] diff --git a/tests/e2e/rayjob/rayjob_existing_cluster_test.py b/tests/e2e/rayjob/rayjob_existing_cluster_test.py index b62ea1ef..e4865d9c 100644 --- a/tests/e2e/rayjob/rayjob_existing_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_existing_cluster_test.py @@ -60,7 +60,11 @@ def test_existing_kueue_cluster(self): ) cluster.apply() - sleep(20) + + # Wait for cluster to be ready (with Kueue admission) + print(f"Waiting for cluster '{cluster_name}' to be ready...") + cluster.wait_ready(timeout=300, dashboard_check=False) + print(f"✓ Cluster '{cluster_name}' is ready") # RayJob with explicit local_queue rayjob_explicit = RayJob( diff --git a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py index 3f9bbf03..821cb1c7 100644 --- a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py @@ -193,7 +193,7 @@ def test_lifecycled_kueue_resource_queueing(self): def verify_configmap_with_owner_reference(self, rayjob: RayJob): """Verify that the ConfigMap was created with proper owner reference to the RayJob.""" v1 = client.CoreV1Api() - configmap_name = f"{rayjob.name}-scripts" + configmap_name = f"{rayjob.name}-files" try: # Get the ConfigMap @@ -237,7 +237,7 @@ def verify_configmap_with_owner_reference(self, rayjob: RayJob): ) assert ( configmap.metadata.labels.get("app.kubernetes.io/component") - == "rayjob-scripts" + == "rayjob-files" ) print(f"✓ ConfigMap {configmap_name} verified with proper owner reference") From 8aa5564a8b68edad064a2d1960192d8a6d863869 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Thu, 9 Oct 2025 11:04:24 +0100 Subject: [PATCH 25/33] task(RHOAIENG-33283): Zip working directory Co-authored-by: Pat O'Connor --- poetry.lock | 6 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 650 ++---- src/codeflare_sdk/ray/rayjobs/runtime_env.py | 373 ++++ .../ray/rayjobs/test/conftest.py | 45 + .../ray/rayjobs/{ => test}/test_config.py | 0 .../rayjobs/{ => test}/test_pretty_print.py | 0 .../ray/rayjobs/{ => test}/test_rayjob.py | 1738 ++++++----------- .../ray/rayjobs/test/test_runtime_env.py | 935 +++++++++ .../ray/rayjobs/{ => test}/test_status.py | 0 .../rayjob/rayjob_existing_cluster_test.py | 20 +- 10 files changed, 2162 insertions(+), 1605 deletions(-) create mode 100644 src/codeflare_sdk/ray/rayjobs/runtime_env.py create mode 100644 src/codeflare_sdk/ray/rayjobs/test/conftest.py rename src/codeflare_sdk/ray/rayjobs/{ => test}/test_config.py (100%) rename src/codeflare_sdk/ray/rayjobs/{ => test}/test_pretty_print.py (100%) rename src/codeflare_sdk/ray/rayjobs/{ => test}/test_rayjob.py (61%) create mode 100644 src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py rename src/codeflare_sdk/ray/rayjobs/{ => test}/test_status.py (100%) diff --git a/poetry.lock b/poetry.lock index aa315f21..88ceb3cb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4508,14 +4508,14 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.31.2" +version = "20.35.1" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, - {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, + {file = "virtualenv-20.35.1-py3-none-any.whl", hash = "sha256:1d9d93cd01d35b785476e2fa7af711a98d40d227a078941695bbae394f8737e2"}, + {file = "virtualenv-20.35.1.tar.gz", hash = "sha256:041dac43b6899858a91838b616599e80000e545dee01a21172a6a46746472cb2"}, ] [package.dependencies] diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index ed0411e7..7ab9a0f5 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -17,21 +17,24 @@ """ import logging -import warnings import os import re -import ast -import yaml -from typing import Dict, Any, Optional, Tuple, List +import warnings +from typing import Dict, Any, Optional, Tuple, Union + +from ray.runtime_env import RuntimeEnv from codeflare_sdk.common.kueue.kueue import get_default_kueue_name from codeflare_sdk.common.utils.constants import MOUNT_PATH -from kubernetes import client from codeflare_sdk.common.utils.utils import get_ray_image_for_python_version -from ...common.kubernetes_cluster.auth import get_api_client from python_client.kuberay_job_api import RayjobApi from python_client.kuberay_cluster_api import RayClusterApi from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig +from codeflare_sdk.ray.rayjobs.runtime_env import ( + create_file_configmap, + extract_all_local_files, + process_runtime_env, +) from ...common.utils import get_current_namespace from ...common.utils.validation import validate_ray_version_compatibility @@ -46,9 +49,6 @@ logger = logging.getLogger(__name__) -# Regex pattern for finding Python files in entrypoint commands -PYTHON_FILE_PATTERN = r"(?:python\s+)?([./\w/]+\.py)" - class RayJob: """ @@ -65,7 +65,7 @@ def __init__( cluster_name: Optional[str] = None, cluster_config: Optional[ManagedClusterConfig] = None, namespace: Optional[str] = None, - runtime_env: Optional[Dict[str, Any]] = None, + runtime_env: Optional[Union[RuntimeEnv, Dict[str, Any]]] = None, ttl_seconds_after_finished: int = 0, active_deadline_seconds: Optional[int] = None, local_queue: Optional[str] = None, @@ -79,7 +79,10 @@ def __init__( cluster_name: The name of an existing Ray cluster (optional if cluster_config provided) cluster_config: Configuration for creating a new cluster (optional if cluster_name provided) namespace: The Kubernetes namespace (auto-detected if not specified) - runtime_env: Ray runtime environment configuration (optional) + runtime_env: Ray runtime environment configuration. Can be: + - RuntimeEnv object from ray.runtime_env + - Dict with keys like 'working_dir', 'pip', 'env_vars', etc. + Example: {"working_dir": "./my-scripts", "pip": ["requests"]} ttl_seconds_after_finished: Seconds to wait before cleanup after job finishes (default: 0) active_deadline_seconds: Maximum time the job can run before being terminated (optional) local_queue: The Kueue LocalQueue to submit the job to (optional) @@ -111,7 +114,13 @@ def __init__( self.name = job_name self.entrypoint = entrypoint - self.runtime_env = runtime_env + + # Convert dict to RuntimeEnv if needed for user convenience + if isinstance(runtime_env, dict): + self.runtime_env = RuntimeEnv(**runtime_env) + else: + self.runtime_env = runtime_env + self.ttl_seconds_after_finished = ttl_seconds_after_finished self.active_deadline_seconds = active_deadline_seconds self.local_queue = local_queue @@ -153,10 +162,12 @@ def submit(self) -> str: if not self.entrypoint: raise ValueError("Entrypoint must be provided to submit a RayJob") + # Validate configuration before submitting self._validate_ray_version_compatibility() + self._validate_working_dir_entrypoint() # Extract files from entrypoint and runtime_env working_dir - files = self._extract_all_local_files() + files = extract_all_local_files(self) # Create ConfigMap for files (will be mounted to submitter pod) configmap_name = None @@ -173,30 +184,12 @@ def submit(self) -> str: # Create ConfigMap with owner reference after RayJob exists if files: - self._create_file_configmap(files, result) + create_file_configmap(self, files, result) return self.name else: raise RuntimeError(f"Failed to submit RayJob {self.name}") - def _create_file_configmap( - self, files: Dict[str, str], rayjob_result: Dict[str, Any] - ): - """ - Create ConfigMap with owner reference for local files. - """ - # Use a basic config builder for ConfigMap creation - config_builder = ManagedClusterConfig() - - # Validate and build ConfigMap spec - config_builder.validate_configmap_size(files) - configmap_spec = config_builder.build_file_configmap_spec( - job_name=self.name, namespace=self.namespace, files=files - ) - - # Create ConfigMap with owner reference - configmap_name = self._create_configmap_from_spec(configmap_spec, rayjob_result) - def stop(self): """ Suspend the Ray job. @@ -252,6 +245,9 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: }, } + # Extract files once and use for both runtime_env and submitter pod + files = extract_all_local_files(self) + labels = {} # If cluster_config is provided, use the local_queue from the cluster_config if self._cluster_config is not None: @@ -282,11 +278,8 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: if self.active_deadline_seconds: rayjob_cr["spec"]["activeDeadlineSeconds"] = self.active_deadline_seconds - # Extract files once and use for both runtime_env and submitter pod - files = self._extract_all_local_files() - # Add runtime environment (can be inferred even if not explicitly specified) - processed_runtime_env = self._process_runtime_env(files) + processed_runtime_env = process_runtime_env(self, files) if processed_runtime_env: rayjob_cr["spec"]["runtimeEnvYAML"] = processed_runtime_env @@ -323,6 +316,9 @@ def _build_submitter_pod_template( """ Build submitterPodTemplate with ConfigMap volume mount for local files. + If files contain working_dir.zip, an init container will unzip it before + the main submitter container runs. + Args: files: Dict of file_name -> file_content configmap_name: Name of the ConfigMap containing the files @@ -330,6 +326,8 @@ def _build_submitter_pod_template( Returns: submitterPodTemplate specification """ + from codeflare_sdk.ray.rayjobs.runtime_env import UNZIP_PATH + # Image has to be hard coded for the job submitter image = get_ray_image_for_python_version() if ( @@ -341,8 +339,31 @@ def _build_submitter_pod_template( # Build ConfigMap items for each file config_map_items = [] + entrypoint_path = files.get( + "__entrypoint_path__" + ) # Metadata for single file case + for file_name in files.keys(): - config_map_items.append({"key": file_name, "path": file_name}) + if file_name == "__entrypoint_path__": + continue # Skip metadata key + + # For single file case, use the preserved path structure + if entrypoint_path: + config_map_items.append({"key": file_name, "path": entrypoint_path}) + else: + config_map_items.append({"key": file_name, "path": file_name}) + + # Check if we need to unzip working_dir + has_working_dir_zip = "working_dir.zip" in files + + # Base volume mounts for main container + volume_mounts = [{"name": "ray-job-files", "mountPath": MOUNT_PATH}] + + # If we have a zip file, we need shared volume for unzipped content + if has_working_dir_zip: + volume_mounts.append( + {"name": "unzipped-working-dir", "mountPath": UNZIP_PATH} + ) submitter_pod_template = { "spec": { @@ -351,9 +372,7 @@ def _build_submitter_pod_template( { "name": "ray-job-submitter", "image": image, - "volumeMounts": [ - {"name": "ray-job-files", "mountPath": MOUNT_PATH} - ], + "volumeMounts": volume_mounts, } ], "volumes": [ @@ -368,6 +387,33 @@ def _build_submitter_pod_template( } } + # Add init container and volume for unzipping if needed + if has_working_dir_zip: + # Add emptyDir volume for unzipped content + submitter_pod_template["spec"]["volumes"].append( + {"name": "unzipped-working-dir", "emptyDir": {}} + ) + + # Add init container to unzip before KubeRay's submitter runs + submitter_pod_template["spec"]["initContainers"] = [ + { + "name": "unzip-working-dir", + "image": image, + "command": ["/bin/sh", "-c"], + "args": [ + # Decode base64 zip, save to temp file, extract, cleanup + f"mkdir -p {UNZIP_PATH} && " + f"python3 -m base64 -d {MOUNT_PATH}/working_dir.zip > /tmp/working_dir.zip && " + f"python3 -m zipfile -e /tmp/working_dir.zip {UNZIP_PATH}/ && " + f"rm /tmp/working_dir.zip && " + f"echo 'Successfully unzipped working_dir to {UNZIP_PATH}' && " + f"ls -la {UNZIP_PATH}" + ], + "volumeMounts": volume_mounts, + } + ] + logger.info(f"Added init container to unzip working_dir to {UNZIP_PATH}") + logger.info( f"Built submitterPodTemplate with {len(files)} files mounted at {MOUNT_PATH}, using image: {image}" ) @@ -409,6 +455,100 @@ def _validate_cluster_config_image(self): elif is_warning: warnings.warn(f"Cluster config image: {message}") + def _validate_working_dir_entrypoint(self): + """ + Validate entrypoint file configuration. + + Checks: + 1. Entrypoint doesn't redundantly reference working_dir + 2. Local files exist before submission + + Raises ValueError if validation fails. + """ + # Skip validation for inline commands (python -c, etc.) + if re.search(r"\s+-c\s+", self.entrypoint): + return + + # Match Python file references only + file_pattern = r"(?:python\d?\s+)?([./\w/-]+\.py)" + matches = re.findall(file_pattern, self.entrypoint) + + if not matches: + return + + entrypoint_path = matches[0] + + # Get working_dir from runtime_env + runtime_env_dict = None + working_dir = None + + if self.runtime_env: + runtime_env_dict = ( + self.runtime_env.to_dict() + if hasattr(self.runtime_env, "to_dict") + else self.runtime_env + ) + if runtime_env_dict and "working_dir" in runtime_env_dict: + working_dir = runtime_env_dict["working_dir"] + + # Skip all validation for remote working_dir + if working_dir and not os.path.isdir(working_dir): + return + + # Case 1: Local working_dir - check redundancy and file existence + if working_dir: + normalized_working_dir = os.path.normpath(working_dir) + normalized_entrypoint = os.path.normpath(entrypoint_path) + + # Check for redundant directory reference + if normalized_entrypoint.startswith(normalized_working_dir + os.sep): + relative_to_working_dir = os.path.relpath( + normalized_entrypoint, normalized_working_dir + ) + working_dir_basename = os.path.basename(normalized_working_dir) + redundant_nested_path = os.path.join( + normalized_working_dir, + working_dir_basename, + relative_to_working_dir, + ) + + if not os.path.exists(redundant_nested_path): + raise ValueError( + f"❌ Working directory conflict detected:\n" + f" working_dir: '{working_dir}'\n" + f" entrypoint references: '{entrypoint_path}'\n" + f"\n" + f"This will fail because the entrypoint runs from within working_dir.\n" + f"It would look for: '{redundant_nested_path}' (which doesn't exist)\n" + f"\n" + f"Fix: Remove the directory prefix from your entrypoint:\n" + f' entrypoint = "python {relative_to_working_dir}"' + ) + + # Check file exists within working_dir + if not normalized_entrypoint.startswith(normalized_working_dir + os.sep): + # Use normalized_working_dir (absolute path) for proper file existence check + full_entrypoint_path = os.path.join( + normalized_working_dir, entrypoint_path + ) + if not os.path.isfile(full_entrypoint_path): + raise ValueError( + f"❌ Entrypoint file not found:\n" + f" Looking for: '{full_entrypoint_path}'\n" + f" (working_dir: '{working_dir}', entrypoint file: '{entrypoint_path}')\n" + f"\n" + f"Please ensure the file exists at the expected location." + ) + + # Case 2: No working_dir - validate local file exists + else: + if not os.path.isfile(entrypoint_path): + raise ValueError( + f"❌ Entrypoint file not found: '{entrypoint_path}'\n" + f"\n" + f"Please ensure the file exists at the specified path." + ) + def status( self, print_to_console: bool = True ) -> Tuple[CodeflareRayJobStatus, bool]: @@ -478,433 +618,3 @@ def _map_to_codeflare_status( return status_mapping.get( deployment_status, (CodeflareRayJobStatus.UNKNOWN, False) ) - - def _extract_files_from_entrypoint(self) -> Optional[Dict[str, str]]: - """ - Extract local Python script files from entrypoint command, plus their dependencies. - - Returns: - Dict of {file_name: file_content} if local files found, None otherwise - """ - if not self.entrypoint: - return None - - files = {} - processed_files = set() # Avoid infinite loops - - # Look for Python file patterns in entrypoint (e.g., "python script.py", "python /path/to/script.py") - matches = re.findall(PYTHON_FILE_PATTERN, self.entrypoint) - - # Process main files from entrypoint - for file_path in matches: - self._process_file_and_imports( - file_path, files, MOUNT_PATH, processed_files - ) - - return files if files else None - - def _process_file_and_imports( - self, - file_path: str, - files: Dict[str, str], - mount_path: str, - processed_files: set, - ): - """ - Recursively process a file and its local imports - """ - if file_path in processed_files: - return - - # Check if it's a local file (not already a container path) - if file_path.startswith("/home/ray/") or not os.path.isfile(file_path): - return - - processed_files.add(file_path) - - try: - with open(file_path, "r") as f: - file_content = f.read() - - file_name = os.path.basename(file_path) - files[file_name] = file_content - - logger.info( - f"Found local file: {file_path} -> will mount at {mount_path}/{file_name}" - ) - - # Parse imports in this file to find dependencies - self._find_local_imports( - file_content, - file_path, - lambda path: self._process_file_and_imports( - path, files, mount_path, processed_files - ), - ) - - except (IOError, OSError) as e: - logger.warning(f"Could not read file {file_path}: {e}") - - def _find_local_imports(self, file_content: str, file_path: str, process_callback): - """ - Find local Python imports in file content and process them. - - Args: - file_content: The content of the Python file - file_path: Path to the current file (for relative imports) - process_callback: Function to call for each found local import - """ - - try: - # Parse the Python AST to find imports - tree = ast.parse(file_content) - file_dir = os.path.dirname(os.path.abspath(file_path)) - - for node in ast.walk(tree): - if isinstance(node, ast.Import): - # Handle: import module_name - for alias in node.names: - potential_file = os.path.join(file_dir, f"{alias.name}.py") - if os.path.isfile(potential_file): - process_callback(potential_file) - - elif isinstance(node, ast.ImportFrom): - # Handle: from module_name import something - if node.module: - potential_file = os.path.join(file_dir, f"{node.module}.py") - if os.path.isfile(potential_file): - process_callback(potential_file) - - except (SyntaxError, ValueError) as e: - logger.debug(f"Could not parse imports from {file_path}: {e}") - - def _extract_all_local_files(self) -> Optional[Dict[str, str]]: - """ - Extract all local files from both entrypoint and runtime_env working_dir. - - Note: If runtime_env has a remote working_dir, we don't extract local files - to avoid conflicts. The remote working_dir should contain all needed files. - - Returns: - Dict of {file_name: file_content} if local files found, None otherwise - """ - # If there's a remote working_dir, don't extract local files to avoid conflicts - if ( - self.runtime_env - and "working_dir" in self.runtime_env - and not os.path.isdir(self.runtime_env["working_dir"]) - ): - logger.info( - f"Remote working_dir detected: {self.runtime_env['working_dir']}. " - "Skipping local file extraction - all files should come from remote source." - ) - return None - - files = {} - processed_files = set() - - # Extract files from entrypoint (always check for local files in entrypoint) - entrypoint_files = self._extract_files_from_entrypoint() - if entrypoint_files: - files.update(entrypoint_files) - processed_files.update(entrypoint_files.keys()) - - # Extract files from runtime_env working_dir if it's a local directory - if ( - self.runtime_env - and "working_dir" in self.runtime_env - and os.path.isdir(self.runtime_env["working_dir"]) - ): - working_dir_files = self._extract_working_dir_files( - self.runtime_env["working_dir"], processed_files - ) - if working_dir_files: - files.update(working_dir_files) - - # If no working_dir specified in runtime_env, try to infer and extract files from inferred directory - elif not self.runtime_env or "working_dir" not in self.runtime_env: - inferred_working_dir = self._infer_working_dir_from_entrypoint() - if inferred_working_dir: - working_dir_files = self._extract_working_dir_files( - inferred_working_dir, processed_files - ) - if working_dir_files: - files.update(working_dir_files) - - return files if files else None - - def _extract_working_dir_files( - self, working_dir: str, processed_files: set - ) -> Dict[str, str]: - """ - Extract all Python files from working directory. - - Args: - working_dir: Path to working directory - processed_files: Set of already processed file names to avoid duplicates - - Returns: - Dict of {file_name: file_content} - """ - files_dict = {} - - try: - for root, dirs, files in os.walk(working_dir): - for file in files: - if file.endswith(".py") and file not in processed_files: - file_path = os.path.join(root, file) - try: - with open(file_path, "r") as f: - content = f.read() - files_dict[file] = content - processed_files.add(file) - logger.info( - f"Added working directory file: {file_path} -> {MOUNT_PATH}/{file}" - ) - except (IOError, OSError) as e: - logger.warning(f"Could not read file {file_path}: {e}") - except (IOError, OSError) as e: - logger.warning(f"Could not scan working directory {working_dir}: {e}") - - return files_dict - - def _process_runtime_env( - self, files: Optional[Dict[str, str]] = None - ) -> Optional[str]: - """ - Process runtime_env field to handle env_vars, pip dependencies, and working_dir. - Can also infer working directory from entrypoint even if runtime_env is not provided. - - Returns: - Processed runtime environment as YAML string, or None if no processing needed - """ - processed_env = {} - - # Handle env_vars - if self.runtime_env and "env_vars" in self.runtime_env: - processed_env["env_vars"] = self.runtime_env["env_vars"] - logger.info( - f"Added {len(self.runtime_env['env_vars'])} environment variables to runtime_env" - ) - - # Handle pip dependencies - if self.runtime_env and "pip" in self.runtime_env: - pip_deps = self._process_pip_dependencies(self.runtime_env["pip"]) - if pip_deps: - processed_env["pip"] = pip_deps - - # Handle working_dir - if it's a local path, set it to mount path - if self.runtime_env and "working_dir" in self.runtime_env: - working_dir = self.runtime_env["working_dir"] - if os.path.isdir(working_dir): - # Local working directory - will be mounted at MOUNT_PATH - processed_env["working_dir"] = MOUNT_PATH - logger.info( - f"Local working directory will be packaged and mounted at: {MOUNT_PATH}" - ) - self._adjust_entrypoint_for_mounted_files() - else: - # Remote URI (e.g., GitHub) - pass through as-is - processed_env["working_dir"] = working_dir - logger.info(f"Using remote working directory: {working_dir}") - - # If no working_dir specified but we have files, set working_dir to mount path - elif not self.runtime_env or "working_dir" not in self.runtime_env: - if files: - # Local files found - will be mounted at MOUNT_PATH - processed_env["working_dir"] = MOUNT_PATH - logger.info( - f"Local files will be packaged and mounted at: {MOUNT_PATH}" - ) - self._adjust_entrypoint_for_mounted_files() - - # Convert to YAML string if we have any processed environment - if processed_env: - return yaml.dump(processed_env, default_flow_style=False) - - return None - - def _process_pip_dependencies(self, pip_spec) -> Optional[List[str]]: - """ - Process pip dependencies from runtime_env. - - Args: - pip_spec: Can be a list of packages, a string path to requirements.txt, or dict - - Returns: - List of pip dependencies - """ - if isinstance(pip_spec, list): - # Already a list of dependencies - logger.info(f"Using provided pip dependencies: {len(pip_spec)} packages") - return pip_spec - elif isinstance(pip_spec, str): - # Assume it's a path to requirements.txt - return self._parse_requirements_file(pip_spec) - elif isinstance(pip_spec, dict): - # Handle dict format (e.g., {"packages": [...], "pip_check": False}) - if "packages" in pip_spec: - logger.info( - f"Using pip dependencies from dict: {len(pip_spec['packages'])} packages" - ) - return pip_spec["packages"] - - logger.warning(f"Unsupported pip specification format: {type(pip_spec)}") - return None - - def _parse_requirements_file(self, requirements_path: str) -> Optional[List[str]]: - """ - Parse a requirements.txt file and return list of dependencies. - - Args: - requirements_path: Path to requirements.txt file - - Returns: - List of pip dependencies - """ - if not os.path.isfile(requirements_path): - logger.warning(f"Requirements file not found: {requirements_path}") - return None - - try: - with open(requirements_path, "r") as f: - lines = f.readlines() - - # Parse requirements, filtering out comments and empty lines - requirements = [] - for line in lines: - line = line.strip() - if line and not line.startswith("#"): - requirements.append(line) - - logger.info( - f"Parsed {len(requirements)} dependencies from {requirements_path}" - ) - return requirements - - except (IOError, OSError) as e: - logger.warning(f"Could not read requirements file {requirements_path}: {e}") - return None - - def _infer_working_dir_from_entrypoint(self) -> Optional[str]: - """ - Infer working directory from entrypoint path when it contains directory components. - Only useful for entrypoints with paths like 'python src/script.py'. - - Returns: - Inferred working directory path, or None if just simple filenames - """ - if not self.entrypoint: - return None - - # Look for Python file patterns in entrypoint - matches = re.findall(PYTHON_FILE_PATTERN, self.entrypoint) - - for script_path in matches: - # Only infer working directory if the path has directory components - if "/" in script_path or "\\" in script_path: - if os.path.isfile(script_path): - working_dir = os.path.dirname(os.path.abspath(script_path)) - logger.info( - f"Inferred working directory from entrypoint: {working_dir}" - ) - return working_dir - else: - # File doesn't exist locally, but path has directory components - working_dir = os.path.dirname(os.path.abspath(script_path)) - logger.info( - f"Inferred working directory from entrypoint path: {working_dir}" - ) - return working_dir - - # For simple filenames like "script.py" we don't need to infer the working directory - return None - - def _adjust_entrypoint_for_mounted_files(self): - """ - Adjust the entrypoint command to use just filenames since files are mounted at MOUNT_PATH. - """ - if not self.entrypoint: - return - - # Look for Python file patterns in entrypoint - matches = re.findall(PYTHON_FILE_PATTERN, self.entrypoint) - - for script_path in matches: - if os.path.isfile(script_path): - # Use just the filename since files will be mounted at MOUNT_PATH - filename = os.path.basename(script_path) - self.entrypoint = self.entrypoint.replace(script_path, filename) - logger.info( - f"Adjusted entrypoint for mounted files: {script_path} -> {filename}" - ) - - def _create_configmap_from_spec( - self, configmap_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None - ) -> str: - """ - Create ConfigMap from specification via Kubernetes API. - - Args: - configmap_spec: ConfigMap specification dictionary - rayjob_result: The result from RayJob creation containing UID - - Returns: - str: Name of the created ConfigMap - """ - - configmap_name = configmap_spec["metadata"]["name"] - - metadata = client.V1ObjectMeta(**configmap_spec["metadata"]) - - # Add owner reference if we have the RayJob result - if ( - rayjob_result - and isinstance(rayjob_result, dict) - and rayjob_result.get("metadata", {}).get("uid") - ): - logger.info( - f"Adding owner reference to ConfigMap '{configmap_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" - ) - metadata.owner_references = [ - client.V1OwnerReference( - api_version="ray.io/v1", - kind="RayJob", - name=self.name, - uid=rayjob_result["metadata"]["uid"], - controller=True, - block_owner_deletion=True, - ) - ] - else: - logger.warning( - f"No valid RayJob result with UID found, ConfigMap '{configmap_name}' will not have owner reference. Result: {rayjob_result}" - ) - - # Convert dict spec to V1ConfigMap - configmap = client.V1ConfigMap( - metadata=metadata, - data=configmap_spec["data"], - ) - - # Create ConfigMap via Kubernetes API - k8s_api = client.CoreV1Api(get_api_client()) - try: - k8s_api.create_namespaced_config_map( - namespace=self.namespace, body=configmap - ) - logger.info( - f"Created ConfigMap '{configmap_name}' with {len(configmap_spec['data'])} files" - ) - except client.ApiException as e: - if e.status == 409: # Already exists - logger.info(f"ConfigMap '{configmap_name}' already exists, updating...") - k8s_api.replace_namespaced_config_map( - name=configmap_name, namespace=self.namespace, body=configmap - ) - else: - raise RuntimeError( - f"Failed to create ConfigMap '{configmap_name}': {e}" - ) - - return configmap_name diff --git a/src/codeflare_sdk/ray/rayjobs/runtime_env.py b/src/codeflare_sdk/ray/rayjobs/runtime_env.py new file mode 100644 index 00000000..0b9a71b2 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/runtime_env.py @@ -0,0 +1,373 @@ +from __future__ import annotations # Postpone evaluation of annotations + +import logging +import os +import re +import yaml +import zipfile +import base64 +import io +from typing import Dict, Any, Optional, List, TYPE_CHECKING +from codeflare_sdk.common.utils.constants import MOUNT_PATH +from kubernetes import client +from ray.runtime_env import RuntimeEnv + +from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig +from ...common.kubernetes_cluster.auth import get_api_client + +# Use TYPE_CHECKING to avoid circular import at runtime +if TYPE_CHECKING: + from codeflare_sdk.ray.rayjobs.rayjob import RayJob + +logger = logging.getLogger(__name__) + +# Regex pattern for finding Python files in entrypoint commands +# Matches paths like: test.py, ./test.py, dir/test.py, my-dir/test.py +PYTHON_FILE_PATTERN = r"(?:python\s+)?([./\w/-]+\.py)" + +# Path where working_dir will be unzipped on submitter pod +UNZIP_PATH = "/tmp/rayjob-working-dir" + + +def _normalize_runtime_env( + runtime_env: Optional[RuntimeEnv], +) -> Optional[Dict[str, Any]]: + if runtime_env is None: + return None + return runtime_env.to_dict() + + +def extract_all_local_files(job: RayJob) -> Optional[Dict[str, str]]: + """ + Prepare local files for ConfigMap upload. + + - If runtime_env has local working_dir: zip entire directory into single file + - If single entrypoint file (no working_dir): extract that file + - If remote working_dir URL: return None (pass through to Ray) + + Returns: + Dict with either: + - {"working_dir.zip": } for zipped directories + - {"script.py": } for single files + - None for remote working_dir or no files + """ + # Convert RuntimeEnv to dict for processing + runtime_env_dict = _normalize_runtime_env(job.runtime_env) + + # If there's a remote working_dir, don't extract local files + if ( + runtime_env_dict + and "working_dir" in runtime_env_dict + and not os.path.isdir(runtime_env_dict["working_dir"]) + ): + logger.info( + f"Remote working_dir detected: {runtime_env_dict['working_dir']}. " + "Skipping local file extraction - using remote source." + ) + return None + + # If there's a local working_dir, zip it + if ( + runtime_env_dict + and "working_dir" in runtime_env_dict + and os.path.isdir(runtime_env_dict["working_dir"]) + ): + working_dir = runtime_env_dict["working_dir"] + logger.info(f"Zipping local working_dir: {working_dir}") + zip_data = _zip_directory(working_dir) + if zip_data: + # Encode zip as base64 for ConfigMap storage + zip_base64 = base64.b64encode(zip_data).decode("utf-8") + return {"working_dir.zip": zip_base64} + + # If no working_dir, check for single entrypoint file + entrypoint_file = _extract_single_entrypoint_file(job) + if entrypoint_file: + return entrypoint_file + + return None + + +def _zip_directory(directory_path: str) -> Optional[bytes]: + """ + Zip entire directory preserving structure. + + Args: + directory_path: Path to directory to zip + + Returns: + Bytes of zip file, or None on error + """ + try: + # Create in-memory zip file + zip_buffer = io.BytesIO() + + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: + # Walk through directory and add all files + for root, dirs, files in os.walk(directory_path): + for file in files: + file_path = os.path.join(root, file) + # Calculate relative path from directory_path + arcname = os.path.relpath(file_path, directory_path) + zipf.write(file_path, arcname) + logger.debug(f"Added to zip: {arcname}") + + zip_data = zip_buffer.getvalue() + logger.info( + f"Successfully zipped directory: {directory_path} ({len(zip_data)} bytes)" + ) + return zip_data + + except (IOError, OSError) as e: + logger.error(f"Failed to zip directory {directory_path}: {e}") + return None + + +def _extract_single_entrypoint_file(job: RayJob) -> Optional[Dict[str, str]]: + """ + Extract single Python file from entrypoint if no working_dir specified. + + Returns a dict with metadata about the file path structure so we can + preserve it when mounting via ConfigMap. + + Args: + job: RayJob instance + + Returns: + Dict with special format: {"__entrypoint_path__": path, "filename": content} + This allows us to preserve directory structure when mounting + """ + if not job.entrypoint: + return None + + # Look for Python file in entrypoint + matches = re.findall(PYTHON_FILE_PATTERN, job.entrypoint) + + for file_path in matches: + # Check if it's a local file + if os.path.isfile(file_path): + try: + with open(file_path, "r") as f: + content = f.read() + + # Use basename as key (ConfigMap keys can't have slashes) + # But store the full path for later use in ConfigMap item.path + filename = os.path.basename(file_path) + relative_path = file_path.lstrip("./") + + logger.info(f"Extracted single entrypoint file: {file_path}") + + # Return special format with metadata + return {"__entrypoint_path__": relative_path, filename: content} + + except (IOError, OSError) as e: + logger.warning(f"Could not read entrypoint file {file_path}: {e}") + + return None + + +def process_runtime_env( + job: RayJob, files: Optional[Dict[str, str]] = None +) -> Optional[str]: + """ + Process runtime_env field to handle env_vars, pip dependencies, and working_dir. + + Returns: + Processed runtime environment as YAML string, or None if no processing needed + """ + # Convert RuntimeEnv to dict for processing + runtime_env_dict = _normalize_runtime_env(job.runtime_env) + + processed_env = {} + + # Handle env_vars + if runtime_env_dict and "env_vars" in runtime_env_dict: + processed_env["env_vars"] = runtime_env_dict["env_vars"] + logger.info( + f"Added {len(runtime_env_dict['env_vars'])} environment variables to runtime_env" + ) + + # Handle pip dependencies + if runtime_env_dict and "pip" in runtime_env_dict: + pip_deps = process_pip_dependencies(job, runtime_env_dict["pip"]) + if pip_deps: + processed_env["pip"] = pip_deps + + # Handle working_dir + if runtime_env_dict and "working_dir" in runtime_env_dict: + working_dir = runtime_env_dict["working_dir"] + if os.path.isdir(working_dir): + # Local working directory - will be zipped and unzipped to UNZIP_PATH by submitter pod + processed_env["working_dir"] = UNZIP_PATH + logger.info( + f"Local working_dir will be zipped, mounted, and unzipped to: {UNZIP_PATH}" + ) + else: + # Remote URI (e.g., GitHub, S3) - pass through as-is + processed_env["working_dir"] = working_dir + logger.info(f"Using remote working_dir: {working_dir}") + + # If no working_dir specified but we have files (single file case) + elif not runtime_env_dict or "working_dir" not in runtime_env_dict: + if files and "working_dir.zip" not in files: + # Single file case - mount at MOUNT_PATH + processed_env["working_dir"] = MOUNT_PATH + logger.info(f"Single file will be mounted at: {MOUNT_PATH}") + + # Convert to YAML string if we have any processed environment + if processed_env: + return yaml.dump(processed_env, default_flow_style=False) + + return None + + +def process_pip_dependencies(job: RayJob, pip_spec) -> Optional[List[str]]: + """ + Process pip dependencies from runtime_env. + + Args: + pip_spec: Can be a list of packages, a string path to requirements.txt, or dict + + Returns: + List of pip dependencies + """ + if isinstance(pip_spec, list): + # Already a list of dependencies + logger.info(f"Using provided pip dependencies: {len(pip_spec)} packages") + return pip_spec + elif isinstance(pip_spec, str): + # Assume it's a path to requirements.txt + return parse_requirements_file(pip_spec) + elif isinstance(pip_spec, dict): + # Handle dict format (e.g., {"packages": [...], "pip_check": False}) + if "packages" in pip_spec: + logger.info( + f"Using pip dependencies from dict: {len(pip_spec['packages'])} packages" + ) + return pip_spec["packages"] + + logger.warning(f"Unsupported pip specification format: {type(pip_spec)}") + return None + + +def parse_requirements_file(requirements_path: str) -> Optional[List[str]]: + """ + Parse a requirements.txt file and return list of dependencies. + + Args: + requirements_path: Path to requirements.txt file + + Returns: + List of pip dependencies + """ + if not os.path.isfile(requirements_path): + logger.warning(f"Requirements file not found: {requirements_path}") + return None + + try: + with open(requirements_path, "r") as f: + lines = f.readlines() + + # Parse requirements, filtering out comments and empty lines + requirements = [] + for line in lines: + line = line.strip() + if line and not line.startswith("#"): + requirements.append(line) + + logger.info(f"Parsed {len(requirements)} dependencies from {requirements_path}") + return requirements + + except (IOError, OSError) as e: + logger.warning(f"Could not read requirements file {requirements_path}: {e}") + return None + + +def create_configmap_from_spec( + job: RayJob, configmap_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None +) -> str: + """ + Create ConfigMap from specification via Kubernetes API. + + Args: + configmap_spec: ConfigMap specification dictionary + rayjob_result: The result from RayJob creation containing UID + + Returns: + str: Name of the created ConfigMap + """ + + configmap_name = configmap_spec["metadata"]["name"] + + metadata = client.V1ObjectMeta(**configmap_spec["metadata"]) + + # Add owner reference if we have the RayJob result + if ( + rayjob_result + and isinstance(rayjob_result, dict) + and rayjob_result.get("metadata", {}).get("uid") + ): + logger.info( + f"Adding owner reference to ConfigMap '{configmap_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" + ) + metadata.owner_references = [ + client.V1OwnerReference( + api_version="ray.io/v1", + kind="RayJob", + name=job.name, + uid=rayjob_result["metadata"]["uid"], + controller=True, + block_owner_deletion=True, + ) + ] + else: + logger.warning( + f"No valid RayJob result with UID found, ConfigMap '{configmap_name}' will not have owner reference. Result: {rayjob_result}" + ) + + # Convert dict spec to V1ConfigMap + configmap = client.V1ConfigMap( + metadata=metadata, + data=configmap_spec["data"], + ) + + # Create ConfigMap via Kubernetes API + k8s_api = client.CoreV1Api(get_api_client()) + try: + k8s_api.create_namespaced_config_map(namespace=job.namespace, body=configmap) + logger.info( + f"Created ConfigMap '{configmap_name}' with {len(configmap_spec['data'])} files" + ) + except client.ApiException as e: + if e.status == 409: # Already exists + logger.info(f"ConfigMap '{configmap_name}' already exists, updating...") + k8s_api.replace_namespaced_config_map( + name=configmap_name, namespace=job.namespace, body=configmap + ) + else: + raise RuntimeError(f"Failed to create ConfigMap '{configmap_name}': {e}") + + return configmap_name + + +def create_file_configmap( + job: RayJob, files: Dict[str, str], rayjob_result: Dict[str, Any] +): + """ + Create ConfigMap with owner reference for local files. + """ + # Use a basic config builder for ConfigMap creation + config_builder = ManagedClusterConfig() + + # Filter out metadata keys (like __entrypoint_path__) from ConfigMap data + configmap_files = {k: v for k, v in files.items() if not k.startswith("__")} + + # Validate and build ConfigMap spec + config_builder.validate_configmap_size(configmap_files) + configmap_spec = config_builder.build_file_configmap_spec( + job_name=job.name, namespace=job.namespace, files=configmap_files + ) + + # Create ConfigMap with owner reference + # TODO Error handling + create_configmap_from_spec(job, configmap_spec, rayjob_result) diff --git a/src/codeflare_sdk/ray/rayjobs/test/conftest.py b/src/codeflare_sdk/ray/rayjobs/test/conftest.py new file mode 100644 index 00000000..bad195a7 --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/test/conftest.py @@ -0,0 +1,45 @@ +"""Shared pytest fixtures for rayjobs tests.""" + +import pytest +from unittest.mock import MagicMock + + +# Global test setup that runs automatically for ALL tests +@pytest.fixture(autouse=True) +def auto_mock_setup(mocker): + """Automatically mock common dependencies for all tests.""" + mocker.patch("kubernetes.config.load_kube_config") + + # Always mock get_default_kueue_name to prevent K8s API calls + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", + return_value="default-queue", + ) + + mock_get_ns = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", + return_value="test-namespace", + ) + + mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_rayjob_instance = MagicMock() + mock_rayjob_api.return_value = mock_rayjob_instance + + mock_cluster_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") + mock_cluster_instance = MagicMock() + mock_cluster_api.return_value = mock_cluster_instance + + mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") + mock_k8s_instance = MagicMock() + mock_k8s_api.return_value = mock_k8s_instance + + # Mock get_api_client in runtime_env module where it's actually used + mocker.patch("codeflare_sdk.ray.rayjobs.runtime_env.get_api_client") + + # Return the mocked instances so tests can configure them as needed + return { + "rayjob_api": mock_rayjob_instance, + "cluster_api": mock_cluster_instance, + "k8s_api": mock_k8s_instance, + "get_current_namespace": mock_get_ns, + } diff --git a/src/codeflare_sdk/ray/rayjobs/test_config.py b/src/codeflare_sdk/ray/rayjobs/test/test_config.py similarity index 100% rename from src/codeflare_sdk/ray/rayjobs/test_config.py rename to src/codeflare_sdk/ray/rayjobs/test/test_config.py diff --git a/src/codeflare_sdk/ray/rayjobs/test_pretty_print.py b/src/codeflare_sdk/ray/rayjobs/test/test_pretty_print.py similarity index 100% rename from src/codeflare_sdk/ray/rayjobs/test_pretty_print.py rename to src/codeflare_sdk/ray/rayjobs/test/test_pretty_print.py diff --git a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py similarity index 61% rename from src/codeflare_sdk/ray/rayjobs/test_rayjob.py rename to src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py index 15c53c8d..bfdaaabd 100644 --- a/src/codeflare_sdk/ray/rayjobs/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py @@ -13,62 +13,14 @@ # limitations under the License. import pytest -import os -from unittest.mock import MagicMock, patch -from codeflare_sdk.common.utils.constants import MOUNT_PATH, RAY_VERSION +from unittest.mock import MagicMock +from codeflare_sdk.common.utils.constants import RAY_VERSION +from ray.runtime_env import RuntimeEnv from codeflare_sdk.ray.rayjobs.rayjob import RayJob from codeflare_sdk.ray.cluster.config import ClusterConfiguration from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig -from kubernetes.client import ( - V1Volume, - V1VolumeMount, - V1Toleration, - V1ConfigMapVolumeSource, - ApiException, -) - - -# Global test setup that runs automatically for ALL tests -@pytest.fixture(autouse=True) -def auto_mock_setup(mocker): - """ - Automatically mock common dependencies for all tests. - """ - mocker.patch("kubernetes.config.load_kube_config") - - # Always mock get_default_kueue_name to prevent K8s API calls - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", - return_value="default-queue", - ) - - mock_get_ns = mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_current_namespace", - return_value="test-namespace", - ) - - mock_rayjob_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_rayjob_instance = MagicMock() - mock_rayjob_api.return_value = mock_rayjob_instance - - mock_cluster_api = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayClusterApi") - mock_cluster_instance = MagicMock() - mock_cluster_api.return_value = mock_cluster_instance - - mock_k8s_api = mocker.patch("kubernetes.client.CoreV1Api") - mock_k8s_instance = MagicMock() - mock_k8s_api.return_value = mock_k8s_instance - - mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.get_api_client") - - # Return the mocked instances so tests can configure them as needed - return { - "rayjob_api": mock_rayjob_instance, - "cluster_api": mock_cluster_instance, - "k8s_api": mock_k8s_instance, - "get_current_namespace": mock_get_ns, - } +from kubernetes.client import V1Volume, V1VolumeMount, V1Toleration def test_rayjob_submit_success(auto_mock_setup): @@ -84,7 +36,7 @@ def test_rayjob_submit_success(auto_mock_setup): cluster_name="test-ray-cluster", namespace="test-namespace", entrypoint="python -c 'print(\"hello world\")'", - runtime_env={"pip": ["requests"]}, + runtime_env=RuntimeEnv(pip=["requests"]), ) job_id = rayjob.submit() @@ -116,8 +68,8 @@ def test_rayjob_submit_failure(auto_mock_setup): job_name="test-rayjob", cluster_name="test-ray-cluster", namespace="default", - entrypoint="python test.py", - runtime_env={"pip": ["numpy"]}, + entrypoint="python -c 'print()'", + runtime_env=RuntimeEnv(pip=["numpy"]), ) with pytest.raises(RuntimeError, match="Failed to submit RayJob test-rayjob"): @@ -138,7 +90,7 @@ def test_rayjob_init_validation_both_provided(auto_mock_setup): job_name="test-job", cluster_name="existing-cluster", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) @@ -164,7 +116,7 @@ def test_rayjob_init_with_cluster_config(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -187,7 +139,7 @@ def test_rayjob_cluster_name_generation(auto_mock_setup): rayjob = RayJob( job_name="my-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -208,7 +160,7 @@ def test_rayjob_cluster_config_namespace_none(auto_mock_setup): job_name="test-job", cluster_config=cluster_config, namespace="job-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) assert rayjob.namespace == "job-namespace" @@ -238,7 +190,7 @@ def test_build_ray_cluster_spec_no_config_error(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -271,7 +223,7 @@ def test_build_ray_cluster_spec(mocker, auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -384,7 +336,7 @@ def test_submit_with_auto_cluster(mocker, auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -422,7 +374,7 @@ def test_namespace_auto_detection_fallback(auto_mock_setup): with pytest.raises(ValueError, match="Could not auto-detect Kubernetes namespace"): RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_name="test-cluster", ) @@ -435,7 +387,7 @@ def test_namespace_explicit_override(auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_name="test-cluster", namespace="explicit-ns", ) @@ -455,7 +407,7 @@ def test_rayjob_with_rayjob_cluster_config(auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_config=cluster_config, namespace="test-namespace", ) @@ -472,7 +424,7 @@ def test_rayjob_cluster_config_validation(auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_config=cluster_config, namespace="test-namespace", ) @@ -505,7 +457,7 @@ def test_build_ray_cluster_spec_integration(mocker, auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_config=cluster_config, namespace="test-namespace", ) @@ -523,11 +475,11 @@ def test_rayjob_with_runtime_env(auto_mock_setup): """ Test RayJob with runtime environment configuration. """ - runtime_env = {"pip": ["numpy", "pandas"]} + runtime_env = RuntimeEnv(pip=["numpy", "pandas"]) rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_name="test-cluster", runtime_env=runtime_env, namespace="test-namespace", @@ -539,49 +491,36 @@ def test_rayjob_with_runtime_env(auto_mock_setup): assert rayjob_cr["spec"]["runtimeEnvYAML"] == "pip:\n- numpy\n- pandas\n" -def test_rayjob_with_remote_working_dir(auto_mock_setup): +def test_rayjob_with_runtime_env_dict(auto_mock_setup): """ - Test RayJob with remote working directory in runtime_env. - Should not extract local files and should pass through remote URL. + Test RayJob with runtime environment as dict (user convenience). + Users can pass a dict instead of having to import RuntimeEnv. """ - runtime_env = { - "working_dir": "https://github.com/org/repo/archive/refs/heads/main.zip", + # User can pass dict instead of RuntimeEnv object + runtime_env_dict = { "pip": ["numpy", "pandas"], "env_vars": {"TEST_VAR": "test_value"}, } rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_name="test-cluster", - runtime_env=runtime_env, + runtime_env=runtime_env_dict, namespace="test-namespace", ) - assert rayjob.runtime_env == runtime_env - - # Should not extract any local files due to remote working_dir - files = rayjob._extract_all_local_files() - assert files is None + # Should be converted to RuntimeEnv internally + assert isinstance(rayjob.runtime_env, RuntimeEnv) + assert rayjob.runtime_env["env_vars"] == {"TEST_VAR": "test_value"} + # Verify it generates proper YAML output rayjob_cr = rayjob._build_rayjob_cr() - - # Should have runtimeEnvYAML with all fields - expected_runtime_env = ( - "env_vars:\n" - " TEST_VAR: test_value\n" - "pip:\n" - "- numpy\n" - "- pandas\n" - "working_dir: https://github.com/org/repo/archive/refs/heads/main.zip\n" - ) - assert rayjob_cr["spec"]["runtimeEnvYAML"] == expected_runtime_env - - # Should not have submitterPodTemplate since no local files - assert "submitterPodTemplate" not in rayjob_cr["spec"] - - # Entrypoint should be unchanged - assert rayjob_cr["spec"]["entrypoint"] == "python test.py" + assert "runtimeEnvYAML" in rayjob_cr["spec"] + runtime_yaml = rayjob_cr["spec"]["runtimeEnvYAML"] + assert "pip:" in runtime_yaml or "pip_packages:" in runtime_yaml + assert "env_vars:" in runtime_yaml + assert "TEST_VAR" in runtime_yaml def test_rayjob_with_active_deadline_and_ttl(auto_mock_setup): @@ -591,7 +530,7 @@ def test_rayjob_with_active_deadline_and_ttl(auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_name="test-cluster", active_deadline_seconds=300, ttl_seconds_after_finished=600, @@ -615,7 +554,7 @@ def test_rayjob_cluster_name_generation_with_config(auto_mock_setup): rayjob = RayJob( job_name="my-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_config=cluster_config, namespace="test-namespace", # Explicitly specify namespace ) @@ -633,7 +572,7 @@ def test_rayjob_namespace_propagation_to_cluster_config(auto_mock_setup): rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_config=cluster_config, ) @@ -648,7 +587,7 @@ def test_rayjob_error_handling_invalid_cluster_config(auto_mock_setup): with pytest.raises(ValueError): RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) @@ -658,19 +597,24 @@ def test_rayjob_constructor_parameter_validation(auto_mock_setup): """ rayjob = RayJob( job_name="test-job", - entrypoint="python test.py", + entrypoint="python -c 'print()'", cluster_name="test-cluster", namespace="test-ns", - runtime_env={"pip": ["numpy"]}, + runtime_env=RuntimeEnv(pip=["numpy"]), ttl_seconds_after_finished=300, active_deadline_seconds=600, ) assert rayjob.name == "test-job" - assert rayjob.entrypoint == "python test.py" + assert rayjob.entrypoint == "python -c 'print()'" assert rayjob.cluster_name == "test-cluster" assert rayjob.namespace == "test-ns" - assert rayjob.runtime_env == {"pip": ["numpy"]} + # Check that runtime_env is a RuntimeEnv object and contains pip dependencies + assert isinstance(rayjob.runtime_env, RuntimeEnv) + runtime_env_dict = rayjob.runtime_env.to_dict() + assert "pip" in runtime_env_dict + # Ray transforms pip to dict format with 'packages' key + assert runtime_env_dict["pip"]["packages"] == ["numpy"] assert rayjob.ttl_seconds_after_finished == 300 assert rayjob.active_deadline_seconds == 600 @@ -852,7 +796,7 @@ def test_submit_with_cluster_config_compatible_image_passes(auto_mock_setup): job_name="test-job", cluster_config=cluster_config, namespace="test-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) result = rayjob.submit() @@ -870,7 +814,7 @@ def test_submit_with_cluster_config_incompatible_image_fails(auto_mock_setup): job_name="test-job", cluster_config=cluster_config, namespace="test-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) with pytest.raises( @@ -888,7 +832,7 @@ def test_validate_ray_version_compatibility_method(auto_mock_setup): job_name="test-job", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) rayjob._validate_ray_version_compatibility() @@ -916,7 +860,7 @@ def test_validate_cluster_config_image_method(auto_mock_setup): job_name="test-job", cluster_config=ManagedClusterConfig(), namespace="test-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) rayjob._validate_cluster_config_image() @@ -944,7 +888,7 @@ def test_validate_cluster_config_image_edge_cases(auto_mock_setup): job_name="test-job", cluster_config=ManagedClusterConfig(), namespace="test-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) rayjob._cluster_config.image = None @@ -961,1110 +905,276 @@ class MockClusterConfig: rayjob._validate_cluster_config_image() -def test_extract_files_from_entrypoint_single_file(auto_mock_setup, tmp_path): +def test_rayjob_stop_success(auto_mock_setup, caplog): """ - Test extracting a single file from entrypoint. + Test successful RayJob stop operation. """ + mock_api_instance = auto_mock_setup["rayjob_api"] - # Create a test file - test_file = tmp_path / "test_file.py" - test_file.write_text("print('Hello World!')") + mock_api_instance.suspend_job.return_value = { + "metadata": {"name": "test-rayjob"}, + "spec": {"suspend": True}, + } - # Change to temp directory for test - original_cwd = os.getcwd() - os.chdir(tmp_path) + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python -c 'print()'", + ) - try: - # Use a path that would need adjustment - entrypoint_with_path = f"python ./{test_file.name}" - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint=entrypoint_with_path, - namespace="test-namespace", - ) + with caplog.at_level("INFO"): + result = rayjob.stop() - files = rayjob._extract_files_from_entrypoint() + assert result is True - assert files is not None - assert test_file.name in files - assert files[test_file.name] == "print('Hello World!')" - assert entrypoint_with_path == rayjob.entrypoint - finally: - os.chdir(original_cwd) + mock_api_instance.suspend_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) + + # Verify success message was logged + assert "Successfully stopped the RayJob test-rayjob" in caplog.text -def test_extract_files_with_dependencies(auto_mock_setup, tmp_path): +def test_rayjob_stop_failure(auto_mock_setup): """ - Test extracting files with local dependencies. + Test RayJob stop operation when API call fails. """ + mock_api_instance = auto_mock_setup["rayjob_api"] - # Create main file and dependency - main_file = tmp_path / "main.py" - main_file.write_text( - """ -import helper -from utils import calculate - -def main(): - helper.do_something() - result = calculate(42) - print(f"Result: {result}") + mock_api_instance.suspend_job.return_value = None -if __name__ == "__main__": - main() -""" + rayjob = RayJob( + job_name="test-rayjob", + cluster_name="test-cluster", + namespace="test-namespace", + entrypoint="python -c 'print()'", ) - helper_file = tmp_path / "helper.py" - helper_file.write_text( - """ -def do_something(): - print("Doing something...") -""" - ) + with pytest.raises(RuntimeError, match="Failed to stop the RayJob test-rayjob"): + rayjob.stop() - utils_file = tmp_path / "utils.py" - utils_file.write_text( - """ -def calculate(x): - return x * 2 -""" + mock_api_instance.suspend_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" ) - # Change to temp directory for test - original_cwd = os.getcwd() - os.chdir(tmp_path) - - try: - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python main.py", - namespace="test-namespace", - ) - - files = rayjob._extract_files_from_entrypoint() - - assert files is not None - assert len(files) == 3 - assert "main.py" in files - assert "helper.py" in files - assert "utils.py" in files - - assert "import helper" in files["main.py"] - assert "def do_something" in files["helper.py"] - assert "def calculate" in files["utils.py"] - - finally: - os.chdir(original_cwd) - -def test_extract_files_no_local_files(auto_mock_setup): +def test_rayjob_resubmit_success(auto_mock_setup): """ - Test entrypoint with no local files. + Test successful RayJob resubmit operation. """ + mock_api_instance = auto_mock_setup["rayjob_api"] + + mock_api_instance.resubmit_job.return_value = { + "metadata": {"name": "test-rayjob"}, + "spec": {"suspend": False}, + } rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python -c 'print(\"hello world\")'", + job_name="test-rayjob", + cluster_name="test-cluster", namespace="test-namespace", + entrypoint="python -c 'print()'", ) - files = rayjob._extract_files_from_entrypoint() + result = rayjob.resubmit() + + assert result is True - assert files is None + mock_api_instance.resubmit_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) -def test_extract_files_nonexistent_file(auto_mock_setup): +def test_rayjob_resubmit_failure(auto_mock_setup): """ - Test entrypoint referencing non-existent file. + Test RayJob resubmit operation when API call fails. """ + mock_api_instance = auto_mock_setup["rayjob_api"] + + mock_api_instance.resubmit_job.return_value = None rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python nonexistent.py", + job_name="test-rayjob", + cluster_name="test-cluster", namespace="test-namespace", + entrypoint="python -c 'print()'", ) - files = rayjob._extract_files_from_entrypoint() + with pytest.raises(RuntimeError, match="Failed to resubmit the RayJob test-rayjob"): + rayjob.resubmit() - assert files is None + mock_api_instance.resubmit_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) -def test_build_file_configmap_spec(): +def test_rayjob_delete_success(auto_mock_setup): """ - Test building ConfigMap specification for files. + Test successful RayJob deletion. """ - config = ManagedClusterConfig() - files = {"main.py": "print('main')", "helper.py": "print('helper')"} + mock_api_instance = auto_mock_setup["rayjob_api"] - spec = config.build_file_configmap_spec( - job_name="test-job", namespace="test-namespace", files=files + rayjob = RayJob( + job_name="test-rayjob", + entrypoint="python -c 'print()'", + cluster_name="test-cluster", ) - assert spec["apiVersion"] == "v1" - assert spec["kind"] == "ConfigMap" - assert spec["metadata"]["name"] == "test-job-files" - assert spec["metadata"]["namespace"] == "test-namespace" - assert spec["data"] == files - + mock_api_instance.delete_job.return_value = True -def test_build_file_volume_specs(): - """ - Test building volume and mount specifications for files. - """ - config = ManagedClusterConfig() + result = rayjob.delete() - volume_spec, mount_spec = config.build_file_volume_specs( - configmap_name="test-files", mount_path="/custom/path" + assert result is True + mock_api_instance.delete_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" ) - assert volume_spec["name"] == "ray-job-files" - assert volume_spec["configMap"]["name"] == "test-files" - - assert mount_spec["name"] == "ray-job-files" - assert mount_spec["mountPath"] == "/custom/path" - -def test_add_file_volumes(): +def test_rayjob_delete_already_deleted(auto_mock_setup, caplog): """ - Test adding file volumes to cluster configuration. + Test RayJob deletion when already deleted (should succeed gracefully). """ - config = ManagedClusterConfig() - - # Initially no volumes - assert len(config.volumes) == 0 - assert len(config.volume_mounts) == 0 + mock_api_instance = auto_mock_setup["rayjob_api"] - config.add_file_volumes(configmap_name="test-files") + rayjob = RayJob( + job_name="test-rayjob", + entrypoint="python -c 'print()'", + cluster_name="test-cluster", + ) - assert len(config.volumes) == 1 - assert len(config.volume_mounts) == 1 + # Python client returns False when job doesn't exist/already deleted + mock_api_instance.delete_job.return_value = False - volume = config.volumes[0] - mount = config.volume_mounts[0] + with caplog.at_level("INFO"): + result = rayjob.delete() - assert volume.name == "ray-job-files" - assert volume.config_map.name == "test-files" + # Should succeed (not raise error) when already deleted + assert result is True + assert "already deleted or does not exist" in caplog.text - assert mount.name == "ray-job-files" - assert mount.mount_path == MOUNT_PATH + mock_api_instance.delete_job.assert_called_once_with( + name="test-rayjob", k8s_namespace="test-namespace" + ) -def test_add_file_volumes_duplicate_prevention(): +def test_rayjob_init_both_none_error(auto_mock_setup): """ - Test that adding file volumes twice doesn't create duplicates. + Test RayJob initialization error when both cluster_name and cluster_config are None. """ - config = ManagedClusterConfig() - - # Add volumes twice - config.add_file_volumes(configmap_name="test-files") - config.add_file_volumes(configmap_name="test-files") - - assert len(config.volumes) == 1 - assert len(config.volume_mounts) == 1 + with pytest.raises( + ValueError, + match="Configuration Error: You must provide either 'cluster_name' .* or 'cluster_config'", + ): + RayJob( + job_name="test-job", + entrypoint="python -c 'print()'", + cluster_name=None, + cluster_config=None, + ) -def test_create_configmap_from_spec(auto_mock_setup): +def test_rayjob_init_missing_cluster_name_with_no_config(auto_mock_setup): """ - Test creating ConfigMap via Kubernetes API. + Test RayJob initialization error when cluster_name is None without cluster_config. """ - mock_api_instance = auto_mock_setup["k8s_api"] - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - configmap_spec = { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } - - result = rayjob._create_configmap_from_spec(configmap_spec) - - assert result == "test-files" - mock_api_instance.create_namespaced_config_map.assert_called_once() + with pytest.raises( + ValueError, + match="Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'", + ): + rayjob = RayJob.__new__(RayJob) + rayjob.name = "test-job" + rayjob.entrypoint = "python test.py" + rayjob.runtime_env = None + rayjob.ttl_seconds_after_finished = 0 + rayjob.active_deadline_seconds = None + rayjob.shutdown_after_job_finishes = True + rayjob.namespace = "test-namespace" + rayjob._cluster_name = None + rayjob._cluster_config = None + if rayjob._cluster_config is None and rayjob._cluster_name is None: + raise ValueError( + "❌ Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'" + ) -def test_create_configmap_already_exists(auto_mock_setup): +def test_rayjob_kueue_label_no_default_queue(auto_mock_setup, mocker, caplog): """ - Test creating ConfigMap when it already exists (409 conflict). + Test RayJob falls back to 'default' queue when no default queue exists. """ - mock_api_instance = auto_mock_setup["k8s_api"] - - mock_api_instance.create_namespaced_config_map.side_effect = ApiException( - status=409 + mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", + return_value=None, ) + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", + cluster_config=cluster_config, + entrypoint="python -c 'print()'", ) - configmap_spec = { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } + with caplog.at_level("WARNING"): + rayjob.submit() - result = rayjob._create_configmap_from_spec(configmap_spec) + # Verify the submitted job has the fallback label + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert submitted_job["metadata"]["labels"]["kueue.x-k8s.io/queue-name"] == "default" - assert result == "test-files" - mock_api_instance.create_namespaced_config_map.assert_called_once() - mock_api_instance.replace_namespaced_config_map.assert_called_once() + # Verify warning was logged + assert "No default Kueue LocalQueue found" in caplog.text -def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, caplog): +def test_rayjob_kueue_explicit_local_queue(auto_mock_setup): """ - Test creating ConfigMap with owner reference from valid RayJob result. + Test RayJob uses explicitly specified local queue. """ - mock_api_instance = auto_mock_setup["k8s_api"] - - # Mock client.V1ObjectMeta and V1ConfigMap - mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") - mock_metadata_instance = MagicMock() - mock_v1_metadata.return_value = mock_metadata_instance + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + cluster_config = ManagedClusterConfig() rayjob = RayJob( job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", + cluster_config=cluster_config, + entrypoint="python -c 'print()'", + local_queue="custom-queue", ) - configmap_spec = { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": { - "name": "test-files", - "namespace": "test-namespace", - "labels": { - "ray.io/job-name": "test-job", - "app.kubernetes.io/managed-by": "codeflare-sdk", - "app.kubernetes.io/component": "rayjob-files", - }, - }, - "data": {"test.py": "print('test')"}, - } - - # Valid RayJob result with UID - rayjob_result = { - "metadata": { - "name": "test-job", - "namespace": "test-namespace", - "uid": "a4dd4c5a-ab61-411d-b4d1-4abb5177422a", - } - } - - with caplog.at_level("INFO"): - result = rayjob._create_configmap_from_spec(configmap_spec, rayjob_result) - - assert result == "test-files" + rayjob.submit() - # Verify owner reference was set - expected_owner_ref = mocker.ANY # We'll check via the logs + # Verify the submitted job has the explicit queue label + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] assert ( - "Adding owner reference to ConfigMap 'test-files' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" - in caplog.text + submitted_job["metadata"]["labels"]["kueue.x-k8s.io/queue-name"] + == "custom-queue" ) - assert mock_metadata_instance.owner_references is not None - mock_api_instance.create_namespaced_config_map.assert_called_once() - -def test_create_configmap_without_owner_reference_no_uid( - mocker, auto_mock_setup, caplog -): +def test_rayjob_no_kueue_label_for_existing_cluster(auto_mock_setup): """ - Test creating ConfigMap without owner reference when RayJob has no UID. + Test RayJob doesn't add Kueue label for existing clusters. """ - mock_api_instance = auto_mock_setup["k8s_api"] - - mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") - mock_metadata_instance = MagicMock() - mock_v1_metadata.return_value = mock_metadata_instance + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + # Using existing cluster (no cluster_config) rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", + entrypoint="python -c 'print()'", ) - configmap_spec = { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } - - # RayJob result without UID - rayjob_result = { - "metadata": { - "name": "test-job", - "namespace": "test-namespace", - # No UID field - } - } + rayjob.submit() - with caplog.at_level("WARNING"): - result = rayjob._create_configmap_from_spec(configmap_spec, rayjob_result) - - assert result == "test-files" - - # Verify warning was logged and no owner reference was set - assert ( - "No valid RayJob result with UID found, ConfigMap 'test-files' will not have owner reference" - in caplog.text - ) - - # The important part is that the warning was logged, indicating no owner reference was set - mock_api_instance.create_namespaced_config_map.assert_called_once() - - -def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): - """ - Test creating ConfigMap with None or invalid rayjob_result. - """ - mock_api_instance = auto_mock_setup["k8s_api"] - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - configmap_spec = { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } - - # Test with None - with caplog.at_level("WARNING"): - result = rayjob._create_configmap_from_spec(configmap_spec, None) - - assert result == "test-files" - assert "No valid RayJob result with UID found" in caplog.text - - # Test with string instead of dict - caplog.clear() - with caplog.at_level("WARNING"): - result = rayjob._create_configmap_from_spec(configmap_spec, "not-a-dict") - - assert result == "test-files" - assert "No valid RayJob result with UID found" in caplog.text - - -def test_ast_parsing_import_detection(auto_mock_setup, tmp_path): - """ - Test AST parsing correctly detects import statements. - """ - - main_file = tmp_path / "main.py" - main_file.write_text( - """# Different import patterns -import helper -from utils import func1, func2 -from local_module import MyClass -import os # Standard library - should be ignored -import non_existent # Non-local - should be ignored -""" - ) - - helper_file = tmp_path / "helper.py" - helper_file.write_text("def helper_func(): pass") - - utils_file = tmp_path / "utils.py" - utils_file.write_text( - """def func1(): pass -def func2(): pass -""" - ) - - local_module_file = tmp_path / "local_module.py" - local_module_file.write_text("class MyClass: pass") - - original_cwd = os.getcwd() - os.chdir(tmp_path) - - try: - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python main.py", - namespace="test-namespace", - ) - - files = rayjob._extract_files_from_entrypoint() - - assert files is not None - assert len(files) == 4 # main + 3 dependencies - assert "main.py" in files - assert "helper.py" in files - assert "utils.py" in files - assert "local_module.py" in files - - finally: - os.chdir(original_cwd) - - -def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): - """ - Test the Kubernetes best practice flow: pre-declare volume, submit, create ConfigMap. - """ - mocker.patch("kubernetes.config.load_kube_config") - - mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") - mock_api_instance = MagicMock() - mock_api_class.return_value = mock_api_instance - - submit_result = { - "metadata": { - "name": "test-job", - "namespace": "test-namespace", - "uid": "test-uid-12345", - } - } - mock_api_instance.submit_job.return_value = submit_result - - mock_create_cm = mocker.patch.object(RayJob, "_create_file_configmap") - mock_add_volumes = mocker.patch.object(ManagedClusterConfig, "add_file_volumes") - - # RayClusterApi is already mocked by auto_mock_setup - - test_file = tmp_path / "test.py" - test_file.write_text("print('test')") - - call_order = [] - - def track_add_volumes(*args, **kwargs): - call_order.append("add_volumes") - # Should be called with ConfigMap name - assert args[0] == "test-job-files" - - def track_submit(*args, **kwargs): - call_order.append("submit_job") - return submit_result - - def track_create_cm(*args, **kwargs): - call_order.append("create_configmap") - assert args[1] == submit_result # rayjob_result should be second arg - - mock_add_volumes.side_effect = track_add_volumes - mock_api_instance.submit_job.side_effect = track_submit - mock_create_cm.side_effect = track_create_cm - - original_cwd = os.getcwd() - try: - os.chdir(tmp_path) - - cluster_config = ManagedClusterConfig() - - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - entrypoint="python test.py", - namespace="test-namespace", - ) - - rayjob.submit() - finally: - os.chdir(original_cwd) - - # Verify the order: submit → create ConfigMap - assert call_order == ["submit_job", "create_configmap"] - - mock_api_instance.submit_job.assert_called_once() - mock_create_cm.assert_called_once() - - mock_create_cm.assert_called_with({"test.py": "print('test')"}, submit_result) - - -def test_rayjob_submit_with_files_new_cluster(auto_mock_setup, tmp_path): - """ - Test RayJob submission with file detection for new cluster. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_api_instance.submit_job.return_value = True - - mock_k8s_instance = auto_mock_setup["k8s_api"] - - # Create test file - test_file = tmp_path / "test.py" - test_file.write_text("print('Hello from the test file!')") - - cluster_config = ManagedClusterConfig() - - original_cwd = os.getcwd() - os.chdir(tmp_path) - - try: - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - entrypoint="python test.py", - namespace="test-namespace", - ) - - # Submit should detect files and handle them - result = rayjob.submit() - - assert result == "test-job" - - mock_k8s_instance.create_namespaced_config_map.assert_called_once() - - assert len(cluster_config.volumes) == 0 - assert len(cluster_config.volume_mounts) == 0 - # Entrypoint should be adjusted to use just the filename - assert rayjob.entrypoint == "python test.py" - - finally: - os.chdir(original_cwd) - - -def test_process_file_and_imports_io_error(mocker, auto_mock_setup, tmp_path): - """ - Test _process_file_and_imports handles IO errors gracefully. - """ - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - files = {} - processed_files = set() - - # Mock os.path.isfile to return True but open() to raise IOError - mocker.patch("os.path.isfile", return_value=True) - mocker.patch("builtins.open", side_effect=IOError("Permission denied")) - - rayjob._process_file_and_imports("test.py", files, MOUNT_PATH, processed_files) - assert "test.py" in processed_files - assert len(files) == 0 - - -def test_process_file_and_imports_container_path_skip(auto_mock_setup): - """ - Test that files already in container paths are skipped. - """ - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - files = {} - processed_files = set() - - # Test file path already in container - rayjob._process_file_and_imports( - f"{MOUNT_PATH}/test.py", files, MOUNT_PATH, processed_files - ) - - assert len(files) == 0 - assert len(processed_files) == 0 - - -def test_process_file_and_imports_already_processed(auto_mock_setup, tmp_path): - """ - Test that already processed files are skipped (infinite loop prevention). - """ - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - files = {} - processed_files = {"test.py"} # Already processed - - rayjob._process_file_and_imports("test.py", files, MOUNT_PATH, processed_files) - - assert len(files) == 0 - assert processed_files == {"test.py"} - - -def test_submit_with_files_owner_reference_integration( - mocker, auto_mock_setup, tmp_path, caplog -): - """ - Integration test for submit() with local files to verify end-to-end owner reference flow. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_k8s_instance = auto_mock_setup["k8s_api"] - - # RayJob submission returns result with UID - submit_result = { - "metadata": { - "name": "test-job", - "namespace": "test-namespace", - "uid": "unique-rayjob-uid-12345", - } - } - mock_api_instance.submit_job.return_value = submit_result - - # Capture the ConfigMap that gets created - created_configmap = None - - def capture_configmap(namespace, body): - nonlocal created_configmap - created_configmap = body - return body - - mock_k8s_instance.create_namespaced_config_map.side_effect = capture_configmap - - # Create test files - test_file = tmp_path / "main.py" - test_file.write_text("import helper\nprint('main')") - - helper_file = tmp_path / "helper.py" - helper_file.write_text("def help(): print('helper')") - - # Change to temp directory for file detection - original_cwd = os.getcwd() - try: - os.chdir(tmp_path) - - cluster_config = ManagedClusterConfig() - - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - entrypoint="python main.py", - namespace="test-namespace", - ) - - with caplog.at_level("INFO"): - result = rayjob.submit() - - assert result == "test-job" - - mock_api_instance.submit_job.assert_called_once() - mock_k8s_instance.create_namespaced_config_map.assert_called_once() - assert created_configmap is not None - - # Verify owner reference was set correctly - assert hasattr(created_configmap.metadata, "owner_references") - assert created_configmap.metadata.owner_references is not None - assert len(created_configmap.metadata.owner_references) == 1 - - owner_ref = created_configmap.metadata.owner_references[0] - assert owner_ref.api_version == "ray.io/v1" - assert owner_ref.kind == "RayJob" - assert owner_ref.name == "test-job" - assert owner_ref.uid == "unique-rayjob-uid-12345" - assert owner_ref.controller is True - assert owner_ref.block_owner_deletion is True - - # Verify labels were set - assert created_configmap.metadata.labels["ray.io/job-name"] == "test-job" - assert ( - created_configmap.metadata.labels["app.kubernetes.io/managed-by"] - == "codeflare-sdk" - ) - assert ( - created_configmap.metadata.labels["app.kubernetes.io/component"] - == "rayjob-files" - ) - - assert "main.py" in created_configmap.data - assert "helper.py" in created_configmap.data - assert ( - "Adding owner reference to ConfigMap 'test-job-files' with RayJob UID: unique-rayjob-uid-12345" - in caplog.text - ) - - finally: - os.chdir(original_cwd) - - -def test_find_local_imports_syntax_error(mocker, auto_mock_setup): - """ - Test _find_local_imports handles syntax errors gracefully. - """ - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - # Invalid Python syntax - invalid_file_content = "import helper\ndef invalid_syntax(" - - mock_callback = mocker.Mock() - - rayjob._find_local_imports(invalid_file_content, "test.py", mock_callback) - mock_callback.assert_not_called() - - -def test_create_configmap_api_error_non_409(auto_mock_setup): - """ - Test _create_configmap_from_spec handles non-409 API errors. - """ - mock_api_instance = auto_mock_setup["k8s_api"] - - # Configure to raise 500 error - mock_api_instance.create_namespaced_config_map.side_effect = ApiException( - status=500 - ) - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - configmap_spec = { - "apiVersion": "v1", - "kind": "ConfigMap", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } - - with pytest.raises(RuntimeError, match="Failed to create ConfigMap"): - rayjob._create_configmap_from_spec(configmap_spec) - - -def test_extract_files_empty_entrypoint(auto_mock_setup): - """ - Test file extraction with empty entrypoint. - """ - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="", # Empty entrypoint - namespace="test-namespace", - ) - - files = rayjob._extract_files_from_entrypoint() - - assert files is None - - -def test_add_file_volumes_existing_volume_skip(): - """ - Test add_file_volumes skips when volume already exists (missing coverage). - """ - config = ManagedClusterConfig() - - # Pre-add a volume with same name - existing_volume = V1Volume( - name="ray-job-files", - config_map=V1ConfigMapVolumeSource(name="existing-files"), - ) - config.volumes.append(existing_volume) - - config.add_file_volumes(configmap_name="new-files") - assert len(config.volumes) == 1 - assert len(config.volume_mounts) == 0 # Mount not added due to volume skip - - -def test_add_file_volumes_existing_mount_skip(): - """ - Test add_file_volumes skips when mount already exists (missing coverage). - """ - config = ManagedClusterConfig() - - # Pre-add a mount with same name - existing_mount = V1VolumeMount(name="ray-job-files", mount_path="/existing/path") - config.volume_mounts.append(existing_mount) - - config.add_file_volumes(configmap_name="new-files") - assert len(config.volumes) == 0 # Volume not added due to mount skip - assert len(config.volume_mounts) == 1 - - -def test_rayjob_stop_success(auto_mock_setup, caplog): - """ - Test successful RayJob stop operation. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - - mock_api_instance.suspend_job.return_value = { - "metadata": {"name": "test-rayjob"}, - "spec": {"suspend": True}, - } - - rayjob = RayJob( - job_name="test-rayjob", - cluster_name="test-cluster", - namespace="test-namespace", - entrypoint="python test.py", - ) - - with caplog.at_level("INFO"): - result = rayjob.stop() - - assert result is True - - mock_api_instance.suspend_job.assert_called_once_with( - name="test-rayjob", k8s_namespace="test-namespace" - ) - - # Verify success message was logged - assert "Successfully stopped the RayJob test-rayjob" in caplog.text - - -def test_rayjob_stop_failure(auto_mock_setup): - """ - Test RayJob stop operation when API call fails. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - - mock_api_instance.suspend_job.return_value = None - - rayjob = RayJob( - job_name="test-rayjob", - cluster_name="test-cluster", - namespace="test-namespace", - entrypoint="python test.py", - ) - - with pytest.raises(RuntimeError, match="Failed to stop the RayJob test-rayjob"): - rayjob.stop() - - mock_api_instance.suspend_job.assert_called_once_with( - name="test-rayjob", k8s_namespace="test-namespace" - ) - - -def test_rayjob_resubmit_success(auto_mock_setup): - """ - Test successful RayJob resubmit operation. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - - mock_api_instance.resubmit_job.return_value = { - "metadata": {"name": "test-rayjob"}, - "spec": {"suspend": False}, - } - - rayjob = RayJob( - job_name="test-rayjob", - cluster_name="test-cluster", - namespace="test-namespace", - entrypoint="python test.py", - ) - - result = rayjob.resubmit() - - assert result is True - - mock_api_instance.resubmit_job.assert_called_once_with( - name="test-rayjob", k8s_namespace="test-namespace" - ) - - -def test_rayjob_resubmit_failure(auto_mock_setup): - """ - Test RayJob resubmit operation when API call fails. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - - mock_api_instance.resubmit_job.return_value = None - - rayjob = RayJob( - job_name="test-rayjob", - cluster_name="test-cluster", - namespace="test-namespace", - entrypoint="python test.py", - ) - - with pytest.raises(RuntimeError, match="Failed to resubmit the RayJob test-rayjob"): - rayjob.resubmit() - - mock_api_instance.resubmit_job.assert_called_once_with( - name="test-rayjob", k8s_namespace="test-namespace" - ) - - -def test_rayjob_delete_success(auto_mock_setup): - """ - Test successful RayJob deletion. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - - rayjob = RayJob( - job_name="test-rayjob", - entrypoint="python test.py", - cluster_name="test-cluster", - ) - - mock_api_instance.delete_job.return_value = True - - result = rayjob.delete() - - assert result is True - mock_api_instance.delete_job.assert_called_once_with( - name="test-rayjob", k8s_namespace="test-namespace" - ) - - -def test_rayjob_delete_already_deleted(auto_mock_setup, caplog): - """ - Test RayJob deletion when already deleted (should succeed gracefully). - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - - rayjob = RayJob( - job_name="test-rayjob", - entrypoint="python test.py", - cluster_name="test-cluster", - ) - - # Python client returns False when job doesn't exist/already deleted - mock_api_instance.delete_job.return_value = False - - with caplog.at_level("INFO"): - result = rayjob.delete() - - # Should succeed (not raise error) when already deleted - assert result is True - assert "already deleted or does not exist" in caplog.text - - mock_api_instance.delete_job.assert_called_once_with( - name="test-rayjob", k8s_namespace="test-namespace" - ) - - -def test_rayjob_init_both_none_error(auto_mock_setup): - """ - Test RayJob initialization error when both cluster_name and cluster_config are None. - """ - with pytest.raises( - ValueError, - match="Configuration Error: You must provide either 'cluster_name' .* or 'cluster_config'", - ): - RayJob( - job_name="test-job", - entrypoint="python test.py", - cluster_name=None, - cluster_config=None, - ) - - -def test_rayjob_init_missing_cluster_name_with_no_config(auto_mock_setup): - """ - Test RayJob initialization error when cluster_name is None without cluster_config. - """ - with pytest.raises( - ValueError, - match="Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'", - ): - rayjob = RayJob.__new__(RayJob) - rayjob.name = "test-job" - rayjob.entrypoint = "python test.py" - rayjob.runtime_env = None - rayjob.ttl_seconds_after_finished = 0 - rayjob.active_deadline_seconds = None - rayjob.shutdown_after_job_finishes = True - rayjob.namespace = "test-namespace" - rayjob._cluster_name = None - rayjob._cluster_config = None - if rayjob._cluster_config is None and rayjob._cluster_name is None: - raise ValueError( - "❌ Configuration Error: a 'cluster_name' is required when not providing 'cluster_config'" - ) - - -def test_rayjob_kueue_label_no_default_queue(auto_mock_setup, mocker, caplog): - """ - Test RayJob falls back to 'default' queue when no default queue exists. - """ - mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.get_default_kueue_name", - return_value=None, - ) - - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} - - cluster_config = ManagedClusterConfig() - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - entrypoint="python test.py", - ) - - with caplog.at_level("WARNING"): - rayjob.submit() - - # Verify the submitted job has the fallback label - call_args = mock_api_instance.submit_job.call_args - submitted_job = call_args.kwargs["job"] - assert submitted_job["metadata"]["labels"]["kueue.x-k8s.io/queue-name"] == "default" - - # Verify warning was logged - assert "No default Kueue LocalQueue found" in caplog.text - - -def test_rayjob_kueue_explicit_local_queue(auto_mock_setup): - """ - Test RayJob uses explicitly specified local queue. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} - - cluster_config = ManagedClusterConfig() - rayjob = RayJob( - job_name="test-job", - cluster_config=cluster_config, - entrypoint="python test.py", - local_queue="custom-queue", - ) - - rayjob.submit() - - # Verify the submitted job has the explicit queue label - call_args = mock_api_instance.submit_job.call_args - submitted_job = call_args.kwargs["job"] - assert ( - submitted_job["metadata"]["labels"]["kueue.x-k8s.io/queue-name"] - == "custom-queue" - ) - - -def test_rayjob_no_kueue_label_for_existing_cluster(auto_mock_setup): - """ - Test RayJob doesn't add Kueue label for existing clusters. - """ - mock_api_instance = auto_mock_setup["rayjob_api"] - mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} - - # Using existing cluster (no cluster_config) - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - ) - - rayjob.submit() - - # Verify no Kueue label was added - call_args = mock_api_instance.submit_job.call_args - submitted_job = call_args.kwargs["job"] - assert "kueue.x-k8s.io/queue-name" not in submitted_job["metadata"]["labels"] + # Verify no Kueue label was added + call_args = mock_api_instance.submit_job.call_args + submitted_job = call_args.kwargs["job"] + assert "kueue.x-k8s.io/queue-name" not in submitted_job["metadata"]["labels"] def test_rayjob_with_ttl_and_deadline(auto_mock_setup): @@ -2078,7 +1188,7 @@ def test_rayjob_with_ttl_and_deadline(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", ttl_seconds_after_finished=300, active_deadline_seconds=600, ) @@ -2104,7 +1214,7 @@ def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) rayjob.submit() @@ -2117,7 +1227,7 @@ def test_rayjob_shutdown_after_job_finishes(auto_mock_setup): rayjob2 = RayJob( job_name="test-job2", cluster_name="existing-cluster", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) rayjob2.submit() @@ -2143,7 +1253,7 @@ def test_rayjob_stop_delete_resubmit_logging(auto_mock_setup, caplog): job_name="test-rayjob", cluster_name="test-cluster", namespace="test-namespace", - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) with caplog.at_level("INFO"): @@ -2185,7 +1295,7 @@ def test_rayjob_initialization_logging(auto_mock_setup, caplog): rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", ) assert "Creating new cluster: test-job-cluster" in caplog.text @@ -2205,7 +1315,7 @@ def test_build_submitter_pod_template_uses_default_image(auto_mock_setup, mocker rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -2242,7 +1352,7 @@ def test_build_submitter_pod_template_uses_cluster_config_image( rayjob = RayJob( job_name="test-job", cluster_config=cluster_config, - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -2269,7 +1379,7 @@ def test_build_submitter_pod_template_with_files(auto_mock_setup): rayjob = RayJob( job_name="test-job", cluster_name="existing-cluster", - entrypoint="python test.py", + entrypoint="python -c 'print()'", namespace="test-namespace", ) @@ -2291,3 +1401,383 @@ def test_build_submitter_pod_template_with_files(auto_mock_setup): # Verify paths match keys for item in config_map_items: assert item["key"] == item["path"] + + +def test_validate_working_dir_entrypoint_no_runtime_env(auto_mock_setup, tmp_path): + """ + Test validation checks file exists even when no runtime_env is specified. + """ + # Create the script file + script_file = tmp_path / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {script_file}", + namespace="test-namespace", + ) + + # Should not raise exception (file exists) + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_no_working_dir(auto_mock_setup, tmp_path): + """ + Test validation checks file when runtime_env has no working_dir. + """ + # Create the script file + script_file = tmp_path / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {script_file}", + namespace="test-namespace", + runtime_env=RuntimeEnv(pip=["numpy"]), + ) + + # Should not raise exception (file exists) + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_remote_working_dir(auto_mock_setup): + """ + Test validation skips ALL checks for remote working_dir. + """ + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python nonexistent_script.py", # File doesn't exist, but should be ignored + namespace="test-namespace", + runtime_env=RuntimeEnv( + working_dir="https://github.com/user/repo/archive/main.zip" + ), + ) + + # Should not raise any exception (remote working_dir skips all validation) + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_no_python_file(auto_mock_setup): + """ + Test validation passes when entrypoint has no Python file reference. + """ + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="echo 'hello world'", # No Python file + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir="."), + ) + + # Should not raise any exception + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_no_redundancy(auto_mock_setup, tmp_path): + """ + Test validation passes when entrypoint doesn't reference working_dir. + """ + # Create test directory and file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python script.py", # No directory prefix + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + # Should not raise any exception + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_redundant_reference_error( + auto_mock_setup, tmp_path +): + """ + Test validation raises error when entrypoint redundantly references working_dir. + """ + # Create test directory and file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {test_dir}/script.py", # Redundant reference + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + # Should raise ValueError with helpful message + with pytest.raises(ValueError, match="Working directory conflict detected"): + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_with_dot_slash(auto_mock_setup, tmp_path): + """ + Test validation handles paths with ./ prefix correctly. + """ + # Create test directory and file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + # Change to temp directory so relative paths work + import os + + original_cwd = os.getcwd() + try: + os.chdir(tmp_path) + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python ./testdir/script.py", # With ./ prefix + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir="./testdir"), # With ./ prefix + ) + + # Should raise ValueError (redundant reference) + with pytest.raises(ValueError, match="Working directory conflict detected"): + rayjob._validate_working_dir_entrypoint() + finally: + os.chdir(original_cwd) + + +def test_validate_working_dir_entrypoint_submit_integration(auto_mock_setup, tmp_path): + """ + Test that validation is called during submit() and blocks submission. + """ + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}} + + # Create test directory and file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {test_dir}/script.py", # Redundant reference + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + # Should raise ValueError during submit() before API call + with pytest.raises(ValueError, match="Working directory conflict detected"): + rayjob.submit() + + # Verify submit_job was never called (validation blocked it) + mock_api_instance.submit_job.assert_not_called() + + +def test_validate_working_dir_entrypoint_error_message_format( + auto_mock_setup, tmp_path +): + """ + Test that error message contains helpful information. + """ + # Create test directory and file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {test_dir}/script.py", + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + try: + rayjob._validate_working_dir_entrypoint() + assert False, "Should have raised ValueError" + except ValueError as e: + error_msg = str(e) + # Verify error message contains key information + assert "Working directory conflict detected" in error_msg + assert "working_dir:" in error_msg + assert "entrypoint references:" in error_msg + assert "Fix: Remove the directory prefix" in error_msg + assert "python script.py" in error_msg # Suggested fix + + +def test_validate_working_dir_entrypoint_subdirectory_valid(auto_mock_setup, tmp_path): + """ + Test validation passes when entrypoint references subdirectory within working_dir. + """ + # Create test directory structure: testdir/subdir/script.py + test_dir = tmp_path / "testdir" + test_dir.mkdir() + sub_dir = test_dir / "subdir" + sub_dir.mkdir() + script_file = sub_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python subdir/script.py", # Correct: relative to working_dir + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + # Should not raise any exception + rayjob._validate_working_dir_entrypoint() + + +def test_validate_working_dir_entrypoint_runtime_env_as_dict(auto_mock_setup, tmp_path): + """ + Test validation works when runtime_env is passed as dict (not RuntimeEnv object). + """ + # Create test directory and file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {test_dir}/script.py", # Redundant reference + namespace="test-namespace", + runtime_env={"working_dir": str(test_dir)}, # Dict instead of RuntimeEnv + ) + + # Should raise ValueError even with dict runtime_env + with pytest.raises(ValueError, match="Working directory conflict detected"): + rayjob._validate_working_dir_entrypoint() + + +def test_validate_file_exists_with_working_dir(auto_mock_setup, tmp_path): + """ + Test validation checks that entrypoint file exists within working_dir. + """ + # Create working directory but NOT the script file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python script.py", # File doesn't exist + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + # Should raise ValueError about missing file + with pytest.raises(ValueError, match="Entrypoint file not found"): + rayjob._validate_working_dir_entrypoint() + + +def test_validate_file_exists_without_working_dir(auto_mock_setup, tmp_path): + """ + Test validation checks that entrypoint file exists when no working_dir and using ./ prefix. + """ + # Don't create the script file + script_path = "./missing_script.py" + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint=f"python {script_path}", # File doesn't exist (local path with ./) + namespace="test-namespace", + ) + + # Should raise ValueError about missing file + with pytest.raises(ValueError, match="Entrypoint file not found"): + rayjob._validate_working_dir_entrypoint() + + +def test_validate_existing_file_with_working_dir_passes(auto_mock_setup, tmp_path): + """ + Test validation passes when file exists in working_dir. + """ + # Create working directory AND the script file + test_dir = tmp_path / "testdir" + test_dir.mkdir() + script_file = test_dir / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python script.py", # File exists + namespace="test-namespace", + runtime_env=RuntimeEnv(working_dir=str(test_dir)), + ) + + # Should not raise any exception + rayjob._validate_working_dir_entrypoint() + + +def test_validate_inline_python_command_skipped(auto_mock_setup): + """ + Test validation skips inline Python commands (no file reference). + """ + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python -c 'print(\"hello world\")'", # No file reference + namespace="test-namespace", + ) + + # Should not raise any exception (no file to validate) + rayjob._validate_working_dir_entrypoint() + + +def test_validate_simple_filename_without_working_dir_missing(auto_mock_setup): + """ + Test validation checks simple filenames without working_dir. + """ + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python script.py", # File doesn't exist locally + namespace="test-namespace", + ) + + # Should raise ValueError (file will be extracted from local, so must exist) + with pytest.raises(ValueError, match="Entrypoint file not found"): + rayjob._validate_working_dir_entrypoint() + + +def test_validate_simple_filename_without_working_dir_exists(auto_mock_setup, tmp_path): + """ + Test validation passes when simple filename exists locally without working_dir. + """ + import os + + original_cwd = os.getcwd() + try: + os.chdir(tmp_path) + # Create the script file in current directory + script_file = tmp_path / "script.py" + script_file.write_text("print('hello')") + + rayjob = RayJob( + job_name="test-job", + cluster_name="test-cluster", + entrypoint="python script.py", # Simple filename exists locally + namespace="test-namespace", + ) + + # Should not raise exception (file exists) + rayjob._validate_working_dir_entrypoint() + finally: + os.chdir(original_cwd) diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py new file mode 100644 index 00000000..7114ca4c --- /dev/null +++ b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py @@ -0,0 +1,935 @@ +# Copyright 2025 IBM, Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest +import os +from unittest.mock import MagicMock, patch +from codeflare_sdk.common.utils.constants import MOUNT_PATH, RAY_VERSION +from ray.runtime_env import RuntimeEnv + +from codeflare_sdk.ray.rayjobs.rayjob import RayJob +from codeflare_sdk.ray.cluster.config import ClusterConfiguration +from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig +from kubernetes.client import ( + V1Volume, + V1VolumeMount, + V1Toleration, + V1ConfigMapVolumeSource, + ApiException, +) + +from codeflare_sdk.ray.rayjobs.runtime_env import ( + create_configmap_from_spec, + extract_all_local_files, +) + + +def test_rayjob_with_remote_working_dir(auto_mock_setup): + """ + Test RayJob with remote working directory in runtime_env. + Should not extract local files and should pass through remote URL. + """ + runtime_env = RuntimeEnv( + working_dir="https://github.com/org/repo/archive/refs/heads/main.zip", + pip=["numpy", "pandas"], + env_vars={"TEST_VAR": "test_value"}, + ) + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + cluster_name="test-cluster", + runtime_env=runtime_env, + namespace="test-namespace", + ) + + assert rayjob.runtime_env == runtime_env + + # Should not extract any local files due to remote working_dir + files = extract_all_local_files(rayjob) + assert files is None + + rayjob_cr = rayjob._build_rayjob_cr() + + # Should have runtimeEnvYAML with all fields + expected_runtime_env = ( + "env_vars:\n" + " TEST_VAR: test_value\n" + "pip:\n" + "- numpy\n" + "- pandas\n" + "working_dir: https://github.com/org/repo/archive/refs/heads/main.zip\n" + ) + assert rayjob_cr["spec"]["runtimeEnvYAML"] == expected_runtime_env + + # Should not have submitterPodTemplate since no local files + assert "submitterPodTemplate" not in rayjob_cr["spec"] + + # Entrypoint should be unchanged + assert rayjob_cr["spec"]["entrypoint"] == "python test.py" + + +def test_build_file_configmap_spec(): + """ + Test building ConfigMap specification for files. + """ + config = ManagedClusterConfig() + files = {"main.py": "print('main')", "helper.py": "print('helper')"} + + spec = config.build_file_configmap_spec( + job_name="test-job", namespace="test-namespace", files=files + ) + + assert spec["apiVersion"] == "v1" + assert spec["kind"] == "ConfigMap" + assert spec["metadata"]["name"] == "test-job-files" + assert spec["metadata"]["namespace"] == "test-namespace" + assert spec["data"] == files + + +def test_build_file_volume_specs(): + """ + Test building volume and mount specifications for files. + """ + config = ManagedClusterConfig() + + volume_spec, mount_spec = config.build_file_volume_specs( + configmap_name="test-files", mount_path="/custom/path" + ) + + assert volume_spec["name"] == "ray-job-files" + assert volume_spec["configMap"]["name"] == "test-files" + + assert mount_spec["name"] == "ray-job-files" + assert mount_spec["mountPath"] == "/custom/path" + + +def test_add_file_volumes(): + """ + Test adding file volumes to cluster configuration. + """ + config = ManagedClusterConfig() + + # Initially no volumes + assert len(config.volumes) == 0 + assert len(config.volume_mounts) == 0 + + config.add_file_volumes(configmap_name="test-files") + + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 1 + + volume = config.volumes[0] + mount = config.volume_mounts[0] + + assert volume.name == "ray-job-files" + assert volume.config_map.name == "test-files" + + assert mount.name == "ray-job-files" + assert mount.mount_path == MOUNT_PATH + + +def test_add_file_volumes_duplicate_prevention(): + """ + Test that adding file volumes twice doesn't create duplicates. + """ + config = ManagedClusterConfig() + + # Add volumes twice + config.add_file_volumes(configmap_name="test-files") + config.add_file_volumes(configmap_name="test-files") + + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 1 + + +def test_create_configmap_from_spec(auto_mock_setup): + """ + Test creating ConfigMap via Kubernetes API. + """ + mock_api_instance = auto_mock_setup["k8s_api"] + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-files", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + result = create_configmap_from_spec(rayjob, configmap_spec) + + assert result == "test-files" + mock_api_instance.create_namespaced_config_map.assert_called_once() + + +def test_create_configmap_already_exists(auto_mock_setup): + """ + Test creating ConfigMap when it already exists (409 conflict). + """ + mock_api_instance = auto_mock_setup["k8s_api"] + + mock_api_instance.create_namespaced_config_map.side_effect = ApiException( + status=409 + ) + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-files", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + result = create_configmap_from_spec(rayjob, configmap_spec) + + assert result == "test-files" + mock_api_instance.create_namespaced_config_map.assert_called_once() + mock_api_instance.replace_namespaced_config_map.assert_called_once() + + +def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, caplog): + """ + Test creating ConfigMap with owner reference from valid RayJob result. + """ + mock_api_instance = auto_mock_setup["k8s_api"] + + # Mock client.V1ObjectMeta and V1ConfigMap + mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") + mock_metadata_instance = MagicMock() + mock_v1_metadata.return_value = mock_metadata_instance + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": { + "name": "test-files", + "namespace": "test-namespace", + "labels": { + "ray.io/job-name": "test-job", + "app.kubernetes.io/managed-by": "codeflare-sdk", + "app.kubernetes.io/component": "rayjob-files", + }, + }, + "data": {"test.py": "print('test')"}, + } + + # Valid RayJob result with UID + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "a4dd4c5a-ab61-411d-b4d1-4abb5177422a", + } + } + + with caplog.at_level("INFO"): + result = create_configmap_from_spec(rayjob, configmap_spec, rayjob_result) + + assert result == "test-files" + + # Verify owner reference was set + expected_owner_ref = mocker.ANY # We'll check via the logs + assert ( + "Adding owner reference to ConfigMap 'test-files' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" + in caplog.text + ) + + assert mock_metadata_instance.owner_references is not None + mock_api_instance.create_namespaced_config_map.assert_called_once() + + +def test_create_configmap_without_owner_reference_no_uid( + mocker, auto_mock_setup, caplog +): + """ + Test creating ConfigMap without owner reference when RayJob has no UID. + """ + mock_api_instance = auto_mock_setup["k8s_api"] + + mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") + mock_metadata_instance = MagicMock() + mock_v1_metadata.return_value = mock_metadata_instance + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-files", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + # RayJob result without UID + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + # No UID field + } + } + + with caplog.at_level("WARNING"): + result = create_configmap_from_spec(rayjob, configmap_spec, rayjob_result) + + assert result == "test-files" + + # Verify warning was logged and no owner reference was set + assert ( + "No valid RayJob result with UID found, ConfigMap 'test-files' will not have owner reference" + in caplog.text + ) + + # The important part is that the warning was logged, indicating no owner reference was set + mock_api_instance.create_namespaced_config_map.assert_called_once() + + +def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): + """ + Test creating ConfigMap with None or invalid rayjob_result. + """ + mock_api_instance = auto_mock_setup["k8s_api"] + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-files", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + # Test with None + with caplog.at_level("WARNING"): + result = create_configmap_from_spec(rayjob, configmap_spec, None) + + assert result == "test-files" + assert "No valid RayJob result with UID found" in caplog.text + + # Test with string instead of dict + caplog.clear() + with caplog.at_level("WARNING"): + result = create_configmap_from_spec(rayjob, configmap_spec, "not-a-dict") + + assert result == "test-files" + assert "No valid RayJob result with UID found" in caplog.text + + +def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): + """ + Test the Kubernetes best practice flow: pre-declare volume, submit, create ConfigMap. + """ + mocker.patch("kubernetes.config.load_kube_config") + + mock_api_class = mocker.patch("codeflare_sdk.ray.rayjobs.rayjob.RayjobApi") + mock_api_instance = MagicMock() + mock_api_class.return_value = mock_api_instance + + submit_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-12345", + } + } + mock_api_instance.submit_job.return_value = submit_result + + # Mock create_file_configmap where it's used (imported into rayjob module) + mock_create_cm = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.create_file_configmap" + ) + mock_add_volumes = mocker.patch.object(ManagedClusterConfig, "add_file_volumes") + + # RayClusterApi is already mocked by auto_mock_setup + + test_file = tmp_path / "test.py" + test_file.write_text("print('test')") + + call_order = [] + + def track_add_volumes(*args, **kwargs): + call_order.append("add_volumes") + # Should be called with ConfigMap name + assert args[0] == "test-job-files" + + def track_submit(*args, **kwargs): + call_order.append("submit_job") + return submit_result + + def track_create_cm(*args, **kwargs): + call_order.append("create_configmap") + # Args should be: (job, files, rayjob_result) + assert len(args) >= 3, f"Expected 3 args, got {len(args)}: {args}" + assert args[2] == submit_result # rayjob_result should be third arg + + mock_add_volumes.side_effect = track_add_volumes + mock_api_instance.submit_job.side_effect = track_submit + mock_create_cm.side_effect = track_create_cm + + original_cwd = os.getcwd() + try: + os.chdir(tmp_path) + + cluster_config = ManagedClusterConfig() + + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python test.py", + namespace="test-namespace", + ) + + rayjob.submit() + finally: + os.chdir(original_cwd) + + # Verify the order: submit → create ConfigMap + assert call_order == ["submit_job", "create_configmap"] + + mock_api_instance.submit_job.assert_called_once() + mock_create_cm.assert_called_once() + + # Verify create_file_configmap was called with: (job, files, rayjob_result) + # Files dict includes metadata key __entrypoint_path__ for single file case + call_args = mock_create_cm.call_args[0] + assert call_args[0] == rayjob + assert call_args[2] == submit_result + # Check that the actual file content is present + assert "test.py" in call_args[1] + assert call_args[1]["test.py"] == "print('test')" + + +def test_rayjob_submit_with_files_new_cluster(auto_mock_setup, tmp_path): + """ + Test RayJob submission with file detection for new cluster. + """ + mock_api_instance = auto_mock_setup["rayjob_api"] + mock_api_instance.submit_job.return_value = True + + mock_k8s_instance = auto_mock_setup["k8s_api"] + + # Create test file + test_file = tmp_path / "test.py" + test_file.write_text("print('Hello from the test file!')") + + cluster_config = ManagedClusterConfig() + + original_cwd = os.getcwd() + os.chdir(tmp_path) + + try: + rayjob = RayJob( + job_name="test-job", + cluster_config=cluster_config, + entrypoint="python test.py", + namespace="test-namespace", + ) + + # Submit should detect files and handle them + result = rayjob.submit() + + assert result == "test-job" + + mock_k8s_instance.create_namespaced_config_map.assert_called_once() + + assert len(cluster_config.volumes) == 0 + assert len(cluster_config.volume_mounts) == 0 + # Entrypoint should be adjusted to use just the filename + assert rayjob.entrypoint == "python test.py" + + finally: + os.chdir(original_cwd) + + +def test_create_configmap_api_error_non_409(auto_mock_setup): + """ + Test _create_configmap_from_spec handles non-409 API errors. + """ + mock_api_instance = auto_mock_setup["k8s_api"] + + # Configure to raise 500 error + mock_api_instance.create_namespaced_config_map.side_effect = ApiException( + status=500 + ) + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + configmap_spec = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "test-files", "namespace": "test-namespace"}, + "data": {"test.py": "print('test')"}, + } + + with pytest.raises(RuntimeError, match="Failed to create ConfigMap"): + create_configmap_from_spec(rayjob, configmap_spec) + + +def test_add_file_volumes_existing_volume_skip(): + """ + Test add_file_volumes skips when volume already exists (missing coverage). + """ + config = ManagedClusterConfig() + + # Pre-add a volume with same name + existing_volume = V1Volume( + name="ray-job-files", + config_map=V1ConfigMapVolumeSource(name="existing-files"), + ) + config.volumes.append(existing_volume) + + config.add_file_volumes(configmap_name="new-files") + assert len(config.volumes) == 1 + assert len(config.volume_mounts) == 0 # Mount not added due to volume skip + + +def test_add_file_volumes_existing_mount_skip(): + """ + Test add_file_volumes skips when mount already exists (missing coverage). + """ + config = ManagedClusterConfig() + + # Pre-add a mount with same name + existing_mount = V1VolumeMount(name="ray-job-files", mount_path="/existing/path") + config.volume_mounts.append(existing_mount) + + config.add_file_volumes(configmap_name="new-files") + assert len(config.volumes) == 0 # Volume not added due to mount skip + assert len(config.volume_mounts) == 1 + + +def test_zip_directory_functionality(tmp_path): + """ + Test _zip_directory with real directories and files. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory + + # Create test directory structure + test_dir = tmp_path / "working_dir" + test_dir.mkdir() + + # Create some test files + (test_dir / "main.py").write_text("print('main script')") + (test_dir / "utils.py").write_text("def helper(): pass") + + # Create subdirectory with file + sub_dir = test_dir / "subdir" + sub_dir.mkdir() + (sub_dir / "nested.py").write_text("print('nested file')") + + # Test zipping + zip_data = _zip_directory(str(test_dir)) + + assert zip_data is not None + assert len(zip_data) > 0 + assert isinstance(zip_data, bytes) + + +def test_zip_directory_error_handling(): + """ + Test _zip_directory error handling for IO errors during zipping. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory + + # Mock os.walk to raise an OSError + with patch("os.walk", side_effect=OSError("Permission denied")): + zip_data = _zip_directory("/some/path") + assert zip_data is None + + +def test_extract_all_local_files_with_working_dir(tmp_path): + """ + Test extract_all_local_files with local working directory. + """ + # Create test working directory + working_dir = tmp_path / "working_dir" + working_dir.mkdir() + (working_dir / "script.py").write_text("print('working dir script')") + + runtime_env = RuntimeEnv(working_dir=str(working_dir)) + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + runtime_env=runtime_env, + namespace="test-namespace", + cluster_name="test-cluster", + ) + + files = extract_all_local_files(rayjob) + + assert files is not None + assert "working_dir.zip" in files + assert isinstance(files["working_dir.zip"], str) # base64 encoded + + # Verify it's valid base64 + import base64 + + try: + decoded = base64.b64decode(files["working_dir.zip"]) + assert len(decoded) > 0 + except Exception: + pytest.fail("Invalid base64 encoding") + + +def test_extract_single_entrypoint_file_error_handling(tmp_path): + """ + Test _extract_single_entrypoint_file with file read errors. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _extract_single_entrypoint_file + + # Create a file that exists but make it unreadable + test_file = tmp_path / "unreadable.py" + test_file.write_text("print('test')") + + rayjob = RayJob( + job_name="test-job", + entrypoint=f"python {test_file}", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + # Mock open to raise IOError + with patch("builtins.open", side_effect=IOError("Permission denied")): + result = _extract_single_entrypoint_file(rayjob) + assert result is None + + +def test_extract_single_entrypoint_file_no_match(): + """ + Test _extract_single_entrypoint_file with no Python file matches. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _extract_single_entrypoint_file + + rayjob = RayJob( + job_name="test-job", + entrypoint="echo 'no python files here'", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + result = _extract_single_entrypoint_file(rayjob) + assert result is None + + +def test_parse_requirements_file_valid(tmp_path): + """ + Test parse_requirements_file with valid requirements.txt. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import parse_requirements_file + + # Create test requirements file + req_file = tmp_path / "requirements.txt" + req_file.write_text( + """# This is a comment +numpy==1.21.0 +pandas>=1.3.0 + +# Another comment +scikit-learn +""" + ) + + result = parse_requirements_file(str(req_file)) + + assert result is not None + assert len(result) == 3 + assert "numpy==1.21.0" in result + assert "pandas>=1.3.0" in result + assert "scikit-learn" in result + # Comments and empty lines should be filtered out + assert "# This is a comment" not in result + + +def test_parse_requirements_file_missing(): + """ + Test parse_requirements_file with non-existent file. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import parse_requirements_file + + result = parse_requirements_file("/non/existent/requirements.txt") + assert result is None + + +def test_parse_requirements_file_read_error(tmp_path): + """ + Test parse_requirements_file with file read error. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import parse_requirements_file + + # Create a file + req_file = tmp_path / "requirements.txt" + req_file.write_text("numpy==1.21.0") + + # Mock open to raise IOError + with patch("builtins.open", side_effect=IOError("Permission denied")): + result = parse_requirements_file(str(req_file)) + assert result is None + + +def test_process_pip_dependencies_list(): + """ + Test process_pip_dependencies with list input. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_pip_dependencies + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + pip_list = ["numpy", "pandas", "scikit-learn"] + result = process_pip_dependencies(rayjob, pip_list) + + assert result == pip_list + + +def test_process_pip_dependencies_requirements_file(tmp_path): + """ + Test process_pip_dependencies with requirements.txt path. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_pip_dependencies + + # Create test requirements file + req_file = tmp_path / "requirements.txt" + req_file.write_text("numpy==1.21.0\npandas>=1.3.0") + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + result = process_pip_dependencies(rayjob, str(req_file)) + + assert result is not None + assert len(result) == 2 + assert "numpy==1.21.0" in result + assert "pandas>=1.3.0" in result + + +def test_process_pip_dependencies_dict_format(): + """ + Test process_pip_dependencies with dict format containing packages. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_pip_dependencies + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + pip_dict = {"packages": ["numpy", "pandas"], "pip_check": False} + result = process_pip_dependencies(rayjob, pip_dict) + + assert result == ["numpy", "pandas"] + + +def test_process_pip_dependencies_unsupported_format(): + """ + Test process_pip_dependencies with unsupported format. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_pip_dependencies + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + # Test with unsupported format (int) + result = process_pip_dependencies(rayjob, 12345) + assert result is None + + +def test_process_runtime_env_local_working_dir(tmp_path): + """ + Test process_runtime_env with local working directory. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_runtime_env, UNZIP_PATH + + # Create test working directory + working_dir = tmp_path / "working_dir" + working_dir.mkdir() + (working_dir / "script.py").write_text("print('test')") + + runtime_env = RuntimeEnv( + working_dir=str(working_dir), + env_vars={"TEST_VAR": "test_value"}, + ) + + rayjob = RayJob( + job_name="test-job", + entrypoint="python script.py", + runtime_env=runtime_env, + namespace="test-namespace", + cluster_name="test-cluster", + ) + + result = process_runtime_env(rayjob) + + assert result is not None + assert f"working_dir: {UNZIP_PATH}" in result + assert "env_vars:" in result + assert "TEST_VAR: test_value" in result + + +def test_process_runtime_env_single_file_case(tmp_path): + """ + Test process_runtime_env with single file case (no working_dir in runtime_env). + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_runtime_env + from codeflare_sdk.common.utils.constants import MOUNT_PATH + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + # Files dict without working_dir.zip (single file case) + files = {"test.py": "print('test')"} + + result = process_runtime_env(rayjob, files) + + assert result is not None + assert f"working_dir: {MOUNT_PATH}" in result + + +def test_process_runtime_env_no_processing_needed(): + """ + Test process_runtime_env returns None when no processing needed. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import process_runtime_env + + rayjob = RayJob( + job_name="test-job", + entrypoint="python test.py", + namespace="test-namespace", + cluster_name="test-cluster", + ) + + # No runtime_env and no files + result = process_runtime_env(rayjob) + assert result is None + + +def test_normalize_runtime_env_with_none(): + """ + Test _normalize_runtime_env with None input. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _normalize_runtime_env + + result = _normalize_runtime_env(None) + assert result is None + + +def test_extract_single_entrypoint_file_no_entrypoint(): + """ + Test _extract_single_entrypoint_file with no entrypoint. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _extract_single_entrypoint_file + + rayjob = RayJob( + job_name="test-job", + entrypoint=None, # No entrypoint + namespace="test-namespace", + cluster_name="test-cluster", + ) + + result = _extract_single_entrypoint_file(rayjob) + assert result is None + + +def test_create_file_configmap_filters_metadata_keys(auto_mock_setup, tmp_path): + """ + Test create_file_configmap filters out metadata keys from files dict. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import create_file_configmap + + rayjob = RayJob( + job_name="test-job", + cluster_name="existing-cluster", + entrypoint="python test.py", + namespace="test-namespace", + ) + + # Files dict with metadata key that should be filtered out + files = { + "__entrypoint_path__": "some/path/test.py", # Should be filtered + "test.py": "print('test')", # Should remain + } + + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-12345", + } + } + + # This should not raise an error and should filter out metadata keys + create_file_configmap(rayjob, files, rayjob_result) + + # Verify the ConfigMap was created (mocked) + mock_api_instance = auto_mock_setup["k8s_api"] + mock_api_instance.create_namespaced_config_map.assert_called_once() + + # The call should have filtered data (only test.py, not __entrypoint_path__) + call_args = mock_api_instance.create_namespaced_config_map.call_args + configmap_data = call_args[1]["body"].data + assert "test.py" in configmap_data + assert "__entrypoint_path__" not in configmap_data diff --git a/src/codeflare_sdk/ray/rayjobs/test_status.py b/src/codeflare_sdk/ray/rayjobs/test/test_status.py similarity index 100% rename from src/codeflare_sdk/ray/rayjobs/test_status.py rename to src/codeflare_sdk/ray/rayjobs/test/test_status.py diff --git a/tests/e2e/rayjob/rayjob_existing_cluster_test.py b/tests/e2e/rayjob/rayjob_existing_cluster_test.py index e4865d9c..82858d28 100644 --- a/tests/e2e/rayjob/rayjob_existing_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_existing_cluster_test.py @@ -41,18 +41,22 @@ def test_existing_kueue_cluster(self): ) auth.login() + resources = get_platform_appropriate_resources() + cluster = Cluster( ClusterConfiguration( name=cluster_name, namespace=self.namespace, num_workers=1, - head_cpu_requests="500m", - head_cpu_limits="500m", - worker_cpu_requests=1, - worker_cpu_limits=1, - worker_memory_requests=1, - worker_memory_limits=4, - image=get_ray_image(), + head_cpu_requests=resources["head_cpu_requests"], + head_cpu_limits=resources["head_cpu_limits"], + head_memory_requests=resources["head_memory_requests"], + head_memory_limits=resources["head_memory_limits"], + worker_cpu_requests=resources["worker_cpu_requests"], + worker_cpu_limits=resources["worker_cpu_limits"], + worker_memory_requests=resources["worker_memory_requests"], + worker_memory_limits=resources["worker_memory_limits"], + image=constants.CUDA_PY312_RUNTIME_IMAGE, local_queue=self.local_queues[0], write_to_file=True, verify_tls=False, @@ -63,7 +67,7 @@ def test_existing_kueue_cluster(self): # Wait for cluster to be ready (with Kueue admission) print(f"Waiting for cluster '{cluster_name}' to be ready...") - cluster.wait_ready(timeout=300, dashboard_check=False) + cluster.wait_ready(timeout=600) print(f"✓ Cluster '{cluster_name}' is ready") # RayJob with explicit local_queue From 4de071c6b4bddb933a45a3675342fcc09decfcf8 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Tue, 14 Oct 2025 12:31:38 +0100 Subject: [PATCH 26/33] RHOAIENG-33283: Change ConfigMaps to Secrets and exclude .ipynb files in zipping --- .github/workflows/rayjob_e2e_tests.yaml | 4 +- src/codeflare_sdk/ray/rayjobs/config.py | 37 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 36 +- src/codeflare_sdk/ray/rayjobs/runtime_env.py | 111 ++++-- .../ray/rayjobs/test/test_config.py | 27 +- .../ray/rayjobs/test/test_rayjob.py | 26 +- .../ray/rayjobs/test/test_runtime_env.py | 331 +++++++++++++----- .../rayjob/rayjob_lifecycled_cluster_test.py | 50 +-- 8 files changed, 407 insertions(+), 215 deletions(-) diff --git a/.github/workflows/rayjob_e2e_tests.yaml b/.github/workflows/rayjob_e2e_tests.yaml index b5ba067f..ba0659c0 100644 --- a/.github/workflows/rayjob_e2e_tests.yaml +++ b/.github/workflows/rayjob_e2e_tests.yaml @@ -115,8 +115,8 @@ jobs: kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user - kubectl create clusterrole configmap-manager --verb=get,list,create,delete,update,patch --resource=configmaps - kubectl create clusterrolebinding sdk-user-configmap-manager --clusterrole=configmap-manager --user=sdk-user + kubectl create clusterrole secret-manager --verb=get,list,create,delete,update,patch --resource=secrets + kubectl create clusterrolebinding sdk-user-secret-manager --clusterrole=secret-manager --user=sdk-user kubectl create clusterrole workload-reader --verb=get,list,watch --resource=workloads kubectl create clusterrolebinding sdk-user-workload-reader --clusterrole=workload-reader --user=sdk-user kubectl config use-context sdk-user diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index 023d57f6..c1fe0daa 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -24,6 +24,8 @@ from kubernetes.client import ( V1ConfigMapVolumeSource, V1KeyToPath, + V1LocalObjectReference, + V1SecretVolumeSource, V1Toleration, V1Volume, V1VolumeMount, @@ -415,8 +417,6 @@ def _build_pod_spec(self, container: V1Container, is_head: bool) -> V1PodSpec: # Add image pull secrets if specified if hasattr(self, "image_pull_secrets") and self.image_pull_secrets: - from kubernetes.client import V1LocalObjectReference - pod_spec.image_pull_secrets = [ V1LocalObjectReference(name=secret) for secret in self.image_pull_secrets @@ -448,12 +448,12 @@ def _build_env_vars(self) -> list: """Build environment variables list.""" return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()] - def add_file_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH): + def add_file_volumes(self, secret_name: str, mount_path: str = MOUNT_PATH): """ Add file volume and mount references to cluster configuration. Args: - configmap_name: Name of the ConfigMap containing files + secret_name: Name of the Secret containing files mount_path: Where to mount files in containers (default: /home/ray/scripts) """ # Check if file volume already exists @@ -478,7 +478,7 @@ def add_file_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH): # Add file volume to cluster configuration file_volume = V1Volume( - name=volume_name, config_map=V1ConfigMapVolumeSource(name=configmap_name) + name=volume_name, secret=V1SecretVolumeSource(secret_name=secret_name) ) self.volumes.append(file_volume) @@ -487,36 +487,37 @@ def add_file_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH): self.volume_mounts.append(file_mount) logger.info( - f"Added file volume '{configmap_name}' to cluster config: mount_path={mount_path}" + f"Added file volume '{secret_name}' to cluster config: mount_path={mount_path}" ) - def validate_configmap_size(self, files: Dict[str, str]) -> None: + def validate_secret_size(self, files: Dict[str, str]) -> None: total_size = sum(len(content.encode("utf-8")) for content in files.values()) if total_size > 1024 * 1024: # 1MB raise ValueError( - f"ConfigMap size exceeds 1MB limit. Total size: {total_size} bytes" + f"Secret size exceeds 1MB limit. Total size: {total_size} bytes" ) - def build_file_configmap_spec( + def build_file_secret_spec( self, job_name: str, namespace: str, files: Dict[str, str] ) -> Dict[str, Any]: """ - Build ConfigMap specification for files + Build Secret specification for files Args: - job_name: Name of the RayJob (used for ConfigMap naming) + job_name: Name of the RayJob (used for Secret naming) namespace: Kubernetes namespace files: Dictionary of file_name -> file_content Returns: - Dict: ConfigMap specification ready for Kubernetes API + Dict: Secret specification ready for Kubernetes API """ - configmap_name = f"{job_name}-files" + secret_name = f"{job_name}-files" return { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": { - "name": configmap_name, + "name": secret_name, "namespace": namespace, "labels": { "ray.io/job-name": job_name, @@ -528,19 +529,19 @@ def build_file_configmap_spec( } def build_file_volume_specs( - self, configmap_name: str, mount_path: str = MOUNT_PATH + self, secret_name: str, mount_path: str = MOUNT_PATH ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Build volume and mount specifications for files Args: - configmap_name: Name of the ConfigMap containing files + secret_name: Name of the Secret containing files mount_path: Where to mount files in containers Returns: Tuple of (volume_spec, mount_spec) as dictionaries """ - volume_spec = {"name": "ray-job-files", "configMap": {"name": configmap_name}} + volume_spec = {"name": "ray-job-files", "secret": {"secretName": secret_name}} mount_spec = {"name": "ray-job-files", "mountPath": mount_path} diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 7ab9a0f5..76e396ee 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -31,7 +31,7 @@ from python_client.kuberay_cluster_api import RayClusterApi from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from codeflare_sdk.ray.rayjobs.runtime_env import ( - create_file_configmap, + create_file_secret, extract_all_local_files, process_runtime_env, ) @@ -169,10 +169,10 @@ def submit(self) -> str: # Extract files from entrypoint and runtime_env working_dir files = extract_all_local_files(self) - # Create ConfigMap for files (will be mounted to submitter pod) - configmap_name = None + # Create Secret for files (will be mounted to submitter pod) + secret_name = None if files: - configmap_name = f"{self.name}-files" + secret_name = f"{self.name}-files" rayjob_cr = self._build_rayjob_cr() @@ -182,9 +182,9 @@ def submit(self) -> str: if result: logger.info(f"Successfully submitted RayJob {self.name}") - # Create ConfigMap with owner reference after RayJob exists + # Create Secret with owner reference after RayJob exists if files: - create_file_configmap(self, files, result) + create_file_secret(self, files, result) return self.name else: @@ -285,10 +285,10 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: # Add submitterPodTemplate if we have files to mount if files: - configmap_name = f"{self.name}-files" + secret_name = f"{self.name}-files" rayjob_cr["spec"][ "submitterPodTemplate" - ] = self._build_submitter_pod_template(files, configmap_name) + ] = self._build_submitter_pod_template(files, secret_name) # Configure cluster: either use existing or create new if self._cluster_config is not None: @@ -311,17 +311,17 @@ def _build_rayjob_cr(self) -> Dict[str, Any]: return rayjob_cr def _build_submitter_pod_template( - self, files: Dict[str, str], configmap_name: str + self, files: Dict[str, str], secret_name: str ) -> Dict[str, Any]: """ - Build submitterPodTemplate with ConfigMap volume mount for local files. + Build submitterPodTemplate with Secret volume mount for local files. If files contain working_dir.zip, an init container will unzip it before the main submitter container runs. Args: files: Dict of file_name -> file_content - configmap_name: Name of the ConfigMap containing the files + secret_name: Name of the Secret containing the files Returns: submitterPodTemplate specification @@ -337,8 +337,8 @@ def _build_submitter_pod_template( ): image = self._cluster_config.image - # Build ConfigMap items for each file - config_map_items = [] + # Build Secret items for each file + secret_items = [] entrypoint_path = files.get( "__entrypoint_path__" ) # Metadata for single file case @@ -349,9 +349,9 @@ def _build_submitter_pod_template( # For single file case, use the preserved path structure if entrypoint_path: - config_map_items.append({"key": file_name, "path": entrypoint_path}) + secret_items.append({"key": file_name, "path": entrypoint_path}) else: - config_map_items.append({"key": file_name, "path": file_name}) + secret_items.append({"key": file_name, "path": file_name}) # Check if we need to unzip working_dir has_working_dir_zip = "working_dir.zip" in files @@ -378,9 +378,9 @@ def _build_submitter_pod_template( "volumes": [ { "name": "ray-job-files", - "configMap": { - "name": configmap_name, - "items": config_map_items, + "secret": { + "secretName": secret_name, + "items": secret_items, }, } ], diff --git a/src/codeflare_sdk/ray/rayjobs/runtime_env.py b/src/codeflare_sdk/ray/rayjobs/runtime_env.py index 0b9a71b2..d6d2230b 100644 --- a/src/codeflare_sdk/ray/rayjobs/runtime_env.py +++ b/src/codeflare_sdk/ray/rayjobs/runtime_env.py @@ -28,6 +28,24 @@ # Path where working_dir will be unzipped on submitter pod UNZIP_PATH = "/tmp/rayjob-working-dir" +# File pattern to exclude from working directory zips +# Jupyter notebooks can contain sensitive outputs, tokens, and large data +JUPYTER_NOTEBOOK_PATTERN = r"\.ipynb$" + + +def _should_exclude_file(file_path: str) -> bool: + """ + Check if file should be excluded from working directory zip. + Currently excludes Jupyter notebook files (.ipynb). + + Args: + file_path: Relative file path within the working directory + + Returns: + True if file should be excluded, False otherwise + """ + return bool(re.search(JUPYTER_NOTEBOOK_PATTERN, file_path, re.IGNORECASE)) + def _normalize_runtime_env( runtime_env: Optional[RuntimeEnv], @@ -39,7 +57,7 @@ def _normalize_runtime_env( def extract_all_local_files(job: RayJob) -> Optional[Dict[str, str]]: """ - Prepare local files for ConfigMap upload. + Prepare local files for Secret upload. - If runtime_env has local working_dir: zip entire directory into single file - If single entrypoint file (no working_dir): extract that file @@ -76,7 +94,7 @@ def extract_all_local_files(job: RayJob) -> Optional[Dict[str, str]]: logger.info(f"Zipping local working_dir: {working_dir}") zip_data = _zip_directory(working_dir) if zip_data: - # Encode zip as base64 for ConfigMap storage + # Encode zip as base64 for Secret storage zip_base64 = base64.b64encode(zip_data).decode("utf-8") return {"working_dir.zip": zip_base64} @@ -90,7 +108,7 @@ def extract_all_local_files(job: RayJob) -> Optional[Dict[str, str]]: def _zip_directory(directory_path: str) -> Optional[bytes]: """ - Zip entire directory preserving structure. + Zip entire directory preserving structure, excluding Jupyter notebook files. Args: directory_path: Path to directory to zip @@ -101,6 +119,7 @@ def _zip_directory(directory_path: str) -> Optional[bytes]: try: # Create in-memory zip file zip_buffer = io.BytesIO() + excluded_count = 0 with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: # Walk through directory and add all files @@ -109,13 +128,26 @@ def _zip_directory(directory_path: str) -> Optional[bytes]: file_path = os.path.join(root, file) # Calculate relative path from directory_path arcname = os.path.relpath(file_path, directory_path) + + # Check if file should be excluded + if _should_exclude_file(arcname): + excluded_count += 1 + logger.debug(f"Excluded from zip: {arcname}") + continue + zipf.write(file_path, arcname) logger.debug(f"Added to zip: {arcname}") zip_data = zip_buffer.getvalue() - logger.info( + + # Log summary with exclusion count + log_message = ( f"Successfully zipped directory: {directory_path} ({len(zip_data)} bytes)" ) + if excluded_count > 0: + log_message += f" - Excluded {excluded_count} Jupyter notebook files" + logger.info(log_message) + return zip_data except (IOError, OSError) as e: @@ -128,7 +160,7 @@ def _extract_single_entrypoint_file(job: RayJob) -> Optional[Dict[str, str]]: Extract single Python file from entrypoint if no working_dir specified. Returns a dict with metadata about the file path structure so we can - preserve it when mounting via ConfigMap. + preserve it when mounting via Secret. Args: job: RayJob instance @@ -150,8 +182,8 @@ def _extract_single_entrypoint_file(job: RayJob) -> Optional[Dict[str, str]]: with open(file_path, "r") as f: content = f.read() - # Use basename as key (ConfigMap keys can't have slashes) - # But store the full path for later use in ConfigMap item.path + # Use basename as key (Secret keys can't have slashes) + # But store the full path for later use in Secret item.path filename = os.path.basename(file_path) relative_path = file_path.lstrip("./") @@ -283,23 +315,23 @@ def parse_requirements_file(requirements_path: str) -> Optional[List[str]]: return None -def create_configmap_from_spec( - job: RayJob, configmap_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None +def create_secret_from_spec( + job: RayJob, secret_spec: Dict[str, Any], rayjob_result: Dict[str, Any] = None ) -> str: """ - Create ConfigMap from specification via Kubernetes API. + Create Secret from specification via Kubernetes API. Args: - configmap_spec: ConfigMap specification dictionary + secret_spec: Secret specification dictionary rayjob_result: The result from RayJob creation containing UID Returns: - str: Name of the created ConfigMap + str: Name of the created Secret """ - configmap_name = configmap_spec["metadata"]["name"] + secret_name = secret_spec["metadata"]["name"] - metadata = client.V1ObjectMeta(**configmap_spec["metadata"]) + metadata = client.V1ObjectMeta(**secret_spec["metadata"]) # Add owner reference if we have the RayJob result if ( @@ -308,7 +340,7 @@ def create_configmap_from_spec( and rayjob_result.get("metadata", {}).get("uid") ): logger.info( - f"Adding owner reference to ConfigMap '{configmap_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" + f"Adding owner reference to Secret '{secret_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" ) metadata.owner_references = [ client.V1OwnerReference( @@ -322,52 +354,55 @@ def create_configmap_from_spec( ] else: logger.warning( - f"No valid RayJob result with UID found, ConfigMap '{configmap_name}' will not have owner reference. Result: {rayjob_result}" + f"No valid RayJob result with UID found, Secret '{secret_name}' will not have owner reference. Result: {rayjob_result}" ) - # Convert dict spec to V1ConfigMap - configmap = client.V1ConfigMap( + # Convert dict spec to V1Secret + # Use stringData instead of data to avoid double base64 encoding + # Our zip files are already base64-encoded, so stringData will handle the final encoding + secret = client.V1Secret( metadata=metadata, - data=configmap_spec["data"], + type=secret_spec.get("type", "Opaque"), + string_data=secret_spec["data"], ) - # Create ConfigMap via Kubernetes API + # Create Secret via Kubernetes API k8s_api = client.CoreV1Api(get_api_client()) try: - k8s_api.create_namespaced_config_map(namespace=job.namespace, body=configmap) + k8s_api.create_namespaced_secret(namespace=job.namespace, body=secret) logger.info( - f"Created ConfigMap '{configmap_name}' with {len(configmap_spec['data'])} files" + f"Created Secret '{secret_name}' with {len(secret_spec['data'])} files" ) except client.ApiException as e: if e.status == 409: # Already exists - logger.info(f"ConfigMap '{configmap_name}' already exists, updating...") - k8s_api.replace_namespaced_config_map( - name=configmap_name, namespace=job.namespace, body=configmap + logger.info(f"Secret '{secret_name}' already exists, updating...") + k8s_api.replace_namespaced_secret( + name=secret_name, namespace=job.namespace, body=secret ) else: - raise RuntimeError(f"Failed to create ConfigMap '{configmap_name}': {e}") + raise RuntimeError(f"Failed to create Secret '{secret_name}': {e}") - return configmap_name + return secret_name -def create_file_configmap( +def create_file_secret( job: RayJob, files: Dict[str, str], rayjob_result: Dict[str, Any] ): """ - Create ConfigMap with owner reference for local files. + Create Secret with owner reference for local files. """ - # Use a basic config builder for ConfigMap creation + # Use a basic config builder for Secret creation config_builder = ManagedClusterConfig() - # Filter out metadata keys (like __entrypoint_path__) from ConfigMap data - configmap_files = {k: v for k, v in files.items() if not k.startswith("__")} + # Filter out metadata keys (like __entrypoint_path__) from Secret data + secret_files = {k: v for k, v in files.items() if not k.startswith("__")} - # Validate and build ConfigMap spec - config_builder.validate_configmap_size(configmap_files) - configmap_spec = config_builder.build_file_configmap_spec( - job_name=job.name, namespace=job.namespace, files=configmap_files + # Validate and build Secret spec + config_builder.validate_secret_size(secret_files) + secret_spec = config_builder.build_file_secret_spec( + job_name=job.name, namespace=job.namespace, files=secret_files ) - # Create ConfigMap with owner reference + # Create Secret with owner reference # TODO Error handling - create_configmap_from_spec(job, configmap_spec, rayjob_result) + create_secret_from_spec(job, secret_spec, rayjob_result) diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_config.py b/src/codeflare_sdk/ray/rayjobs/test/test_config.py index 4f538763..182ff90c 100644 --- a/src/codeflare_sdk/ray/rayjobs/test/test_config.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_config.py @@ -5,7 +5,7 @@ import pytest from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig, DEFAULT_ACCELERATORS from kubernetes.client import V1VolumeMount -from kubernetes.client import V1Volume, V1ConfigMapVolumeSource +from kubernetes.client import V1Volume, V1SecretVolumeSource def test_accelerator_configs_defaults_to_default_accelerators(): @@ -177,12 +177,12 @@ def test_add_file_volumes_existing_volume_early_return(): # Pre-add a volume with same name existing_volume = V1Volume( name="ray-job-files", - config_map=V1ConfigMapVolumeSource(name="existing-files"), + secret=V1SecretVolumeSource(secret_name="existing-files"), ) config.volumes.append(existing_volume) # Should return early and not add duplicate - config.add_file_volumes(configmap_name="new-files") + config.add_file_volumes(secret_name="new-files") # Should still have only one volume, no mount added assert len(config.volumes) == 1 @@ -199,34 +199,35 @@ def test_add_file_volumes_existing_mount_early_return(): config.volume_mounts.append(existing_mount) # Should return early and not add duplicate - config.add_file_volumes(configmap_name="new-files") + config.add_file_volumes(secret_name="new-files") # Should still have only one mount, no volume added assert len(config.volumes) == 0 assert len(config.volume_mounts) == 1 -def test_build_file_configmap_spec_labels(): - """Test that build_file_configmap_spec creates ConfigMap with correct labels.""" +def test_build_file_secret_spec_labels(): + """Test that build_file_secret_spec creates Secret with correct labels.""" config = ManagedClusterConfig() job_name = "test-job" namespace = "test-namespace" files = {"test.py": "print('hello')", "helper.py": "# helper code"} - configmap_spec = config.build_file_configmap_spec(job_name, namespace, files) + secret_spec = config.build_file_secret_spec(job_name, namespace, files) - assert configmap_spec["apiVersion"] == "v1" - assert configmap_spec["kind"] == "ConfigMap" - assert configmap_spec["metadata"]["name"] == f"{job_name}-files" - assert configmap_spec["metadata"]["namespace"] == namespace + assert secret_spec["apiVersion"] == "v1" + assert secret_spec["kind"] == "Secret" + assert secret_spec["type"] == "Opaque" + assert secret_spec["metadata"]["name"] == f"{job_name}-files" + assert secret_spec["metadata"]["namespace"] == namespace - labels = configmap_spec["metadata"]["labels"] + labels = secret_spec["metadata"]["labels"] assert labels["ray.io/job-name"] == job_name assert labels["app.kubernetes.io/managed-by"] == "codeflare-sdk" assert labels["app.kubernetes.io/component"] == "rayjob-files" - assert configmap_spec["data"] == files + assert secret_spec["data"] == files def test_managed_cluster_config_uses_update_image_for_head(mocker): diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py index bfdaaabd..928cc1f8 100644 --- a/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py @@ -1320,10 +1320,10 @@ def test_build_submitter_pod_template_uses_default_image(auto_mock_setup, mocker ) files = {"test.py": "print('hello')"} - configmap_name = "test-files" + secret_name = "test-files" # Call _build_submitter_pod_template - submitter_template = rayjob._build_submitter_pod_template(files, configmap_name) + submitter_template = rayjob._build_submitter_pod_template(files, secret_name) # Verify get_ray_image_for_python_version was called mock_get_image.assert_called_once() @@ -1357,10 +1357,10 @@ def test_build_submitter_pod_template_uses_cluster_config_image( ) files = {"test.py": "print('hello')"} - configmap_name = "test-files" + secret_name = "test-files" # Call _build_submitter_pod_template - submitter_template = rayjob._build_submitter_pod_template(files, configmap_name) + submitter_template = rayjob._build_submitter_pod_template(files, secret_name) # Verify get_ray_image_for_python_version was called mock_get_image.assert_called_once() @@ -1374,7 +1374,7 @@ def test_build_submitter_pod_template_uses_cluster_config_image( def test_build_submitter_pod_template_with_files(auto_mock_setup): """ - Test that _build_submitter_pod_template() correctly builds ConfigMap items for files. + Test that _build_submitter_pod_template() correctly builds Secret items for files. """ rayjob = RayJob( job_name="test-job", @@ -1384,22 +1384,22 @@ def test_build_submitter_pod_template_with_files(auto_mock_setup): ) files = {"main.py": "print('main')", "helper.py": "print('helper')"} - configmap_name = "test-files" + secret_name = "test-files" # Call _build_submitter_pod_template - submitter_template = rayjob._build_submitter_pod_template(files, configmap_name) + submitter_template = rayjob._build_submitter_pod_template(files, secret_name) - # Verify ConfigMap items are created for each file - config_map_items = submitter_template["spec"]["volumes"][0]["configMap"]["items"] - assert len(config_map_items) == 2 + # Verify Secret items are created for each file + secret_items = submitter_template["spec"]["volumes"][0]["secret"]["items"] + assert len(secret_items) == 2 - # Verify each file has a ConfigMap item - file_names = [item["key"] for item in config_map_items] + # Verify each file has a Secret item + file_names = [item["key"] for item in secret_items] assert "main.py" in file_names assert "helper.py" in file_names # Verify paths match keys - for item in config_map_items: + for item in secret_items: assert item["key"] == item["path"] diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py index 7114ca4c..7a4150e5 100644 --- a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py @@ -15,6 +15,7 @@ import pytest import os +import io from unittest.mock import MagicMock, patch from codeflare_sdk.common.utils.constants import MOUNT_PATH, RAY_VERSION from ray.runtime_env import RuntimeEnv @@ -25,13 +26,11 @@ from kubernetes.client import ( V1Volume, V1VolumeMount, - V1Toleration, - V1ConfigMapVolumeSource, ApiException, ) from codeflare_sdk.ray.rayjobs.runtime_env import ( - create_configmap_from_spec, + create_secret_from_spec, extract_all_local_files, ) @@ -81,19 +80,20 @@ def test_rayjob_with_remote_working_dir(auto_mock_setup): assert rayjob_cr["spec"]["entrypoint"] == "python test.py" -def test_build_file_configmap_spec(): +def test_build_file_secret_spec(): """ - Test building ConfigMap specification for files. + Test building Secret specification for files. """ config = ManagedClusterConfig() files = {"main.py": "print('main')", "helper.py": "print('helper')"} - spec = config.build_file_configmap_spec( + spec = config.build_file_secret_spec( job_name="test-job", namespace="test-namespace", files=files ) assert spec["apiVersion"] == "v1" - assert spec["kind"] == "ConfigMap" + assert spec["kind"] == "Secret" + assert spec["type"] == "Opaque" assert spec["metadata"]["name"] == "test-job-files" assert spec["metadata"]["namespace"] == "test-namespace" assert spec["data"] == files @@ -106,11 +106,11 @@ def test_build_file_volume_specs(): config = ManagedClusterConfig() volume_spec, mount_spec = config.build_file_volume_specs( - configmap_name="test-files", mount_path="/custom/path" + secret_name="test-files", mount_path="/custom/path" ) assert volume_spec["name"] == "ray-job-files" - assert volume_spec["configMap"]["name"] == "test-files" + assert volume_spec["secret"]["secretName"] == "test-files" assert mount_spec["name"] == "ray-job-files" assert mount_spec["mountPath"] == "/custom/path" @@ -126,7 +126,7 @@ def test_add_file_volumes(): assert len(config.volumes) == 0 assert len(config.volume_mounts) == 0 - config.add_file_volumes(configmap_name="test-files") + config.add_file_volumes(secret_name="test-files") assert len(config.volumes) == 1 assert len(config.volume_mounts) == 1 @@ -135,7 +135,7 @@ def test_add_file_volumes(): mount = config.volume_mounts[0] assert volume.name == "ray-job-files" - assert volume.config_map.name == "test-files" + assert volume.secret.secret_name == "test-files" assert mount.name == "ray-job-files" assert mount.mount_path == MOUNT_PATH @@ -148,16 +148,16 @@ def test_add_file_volumes_duplicate_prevention(): config = ManagedClusterConfig() # Add volumes twice - config.add_file_volumes(configmap_name="test-files") - config.add_file_volumes(configmap_name="test-files") + config.add_file_volumes(secret_name="test-files") + config.add_file_volumes(secret_name="test-files") assert len(config.volumes) == 1 assert len(config.volume_mounts) == 1 -def test_create_configmap_from_spec(auto_mock_setup): +def test_create_secret_from_spec(auto_mock_setup): """ - Test creating ConfigMap via Kubernetes API. + Test creating Secret via Kubernetes API. """ mock_api_instance = auto_mock_setup["k8s_api"] @@ -168,28 +168,27 @@ def test_create_configmap_from_spec(auto_mock_setup): namespace="test-namespace", ) - configmap_spec = { + secret_spec = { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } - result = create_configmap_from_spec(rayjob, configmap_spec) + result = create_secret_from_spec(rayjob, secret_spec) assert result == "test-files" - mock_api_instance.create_namespaced_config_map.assert_called_once() + mock_api_instance.create_namespaced_secret.assert_called_once() -def test_create_configmap_already_exists(auto_mock_setup): +def test_create_secret_already_exists(auto_mock_setup): """ - Test creating ConfigMap when it already exists (409 conflict). + Test creating Secret when it already exists (409 conflict). """ mock_api_instance = auto_mock_setup["k8s_api"] - mock_api_instance.create_namespaced_config_map.side_effect = ApiException( - status=409 - ) + mock_api_instance.create_namespaced_secret.side_effect = ApiException(status=409) rayjob = RayJob( job_name="test-job", @@ -198,27 +197,28 @@ def test_create_configmap_already_exists(auto_mock_setup): namespace="test-namespace", ) - configmap_spec = { + secret_spec = { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } - result = create_configmap_from_spec(rayjob, configmap_spec) + result = create_secret_from_spec(rayjob, secret_spec) assert result == "test-files" - mock_api_instance.create_namespaced_config_map.assert_called_once() - mock_api_instance.replace_namespaced_config_map.assert_called_once() + mock_api_instance.create_namespaced_secret.assert_called_once() + mock_api_instance.replace_namespaced_secret.assert_called_once() -def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, caplog): +def test_create_secret_with_owner_reference_basic(mocker, auto_mock_setup, caplog): """ - Test creating ConfigMap with owner reference from valid RayJob result. + Test creating Secret with owner reference from valid RayJob result. """ mock_api_instance = auto_mock_setup["k8s_api"] - # Mock client.V1ObjectMeta and V1ConfigMap + # Mock client.V1ObjectMeta and V1Secret mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") mock_metadata_instance = MagicMock() mock_v1_metadata.return_value = mock_metadata_instance @@ -230,9 +230,10 @@ def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, ca namespace="test-namespace", ) - configmap_spec = { + secret_spec = { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": { "name": "test-files", "namespace": "test-namespace", @@ -255,26 +256,24 @@ def test_create_configmap_with_owner_reference_basic(mocker, auto_mock_setup, ca } with caplog.at_level("INFO"): - result = create_configmap_from_spec(rayjob, configmap_spec, rayjob_result) + result = create_secret_from_spec(rayjob, secret_spec, rayjob_result) assert result == "test-files" # Verify owner reference was set expected_owner_ref = mocker.ANY # We'll check via the logs assert ( - "Adding owner reference to ConfigMap 'test-files' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" + "Adding owner reference to Secret 'test-files' with RayJob UID: a4dd4c5a-ab61-411d-b4d1-4abb5177422a" in caplog.text ) assert mock_metadata_instance.owner_references is not None - mock_api_instance.create_namespaced_config_map.assert_called_once() + mock_api_instance.create_namespaced_secret.assert_called_once() -def test_create_configmap_without_owner_reference_no_uid( - mocker, auto_mock_setup, caplog -): +def test_create_secret_without_owner_reference_no_uid(mocker, auto_mock_setup, caplog): """ - Test creating ConfigMap without owner reference when RayJob has no UID. + Test creating Secret without owner reference when RayJob has no UID. """ mock_api_instance = auto_mock_setup["k8s_api"] @@ -289,9 +288,10 @@ def test_create_configmap_without_owner_reference_no_uid( namespace="test-namespace", ) - configmap_spec = { + secret_spec = { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } @@ -306,23 +306,23 @@ def test_create_configmap_without_owner_reference_no_uid( } with caplog.at_level("WARNING"): - result = create_configmap_from_spec(rayjob, configmap_spec, rayjob_result) + result = create_secret_from_spec(rayjob, secret_spec, rayjob_result) assert result == "test-files" # Verify warning was logged and no owner reference was set assert ( - "No valid RayJob result with UID found, ConfigMap 'test-files' will not have owner reference" + "No valid RayJob result with UID found, Secret 'test-files' will not have owner reference" in caplog.text ) # The important part is that the warning was logged, indicating no owner reference was set - mock_api_instance.create_namespaced_config_map.assert_called_once() + mock_api_instance.create_namespaced_secret.assert_called_once() -def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): +def test_create_secret_with_invalid_rayjob_result(auto_mock_setup, caplog): """ - Test creating ConfigMap with None or invalid rayjob_result. + Test creating Secret with None or invalid rayjob_result. """ mock_api_instance = auto_mock_setup["k8s_api"] @@ -333,16 +333,17 @@ def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): namespace="test-namespace", ) - configmap_spec = { + secret_spec = { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } # Test with None with caplog.at_level("WARNING"): - result = create_configmap_from_spec(rayjob, configmap_spec, None) + result = create_secret_from_spec(rayjob, secret_spec, None) assert result == "test-files" assert "No valid RayJob result with UID found" in caplog.text @@ -350,7 +351,7 @@ def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): # Test with string instead of dict caplog.clear() with caplog.at_level("WARNING"): - result = create_configmap_from_spec(rayjob, configmap_spec, "not-a-dict") + result = create_secret_from_spec(rayjob, secret_spec, "not-a-dict") assert result == "test-files" assert "No valid RayJob result with UID found" in caplog.text @@ -358,7 +359,7 @@ def test_create_configmap_with_invalid_rayjob_result(auto_mock_setup, caplog): def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): """ - Test the Kubernetes best practice flow: pre-declare volume, submit, create ConfigMap. + Test the Kubernetes best practice flow: pre-declare volume, submit, create Secret. """ mocker.patch("kubernetes.config.load_kube_config") @@ -375,9 +376,9 @@ def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): } mock_api_instance.submit_job.return_value = submit_result - # Mock create_file_configmap where it's used (imported into rayjob module) - mock_create_cm = mocker.patch( - "codeflare_sdk.ray.rayjobs.rayjob.create_file_configmap" + # Mock create_file_secret where it's used (imported into rayjob module) + mock_create_secret = mocker.patch( + "codeflare_sdk.ray.rayjobs.rayjob.create_file_secret" ) mock_add_volumes = mocker.patch.object(ManagedClusterConfig, "add_file_volumes") @@ -390,22 +391,22 @@ def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): def track_add_volumes(*args, **kwargs): call_order.append("add_volumes") - # Should be called with ConfigMap name + # Should be called with Secret name assert args[0] == "test-job-files" def track_submit(*args, **kwargs): call_order.append("submit_job") return submit_result - def track_create_cm(*args, **kwargs): - call_order.append("create_configmap") + def track_create_secret(*args, **kwargs): + call_order.append("create_secret") # Args should be: (job, files, rayjob_result) assert len(args) >= 3, f"Expected 3 args, got {len(args)}: {args}" assert args[2] == submit_result # rayjob_result should be third arg mock_add_volumes.side_effect = track_add_volumes mock_api_instance.submit_job.side_effect = track_submit - mock_create_cm.side_effect = track_create_cm + mock_create_secret.side_effect = track_create_secret original_cwd = os.getcwd() try: @@ -424,15 +425,15 @@ def track_create_cm(*args, **kwargs): finally: os.chdir(original_cwd) - # Verify the order: submit → create ConfigMap - assert call_order == ["submit_job", "create_configmap"] + # Verify the order: submit → create Secret + assert call_order == ["submit_job", "create_secret"] mock_api_instance.submit_job.assert_called_once() - mock_create_cm.assert_called_once() + mock_create_secret.assert_called_once() - # Verify create_file_configmap was called with: (job, files, rayjob_result) + # Verify create_file_secret was called with: (job, files, rayjob_result) # Files dict includes metadata key __entrypoint_path__ for single file case - call_args = mock_create_cm.call_args[0] + call_args = mock_create_secret.call_args[0] assert call_args[0] == rayjob assert call_args[2] == submit_result # Check that the actual file content is present @@ -471,7 +472,7 @@ def test_rayjob_submit_with_files_new_cluster(auto_mock_setup, tmp_path): assert result == "test-job" - mock_k8s_instance.create_namespaced_config_map.assert_called_once() + mock_k8s_instance.create_namespaced_secret.assert_called_once() assert len(cluster_config.volumes) == 0 assert len(cluster_config.volume_mounts) == 0 @@ -482,16 +483,14 @@ def test_rayjob_submit_with_files_new_cluster(auto_mock_setup, tmp_path): os.chdir(original_cwd) -def test_create_configmap_api_error_non_409(auto_mock_setup): +def test_create_secret_api_error_non_409(auto_mock_setup): """ - Test _create_configmap_from_spec handles non-409 API errors. + Test create_secret_from_spec handles non-409 API errors. """ mock_api_instance = auto_mock_setup["k8s_api"] # Configure to raise 500 error - mock_api_instance.create_namespaced_config_map.side_effect = ApiException( - status=500 - ) + mock_api_instance.create_namespaced_secret.side_effect = ApiException(status=500) rayjob = RayJob( job_name="test-job", @@ -500,31 +499,34 @@ def test_create_configmap_api_error_non_409(auto_mock_setup): namespace="test-namespace", ) - configmap_spec = { + secret_spec = { "apiVersion": "v1", - "kind": "ConfigMap", + "kind": "Secret", + "type": "Opaque", "metadata": {"name": "test-files", "namespace": "test-namespace"}, "data": {"test.py": "print('test')"}, } - with pytest.raises(RuntimeError, match="Failed to create ConfigMap"): - create_configmap_from_spec(rayjob, configmap_spec) + with pytest.raises(RuntimeError, match="Failed to create Secret"): + create_secret_from_spec(rayjob, secret_spec) def test_add_file_volumes_existing_volume_skip(): """ Test add_file_volumes skips when volume already exists (missing coverage). """ + from kubernetes.client import V1SecretVolumeSource + config = ManagedClusterConfig() # Pre-add a volume with same name existing_volume = V1Volume( name="ray-job-files", - config_map=V1ConfigMapVolumeSource(name="existing-files"), + secret=V1SecretVolumeSource(secret_name="existing-files"), ) config.volumes.append(existing_volume) - config.add_file_volumes(configmap_name="new-files") + config.add_file_volumes(secret_name="new-files") assert len(config.volumes) == 1 assert len(config.volume_mounts) == 0 # Mount not added due to volume skip @@ -539,7 +541,7 @@ def test_add_file_volumes_existing_mount_skip(): existing_mount = V1VolumeMount(name="ray-job-files", mount_path="/existing/path") config.volume_mounts.append(existing_mount) - config.add_file_volumes(configmap_name="new-files") + config.add_file_volumes(secret_name="new-files") assert len(config.volumes) == 0 # Volume not added due to mount skip assert len(config.volume_mounts) == 1 @@ -571,6 +573,102 @@ def test_zip_directory_functionality(tmp_path): assert isinstance(zip_data, bytes) +def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog): + """ + Test that Jupyter notebook files (.ipynb) are excluded from zip. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory + import zipfile + + # Create test directory with mixed file types + test_dir = tmp_path / "working_dir" + test_dir.mkdir() + + # Create Python files (should be included) + (test_dir / "main.py").write_text("print('main script')") + (test_dir / "utils.py").write_text("def helper(): pass") + + # Create Jupyter notebook files (should be excluded) + (test_dir / "analysis.ipynb").write_text('{"cells": [], "metadata": {}}') + (test_dir / "experiment.IPYNB").write_text( + '{"cells": [], "metadata": {}}' + ) # Test case insensitive + + # Create subdirectory with mixed files + sub_dir = test_dir / "notebooks" + sub_dir.mkdir() + (sub_dir / "data_exploration.ipynb").write_text('{"cells": [], "metadata": {}}') + (sub_dir / "helper.py").write_text("print('nested file')") + + # Test zipping + with caplog.at_level("INFO"): + zip_data = _zip_directory(str(test_dir)) + + assert zip_data is not None + assert len(zip_data) > 0 + + # Verify log message includes exclusion count + assert "Excluded 3 Jupyter notebook files" in caplog.text + + # Verify excluded files are not in the zip + zip_buffer = io.BytesIO(zip_data) + with zipfile.ZipFile(zip_buffer, "r") as zipf: + zip_contents = zipf.namelist() + + # Python files should be present + assert "main.py" in zip_contents + assert "utils.py" in zip_contents + assert "notebooks/helper.py" in zip_contents + + # Jupyter notebooks should be excluded + assert "analysis.ipynb" not in zip_contents + assert "experiment.IPYNB" not in zip_contents + assert "notebooks/data_exploration.ipynb" not in zip_contents + + +def test_zip_directory_no_exclusions_when_no_notebooks(tmp_path, caplog): + """ + Test that no exclusion message is logged when no notebook files exist. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory + + # Create test directory with only Python files + test_dir = tmp_path / "working_dir" + test_dir.mkdir() + (test_dir / "main.py").write_text("print('main script')") + (test_dir / "utils.py").write_text("def helper(): pass") + + # Test zipping + with caplog.at_level("INFO"): + zip_data = _zip_directory(str(test_dir)) + + assert zip_data is not None + + # Verify log message does NOT mention exclusions + assert "Excluded" not in caplog.text + assert "Jupyter notebook files" not in caplog.text + + +def test_should_exclude_file_function(): + """ + Test the _should_exclude_file helper function directly. + """ + from codeflare_sdk.ray.rayjobs.runtime_env import _should_exclude_file + + # Should exclude .ipynb files (case insensitive) + assert _should_exclude_file("notebook.ipynb") is True + assert _should_exclude_file("analysis.IPYNB") is True + assert _should_exclude_file("data/exploration.ipynb") is True + assert _should_exclude_file("subdir/nested.Ipynb") is True + + # Should NOT exclude other files + assert _should_exclude_file("script.py") is False + assert _should_exclude_file("data.json") is False + assert _should_exclude_file("requirements.txt") is False + assert _should_exclude_file("README.md") is False + assert _should_exclude_file("model.pkl") is False + + def test_zip_directory_error_handling(): """ Test _zip_directory error handling for IO errors during zipping. @@ -618,6 +716,63 @@ def test_extract_all_local_files_with_working_dir(tmp_path): pytest.fail("Invalid base64 encoding") +def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog): + """ + Test that extract_all_local_files excludes Jupyter notebooks when zipping working directory. + """ + import zipfile + import base64 + + # Create test working directory with mixed files + working_dir = tmp_path / "working_dir" + working_dir.mkdir() + + # Python files that should be included + (working_dir / "main.py").write_text("print('main script')") + (working_dir / "helper.py").write_text("def helper_function(): pass") + + # Jupyter notebooks that should be excluded + (working_dir / "analysis.ipynb").write_text( + '{"cells": [{"cell_type": "code", "source": ["print(\'hello\')"]}]}' + ) + (working_dir / "data.ipynb").write_text('{"cells": [], "metadata": {}}') + + runtime_env = RuntimeEnv(working_dir=str(working_dir)) + + rayjob = RayJob( + job_name="test-job", + entrypoint="python main.py", + runtime_env=runtime_env, + namespace="test-namespace", + cluster_name="test-cluster", + ) + + # This should zip the directory and exclude notebooks + with caplog.at_level("INFO"): + files = extract_all_local_files(rayjob) + + assert files is not None + assert "working_dir.zip" in files + + # Verify exclusion was logged + assert "Excluded 2 Jupyter notebook files" in caplog.text + + # Decode and verify zip contents + zip_data = base64.b64decode(files["working_dir.zip"]) + zip_buffer = io.BytesIO(zip_data) + + with zipfile.ZipFile(zip_buffer, "r") as zipf: + zip_contents = zipf.namelist() + + # Python files should be present + assert "main.py" in zip_contents + assert "helper.py" in zip_contents + + # Jupyter notebooks should be excluded + assert "analysis.ipynb" not in zip_contents + assert "data.ipynb" not in zip_contents + + def test_extract_single_entrypoint_file_error_handling(tmp_path): """ Test _extract_single_entrypoint_file with file read errors. @@ -894,11 +1049,11 @@ def test_extract_single_entrypoint_file_no_entrypoint(): assert result is None -def test_create_file_configmap_filters_metadata_keys(auto_mock_setup, tmp_path): +def test_create_file_secret_filters_metadata_keys(auto_mock_setup, tmp_path): """ - Test create_file_configmap filters out metadata keys from files dict. + Test create_file_secret filters out metadata keys from files dict. """ - from codeflare_sdk.ray.rayjobs.runtime_env import create_file_configmap + from codeflare_sdk.ray.rayjobs.runtime_env import create_file_secret rayjob = RayJob( job_name="test-job", @@ -922,14 +1077,14 @@ def test_create_file_configmap_filters_metadata_keys(auto_mock_setup, tmp_path): } # This should not raise an error and should filter out metadata keys - create_file_configmap(rayjob, files, rayjob_result) + create_file_secret(rayjob, files, rayjob_result) - # Verify the ConfigMap was created (mocked) + # Verify the Secret was created (mocked) mock_api_instance = auto_mock_setup["k8s_api"] - mock_api_instance.create_namespaced_config_map.assert_called_once() + mock_api_instance.create_namespaced_secret.assert_called_once() # The call should have filtered data (only test.py, not __entrypoint_path__) - call_args = mock_api_instance.create_namespaced_config_map.call_args - configmap_data = call_args[1]["body"].data - assert "test.py" in configmap_data - assert "__entrypoint_path__" not in configmap_data + call_args = mock_api_instance.create_namespaced_secret.call_args + secret_data = call_args[1]["body"].string_data # Changed from data to string_data + assert "test.py" in secret_data + assert "__entrypoint_path__" not in secret_data diff --git a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py index 821cb1c7..10390011 100644 --- a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py @@ -25,7 +25,7 @@ def teardown_method(self): delete_kueue_resources(self) def test_lifecycled_kueue_managed(self): - """Test RayJob with Kueue-managed lifecycled cluster with ConfigMap validation.""" + """Test RayJob with Kueue-managed lifecycled cluster with Secret validation.""" self.setup_method() create_namespace(self) create_kueue_resources(self) @@ -49,7 +49,7 @@ def test_lifecycled_kueue_managed(self): worker_memory_limits=resources["worker_memory_limits"], ) - # Create a temporary script file to test ConfigMap functionality + # Create a temporary script file to test Secret functionality with tempfile.NamedTemporaryFile( mode="w", suffix=".py", delete=False, dir=os.getcwd() ) as script_file: @@ -57,7 +57,7 @@ def test_lifecycled_kueue_managed(self): """ import ray ray.init() - print('Kueue job with ConfigMap done') + print('Kueue job with Secret done') ray.shutdown() """ ) @@ -76,8 +76,8 @@ def test_lifecycled_kueue_managed(self): assert rayjob.submit() == job_name - # Verify ConfigMap was created with owner reference - self.verify_configmap_with_owner_reference(rayjob) + # Verify Secret was created with owner reference + self.verify_secret_with_owner_reference(rayjob) assert self.job_api.wait_until_job_running( name=rayjob.name, k8s_namespace=rayjob.namespace, timeout=600 @@ -190,33 +190,33 @@ def test_lifecycled_kueue_resource_queueing(self): except: pass - def verify_configmap_with_owner_reference(self, rayjob: RayJob): - """Verify that the ConfigMap was created with proper owner reference to the RayJob.""" + def verify_secret_with_owner_reference(self, rayjob: RayJob): + """Verify that the Secret was created with proper owner reference to the RayJob.""" v1 = client.CoreV1Api() - configmap_name = f"{rayjob.name}-files" + secret_name = f"{rayjob.name}-files" try: - # Get the ConfigMap - configmap = v1.read_namespaced_config_map( - name=configmap_name, namespace=rayjob.namespace + # Get the Secret + secret = v1.read_namespaced_secret( + name=secret_name, namespace=rayjob.namespace ) - # Verify ConfigMap exists - assert configmap is not None, f"ConfigMap {configmap_name} not found" + # Verify Secret exists + assert secret is not None, f"Secret {secret_name} not found" # Verify it contains the script - assert configmap.data is not None, "ConfigMap has no data" - assert len(configmap.data) > 0, "ConfigMap data is empty" + assert secret.data is not None, "Secret has no data" + assert len(secret.data) > 0, "Secret data is empty" # Verify owner reference assert ( - configmap.metadata.owner_references is not None - ), "ConfigMap has no owner references" + secret.metadata.owner_references is not None + ), "Secret has no owner references" assert ( - len(configmap.metadata.owner_references) > 0 - ), "ConfigMap owner references list is empty" + len(secret.metadata.owner_references) > 0 + ), "Secret owner references list is empty" - owner_ref = configmap.metadata.owner_references[0] + owner_ref = secret.metadata.owner_references[0] assert ( owner_ref.api_version == "ray.io/v1" ), f"Wrong API version: {owner_ref.api_version}" @@ -230,20 +230,20 @@ def verify_configmap_with_owner_reference(self, rayjob: RayJob): ), "Owner reference blockOwnerDeletion not set to true" # Verify labels - assert configmap.metadata.labels.get("ray.io/job-name") == rayjob.name + assert secret.metadata.labels.get("ray.io/job-name") == rayjob.name assert ( - configmap.metadata.labels.get("app.kubernetes.io/managed-by") + secret.metadata.labels.get("app.kubernetes.io/managed-by") == "codeflare-sdk" ) assert ( - configmap.metadata.labels.get("app.kubernetes.io/component") + secret.metadata.labels.get("app.kubernetes.io/component") == "rayjob-files" ) - print(f"✓ ConfigMap {configmap_name} verified with proper owner reference") + print(f"✓ Secret {secret_name} verified with proper owner reference") except client.rest.ApiException as e: if e.status == 404: - raise AssertionError(f"ConfigMap {configmap_name} not found") + raise AssertionError(f"Secret {secret_name} not found") else: raise e From 57992eedd9780b57c0dd92352ab97d92c737b6af Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 15 Oct 2025 14:34:58 +0100 Subject: [PATCH 27/33] RHOAIENG-33283: Exclude md files from files secret --- src/codeflare_sdk/ray/rayjobs/runtime_env.py | 15 ++++--- .../ray/rayjobs/test/test_runtime_env.py | 44 ++++++++++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/codeflare_sdk/ray/rayjobs/runtime_env.py b/src/codeflare_sdk/ray/rayjobs/runtime_env.py index d6d2230b..401bc89a 100644 --- a/src/codeflare_sdk/ray/rayjobs/runtime_env.py +++ b/src/codeflare_sdk/ray/rayjobs/runtime_env.py @@ -28,15 +28,17 @@ # Path where working_dir will be unzipped on submitter pod UNZIP_PATH = "/tmp/rayjob-working-dir" -# File pattern to exclude from working directory zips -# Jupyter notebooks can contain sensitive outputs, tokens, and large data +# Exclude Jupyter notebook and Markdown files from working directory zips JUPYTER_NOTEBOOK_PATTERN = r"\.ipynb$" +MARKDOWN_FILE_PATTERN = r"\.md$" def _should_exclude_file(file_path: str) -> bool: """ Check if file should be excluded from working directory zip. - Currently excludes Jupyter notebook files (.ipynb). + Currently excludes: + - Jupyter notebook files (.ipynb) + - Markdown files (.md) Args: file_path: Relative file path within the working directory @@ -44,7 +46,10 @@ def _should_exclude_file(file_path: str) -> bool: Returns: True if file should be excluded, False otherwise """ - return bool(re.search(JUPYTER_NOTEBOOK_PATTERN, file_path, re.IGNORECASE)) + return bool( + re.search(JUPYTER_NOTEBOOK_PATTERN, file_path, re.IGNORECASE) + or re.search(MARKDOWN_FILE_PATTERN, file_path, re.IGNORECASE) + ) def _normalize_runtime_env( @@ -145,7 +150,7 @@ def _zip_directory(directory_path: str) -> Optional[bytes]: f"Successfully zipped directory: {directory_path} ({len(zip_data)} bytes)" ) if excluded_count > 0: - log_message += f" - Excluded {excluded_count} Jupyter notebook files" + log_message += f" - Excluded {excluded_count} file(s) (.ipynb, .md)" logger.info(log_message) return zip_data diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py index 7a4150e5..58edc091 100644 --- a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py @@ -575,7 +575,7 @@ def test_zip_directory_functionality(tmp_path): def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog): """ - Test that Jupyter notebook files (.ipynb) are excluded from zip. + Test that Jupyter notebook files (.ipynb) and markdown files (.md) are excluded from zip. """ from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory import zipfile @@ -594,11 +594,16 @@ def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog): '{"cells": [], "metadata": {}}' ) # Test case insensitive + # Create markdown files (should be excluded) + (test_dir / "README.md").write_text("# Project Documentation\n") + (test_dir / "CHANGELOG.MD").write_text("# Changes\n") # Test case insensitive + # Create subdirectory with mixed files sub_dir = test_dir / "notebooks" sub_dir.mkdir() (sub_dir / "data_exploration.ipynb").write_text('{"cells": [], "metadata": {}}') (sub_dir / "helper.py").write_text("print('nested file')") + (sub_dir / "guide.md").write_text("# Guide\n") # Test zipping with caplog.at_level("INFO"): @@ -607,8 +612,8 @@ def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog): assert zip_data is not None assert len(zip_data) > 0 - # Verify log message includes exclusion count - assert "Excluded 3 Jupyter notebook files" in caplog.text + # Verify log message includes exclusion count (3 ipynb + 3 md = 6 total) + assert "Excluded 6 file(s) (.ipynb, .md)" in caplog.text # Verify excluded files are not in the zip zip_buffer = io.BytesIO(zip_data) @@ -625,10 +630,15 @@ def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog): assert "experiment.IPYNB" not in zip_contents assert "notebooks/data_exploration.ipynb" not in zip_contents + # Markdown files should be excluded + assert "README.md" not in zip_contents + assert "CHANGELOG.MD" not in zip_contents + assert "notebooks/guide.md" not in zip_contents + def test_zip_directory_no_exclusions_when_no_notebooks(tmp_path, caplog): """ - Test that no exclusion message is logged when no notebook files exist. + Test that no exclusion message is logged when no notebook or markdown files exist. """ from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory @@ -646,7 +656,6 @@ def test_zip_directory_no_exclusions_when_no_notebooks(tmp_path, caplog): # Verify log message does NOT mention exclusions assert "Excluded" not in caplog.text - assert "Jupyter notebook files" not in caplog.text def test_should_exclude_file_function(): @@ -661,12 +670,19 @@ def test_should_exclude_file_function(): assert _should_exclude_file("data/exploration.ipynb") is True assert _should_exclude_file("subdir/nested.Ipynb") is True + # Should exclude .md files (case insensitive) + assert _should_exclude_file("README.md") is True + assert _should_exclude_file("CHANGELOG.MD") is True + assert _should_exclude_file("docs/guide.md") is True + assert _should_exclude_file("subdir/notes.Md") is True + # Should NOT exclude other files assert _should_exclude_file("script.py") is False assert _should_exclude_file("data.json") is False assert _should_exclude_file("requirements.txt") is False - assert _should_exclude_file("README.md") is False assert _should_exclude_file("model.pkl") is False + assert _should_exclude_file("markdown_parser.py") is False # Not .md + assert _should_exclude_file("test.html") is False def test_zip_directory_error_handling(): @@ -718,7 +734,7 @@ def test_extract_all_local_files_with_working_dir(tmp_path): def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog): """ - Test that extract_all_local_files excludes Jupyter notebooks when zipping working directory. + Test that extract_all_local_files excludes Jupyter notebooks and markdown files when zipping working directory. """ import zipfile import base64 @@ -737,6 +753,10 @@ def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog): ) (working_dir / "data.ipynb").write_text('{"cells": [], "metadata": {}}') + # Markdown files that should be excluded + (working_dir / "README.md").write_text("# Project Documentation\n") + (working_dir / "CHANGELOG.md").write_text("# Changes\n") + runtime_env = RuntimeEnv(working_dir=str(working_dir)) rayjob = RayJob( @@ -747,15 +767,15 @@ def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog): cluster_name="test-cluster", ) - # This should zip the directory and exclude notebooks + # This should zip the directory and exclude notebooks and markdown files with caplog.at_level("INFO"): files = extract_all_local_files(rayjob) assert files is not None assert "working_dir.zip" in files - # Verify exclusion was logged - assert "Excluded 2 Jupyter notebook files" in caplog.text + # Verify exclusion was logged (2 ipynb + 2 md = 4 total) + assert "Excluded 4 file(s) (.ipynb, .md)" in caplog.text # Decode and verify zip contents zip_data = base64.b64decode(files["working_dir.zip"]) @@ -772,6 +792,10 @@ def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog): assert "analysis.ipynb" not in zip_contents assert "data.ipynb" not in zip_contents + # Markdown files should be excluded + assert "README.md" not in zip_contents + assert "CHANGELOG.md" not in zip_contents + def test_extract_single_entrypoint_file_error_handling(tmp_path): """ From c7cf12ad3b216a1a517f34cb4579616520be5174 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 15 Oct 2025 15:20:39 +0100 Subject: [PATCH 28/33] RHOAIENG-33283: Error handling, remove obsolete volumes, housekeeping --- .../common/widgets/test_widgets.py | 2 +- src/codeflare_sdk/common/widgets/widgets.py | 2 - src/codeflare_sdk/ray/cluster/cluster.py | 2 +- src/codeflare_sdk/ray/rayjobs/config.py | 50 +------ src/codeflare_sdk/ray/rayjobs/rayjob.py | 5 - src/codeflare_sdk/ray/rayjobs/runtime_env.py | 40 +++--- .../ray/rayjobs/test/test_runtime_env.py | 127 +++++------------- 7 files changed, 57 insertions(+), 171 deletions(-) diff --git a/src/codeflare_sdk/common/widgets/test_widgets.py b/src/codeflare_sdk/common/widgets/test_widgets.py index 33beca5c..55be2b75 100644 --- a/src/codeflare_sdk/common/widgets/test_widgets.py +++ b/src/codeflare_sdk/common/widgets/test_widgets.py @@ -106,7 +106,7 @@ def test_view_clusters(mocker, capsys): # Prepare to run view_clusters when notebook environment is detected mocker.patch("codeflare_sdk.common.widgets.widgets.is_notebook", return_value=True) mock_get_current_namespace = mocker.patch( - "codeflare_sdk.common.utils.get_current_namespace", + "codeflare_sdk.common.widgets.widgets.get_current_namespace", return_value="default", ) namespace = mock_get_current_namespace.return_value diff --git a/src/codeflare_sdk/common/widgets/widgets.py b/src/codeflare_sdk/common/widgets/widgets.py index 91295fa9..c813fabc 100644 --- a/src/codeflare_sdk/common/widgets/widgets.py +++ b/src/codeflare_sdk/common/widgets/widgets.py @@ -353,8 +353,6 @@ def view_clusters(namespace: str = None): ) return # Exit function if not in Jupyter Notebook - from ...common.utils import get_current_namespace - if not namespace: namespace = get_current_namespace() diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index 9509a8be..cd188816 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -520,7 +520,7 @@ def cluster_dashboard_uri(self) -> str: protocol = "https" if route["spec"].get("tls") else "http" return f"{protocol}://{route['spec']['host']}" # No route found for this cluster - return "Dashboard not available yet, have you run cluster.up()?" + return "Dashboard not available yet, have you run cluster.apply()?" else: try: api_instance = client.NetworkingV1Api(get_api_client()) diff --git a/src/codeflare_sdk/ray/rayjobs/config.py b/src/codeflare_sdk/ray/rayjobs/config.py index c1fe0daa..5b724272 100644 --- a/src/codeflare_sdk/ray/rayjobs/config.py +++ b/src/codeflare_sdk/ray/rayjobs/config.py @@ -22,8 +22,6 @@ from dataclasses import dataclass, field, fields from typing import Dict, List, Optional, Union, get_args, get_origin, Any, Tuple from kubernetes.client import ( - V1ConfigMapVolumeSource, - V1KeyToPath, V1LocalObjectReference, V1SecretVolumeSource, V1Toleration, @@ -62,50 +60,6 @@ "huawei.com/Ascend310": "NPU", } -# Default volume mounts for CA certificates -DEFAULT_VOLUME_MOUNTS = [ - V1VolumeMount( - mount_path="/etc/pki/tls/certs/odh-trusted-ca-bundle.crt", - name="odh-trusted-ca-cert", - sub_path="odh-trusted-ca-bundle.crt", - ), - V1VolumeMount( - mount_path="/etc/ssl/certs/odh-trusted-ca-bundle.crt", - name="odh-trusted-ca-cert", - sub_path="odh-trusted-ca-bundle.crt", - ), - V1VolumeMount( - mount_path="/etc/pki/tls/certs/odh-ca-bundle.crt", - name="odh-ca-cert", - sub_path="odh-ca-bundle.crt", - ), - V1VolumeMount( - mount_path="/etc/ssl/certs/odh-ca-bundle.crt", - name="odh-ca-cert", - sub_path="odh-ca-bundle.crt", - ), -] - -# Default volumes for CA certificates -DEFAULT_VOLUMES = [ - V1Volume( - name="odh-trusted-ca-cert", - config_map=V1ConfigMapVolumeSource( - name="odh-trusted-ca-bundle", - items=[V1KeyToPath(key="ca-bundle.crt", path="odh-trusted-ca-bundle.crt")], - optional=True, - ), - ), - V1Volume( - name="odh-ca-cert", - config_map=V1ConfigMapVolumeSource( - name="odh-trusted-ca-bundle", - items=[V1KeyToPath(key="odh-ca-bundle.crt", path="odh-ca-bundle.crt")], - optional=True, - ), - ), -] - @dataclass class ManagedClusterConfig: @@ -426,7 +380,7 @@ def _build_pod_spec(self, container: V1Container, is_head: bool) -> V1PodSpec: def _generate_volume_mounts(self) -> list: """Generate volume mounts for the container.""" - volume_mounts = DEFAULT_VOLUME_MOUNTS.copy() + volume_mounts = [] # Add custom volume mounts if specified if hasattr(self, "volume_mounts") and self.volume_mounts: @@ -436,7 +390,7 @@ def _generate_volume_mounts(self) -> list: def _generate_volumes(self) -> list: """Generate volumes for the pod.""" - volumes = DEFAULT_VOLUMES.copy() + volumes = [] # Add custom volumes if specified if hasattr(self, "volumes") and self.volumes: diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 76e396ee..65478087 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -169,11 +169,6 @@ def submit(self) -> str: # Extract files from entrypoint and runtime_env working_dir files = extract_all_local_files(self) - # Create Secret for files (will be mounted to submitter pod) - secret_name = None - if files: - secret_name = f"{self.name}-files" - rayjob_cr = self._build_rayjob_cr() logger.info(f"Submitting RayJob {self.name} to Kuberay operator") diff --git a/src/codeflare_sdk/ray/rayjobs/runtime_env.py b/src/codeflare_sdk/ray/rayjobs/runtime_env.py index 401bc89a..93606d73 100644 --- a/src/codeflare_sdk/ray/rayjobs/runtime_env.py +++ b/src/codeflare_sdk/ray/rayjobs/runtime_env.py @@ -113,7 +113,7 @@ def extract_all_local_files(job: RayJob) -> Optional[Dict[str, str]]: def _zip_directory(directory_path: str) -> Optional[bytes]: """ - Zip entire directory preserving structure, excluding Jupyter notebook files. + Zip entire directory preserving structure, excluding Jupyter notebook and markdown files. Args: directory_path: Path to directory to zip @@ -338,29 +338,22 @@ def create_secret_from_spec( metadata = client.V1ObjectMeta(**secret_spec["metadata"]) - # Add owner reference if we have the RayJob result - if ( - rayjob_result - and isinstance(rayjob_result, dict) - and rayjob_result.get("metadata", {}).get("uid") - ): - logger.info( - f"Adding owner reference to Secret '{secret_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" - ) - metadata.owner_references = [ - client.V1OwnerReference( - api_version="ray.io/v1", - kind="RayJob", - name=job.name, - uid=rayjob_result["metadata"]["uid"], - controller=True, - block_owner_deletion=True, - ) - ] - else: - logger.warning( - f"No valid RayJob result with UID found, Secret '{secret_name}' will not have owner reference. Result: {rayjob_result}" + # Add owner reference to ensure proper cleanup + # We can trust that rayjob_result contains UID since submit_job() only returns + # complete K8s resources or None, and we already validated result exists + logger.info( + f"Adding owner reference to Secret '{secret_name}' with RayJob UID: {rayjob_result['metadata']['uid']}" + ) + metadata.owner_references = [ + client.V1OwnerReference( + api_version="ray.io/v1", + kind="RayJob", + name=job.name, + uid=rayjob_result["metadata"]["uid"], + controller=True, + block_owner_deletion=True, ) + ] # Convert dict spec to V1Secret # Use stringData instead of data to avoid double base64 encoding @@ -409,5 +402,4 @@ def create_file_secret( ) # Create Secret with owner reference - # TODO Error handling create_secret_from_spec(job, secret_spec, rayjob_result) diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py index 58edc091..e059a8d3 100644 --- a/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py @@ -176,7 +176,16 @@ def test_create_secret_from_spec(auto_mock_setup): "data": {"test.py": "print('test')"}, } - result = create_secret_from_spec(rayjob, secret_spec) + # Provide valid RayJob result with UID as KubeRay client would + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-12345", + } + } + + result = create_secret_from_spec(rayjob, secret_spec, rayjob_result) assert result == "test-files" mock_api_instance.create_namespaced_secret.assert_called_once() @@ -205,7 +214,16 @@ def test_create_secret_already_exists(auto_mock_setup): "data": {"test.py": "print('test')"}, } - result = create_secret_from_spec(rayjob, secret_spec) + # Provide valid RayJob result with UID as KubeRay client would + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-67890", + } + } + + result = create_secret_from_spec(rayjob, secret_spec, rayjob_result) assert result == "test-files" mock_api_instance.create_namespaced_secret.assert_called_once() @@ -271,92 +289,6 @@ def test_create_secret_with_owner_reference_basic(mocker, auto_mock_setup, caplo mock_api_instance.create_namespaced_secret.assert_called_once() -def test_create_secret_without_owner_reference_no_uid(mocker, auto_mock_setup, caplog): - """ - Test creating Secret without owner reference when RayJob has no UID. - """ - mock_api_instance = auto_mock_setup["k8s_api"] - - mock_v1_metadata = mocker.patch("kubernetes.client.V1ObjectMeta") - mock_metadata_instance = MagicMock() - mock_v1_metadata.return_value = mock_metadata_instance - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - secret_spec = { - "apiVersion": "v1", - "kind": "Secret", - "type": "Opaque", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } - - # RayJob result without UID - rayjob_result = { - "metadata": { - "name": "test-job", - "namespace": "test-namespace", - # No UID field - } - } - - with caplog.at_level("WARNING"): - result = create_secret_from_spec(rayjob, secret_spec, rayjob_result) - - assert result == "test-files" - - # Verify warning was logged and no owner reference was set - assert ( - "No valid RayJob result with UID found, Secret 'test-files' will not have owner reference" - in caplog.text - ) - - # The important part is that the warning was logged, indicating no owner reference was set - mock_api_instance.create_namespaced_secret.assert_called_once() - - -def test_create_secret_with_invalid_rayjob_result(auto_mock_setup, caplog): - """ - Test creating Secret with None or invalid rayjob_result. - """ - mock_api_instance = auto_mock_setup["k8s_api"] - - rayjob = RayJob( - job_name="test-job", - cluster_name="existing-cluster", - entrypoint="python test.py", - namespace="test-namespace", - ) - - secret_spec = { - "apiVersion": "v1", - "kind": "Secret", - "type": "Opaque", - "metadata": {"name": "test-files", "namespace": "test-namespace"}, - "data": {"test.py": "print('test')"}, - } - - # Test with None - with caplog.at_level("WARNING"): - result = create_secret_from_spec(rayjob, secret_spec, None) - - assert result == "test-files" - assert "No valid RayJob result with UID found" in caplog.text - - # Test with string instead of dict - caplog.clear() - with caplog.at_level("WARNING"): - result = create_secret_from_spec(rayjob, secret_spec, "not-a-dict") - - assert result == "test-files" - assert "No valid RayJob result with UID found" in caplog.text - - def test_file_handling_kubernetes_best_practice_flow(mocker, tmp_path): """ Test the Kubernetes best practice flow: pre-declare volume, submit, create Secret. @@ -446,7 +378,13 @@ def test_rayjob_submit_with_files_new_cluster(auto_mock_setup, tmp_path): Test RayJob submission with file detection for new cluster. """ mock_api_instance = auto_mock_setup["rayjob_api"] - mock_api_instance.submit_job.return_value = True + mock_api_instance.submit_job.return_value = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-files-12345", + } + } mock_k8s_instance = auto_mock_setup["k8s_api"] @@ -507,8 +445,17 @@ def test_create_secret_api_error_non_409(auto_mock_setup): "data": {"test.py": "print('test')"}, } + # Provide valid RayJob result with UID as KubeRay client would + rayjob_result = { + "metadata": { + "name": "test-job", + "namespace": "test-namespace", + "uid": "test-uid-api-error", + } + } + with pytest.raises(RuntimeError, match="Failed to create Secret"): - create_secret_from_spec(rayjob, secret_spec) + create_secret_from_spec(rayjob, secret_spec, rayjob_result) def test_add_file_volumes_existing_volume_skip(): From c3b831a5cd4ee7574af0f6bb177cbf60f9f85daf Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 15 Oct 2025 16:32:58 +0100 Subject: [PATCH 29/33] no-jira: fix coverage pull request step --- .github/workflows/coverage-badge.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/coverage-badge.yaml b/.github/workflows/coverage-badge.yaml index 05084d31..2a92010b 100644 --- a/.github/workflows/coverage-badge.yaml +++ b/.github/workflows/coverage-badge.yaml @@ -50,7 +50,14 @@ jobs: - name: Create Pull Request if: steps.changed_files.outputs.files_changed == 'true' - uses: peter-evans/create-pull-request@v4 + uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.GITHUB_TOKEN }} title: "[Automatic] Coverage Badge Update" + commit-message: "Updated coverage.svg" + branch: create-pull-request/coverage-badge-update + delete-branch: true + body: | + This is an automated pull request to update the coverage badge. + + - Updated coverage.svg based on latest test results From afe21b302a562419e9cc5b6099c17538932b1c5d Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Wed, 15 Oct 2025 17:20:40 +0100 Subject: [PATCH 30/33] Update pyproject.toml to v0.32.0 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6d13354e..cda1337f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [project] name = "codeflare-sdk" -version = "0.31.1" +version = "0.32.0" [tool.poetry] name = "codeflare-sdk" -version = "0.31.1" +version = "0.32.0" description = "Python SDK for codeflare client" license = "Apache-2.0" From 538e78039debf713943c23340e884cee70d9c4b2 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Tue, 14 Oct 2025 17:11:23 +0100 Subject: [PATCH 31/33] task(RHOAIENG-35928): Add HTTPRoute detection --- src/codeflare_sdk/ray/cluster/cluster.py | 170 ++++++-- src/codeflare_sdk/ray/cluster/test_cluster.py | 362 +++++++++++++++++- 2 files changed, 486 insertions(+), 46 deletions(-) diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index cd188816..8538dba3 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -495,8 +495,19 @@ def cluster_uri(self) -> str: def cluster_dashboard_uri(self) -> str: """ Returns a string containing the cluster's dashboard URI. + Tries HTTPRoute first (RHOAI v3.0+), then falls back to OpenShift Routes or Ingresses. """ config_check() + + # Try HTTPRoute first (RHOAI v3.0+) + # This will return None if HTTPRoute is not found (SDK v0.31.1 and below or Kind clusters) + httproute_url = _get_dashboard_url_from_httproute( + self.config.name, self.config.namespace + ) + if httproute_url: + return httproute_url + + # Fall back to OpenShift Routes (pre-v3.0) or Ingresses (Kind) if _is_openshift_cluster(): try: api_instance = client.CustomObjectsApi(get_api_client()) @@ -1001,45 +1012,51 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]: status = RayClusterStatus.UNKNOWN config_check() dashboard_url = None - if _is_openshift_cluster(): - try: - api_instance = client.CustomObjectsApi(get_api_client()) - routes = api_instance.list_namespaced_custom_object( - group="route.openshift.io", - version="v1", - namespace=rc["metadata"]["namespace"], - plural="routes", - ) - except Exception as e: # pragma: no cover - return _kube_api_error_handling(e) - for route in routes["items"]: - rc_name = rc["metadata"]["name"] - if route["metadata"]["name"] == f"ray-dashboard-{rc_name}" or route[ - "metadata" - ]["name"].startswith(f"{rc_name}-ingress"): - protocol = "https" if route["spec"].get("tls") else "http" - dashboard_url = f"{protocol}://{route['spec']['host']}" - else: - try: - api_instance = client.NetworkingV1Api(get_api_client()) - ingresses = api_instance.list_namespaced_ingress( - rc["metadata"]["namespace"] - ) - except Exception as e: # pragma no cover - return _kube_api_error_handling(e) - for ingress in ingresses.items: - annotations = ingress.metadata.annotations - protocol = "http" - if ( - ingress.metadata.name == f"ray-dashboard-{rc['metadata']['name']}" - or ingress.metadata.name.startswith(f"{rc['metadata']['name']}-ingress") - ): - if annotations == None: - protocol = "http" - elif "route.openshift.io/termination" in annotations: - protocol = "https" - dashboard_url = f"{protocol}://{ingress.spec.rules[0].host}" + # Try HTTPRoute first (RHOAI v3.0+) + rc_name = rc["metadata"]["name"] + rc_namespace = rc["metadata"]["namespace"] + dashboard_url = _get_dashboard_url_from_httproute(rc_name, rc_namespace) + + # Fall back to OpenShift Routes or Ingresses if HTTPRoute not found + if not dashboard_url: + if _is_openshift_cluster(): + try: + api_instance = client.CustomObjectsApi(get_api_client()) + routes = api_instance.list_namespaced_custom_object( + group="route.openshift.io", + version="v1", + namespace=rc_namespace, + plural="routes", + ) + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e) + + for route in routes["items"]: + if route["metadata"]["name"] == f"ray-dashboard-{rc_name}" or route[ + "metadata" + ]["name"].startswith(f"{rc_name}-ingress"): + protocol = "https" if route["spec"].get("tls") else "http" + dashboard_url = f"{protocol}://{route['spec']['host']}" + break + else: + try: + api_instance = client.NetworkingV1Api(get_api_client()) + ingresses = api_instance.list_namespaced_ingress(rc_namespace) + except Exception as e: # pragma no cover + return _kube_api_error_handling(e) + for ingress in ingresses.items: + annotations = ingress.metadata.annotations + protocol = "http" + if ( + ingress.metadata.name == f"ray-dashboard-{rc_name}" + or ingress.metadata.name.startswith(f"{rc_name}-ingress") + ): + if annotations == None: + protocol = "http" + elif "route.openshift.io/termination" in annotations: + protocol = "https" + dashboard_url = f"{protocol}://{ingress.spec.rules[0].host}" ( head_extended_resources, @@ -1129,3 +1146,80 @@ def _is_openshift_cluster(): return False except Exception as e: # pragma: no cover return _kube_api_error_handling(e) + + +# Get dashboard URL from HTTPRoute (RHOAI v3.0+) +def _get_dashboard_url_from_httproute( + cluster_name: str, namespace: str +) -> Optional[str]: + """ + Attempts to get the Ray dashboard URL from an HTTPRoute resource. + This is used for RHOAI v3.0+ clusters that use Gateway API. + + Args: + cluster_name: Name of the Ray cluster + namespace: Namespace of the Ray cluster + + Returns: + Dashboard URL if HTTPRoute is found, None otherwise + """ + try: + config_check() + api_instance = client.CustomObjectsApi(get_api_client()) + + # Try to get HTTPRoute for this Ray cluster + try: + httproute = api_instance.get_namespaced_custom_object( + group="gateway.networking.k8s.io", + version="v1", + namespace=namespace, + plural="httproutes", + name=cluster_name, + ) + except client.exceptions.ApiException as e: + if e.status == 404: + # HTTPRoute not found - this is expected for SDK v0.31.1 and below or Kind clusters + return None + raise + + # Get the Gateway reference from HTTPRoute + parent_refs = httproute.get("spec", {}).get("parentRefs", []) + if not parent_refs: + return None + + gateway_ref = parent_refs[0] + gateway_name = gateway_ref.get("name") + gateway_namespace = gateway_ref.get("namespace") + + if not gateway_name or not gateway_namespace: + return None + + # Get the Gateway to retrieve the hostname + gateway = api_instance.get_namespaced_custom_object( + group="gateway.networking.k8s.io", + version="v1", + namespace=gateway_namespace, + plural="gateways", + name=gateway_name, + ) + + # Extract hostname from Gateway listeners + listeners = gateway.get("spec", {}).get("listeners", []) + if not listeners: + return None + + hostname = listeners[0].get("hostname") + if not hostname: + return None + + # Construct the dashboard URL using RHOAI v3.0+ Gateway API pattern + # The HTTPRoute existence confirms v3.0+, so we use the standard path pattern + # Format: https://{hostname}/ray/{namespace}/{cluster-name} + protocol = "https" # Gateway API uses HTTPS + dashboard_url = f"{protocol}://{hostname}/ray/{namespace}/{cluster_name}" + + return dashboard_url + + except Exception as e: # pragma: no cover + # If any error occurs, return None to fall back to OpenShift Route + return None diff --git a/src/codeflare_sdk/ray/cluster/test_cluster.py b/src/codeflare_sdk/ray/cluster/test_cluster.py index 164b3a81..c8742a3e 100644 --- a/src/codeflare_sdk/ray/cluster/test_cluster.py +++ b/src/codeflare_sdk/ray/cluster/test_cluster.py @@ -954,13 +954,359 @@ def test_cluster_namespace_type_error(mocker): Cluster(config) +def test_get_dashboard_url_from_httproute(mocker): + """ + Test the HTTPRoute dashboard URL generation for RHOAI v3.0+ + """ + from codeflare_sdk.ray.cluster.cluster import _get_dashboard_url_from_httproute + + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + + # Test successful HTTPRoute and Gateway lookup + mock_httproute = { + "metadata": {"name": "test-cluster", "namespace": "test-ns"}, + "spec": { + "parentRefs": [ + { + "group": "gateway.networking.k8s.io", + "kind": "Gateway", + "name": "data-science-gateway", + "namespace": "openshift-ingress", + } + ] + }, + } + + mock_gateway = { + "metadata": {"name": "data-science-gateway", "namespace": "openshift-ingress"}, + "spec": { + "listeners": [ + { + "name": "https", + "hostname": "data-science-gateway.apps.example.com", + "port": 443, + "protocol": "HTTPS", + } + ] + }, + } + + # Mock the CustomObjectsApi to return HTTPRoute and Gateway + def mock_get_namespaced_custom_object(group, version, namespace, plural, name): + if plural == "httproutes": + return mock_httproute + elif plural == "gateways": + return mock_gateway + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_get_namespaced_custom_object, + ) + + # Test successful URL generation + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + expected_url = ( + "https://data-science-gateway.apps.example.com/ray/test-ns/test-cluster" + ) + assert result == expected_url, f"Expected {expected_url}, got {result}" + + # Test HTTPRoute not found (404) - should return None + def mock_404_error(group, version, namespace, plural, name): + error = client.exceptions.ApiException(status=404) + error.status = 404 + raise error + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_404_error, + ) + + result = _get_dashboard_url_from_httproute("nonexistent-cluster", "test-ns") + assert result is None, "Should return None when HTTPRoute not found" + + # Test HTTPRoute with empty parentRefs - should return None + mock_httproute_no_parents = { + "metadata": {"name": "test-cluster", "namespace": "test-ns"}, + "spec": {"parentRefs": []}, # Empty parentRefs + } + + def mock_httproute_no_parents_fn(group, version, namespace, plural, name): + if plural == "httproutes": + return mock_httproute_no_parents + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_httproute_no_parents_fn, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert result is None, "Should return None when HTTPRoute has empty parentRefs" + + # Test HTTPRoute with missing gateway name - should return None + mock_httproute_no_name = { + "metadata": {"name": "test-cluster", "namespace": "test-ns"}, + "spec": { + "parentRefs": [ + { + "group": "gateway.networking.k8s.io", + "kind": "Gateway", + # Missing "name" field + "namespace": "openshift-ingress", + } + ] + }, + } + + def mock_httproute_no_name_fn(group, version, namespace, plural, name): + if plural == "httproutes": + return mock_httproute_no_name + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_httproute_no_name_fn, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert result is None, "Should return None when gateway reference missing name" + + # Test HTTPRoute with missing gateway namespace - should return None + mock_httproute_no_namespace = { + "metadata": {"name": "test-cluster", "namespace": "test-ns"}, + "spec": { + "parentRefs": [ + { + "group": "gateway.networking.k8s.io", + "kind": "Gateway", + "name": "data-science-gateway", + # Missing "namespace" field + } + ] + }, + } + + def mock_httproute_no_namespace_fn(group, version, namespace, plural, name): + if plural == "httproutes": + return mock_httproute_no_namespace + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_httproute_no_namespace_fn, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert result is None, "Should return None when gateway reference missing namespace" + + # Test Gateway with empty listeners - should return None + mock_httproute_valid = { + "metadata": {"name": "test-cluster", "namespace": "test-ns"}, + "spec": { + "parentRefs": [ + { + "group": "gateway.networking.k8s.io", + "kind": "Gateway", + "name": "data-science-gateway", + "namespace": "openshift-ingress", + } + ] + }, + } + + mock_gateway_no_listeners = { + "metadata": {"name": "data-science-gateway", "namespace": "openshift-ingress"}, + "spec": {"listeners": []}, # Empty listeners + } + + def mock_gateway_no_listeners_fn(group, version, namespace, plural, name): + if plural == "httproutes": + return mock_httproute_valid + elif plural == "gateways": + return mock_gateway_no_listeners + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_gateway_no_listeners_fn, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert result is None, "Should return None when Gateway has empty listeners" + + # Test Gateway listener with missing hostname - should return None + mock_gateway_no_hostname = { + "metadata": {"name": "data-science-gateway", "namespace": "openshift-ingress"}, + "spec": { + "listeners": [ + { + "name": "https", + # Missing "hostname" field + "port": 443, + "protocol": "HTTPS", + } + ] + }, + } + + def mock_gateway_no_hostname_fn(group, version, namespace, plural, name): + if plural == "httproutes": + return mock_httproute_valid + elif plural == "gateways": + return mock_gateway_no_hostname + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_gateway_no_hostname_fn, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert result is None, "Should return None when listener missing hostname" + + # Test non-404 ApiException - should be re-raised then caught by outer handler + # The function is designed to return None for any unexpected errors via outer try-catch + def mock_403_error(group, version, namespace, plural, name): + error = client.exceptions.ApiException(status=403) + error.status = 403 + raise error + + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_403_error, + ) + + # Should return None (the inner handler re-raises, outer handler catches and returns None) + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert ( + result is None + ), "Should return None when non-404 exception occurs (caught by outer handler)" + + +def test_cluster_dashboard_uri_httproute_first(mocker): + """ + Test that cluster_dashboard_uri() tries HTTPRoute first, then falls back to OpenShift Routes + """ + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + + # Test 1: HTTPRoute exists - should return HTTPRoute URL + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._is_openshift_cluster", return_value=True + ) + + httproute_url = ( + "https://data-science-gateway.apps.example.com/ray/ns/unit-test-cluster" + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._get_dashboard_url_from_httproute", + return_value=httproute_url, + ) + + cluster = create_cluster(mocker) + result = cluster.cluster_dashboard_uri() + assert result == httproute_url, "Should return HTTPRoute URL when available" + + # Test 2: HTTPRoute not found - should fall back to OpenShift Route + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._get_dashboard_url_from_httproute", + return_value=None, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value={ + "items": [ + { + "metadata": {"name": "ray-dashboard-unit-test-cluster"}, + "spec": { + "host": "ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org", + "tls": {"termination": "passthrough"}, + }, + } + ] + }, + ) + + cluster = create_cluster(mocker) + result = cluster.cluster_dashboard_uri() + expected = "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + assert ( + result == expected + ), f"Should fall back to OpenShift Route. Expected {expected}, got {result}" + + +def test_map_to_ray_cluster_httproute(mocker): + """ + Test that _map_to_ray_cluster() uses HTTPRoute-first logic + """ + from codeflare_sdk.ray.cluster.cluster import _map_to_ray_cluster + + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._is_openshift_cluster", return_value=True + ) + + # Test with HTTPRoute available + httproute_url = ( + "https://data-science-gateway.apps.example.com/ray/ns/test-cluster-a" + ) + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._get_dashboard_url_from_httproute", + return_value=httproute_url, + ) + + rc = get_ray_obj("ray.io", "v1", "ns", "rayclusters")["items"][0] + result = _map_to_ray_cluster(rc) + + assert ( + result.dashboard == httproute_url + ), f"Expected HTTPRoute URL, got {result.dashboard}" + + # Test with HTTPRoute not available - should fall back to OpenShift Route + mocker.patch( + "codeflare_sdk.ray.cluster.cluster._get_dashboard_url_from_httproute", + return_value=None, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value={ + "items": [ + { + "kind": "Route", + "metadata": { + "name": "ray-dashboard-test-cluster-a", + "namespace": "ns", + }, + "spec": {"host": "ray-dashboard-test-cluster-a.apps.example.com"}, + } + ] + }, + ) + + rc = get_ray_obj("ray.io", "v1", "ns", "rayclusters")["items"][0] + result = _map_to_ray_cluster(rc) + + expected_fallback = "http://ray-dashboard-test-cluster-a.apps.example.com" + assert ( + result.dashboard == expected_fallback + ), f"Expected OpenShift Route fallback URL, got {result.dashboard}" + + # Make sure to always keep this function last def test_cleanup(): - # Remove files only if they exist - test_file = f"{aw_dir}test-all-params.yaml" - if os.path.exists(test_file): - os.remove(test_file) - - aw_file = f"{aw_dir}aw-all-params.yaml" - if os.path.exists(aw_file): - os.remove(aw_file) + # Clean up test files if they exist + # Using try-except to handle cases where files weren't created (e.g., when running full test suite) + try: + os.remove(f"{aw_dir}test-all-params.yaml") + except FileNotFoundError: + pass # File doesn't exist, nothing to clean up + + try: + os.remove(f"{aw_dir}aw-all-params.yaml") + except FileNotFoundError: + pass # File doesn't exist, nothing to clean up From 67df4f5d5f474d8e2a65cd40f6b158bd50adea4f Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Thu, 16 Oct 2025 11:48:00 +0100 Subject: [PATCH 32/33] no-jira: Vendor kuberay python client --- .github/workflows/coverage-badge.yaml | 2 +- .github/workflows/unit-tests.yml | 2 +- poetry.lock | 22 +- pyproject.toml | 12 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 4 +- src/codeflare_sdk/vendored/.gitignore | 35 ++ src/codeflare_sdk/vendored/LICENSE | 272 +++++++++ src/codeflare_sdk/vendored/README.md | 158 +++++ src/codeflare_sdk/vendored/__init__.py | 14 + .../vendored/examples/complete-example.py | 144 +++++ .../vendored/examples/use-builder.py | 79 +++ .../vendored/examples/use-director.py | 98 +++ .../examples/use-raw-config_map_with-api.py | 213 +++++++ .../vendored/examples/use-raw-with-api.py | 195 ++++++ .../vendored/examples/use-utils.py | 117 ++++ src/codeflare_sdk/vendored/poetry.lock | 439 ++++++++++++++ src/codeflare_sdk/vendored/pyproject.toml | 26 + .../vendored/python_client/__init__.py | 1 + .../vendored/python_client/constants.py | 13 + .../python_client/kuberay_cluster_api.py | 311 ++++++++++ .../vendored/python_client/kuberay_job_api.py | 381 ++++++++++++ .../vendored/python_client/utils/__init__.py | 0 .../utils/kuberay_cluster_builder.py | 326 ++++++++++ .../utils/kuberay_cluster_utils.py | 473 +++++++++++++++ .../vendored/python_client_test/README.md | 29 + .../vendored/python_client_test/helpers.py | 135 +++++ .../python_client_test/test_cluster_api.py | 345 +++++++++++ .../python_client_test/test_director.py | 121 ++++ .../python_client_test/test_job_api.py | 567 ++++++++++++++++++ .../vendored/python_client_test/test_utils.py | 352 +++++++++++ .../rayjob/rayjob_existing_cluster_test.py | 2 +- .../rayjob/rayjob_lifecycled_cluster_test.py | 4 +- 32 files changed, 4862 insertions(+), 30 deletions(-) create mode 100644 src/codeflare_sdk/vendored/.gitignore create mode 100644 src/codeflare_sdk/vendored/LICENSE create mode 100644 src/codeflare_sdk/vendored/README.md create mode 100644 src/codeflare_sdk/vendored/__init__.py create mode 100644 src/codeflare_sdk/vendored/examples/complete-example.py create mode 100644 src/codeflare_sdk/vendored/examples/use-builder.py create mode 100644 src/codeflare_sdk/vendored/examples/use-director.py create mode 100644 src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py create mode 100644 src/codeflare_sdk/vendored/examples/use-raw-with-api.py create mode 100644 src/codeflare_sdk/vendored/examples/use-utils.py create mode 100644 src/codeflare_sdk/vendored/poetry.lock create mode 100755 src/codeflare_sdk/vendored/pyproject.toml create mode 100644 src/codeflare_sdk/vendored/python_client/__init__.py create mode 100644 src/codeflare_sdk/vendored/python_client/constants.py create mode 100644 src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py create mode 100644 src/codeflare_sdk/vendored/python_client/kuberay_job_api.py create mode 100644 src/codeflare_sdk/vendored/python_client/utils/__init__.py create mode 100644 src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py create mode 100644 src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/README.md create mode 100644 src/codeflare_sdk/vendored/python_client_test/helpers.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_director.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_job_api.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_utils.py diff --git a/.github/workflows/coverage-badge.yaml b/.github/workflows/coverage-badge.yaml index 2a92010b..d793a699 100644 --- a/.github/workflows/coverage-badge.yaml +++ b/.github/workflows/coverage-badge.yaml @@ -29,7 +29,7 @@ jobs: poetry install --with test - name: Generate coverage report run: | - coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py" -m pytest + coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py,src/codeflare_sdk/vendored/**" -m pytest - name: Coverage Badge uses: tj-actions/coverage-badge-py@v2 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index e38e6973..e276ee3e 100755 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -26,7 +26,7 @@ jobs: poetry install --with test - name: Test with pytest and check coverage run: | - coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py" -m pytest + coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py,src/codeflare_sdk/vendored/**" -m pytest coverage=$(coverage report -m | tail -1 | tail -c 4 | head -c 2) if (( $coverage < 90 )); then echo "Coverage failed at ${coverage}%"; exit 1; else echo "Coverage passed, ${coverage}%"; fi - name: Upload to Codecov diff --git a/poetry.lock b/poetry.lock index 88ceb3cb..0e2d4eac 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3416,26 +3416,6 @@ files = [ [package.dependencies] pytest = ">=7.0.0" -[[package]] -name = "python-client" -version = "0.0.0-dev" -description = "Python Client for Kuberay" -optional = false -python-versions = "^3.11" -groups = ["main"] -files = [] -develop = false - -[package.dependencies] -kubernetes = ">=25.0.0" - -[package.source] -type = "git" -url = "https://github.com/ray-project/kuberay.git" -reference = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" -resolved_reference = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" -subdirectory = "clients/python-client" - [[package]] name = "python-dateutil" version = "3.9.0" @@ -4823,4 +4803,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "9a052e3a816450844fa2cf3427e4660715977aca3b14561d1e9991899624b7c2" +content-hash = "1a3968dbde8f4356b4d93b17f5bcf75f2bc38587553273742de05d9f0f6ee87c" diff --git a/pyproject.toml b/pyproject.toml index cda1337f..0a9d3228 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,15 @@ description = "Python SDK for codeflare client" license = "Apache-2.0" +# Exclude vendored tests, examples, and build files from the package +exclude = [ + "src/codeflare_sdk/vendored/python_client_test", + "src/codeflare_sdk/vendored/examples", + "src/codeflare_sdk/vendored/pyproject.toml", + "src/codeflare_sdk/vendored/poetry.lock", + "src/codeflare_sdk/vendored/README.md" +] + authors = [ "Michael Clifford ", "Mustafa Eyceoz ", @@ -33,7 +42,6 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" -python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" } [[tool.poetry.source]] name = "pypi" @@ -73,7 +81,7 @@ markers = [ "openshift", "nvidia_gpu" ] -addopts = "--timeout=900" +addopts = "--timeout=900 --ignore=src/codeflare_sdk/vendored" testpaths = ["src/codeflare_sdk"] collect_ignore = ["src/codeflare_sdk/common/utils/unit_test_support.py"] diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 65478087..c06c596e 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -27,8 +27,8 @@ from codeflare_sdk.common.utils.constants import MOUNT_PATH from codeflare_sdk.common.utils.utils import get_ray_image_for_python_version -from python_client.kuberay_job_api import RayjobApi -from python_client.kuberay_cluster_api import RayClusterApi +from codeflare_sdk.vendored.python_client.kuberay_job_api import RayjobApi +from codeflare_sdk.vendored.python_client.kuberay_cluster_api import RayClusterApi from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from codeflare_sdk.ray.rayjobs.runtime_env import ( create_file_secret, diff --git a/src/codeflare_sdk/vendored/.gitignore b/src/codeflare_sdk/vendored/.gitignore new file mode 100644 index 00000000..d6d73f9c --- /dev/null +++ b/src/codeflare_sdk/vendored/.gitignore @@ -0,0 +1,35 @@ + + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + +# Distribution / packaging +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +.tox/ +htmlcov +.coverage +.cache +nosetests.xml +coverage.xml diff --git a/src/codeflare_sdk/vendored/LICENSE b/src/codeflare_sdk/vendored/LICENSE new file mode 100644 index 00000000..1dcfa84a --- /dev/null +++ b/src/codeflare_sdk/vendored/LICENSE @@ -0,0 +1,272 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +Code in python/ray/rllib/{evolution_strategies, dqn} adapted from +https://github.com/openai (MIT License) + +Copyright (c) 2016 OpenAI (http://openai.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +Code in python/ray/rllib/impala/vtrace.py from +https://github.com/deepmind/scalable_agent + +Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- +Code in python/ray/rllib/ars is adapted from https://github.com/modestyachts/ARS + +Copyright (c) 2018, ARS contributors (Horia Mania, Aurelia Guy, Benjamin Recht) +All rights reserved. + +Redistribution and use of ARS in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/codeflare_sdk/vendored/README.md b/src/codeflare_sdk/vendored/README.md new file mode 100644 index 00000000..2189cb5b --- /dev/null +++ b/src/codeflare_sdk/vendored/README.md @@ -0,0 +1,158 @@ +# Overview + +This python client library provide APIs to handle `raycluster` and `rayjobs` from your python application. + +## Prerequisites + +It is assumed that your `k8s cluster in already setup`. Your kubectl configuration is expected to be +in `~/.kube/config` if you are running the code directly from you terminal. + +It is also expected that the `kuberay operator` is installed. +[Installation instructions are here][quick-start] + +## Usage + +There are multiple levels of using the API with increasing levels of complexity. + +### director + +This is the easiest form of using the API to create rayclusters with predefined cluster sizes + +```python +my_kuberay_api = kuberay_cluster_api.RayClusterApi() + +my_cluster_director = kuberay_cluster_builder.Director() + +cluster0 = my_cluster_director.build_small_cluster(name="new-cluster0") + +if cluster0: + my_kuberay_api.create_ray_cluster(body=cluster0) +``` + +the director create the cluster definition, and the `cluster_api` acts as the HTTP client sending +the create (post) request to the k8s api-server + +### cluster_builder + +The builder allows you to build the cluster piece by piece. You can customize the cluster more. + +```python +cluster1 = ( + my_cluster_builder.build_meta(name="new-cluster1") + .build_head() + .build_worker(group_name="workers", replicas=3) + .get_cluster() + ) + +if not my_cluster_builder.succeeded: + return + +my_kuberay_api.create_ray_cluster(body=cluster1) +``` + +### cluster_utils + +`cluster_utils` gives you even more options to modify your cluster definition, add/remove worker +groups, change replicas in a worker group, duplicate a worker group, etc. + +```python +my_Cluster_utils = kuberay_cluster_utils.ClusterUtils() + +cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( + cluster2, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 +) + +if succeeded: + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) +``` + +### cluster_api + +The `cluster_api` is the one you always use to implement your cluster change in k8s. You can +use it with raw `JSON` if you wish. The `director/cluster_builder/cluster_utils` are just tools to +shield the user from using raw `JSON`. + +### job_api + +Finally, the `job_api` can be used to submit RayJobs to a pre-existing RayCluster. + +#### Submitting to Existing Cluster + +```python +from codeflare_sdk.vendored.python_client import kuberay_job_api, kuberay_cluster_api, constants + +job_body = { + "apiVersion": "ray.io/v1", + "kind": "RayJob", + "metadata": {...}, + "spec": { + "clusterSelector": { + "ray.io/cluster": "ray-cluster-name", + }, + "entrypoint": 'python -c training_script.py', + "submissionMode": "K8sJobMode", + }, +} + +kuberay_job_api.submit_job( + job=job_body, + k8s_namespace=namespace, +) +``` + +## Code Organization + +```text +clients/ +└── python-client + ├── examples + │ ├── complete-example.py + │ ├── use-builder.py + │ ├── use-director.py + │ ├── use-raw-config_map_with-api.py + │ ├── use-raw-with-api.py + │ └── use-utils.py + ├── LICENSE + ├── poetry.lock + ├── pyproject.toml + ├── python_client + │ ├── __init__.py + │ ├── constants.py + │ ├── kuberay_cluster_api.py + │ ├── kuberay_job_api.py + │ └── utils + │ ├── __init__.py + │ ├── kuberay_cluster_builder.py + │ └── kuberay_cluster_utils.py + ├── python_client_test + │ ├── README.md + │ ├── test_cluster_api.py + │ ├── test_director.py + │ ├── test_job_api.py + │ └── test_utils.py + └── README.md +``` + +## For developers + +make sure you have installed setuptool + +`pip install -U pip setuptools` + +### run the pip command + +from the directory `path/to/kuberay/clients/python-client` + +`pip install -e .` + +### to uninstall the module run + +`pip uninstall python-client` + +### For testing run + + `python -m unittest discover 'path/to/kuberay/clients/python-client/python_client_test/'` + +[quick-start]: https://github.com/ray-project/kuberay#quick-start diff --git a/src/codeflare_sdk/vendored/__init__.py b/src/codeflare_sdk/vendored/__init__.py new file mode 100644 index 00000000..93f1b14f --- /dev/null +++ b/src/codeflare_sdk/vendored/__init__.py @@ -0,0 +1,14 @@ +""" +Vendored third-party dependencies. + +This directory contains code from external projects that are bundled +with codeflare-sdk to avoid PyPI publishing restrictions. + +Contents: +- python_client: KubeRay Python client from ray-project/kuberay + Source: https://github.com/ray-project/kuberay @ b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570 + License: Apache 2.0 (see LICENSE file) + + Vendored because the python-client is not published to PyPI and PyPI + does not allow direct git dependencies. +""" diff --git a/src/codeflare_sdk/vendored/examples/complete-example.py b/src/codeflare_sdk/vendored/examples/complete-example.py new file mode 100644 index 00000000..8cfdfdcc --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/complete-example.py @@ -0,0 +1,144 @@ +import sys +import os +from os import path + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" + +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import ( + kuberay_cluster_utils, + kuberay_cluster_builder, +) + + +def main(): + print("starting cluster handler...") + my_kuberay_api = kuberay_cluster_api.RayClusterApi() # this is the main api object + + my_cluster_director = ( + kuberay_cluster_builder.Director() + ) # this is the director object, to create a cluster with a single line of code + + my_cluster_builder = ( + kuberay_cluster_builder.ClusterBuilder() + ) # this is the builder object, to create a cluster with a more granular control + + my_Cluster_utils = ( + kuberay_cluster_utils.ClusterUtils() + ) # this is the utils object, to perform operations on a cluster + + cluster0 = my_cluster_director.build_small_cluster( + name="new-cluster0", labels={"demo-cluster": "yes"} + ) # this is the cluster object, it is a dict + + if cluster0: + my_kuberay_api.create_ray_cluster( + body=cluster0 + ) # this is the api call to create the cluster0 in k8s + + cluster1 = ( + my_cluster_builder.build_meta( + name="new-cluster1", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + my_kuberay_api.create_ray_cluster( + body=cluster1 + ) # this is the api call to create the cluster1 in k8s + + cluster2 = ( + my_cluster_builder.build_meta( + name="new-cluster2", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + + my_kuberay_api.create_ray_cluster( + body=cluster2 + ) # this is the api call to create the cluster2 in k8s + + # modifying the number of replicas in the workergroup + cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( + cluster2, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 + ) + + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) # this is the api call to patch the cluster2 in k8s + + cluster_to_patch, succeeded = my_Cluster_utils.duplicate_worker_group( + cluster1, group_name="workers", new_group_name="new-workers" + ) # this is the call to duplicate the worker group in cluster1 + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) # this is the api call to patch the cluster1 in k8s + + kube_ray_list = my_kuberay_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) # this is the api call to list all the clusters in k8s + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kuberay_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) # this is the api call to delete the cluster in k8s + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-builder.py b/src/codeflare_sdk/vendored/examples/use-builder.py new file mode 100644 index 00000000..5309fc00 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-builder.py @@ -0,0 +1,79 @@ +import sys +import os +from os import path +import json + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" + +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +def main(): + print("starting cluster handler...") + my_kuberay_api = kuberay_cluster_api.RayClusterApi() + + my_cluster_builder = kuberay_cluster_builder.ClusterBuilder() + + cluster1 = ( + my_cluster_builder.build_meta( + name="new-cluster1", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + + print("creating raycluster = {}".format(cluster1["metadata"]["name"])) + my_kuberay_api.create_ray_cluster(body=cluster1) + + # the rest of the code is simply to list and cleanup the created cluster + kube_ray_list = my_kuberay_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kuberay_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-director.py b/src/codeflare_sdk/vendored/examples/use-director.py new file mode 100644 index 00000000..2608c154 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-director.py @@ -0,0 +1,98 @@ +import sys +import os +from os import path +import json +import time + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) + +""" +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +def wait(duration: int = 5, step_name: str = "next"): + print("waiting for {} seconds before {} step".format(duration, step_name)) + for i in range(duration, 0, -1): + sys.stdout.write(str(i) + " ") + sys.stdout.flush() + time.sleep(1) + print() + + +def main(): + print("starting cluster handler...") + + my_kube_ray_api = kuberay_cluster_api.RayClusterApi() + + my_cluster_director = kuberay_cluster_builder.Director() + + # building the raycluster representation + cluster_body = my_cluster_director.build_small_cluster( + name="new-small-cluster", k8s_namespace="default" + ) + + # creating the raycluster in k8s + if cluster_body: + print("creating the cluster...") + my_kube_ray_api.create_ray_cluster(body=cluster_body) + + # now the cluster should be created. + # the rest of the code is simply to fetch, print and cleanup the created cluster + + print("fetching the cluster...") + # fetching the raycluster from k8s api-server + kube_ray_cluster = my_kube_ray_api.get_ray_cluster( + name=cluster_body["metadata"]["name"], k8s_namespace="default" + ) + + if kube_ray_cluster: + print( + "try: kubectl -n {} get raycluster {} -o yaml".format( + kube_ray_cluster["metadata"]["namespace"], + kube_ray_cluster["metadata"]["name"], + ) + ) + wait(step_name="print created cluster in JSON") + print("printing the raycluster JSON representation...") + json_formatted_str = json.dumps(kube_ray_cluster, indent=2) + print(json_formatted_str) + + # waiting until the cluster is running, and has its status updated + is_running = my_kube_ray_api.wait_until_ray_cluster_running( + name=kube_ray_cluster["metadata"]["name"], + k8s_namespace=kube_ray_cluster["metadata"]["namespace"], + ) + + print( + "raycluster {} status is {}".format( + kube_ray_cluster["metadata"]["name"], "Running" if is_running else "unknown" + ) + ) + + wait(step_name="cleaning up") + print("deleting raycluster {}.".format(kube_ray_cluster["metadata"]["name"])) + + my_kube_ray_api.delete_ray_cluster( + name=kube_ray_cluster["metadata"]["name"], + k8s_namespace=kube_ray_cluster["metadata"]["namespace"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py b/src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py new file mode 100644 index 00000000..97ac6a57 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py @@ -0,0 +1,213 @@ +import json +from os import path +import os +import sys +import time +from kubernetes.client.rest import ApiException +from kubernetes import client +from kubernetes.stream import stream + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + + +configmap_body: dict = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "ray-code-single"}, + "data": { + "sample_code.py": 'import ray\nfrom os import environ\nredis_pass = environ.get("REDIS_PASSWORD") \nprint("trying to connect to Ray!")\nray.init(address="auto", _redis_password=redis_pass)\nprint("now executing some code with Ray!")\nimport time\nstart = time.time()\n@ray.remote\ndef f():\n time.sleep(0.01)\n return ray._private.services.get_node_ip_address()\nvalues=set(ray.get([f.remote() for _ in range(1000)]))\nprint("Ray Nodes: ",str(values))\nfile = open("/tmp/ray_nodes.txt","a")\nfile.write("available nodes: %s\\n" % str(values))\nfile.close()\nend = time.time()\nprint("Execution time = ",end - start)\n' + }, +} + +cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0", "demo-cluster": "yes"}, + "name": "raycluster-getting-started", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": { + "dashboard-host": "0.0.0.0", + "num-cpus": "2", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "volumeMounts": [{"mountPath": "/opt", "name": "config"}], + } + ], + "resources": { + "limits": {"cpu": "2", "memory": "3G"}, + "requests": {"cpu": "1500m", "memory": "3G"}, + }, + "volumes": [ + { + "name": "config", + "configMap": { + "name": configmap_body["metadata"]["name"], + "items": [ + {"key": "sample_code.py", "path": "sample_code.py"} + ], + }, + } + ], + } + }, + }, + }, +} + +""" +the following code is simply to create a configmap and a raycluster using the kuberay_cluster_api + +after the cluster is created, the code will execute a python command in the head pod of the cluster + +then it will print the logs of the head pod + +then it will list the clusters and delete the cluster and the configmap +""" + + +def main(): + print("starting cluster handler...") + + my_kube_ray_api = kuberay_cluster_api.RayClusterApi() # creating the api object + + try: + my_kube_ray_api.core_v1_api.create_namespaced_config_map( + "default", configmap_body + ) + + except ApiException as e: + if e.status == 409: + print( + "configmap {} already exists = {} moving on...".format( + configmap_body["metadata"]["name"], e + ) + ) + else: + print("error creating configmap: {}".format(e)) + + # waiting for the configmap tp be created + time.sleep(3) + + my_kube_ray_api.create_ray_cluster(body=cluster_body) # creating the cluster + + # the rest of the code is simply to fetch, print and cleanup the created cluster + kube_ray_cluster = my_kube_ray_api.get_ray_cluster( + name=cluster_body["metadata"]["name"], k8s_namespace="default" + ) + + if kube_ray_cluster: + print("printing the raycluster json representation...") + json_formatted_str = json.dumps(kube_ray_cluster, indent=2) + print(json_formatted_str) + else: + print("Unable to fetch cluster {}".format(cluster_body["metadata"]["name"])) + return + + print( + "try: kubectl -n default get raycluster {} -o yaml".format( + kube_ray_cluster["metadata"]["name"] + ) + ) + # the rest of the code is simply to list and cleanup the created cluster + + time.sleep(3) + try: + pod_list: client.V1PodList = my_kube_ray_api.core_v1_api.list_namespaced_pod( + namespace="default", + label_selector="ray.io/cluster={}".format(cluster_body["metadata"]["name"]), + ) # getting the pods of the cluster + if pod_list != None: + for pod in pod_list.items: + try: + # Calling exec and waiting for response + exec_command = ["python", "/opt/sample_code.py"] + + print( + "executing a Python command in the raycluster: {}".format( + exec_command + ) + ) + # executing a ray command in the head pod + resp = stream( + my_kube_ray_api.core_v1_api.connect_get_namespaced_pod_exec, + pod.metadata.name, + "default", + command=exec_command, + stderr=True, + stdin=False, + stdout=True, + tty=False, + ) + print("Response: " + resp) + + # getting the logs from the pod + time.sleep(3) + print( + "getting the logs from the raycluster pod: {}".format( + pod.metadata.name + ) + ) + api_response = my_kube_ray_api.core_v1_api.read_namespaced_pod_log( + name=pod.metadata.name, namespace="default" + ) + print(api_response) + + except ApiException as e: + print("An exception has ocurred in reading the logs {}".format(e)) + except ApiException as e: + print("An exception has ocurred in listing pods the logs".format(e)) + + kube_ray_list = my_kube_ray_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kube_ray_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) # deleting the cluster + + try: + my_kube_ray_api.core_v1_api.delete_namespaced_config_map( + configmap_body["metadata"]["name"], "default" + ) # deleting the configmap + print("deleting configmap: {}".format(configmap_body["metadata"]["name"])) + except ApiException as e: + if e.status == 404: + print( + "configmap = {}, does not exist moving on...".format( + configmap_body["metadata"]["name"], e + ) + ) + else: + print("error deleting configmap: {}".format(e)) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-raw-with-api.py b/src/codeflare_sdk/vendored/examples/use-raw-with-api.py new file mode 100644 index 00000000..5ab89586 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-raw-with-api.py @@ -0,0 +1,195 @@ +import json +from os import path +import os +import sys + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0", "demo-cluster": "yes"}, + "name": "raycluster-mini-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": { + "dashboard-host": "0.0.0.0", + "num-cpus": "1", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "resources": { + "limits": {"cpu": 1, "memory": "2Gi"}, + "requests": {"cpu": "500m", "memory": "2Gi"}, + }, + "ports": [ + {"containerPort": 6379, "name": "gcs-server"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + } + ] + } + }, + }, + }, +} + + +cluster_body2: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0", "demo-cluster": "yes"}, + "name": "raycluster-complete-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": {"dashboard-host": "0.0.0.0"}, + "template": { + "metadata": {"labels": {}}, + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "2G"}, + "requests": {"cpu": "500m", "memory": "2G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + }, + }, + }, + "workerGroupSpecs": [ + { + "replicas": 1, + "minReplicas": 1, + "maxReplicas": 10, + "groupName": "small-group", + "rayStartParams": {}, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.46.0", + "lifecycle": { + "preStop": { + "exec": { + "command": ["/bin/sh", "-c", "ray stop"] + } + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "2", "memory": "3G"}, + "requests": {"cpu": "1500m", "memory": "3G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + } + }, + } + ], + }, +} + + +def main(): + print("starting cluster handler...") + + my_kube_ray_api = kuberay_cluster_api.RayClusterApi() + + my_kube_ray_api.create_ray_cluster(body=cluster_body) + + my_kube_ray_api.create_ray_cluster(body=cluster_body2) + + # the rest of the code is simply to fetch, print and cleanup the created cluster + kube_ray_cluster = my_kube_ray_api.get_ray_cluster( + name=cluster_body["metadata"]["name"], k8s_namespace="default" + ) + + if kube_ray_cluster: + print("printing the raycluster json representation...") + json_formatted_str = json.dumps(kube_ray_cluster, indent=2) + print(json_formatted_str) + else: + print("Unable to fetch cluster {}".format(cluster_body["metadata"]["name"])) + return + + print( + "try: kubectl -n default get raycluster {} -o yaml".format( + kube_ray_cluster["metadata"]["name"] + ) + ) + # the rest of the code is simply to list and cleanup the created cluster + kube_ray_list = my_kube_ray_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kube_ray_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-utils.py b/src/codeflare_sdk/vendored/examples/use-utils.py new file mode 100644 index 00000000..ab3e3736 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-utils.py @@ -0,0 +1,117 @@ +import sys +import os +from os import path +import json + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" + +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import ( + kuberay_cluster_utils, + kuberay_cluster_builder, +) + + +def main(): + print("starting cluster handler...") + my_kuberay_api = kuberay_cluster_api.RayClusterApi() # this is the main api object + + my_cluster_builder = ( + kuberay_cluster_builder.ClusterBuilder() + ) # this is the builder object, to create a cluster with a more granular control + + my_Cluster_utils = ( + kuberay_cluster_utils.ClusterUtils() + ) # this is the utils object, to perform operations on a cluster + + cluster1 = ( + my_cluster_builder.build_meta( + name="new-cluster1", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) # this is the cluster object, it is a dict + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + + print("creating raycluster = {}".format(cluster1["metadata"]["name"])) + my_kuberay_api.create_ray_cluster( + body=cluster1 + ) # this is the api call to create the cluster1 in k8s + + cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( + cluster1, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 + ) + + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) # this is the api call to patch the cluster1 in k8s + + cluster_to_patch, succeeded = my_Cluster_utils.duplicate_worker_group( + cluster1, group_name="workers", new_group_name="duplicate-workers" + ) # this is the api call to duplicate the worker group in the cluster1 + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) + + # the rest of the code is simply to list and cleanup the created cluster + kube_ray_list = my_kuberay_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) # this is the api call to list the clusters in k8s + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kuberay_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) # this is the api call to delete the cluster in k8s + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/poetry.lock b/src/codeflare_sdk/vendored/poetry.lock new file mode 100644 index 00000000..b8d82ccc --- /dev/null +++ b/src/codeflare_sdk/vendored/poetry.lock @@ -0,0 +1,439 @@ +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. + +[[package]] +name = "cachetools" +version = "5.5.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, + {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, +] + +[[package]] +name = "certifi" +version = "2025.6.15" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"}, + {file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cad5f45b3146325bb38d6855642f6fd609c3f7cad4dbaf75549bf3b904d3184"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2680962a4848b3c4f155dc2ee64505a9c57186d0d56b43123b17ca3de18f0fa"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36b31da18b8890a76ec181c3cf44326bf2c48e36d393ca1b72b3f484113ea344"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4074c5a429281bf056ddd4c5d3b740ebca4d43ffffe2ef4bf4d2d05114299da"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9e36a97bee9b86ef9a1cf7bb96747eb7a15c2f22bdb5b516434b00f2a599f02"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:1b1bde144d98e446b056ef98e59c256e9294f6b74d7af6846bf5ffdafd687a7d"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:915f3849a011c1f593ab99092f3cecfcb4d65d8feb4a64cf1bf2d22074dc0ec4"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:fb707f3e15060adf5b7ada797624a6c6e0138e2a26baa089df64c68ee98e040f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:25a23ea5c7edc53e0f29bae2c44fcb5a1aa10591aae107f2a2b2583a9c5cbc64"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:770cab594ecf99ae64c236bc9ee3439c3f46be49796e265ce0cc8bc17b10294f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win32.whl", hash = "sha256:6a0289e4589e8bdfef02a80478f1dfcb14f0ab696b5a00e1f4b8a14a307a3c58"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6fc1f5b51fa4cecaa18f2bd7a003f3dd039dd615cd69a2afd6d3b19aed6775f2"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76af085e67e56c8816c3ccf256ebd136def2ed9654525348cfa744b6802b69eb"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45ba65510e2647721e35323d6ef54c7974959f6081b58d4ef5d87c60c84919a"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75d10d37a47afee94919c4fab4c22b9bc2a8bf7d4f46f87363bcf0573f3ff4f5"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6333b3aa5a12c26b2a4d4e7335a28f1475e0e5e17d69d55141ee3cab736f66d1"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8323a9b031aa0393768b87f04b4164a40037fb2a3c11ac06a03ffecd3618027"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:24498ba8ed6c2e0b56d4acbf83f2d989720a93b41d712ebd4f4979660db4417b"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:844da2b5728b5ce0e32d863af26f32b5ce61bc4273a9c720a9f3aa9df73b1455"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:65c981bdbd3f57670af8b59777cbfae75364b483fa8a9f420f08094531d54a01"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3c21d4fca343c805a52c0c78edc01e3477f6dd1ad7c47653241cf2a206d4fc58"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dc7039885fa1baf9be153a0626e337aa7ec8bf96b0128605fb0d77788ddc1681"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win32.whl", hash = "sha256:8272b73e1c5603666618805fe821edba66892e2870058c94c53147602eab29c7"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:70f7172939fdf8790425ba31915bfbe8335030f05b9913d7ae00a87d4395620a"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e92fca20c46e9f5e1bb485887d074918b13543b1c2a1185e69bb8d17ab6236a7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50bf98d5e563b83cc29471fa114366e6806bc06bc7a25fd59641e41445327836"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:721c76e84fe669be19c5791da68232ca2e05ba5185575086e384352e2c309597"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d8fd25b7f4675d0c47cf95b594d4e7b158aca33b76aa63d07186e13c0e0ab7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3daeac64d5b371dea99714f08ffc2c208522ec6b06fbc7866a450dd446f5c0f"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dccab8d5fa1ef9bfba0590ecf4d46df048d18ffe3eec01eeb73a42e0d9e7a8ba"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aaf27faa992bfee0264dc1f03f4c75e9fcdda66a519db6b957a3f826e285cf12"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:eb30abc20df9ab0814b5a2524f23d75dcf83cde762c161917a2b4b7b55b1e518"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c72fbbe68c6f32f251bdc08b8611c7b3060612236e960ef848e0a517ddbe76c5"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:982bb1e8b4ffda883b3d0a521e23abcd6fd17418f6d2c4118d257a10199c0ce3"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win32.whl", hash = "sha256:43e0933a0eff183ee85833f341ec567c0980dae57c464d8a508e1b2ceb336471"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:d11b54acf878eef558599658b0ffca78138c8c3655cf4f3a4a673c437e67732e"}, + {file = "charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0"}, + {file = "charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63"}, +] + +[[package]] +name = "durationpy" +version = "0.10" +description = "Module for converting between datetime.timedelta and Go's Duration strings." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286"}, + {file = "durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba"}, +] + +[[package]] +name = "google-auth" +version = "2.40.3" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca"}, + {file = "google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77"}, +] + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0)", "requests (>=2.20.0,<3.0.0)"] +enterprise-cert = ["cryptography", "pyopenssl"] +pyjwt = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] +pyopenssl = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0)"] +testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] +urllib3 = ["packaging", "urllib3"] + +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + +[[package]] +name = "kubernetes" +version = "33.1.0" +description = "Kubernetes python client" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "kubernetes-33.1.0-py2.py3-none-any.whl", hash = "sha256:544de42b24b64287f7e0aa9513c93cb503f7f40eea39b20f66810011a86eabc5"}, + {file = "kubernetes-33.1.0.tar.gz", hash = "sha256:f64d829843a54c251061a8e7a14523b521f2dc5c896cf6d65ccf348648a88993"}, +] + +[package.dependencies] +certifi = ">=14.05.14" +durationpy = ">=0.7" +google-auth = ">=1.0.1" +oauthlib = ">=3.2.2" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4.1" +requests = "*" +requests-oauthlib = "*" +six = ">=1.9.0" +urllib3 = ">=1.24.2" +websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" + +[package.extras] +adal = ["adal (>=1.0.2)"] + +[[package]] +name = "oauthlib" +version = "3.3.1" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1"}, + {file = "oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "pyasn1" +version = "0.6.1" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, + {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +description = "A collection of ASN.1-based protocols modules" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, + {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, +] + +[package.dependencies] +pyasn1 = ">=0.6.1,<0.7.0" + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, +] + +[[package]] +name = "requests" +version = "2.32.4" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, + {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset_normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=3.4" +groups = ["main"] +files = [ + {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, + {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "rsa" +version = "4.9.1" +description = "Pure-Python RSA implementation" +optional = false +python-versions = "<4,>=3.6" +groups = ["main"] +files = [ + {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, + {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, + {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "websocket-client" +version = "1.8.0" +description = "WebSocket client for Python with low level API options" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, + {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, +] + +[package.extras] +docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[metadata] +lock-version = "2.1" +python-versions = "^3.11" +content-hash = "b36691a561f80767983438fab17d96ee0064eac943f5055abc2dcfee84c07dd7" diff --git a/src/codeflare_sdk/vendored/pyproject.toml b/src/codeflare_sdk/vendored/pyproject.toml new file mode 100755 index 00000000..916829ba --- /dev/null +++ b/src/codeflare_sdk/vendored/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "python-client" +version = "0.0.0-dev" +description = "Python Client for Kuberay" +license = "Apache-2.0" + +readme = "README.md" +repository = "https://github.com/ray-project/kuberay" +homepage = "https://github.com/ray-project/kuberay" +keywords = ["kuberay", "python", "client"] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent" +] +packages = [ + { include = "python_client" } +] + +[tool.poetry.dependencies] +python = "^3.11" +kubernetes = ">=25.0.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/src/codeflare_sdk/vendored/python_client/__init__.py b/src/codeflare_sdk/vendored/python_client/__init__.py new file mode 100644 index 00000000..6849410a --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/__init__.py @@ -0,0 +1 @@ +__version__ = "1.1.0" diff --git a/src/codeflare_sdk/vendored/python_client/constants.py b/src/codeflare_sdk/vendored/python_client/constants.py new file mode 100644 index 00000000..d47e270d --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/constants.py @@ -0,0 +1,13 @@ +# Declares the constants that are used by the client +import logging + +# Group, Version, Plural +GROUP = "ray.io" +CLUSTER_VERSION = "v1" +JOB_VERSION = "v1" +CLUSTER_PLURAL = "rayclusters" +JOB_PLURAL = "rayjobs" +CLUSTER_KIND = "RayCluster" +JOB_KIND = "RayJob" +# log level +LOGLEVEL = logging.INFO diff --git a/src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py b/src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py new file mode 100644 index 00000000..8307cdda --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py @@ -0,0 +1,311 @@ +""" +Set of APIs to manage rayclusters. +""" + +__copyright__ = "Copyright 2021, Microsoft Corp." + +import logging +import time +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from typing import Any, Optional +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + + +class RayClusterApi: + """ + RayClusterApi provides APIs to list, get, create, build, update, delete rayclusters. + + Methods: + - list_ray_clusters(k8s_namespace: str = "default", async_req: bool = False) -> Any: + - get_ray_cluster(name: str, k8s_namespace: str = "default") -> Any: + - create_ray_cluster(body: Any, k8s_namespace: str = "default") -> Any: + - delete_ray_cluster(name: str, k8s_namespace: str = "default") -> bool: + - patch_ray_cluster(name: str, ray_patch: Any, k8s_namespace: str = "default") -> Any: + """ + + # initial config to setup the kube client + def __init__(self): + # loading the config + try: + self.kube_config: Optional[Any] = config.load_kube_config() + except config.ConfigException: + # No kubeconfig found, try in-cluster config + try: + self.kube_config: Optional[Any] = config.load_incluster_config() + except config.ConfigException: + log.error("Failed to load both kubeconfig and in-cluster config") + raise + + self.api = client.CustomObjectsApi() + self.core_v1_api = client.CoreV1Api() + + def __del__(self): + self.api = None + self.kube_config = None + + def list_ray_clusters( + self, + k8s_namespace: str = "default", + label_selector: str = "", + async_req: bool = False, + ) -> Any: + """List Ray clusters in a given namespace. + + Parameters: + - k8s_namespace (str, optional): The namespace in which to list the Ray clusters. Defaults to "default". + - async_req (bool, optional): Whether to make the request asynchronously. Defaults to False. + + Returns: + Any: The custom resource for Ray clusters in the specified namespace, or None if not found. + + Raises: + ApiException: If there was an error fetching the custom resource. + """ + try: + resource: Any = self.api.list_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + namespace=k8s_namespace, + label_selector=label_selector, + async_req=async_req, + ) + if "items" in resource: + return resource + return None + except ApiException as e: + if e.status == 404: + log.error("raycluster resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + def get_ray_cluster(self, name: str, k8s_namespace: str = "default") -> Any: + """Get a specific Ray cluster in a given namespace. + + Parameters: + - name (str): The name of the Ray cluster custom resource. Defaults to "". + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray cluster. Defaults to "default". + + Returns: + Any: The custom resource for the specified Ray cluster, or None if not found. + + Raises: + ApiException: If there was an error fetching the custom resource. + """ + try: + resource: Any = self.api.get_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + namespace=k8s_namespace, + ) + return resource + except ApiException as e: + if e.status == 404: + log.error("raycluster resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + def get_ray_cluster_status( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> Any: + """Get a specific Ray cluster status in a given namespace. + + This method waits until the cluster has a status field populated by the operator. + + Parameters: + - name (str): The name of the Ray cluster custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray cluster. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying to get status. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + Any: The custom resource status for the specified Ray cluster, or None if not found or timeout. + """ + while timeout > 0: + try: + resource: Any = self.api.get_namespaced_custom_object_status( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + namespace=k8s_namespace, + ) + except ApiException as e: + if e.status == 404: + log.error("raycluster resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + if resource and "status" in resource and resource["status"]: + return resource["status"] + else: + log.info("raycluster {} status not set yet, waiting...".format(name)) + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("timed out waiting for raycluster {} status".format(name)) + return None + + def wait_until_ray_cluster_running( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> bool: + """Wait until a Ray cluster is in ready state. + + This method waits for the cluster to have a state field with value 'ready'. + + Parameters: + - name (str): The name of the Ray cluster custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray cluster. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + bool: True if the raycluster status is 'ready', False otherwise. + """ + while timeout > 0: + status = self.get_ray_cluster_status( + name, k8s_namespace, timeout, delay_between_attempts + ) + + if status and "state" in status: + current_state = status["state"] + if current_state == "ready": + log.info( + "raycluster {} is ready with state: {}".format( + name, current_state + ) + ) + return True + else: + log.info( + "raycluster {} is in state: {} (waiting for ready)".format( + name, current_state + ) + ) + else: + log.info( + "raycluster {} state field not available yet, waiting...".format( + name + ) + ) + + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("raycluster {} has not become ready before timeout".format(name)) + return False + + def create_ray_cluster(self, body: Any, k8s_namespace: str = "default") -> Any: + """Create a new Ray cluster custom resource. + + Parameters: + - body (Any): The data of the custom resource to create. + - k8s_namespace (str, optional): The namespace in which to create the custom resource. Defaults to "default". + + Returns: + Any: The created custom resource, or None if it already exists or there was an error. + """ + try: + resource: Any = self.api.create_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + body=body, + namespace=k8s_namespace, + ) + return resource + except ApiException as e: + if e.status == 409: + log.error( + "raycluster resource already exists. error = {}".format(e.reason) + ) + return None + else: + log.error("error creating custom resource: {}".format(e)) + return None + + def delete_ray_cluster(self, name: str, k8s_namespace: str = "default") -> bool: + """Delete a Ray cluster custom resource. + + Parameters: + - name (str): The name of the Ray cluster custom resource to delete. + - k8s_namespace (str, optional): The namespace in which the Ray cluster exists. Defaults to "default". + + Returns: + Any: The deleted custom resource, or None if already deleted or there was an error. + """ + try: + resource: Any = self.api.delete_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + namespace=k8s_namespace, + ) + return resource + except ApiException as e: + if e.status == 404: + log.error( + "raycluster custom resource already deleted. error = {}".format( + e.reason + ) + ) + return None + else: + log.error( + "error deleting the raycluster custom resource: {}".format(e.reason) + ) + return None + + def patch_ray_cluster( + self, name: str, ray_patch: Any, k8s_namespace: str = "default" + ) -> Any: + """Patch an existing Ray cluster custom resource. + + Parameters: + - name (str): The name of the Ray cluster custom resource to be patched. + - ray_patch (Any): The patch data for the Ray cluster. + - k8s_namespace (str, optional): The namespace in which the Ray cluster exists. Defaults to "default". + + Returns: + bool: True if the patch was successful, False otherwise. + """ + try: + # we patch the existing raycluster with the new config + self.api.patch_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + body=ray_patch, + namespace=k8s_namespace, + ) + except ApiException as e: + log.error("raycluster `{}` failed to patch, with error: {}".format(name, e)) + return False + else: + log.info("raycluster `%s` is patched successfully", name) + + return True diff --git a/src/codeflare_sdk/vendored/python_client/kuberay_job_api.py b/src/codeflare_sdk/vendored/python_client/kuberay_job_api.py new file mode 100644 index 00000000..d2d1d7e0 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/kuberay_job_api.py @@ -0,0 +1,381 @@ +""" +Set of APIs to manage rayjobs. +""" + +import logging +import time +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from typing import Any, Optional +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + +TERMINAL_JOB_STATUSES = [ + "STOPPED", + "SUCCEEDED", + "FAILED", +] + + +class RayjobApi: + """ + RayjobApi provides APIs to list, get, create, build, update, delete rayjobs. + Methods: + - submit_job(k8s_namespace: str, job: Any) -> Any: Submit and execute a job asynchronously. + - suspend_job(name: str, k8s_namespace: str) -> bool: Stop a job by suspending it. + - resubmit_job(name: str, k8s_namespace: str) -> bool: Resubmit a job that has been suspended. + - get_job(name: str, k8s_namespace: str) -> Any: Get a job. + - list_jobs(k8s_namespace: str) -> Any: List all jobs. + - get_job_status(name: str, k8s_namespace: str, timeout: int, delay_between_attempts: int) -> Any: Get the most recent status of a job. + - wait_until_job_finished(name: str, k8s_namespace: str, timeout: int, delay_between_attempts: int) -> bool: Wait until a job is completed. + - wait_until_job_running(name: str, k8s_namespace: str, timeout: int, delay_between_attempts: int) -> bool: Wait until a job reaches running state. + - delete_job(name: str, k8s_namespace: str) -> bool: Delete a job and all of its associated data. + """ + + # initial config to setup the kube client + def __init__(self): + # loading the config + try: + self.kube_config: Optional[Any] = config.load_kube_config() + except config.ConfigException: + # No kubeconfig found, try in-cluster config + try: + self.kube_config: Optional[Any] = config.load_incluster_config() + except config.ConfigException: + log.error("Failed to load both kubeconfig and in-cluster config") + raise + + self.api = client.CustomObjectsApi() + + def __del__(self): + self.api = None + self.kube_config = None + + def submit_job(self, k8s_namespace: str = "default", job: Any = None) -> Any: + """Submit a Ray job to a given namespace.""" + try: + rayjob = self.api.create_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + body=job, + namespace=k8s_namespace, + ) + return rayjob + except ApiException as e: + log.error("error submitting ray job: {}".format(e)) + return None + + def get_job_status( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> Any: + """Get a specific Ray job status in a given namespace. + + This method waits until the job has a status field populated by the operator. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray job. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying to get status. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + Any: The custom resource status for the specified Ray job, or None if not found or timeout. + """ + while timeout > 0: + try: + resource: Any = self.api.get_namespaced_custom_object_status( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + ) + except ApiException as e: + if e.status == 404: + log.error("rayjob resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + if resource and "status" in resource and resource["status"]: + return resource["status"] + else: + log.info("rayjob {} status not set yet, waiting...".format(name)) + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("rayjob {} status not set yet, timing out...".format(name)) + return None + + def wait_until_job_finished( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> bool: + """Wait until a Ray job reaches a terminal status. + + This method waits for the job to reach a terminal state by checking both jobStatus + (STOPPED, SUCCEEDED, FAILED) and jobDeploymentStatus (Complete, Failed). + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray job. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + bool: True if the rayjob reaches a terminal status, False otherwise. + """ + while timeout > 0: + status = self.get_job_status( + name, k8s_namespace, timeout, delay_between_attempts + ) + + if status: + if "jobDeploymentStatus" in status: + deployment_status = status["jobDeploymentStatus"] + if deployment_status in ["Complete", "Failed"]: + log.info( + "rayjob {} has finished with deployment status: {}".format( + name, deployment_status + ) + ) + return True + elif deployment_status == "Suspended": + log.info("rayjob {} is suspended".format(name)) + # Suspended is not terminal, continue waiting + elif deployment_status in ["Initializing", "Running", "Suspending"]: + log.info( + "rayjob {} is {}".format(name, deployment_status.lower()) + ) + elif deployment_status: + log.info( + "rayjob {} deployment status: {}".format( + name, deployment_status + ) + ) + + if "jobStatus" in status: + current_status = status["jobStatus"] + if current_status in ["", "PENDING"]: + log.info("rayjob {} has not started yet".format(name)) + elif current_status == "RUNNING": + log.info("rayjob {} is running".format(name)) + elif current_status in TERMINAL_JOB_STATUSES: + log.info( + "rayjob {} has finished with status {}!".format( + name, current_status + ) + ) + return True + else: + log.info( + "rayjob {} has an unknown status: {}".format( + name, current_status + ) + ) + elif "jobDeploymentStatus" not in status: + log.info( + "rayjob {} status fields not available yet, waiting...".format( + name + ) + ) + + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info( + "rayjob {} has not reached terminal status before timeout".format(name) + ) + return False + + def wait_until_job_running( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> bool: + """Wait until a Ray job reaches Running state. + + This method waits for the job's jobDeploymentStatus to reach "Running". + Useful for confirming a job has started after submission or resubmission. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray job. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + bool: True if the rayjob reaches Running status, False otherwise. + """ + while timeout > 0: + status = self.get_job_status( + name, k8s_namespace, timeout, delay_between_attempts + ) + + if status and "jobDeploymentStatus" in status: + deployment_status = status["jobDeploymentStatus"] + if deployment_status == "Running": + log.info("rayjob {} is running".format(name)) + return True + elif deployment_status in ["Complete", "Failed", "Suspended"]: + log.info( + "rayjob {} reached terminal/suspended status {} before running".format( + name, deployment_status + ) + ) + return False + elif deployment_status: + log.info("rayjob {} is {}".format(name, deployment_status.lower())) + else: + log.info("rayjob {} deployment status not set yet".format(name)) + else: + log.info("rayjob {} status not available yet, waiting...".format(name)) + + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("rayjob {} has not reached running status before timeout".format(name)) + return False + + def suspend_job(self, name: str, k8s_namespace: str = "default") -> bool: + """Stop a Ray job by setting the suspend field to True. + + This will delete the associated RayCluster and transition the job to 'Suspended' status. + Only works on jobs in 'Running' or 'Initializing' status. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to stop the Ray job. Defaults to "default". + + Returns: + bool: True if the job was successfully suspended, False otherwise. + """ + try: + patch_body = {"spec": {"suspend": True}} + self.api.patch_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + body=patch_body, + ) + log.info( + f"Successfully suspended rayjob {name} in namespace {k8s_namespace}" + ) + return True + except ApiException as e: + if e.status == 404: + log.error(f"rayjob {name} not found in namespace {k8s_namespace}") + else: + log.error(f"error stopping rayjob {name}: {e.reason}") + return False + + def resubmit_job(self, name: str, k8s_namespace: str = "default") -> bool: + """Resubmit a suspended Ray job by setting the suspend field to False. + + This will create a new RayCluster and resubmit the job. + Only works on jobs in 'Suspended' status. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to resubmit the Ray job. Defaults to "default". + + Returns: + bool: True if the job was successfully resubmitted, False otherwise. + """ + try: + # Patch the RayJob to set suspend=false + patch_body = {"spec": {"suspend": False}} + self.api.patch_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + body=patch_body, + ) + log.info( + f"Successfully resubmitted rayjob {name} in namespace {k8s_namespace}" + ) + return True + except ApiException as e: + if e.status == 404: + log.error(f"rayjob {name} not found in namespace {k8s_namespace}") + else: + log.error(f"error resubmitting rayjob {name}: {e.reason}") + return False + + def delete_job(self, name: str, k8s_namespace: str = "default") -> bool: + """Delete a Ray job and all of its associated data. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to delete the Ray job. Defaults to "default". + + Returns: + bool: True if the job was successfully deleted, False otherwise. + """ + try: + self.api.delete_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + ) + log.info(f"Successfully deleted rayjob {name} in namespace {k8s_namespace}") + return True + except ApiException as e: + if e.status == 404: + log.error(f"rayjob custom resource already deleted. error = {e.reason}") + return False + else: + log.error(f"error deleting the rayjob custom resource: {e.reason}") + return False + + def get_job(self, name: str, k8s_namespace: str = "default") -> Any: + """Get a Ray job in a given namespace.""" + try: + return self.api.get_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + ) + except ApiException as e: + if e.status == 404: + log.error(f"rayjob {name} not found in namespace {k8s_namespace}") + return None + else: + log.error(f"error fetching rayjob {name}: {e.reason}") + return None + + def list_jobs(self, k8s_namespace: str = "default") -> Any: + """List all Ray jobs in a given namespace.""" + try: + return self.api.list_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + namespace=k8s_namespace, + ) + except ApiException as e: + log.error(f"error fetching rayjobs: {e.reason}") + return None diff --git a/src/codeflare_sdk/vendored/python_client/utils/__init__.py b/src/codeflare_sdk/vendored/python_client/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py new file mode 100644 index 00000000..be0a66e5 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py @@ -0,0 +1,326 @@ +""" +Set of helper methods to manage rayclusters. Requires Python 3.9 and higher +""" + +import copy +import logging +import math +from typing import Any +from abc import ABCMeta, abstractmethod +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_utils +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + + +class IClusterBuilder(metaclass=ABCMeta): + """ + IClusterBuilder is an interface for building a cluster. + + The class defines abstract methods for building the metadata, head pod, worker groups, and retrieving the built cluster. + """ + + @staticmethod + @abstractmethod + def build_meta(): + "builds the cluster metadata" + + @staticmethod + @abstractmethod + def build_head(): + "builds the head pod" + + @staticmethod + @abstractmethod + def build_worker(): + "builds a worker group" + + @staticmethod + @abstractmethod + def get_cluster(): + "Returns the built cluster" + + +# Concrete implementation of the builder interface +class ClusterBuilder(IClusterBuilder): + """ + ClusterBuilder implements the abstract methods of IClusterBuilder to build a cluster. + """ + + def __init__(self): + self.cluster: dict[str, Any] = {} + self.succeeded: bool = False + self.cluster_utils = kuberay_cluster_utils.ClusterUtils() + + def build_meta( + self, + name: str, + k8s_namespace: str = "default", + labels: dict = None, + ray_version: str = "2.46.0", + ): + """Builds the metadata and ray version of the cluster. + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The namespace in which the Ray cluster exists. Defaults to "default". + - labels (dict, optional): A dictionary of key-value pairs to add as labels to the cluster. Defaults to None. + - ray_version (str, optional): The version of Ray to use for the cluster. Defaults to "2.46.0". + """ + self.cluster = self.cluster_utils.populate_meta( + cluster=self.cluster, + name=name, + k8s_namespace=k8s_namespace, + labels=labels, + ray_version=ray_version, + ) + return self + + def build_head( + self, + ray_image: str = "rayproject/ray:2.46.0", + service_type: str = "ClusterIP", + cpu_requests: str = "2", + memory_requests: str = "3G", + cpu_limits: str = "2", + memory_limits: str = "3G", + ray_start_params: dict = { + "dashboard-host": "0.0.0.0", + }, + ): + """Build head node of the ray cluster. + + Parameters: + - ray_image (str): Docker image for the head node. Default value is "rayproject/ray:2.46.0". + - service_type (str): Service type of the head node. Default value is "ClusterIP", which creates a headless ClusterIP service. + - cpu_requests (str): CPU requests for the head node. Default value is "2". + - memory_requests (str): Memory requests for the head node. Default value is "3G". + - cpu_limits (str): CPU limits for the head node. Default value is "2". + - memory_limits (str): Memory limits for the head node. Default value is "3G". + - ray_start_params (dict): Dictionary of start parameters for the head node. + Default values is "dashboard-host": "0.0.0.0". + """ + self.cluster, self.succeeded = self.cluster_utils.populate_ray_head( + self.cluster, + ray_image=ray_image, + service_type=service_type, + cpu_requests=cpu_requests, + memory_requests=memory_requests, + cpu_limits=cpu_limits, + memory_limits=memory_limits, + ray_start_params=ray_start_params, + ) + return self + + def build_worker( + self, + group_name: str, + ray_image: str = "rayproject/ray:2.46.0", + ray_command: Any = ["/bin/bash", "-lc"], + init_image: str = "busybox:1.28", + cpu_requests: str = "1", + memory_requests: str = "1G", + cpu_limits: str = "2", + memory_limits: str = "2G", + replicas: int = 1, + min_replicas: int = -1, + max_replicas: int = -1, + ray_start_params: dict = {}, + ): + """Build worker specifications of the cluster. + + This function sets the worker configuration of the cluster, including the Docker image, CPU and memory requirements, number of replicas, and other parameters. + + Parameters: + - group_name (str): name of the worker group. + - ray_image (str, optional): Docker image for the Ray process. Default is "rayproject/ray:2.46.0". + - ray_command (Any, optional): Command to run in the Docker container. Default is ["/bin/bash", "-lc"]. + - init_image (str, optional): Docker image for the init container. Default is "busybox:1.28". + - cpu_requests (str, optional): CPU requests for the worker pods. Default is "1". + - memory_requests (str, optional): Memory requests for the worker pods. Default is "1G". + - cpu_limits (str, optional): CPU limits for the worker pods. Default is "2". + - memory_limits (str, optional): Memory limits for the worker pods. Default is "2G". + - replicas (int, optional): Number of worker pods to run. Default is 1. + - min_replicas (int, optional): Minimum number of worker pods to run. Default is -1. + - max_replicas (int, optional): Maximum number of worker pods to run. Default is -1. + - ray_start_params (dict, optional): Additional parameters to pass to the ray start command. Default is {}. + """ + if min_replicas < 0: + min_replicas = int(math.ceil(replicas / 2)) + if max_replicas < 0: + max_replicas = int(replicas * 3) + + if "spec" in self.cluster.keys(): + if "workerGroupSpecs" not in self.cluster.keys(): + log.info( + "setting the workerGroupSpecs for group_name {}".format(group_name) + ) + self.cluster["spec"]["workerGroupSpecs"] = [] + else: + log.error( + "error creating custom resource: {meta}, the spec section is missing, did you run build_head()?".format( + self.cluster["metadata"] + ) + ) + self.succeeded = False + return self + + worker_group, self.succeeded = self.cluster_utils.populate_worker_group( + group_name, + ray_image, + ray_command, + init_image, + cpu_requests, + memory_requests, + cpu_limits, + memory_limits, + replicas, + min_replicas, + max_replicas, + ray_start_params, + ) + + if self.succeeded: + self.cluster["spec"]["workerGroupSpecs"].append(worker_group) + return self + + def get_cluster(self): + cluster = copy.deepcopy(self.cluster) + return cluster + + +class Director: + def __init__(self): + self.cluster_builder = ClusterBuilder() + + def build_basic_cluster( + self, name: str, k8s_namespace: str = "default", labels: dict = None + ) -> dict: + """Builds a basic cluster with the given name and k8s_namespace parameters. + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The basic cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None + + def build_small_cluster( + self, name: str, k8s_namespace: str = "default", labels: dict = None + ) -> dict: + """Builds a small cluster with the given name and k8s_namespace parameters with 1 workergroup, + the workgroup has 1 replica with 2 cpu and 2G memory limits + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The small cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .build_worker( + group_name="{}-workers".format(name), + replicas=1, + min_replicas=0, + max_replicas=2, + cpu_requests="1", + memory_requests="1G", + cpu_limits="2", + memory_limits="2G", + ) + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None + + def build_medium_cluster( + self, name: str, k8s_namespace: str = "default", labels: str = None + ) -> dict: + """Builds a medium cluster with the given name and k8s_namespace parameters with 1 workergroup, + the workgroup has 3 replicas with 4 cpu and 4G memory limits + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The small cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .build_worker( + group_name="{}-workers".format(name), + replicas=3, + min_replicas=0, + max_replicas=6, + cpu_requests="2", + memory_requests="2G", + cpu_limits="4", + memory_limits="4G", + ) + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None + + def build_large_cluster( + self, name: str, k8s_namespace: str = "default", labels: dict = None + ) -> dict: + """Builds a medium cluster with the given name and k8s_namespace parameters. with 1 workergroup, + the workgroup has 6 replicas with 6 cpu and 6G memory limits + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The small cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .build_worker( + group_name="{}-workers".format(name), + replicas=6, + min_replicas=0, + max_replicas=12, + cpu_requests="3", + memory_requests="4G", + cpu_limits="6", + memory_limits="8G", + ) + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None diff --git a/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py new file mode 100644 index 00000000..ac36fa93 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py @@ -0,0 +1,473 @@ +""" +Set of helper methods to manage rayclusters. Requires Python 3.6 and higher +""" + +import logging +import copy +import re +from typing import Any, Tuple +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + +""" +ClusterUtils contains methods to facilitate modifying/populating the config of a raycluster +""" + + +class ClusterUtils: + """ + ClusterUtils - Utility class for populating cluster information + + Methods: + - populate_meta(cluster: dict, name: str, k8s_namespace: str, labels: dict, ray_version: str) -> dict: + - populate_ray_head(cluster: dict, ray_image: str,service_type: str, cpu_requests: str, memory_requests: str, cpu_limits: str, memory_limits: str, ray_start_params: dict) -> Tuple[dict, bool]: + - populate_worker_group(cluster: dict, group_name: str, ray_image: str, ray_command: Any, init_image: str, cpu_requests: str, memory_requests: str, cpu_limits: str, memory_limits: str, replicas: int, min_replicas: int, max_replicas: int, ray_start_params: dict) -> Tuple[dict, bool]: + - update_worker_group_replicas(cluster: dict, group_name: str, max_replicas: int, min_replicas: int, replicas: int) -> Tuple[dict, bool]: + """ + + def populate_meta( + self, + cluster: dict, + name: str, + k8s_namespace: str, + labels: dict, + ray_version: str, + ) -> dict: + """Populate the metadata and ray version of the cluster. + + Parameters: + - cluster (dict): A dictionary representing a cluster. + - name (str): The name of the cluster. + - k8s_namespace (str): The namespace of the cluster. + - labels (dict): A dictionary of labels to be applied to the cluster. + - ray_version (str): The version of Ray to use in the cluster. + + Returns: + dict: The updated cluster dictionary with metadata and ray version populated. + """ + + assert self.is_valid_name(name) + + cluster["apiVersion"] = "{group}/{version}".format( + group=constants.GROUP, version=constants.CLUSTER_VERSION + ) + cluster["kind"] = constants.CLUSTER_KIND + cluster["metadata"] = { + "name": name, + "namespace": k8s_namespace, + "labels": labels, + } + cluster["spec"] = {"rayVersion": ray_version} + return cluster + + def populate_ray_head( + self, + cluster: dict, + ray_image: str, + service_type: str, + cpu_requests: str, + memory_requests: str, + cpu_limits: str, + memory_limits: str, + ray_start_params: dict, + ) -> Tuple[dict, bool]: + """Populate the ray head specs of the cluster + Parameters: + - cluster (dict): The dictionary representation of the cluster. + - ray_image (str): The name of the ray image to use for the head node. + - service_type (str): The type of service to run for the head node. + - cpu_requests (str): The CPU resource requests for the head node. + - memory_requests (str): The memory resource requests for the head node. + - cpu_limits (str): The CPU resource limits for the head node. + - memory_limits (str): The memory resource limits for the head node. + - ray_start_params (dict): The parameters for starting the Ray cluster. + + Returns: + - Tuple (dict, bool): The updated cluster, and a boolean indicating whether the update was successful. + """ + # validate arguments + try: + arguments = locals() + for k, v in arguments.items(): + assert v + except AssertionError as e: + log.error( + "error creating ray head, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return cluster, False + + # make sure metadata exists + if "spec" in cluster.keys(): + if "headGroupSpec" not in cluster.keys(): + log.info( + "setting the headGroupSpec for cluster {}".format( + cluster["metadata"]["name"] + ) + ) + cluster["spec"]["headGroupSpec"] = [] + else: + log.error("error creating ray head, the spec and/or metadata is not define") + return cluster, False + + # populate headGroupSpec + cluster["spec"]["headGroupSpec"] = { + "serviceType": service_type, + "rayStartParams": ray_start_params, + "template": { + "spec": { + "containers": [ + { + "image": ray_image, + "name": "ray-head", + "ports": [ + { + "containerPort": 6379, + "name": "gcs-server", + "protocol": "TCP", + }, + { + "containerPort": 8265, + "name": "dashboard", + "protocol": "TCP", + }, + { + "containerPort": 10001, + "name": "client", + "protocol": "TCP", + }, + ], + "resources": { + "requests": { + "cpu": cpu_requests, + "memory": memory_requests, + }, + "limits": {"cpu": cpu_limits, "memory": memory_limits}, + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + } + ], + "volumes": [{"emptyDir": {}, "name": "ray-logs"}], + } + }, + } + + return cluster, True + + def populate_worker_group( + self, + group_name: str, + ray_image: str, + ray_command: Any, + init_image: str, + cpu_requests: str, + memory_requests: str, + cpu_limits: str, + memory_limits: str, + replicas: int, + min_replicas: int, + max_replicas: int, + ray_start_params: dict, + ) -> Tuple[dict, bool]: + """Populate the worker group specification in the cluster dictionary. + + Parameters: + - cluster (dict): Dictionary representing the cluster spec. + - group_name (str): The name of the worker group. + - ray_image (str): The image to use for the Ray worker containers. + - ray_command (Any): The command to run in the Ray worker containers. + - init_image (str): The init container image to use. + - cpu_requests (str): The requested CPU resources for the worker containers. + - memory_requests (str): The requested memory resources for the worker containers. + - cpu_limits (str): The limit on CPU resources for the worker containers. + - memory_limits (str): The limit on memory resources for the worker containers. + - replicas (int): The desired number of replicas for the worker group. + - min_replicas (int): The minimum number of replicas for the worker group. + - max_replicas (int): The maximum number of replicas for the worker group. + - ray_start_params (dict): The parameters to pass to the Ray worker start command. + + Returns: + - Tuple[dict, bool]: A tuple of the cluster specification and a boolean indicating + whether the worker group was successfully populated. + """ + # validate arguments + try: + arguments = locals() + for k, v in arguments.items(): + if k != "min_replicas" and k != "ray_start_params": + assert v + except AssertionError as e: + log.error( + "error populating worker group, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return None, False + + assert self.is_valid_name(group_name) + assert max_replicas >= min_replicas + + worker_group: dict[str, Any] = { + "groupName": group_name, + "maxReplicas": max_replicas, + "minReplicas": min_replicas, + "rayStartParams": ray_start_params, + "replicas": replicas, + "template": { + "spec": { + "containers": [ + { + "image": ray_image, + "command": ray_command, + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "name": "ray-worker", + "resources": { + "requests": { + "cpu": cpu_requests, + "memory": memory_requests, + }, + "limits": { + "cpu": cpu_limits, + "memory": memory_limits, + }, + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + } + ], + "volumes": [{"emptyDir": {}, "name": "ray-logs"}], + } + }, + } + + return worker_group, True + + def update_worker_group_replicas( + self, + cluster: dict, + group_name: str, + max_replicas: int, + min_replicas: int, + replicas: int, + ) -> Tuple[dict, bool]: + """Update the number of replicas for a worker group in the cluster. + + Parameters: + - cluster (dict): The cluster to update. + - group_name (str): The name of the worker group to update. + - max_replicas (int): The maximum number of replicas for the worker group. + - min_replicas (int): The minimum number of replicas for the worker group. + - replicas (int): The desired number of replicas for the worker group. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster and a flag indicating whether the update was successful. + """ + try: + arguments = locals() + for k, v in arguments.items(): + if k != "min_replicas": + assert v + except AssertionError as e: + log.error( + "error updating worker group, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return cluster, False + + assert cluster["spec"]["workerGroupSpecs"] + assert max_replicas >= min_replicas + + for i in range(len(cluster["spec"]["workerGroupSpecs"])): + if cluster["spec"]["workerGroupSpecs"][i]["groupName"] == group_name: + cluster["spec"]["workerGroupSpecs"][i]["maxReplicas"] = max_replicas + cluster["spec"]["workerGroupSpecs"][i]["minReplicas"] = min_replicas + cluster["spec"]["workerGroupSpecs"][i]["replicas"] = replicas + return cluster, True + + return cluster, False + + def update_worker_group_resources( + self, + cluster: dict, + group_name: str, + cpu_requests: str, + memory_requests: str, + cpu_limits: str, + memory_limits: str, + container_name="unspecified", + ) -> Tuple[dict, bool]: + """Update the resources for a worker group pods in the cluster. + + Parameters: + - cluster (dict): The cluster to update. + - group_name (str): The name of the worker group to update. + - cpu_requests (str): CPU requests for the worker pods. + - memory_requests (str): Memory requests for the worker pods. + - cpu_limits (str): CPU limits for the worker pods. + - memory_limits (str): Memory limits for the worker pods. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster and a flag indicating whether the update was successful. + """ + try: + arguments = locals() + for k, v in arguments.items(): + if k != "min_replicas": + assert v + except AssertionError as e: + log.error( + "error updating worker group, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return cluster, False + + assert cluster["spec"]["workerGroupSpecs"] + + worker_groups = cluster["spec"]["workerGroupSpecs"] + + def add_values(group_index: int, container_index: int): + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["requests"]["cpu"] = cpu_requests + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["requests"]["memory"] = memory_requests + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["limits"]["cpu"] = cpu_limits + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["limits"]["memory"] = memory_limits + + for group_index, worker_group in enumerate(worker_groups): + if worker_group["groupName"] != group_name: + continue + + containers = worker_group["template"]["spec"]["containers"] + container_names = [container["name"] for container in containers] + + if len(containers) == 0: + log.error( + f"error updating container resources, the worker group {group_name} has no containers" + ) + return cluster, False + + if container_name == "unspecified": + add_values(group_index, 0) + return cluster, True + elif container_name == "all_containers": + for container_index in range(len(containers)): + add_values(group_index, container_index) + return cluster, True + elif container_name in container_names: + container_index = container_names.index(container_name) + add_values(group_index, container_index) + return cluster, True + + return cluster, False + + def duplicate_worker_group( + self, + cluster: dict, + group_name: str, + new_group_name: str, + ) -> Tuple[dict, bool]: + """Duplicate a worker group in the cluster. + + Parameters: + - cluster (dict): The cluster definition. + - group_name (str): The name of the worker group to be duplicated. + - new_group_name (str): The name for the duplicated worker group. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster definition and a boolean indicating the success of the operation. + """ + try: + arguments = locals() + for k, v in arguments.items(): + assert v + except AssertionError as e: + log.error( + f"error duplicating worker group, the parameters are not fully defined. {k} = {v}" + ) + return cluster, False + assert self.is_valid_name(new_group_name) + assert cluster["spec"]["workerGroupSpecs"] + + worker_groups = cluster["spec"]["workerGroupSpecs"] + for _, worker_group in enumerate(worker_groups): + if worker_group["groupName"] == group_name: + duplicate_group = copy.deepcopy(worker_group) + duplicate_group["groupName"] = new_group_name + worker_groups.append(duplicate_group) + return cluster, True + + log.error( + f"error duplicating worker group, no match was found for {group_name}" + ) + return cluster, False + + def delete_worker_group( + self, + cluster: dict, + group_name: str, + ) -> Tuple[dict, bool]: + """Deletes a worker group in the cluster. + + Parameters: + - cluster (dict): The cluster definition. + - group_name (str): The name of the worker group to be duplicated. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster definition and a boolean indicating the success of the operation. + """ + try: + arguments = locals() + for k, v in arguments.items(): + assert v + except AssertionError as e: + log.error( + f"error creating ray head, the parameters are not fully defined. {k} = {v}" + ) + return cluster, False + + assert cluster["spec"]["workerGroupSpecs"] + + worker_groups = cluster["spec"]["workerGroupSpecs"] + first_or_none = next( + (x for x in worker_groups if x["groupName"] == group_name), None + ) + if first_or_none: + worker_groups.remove(first_or_none) + return cluster, True + + log.error(f"error removing worker group, no match was found for {group_name}") + return cluster, False + + def is_valid_name(self, name: str) -> bool: + msg = "The name must be 63 characters or less, begin and end with an alphanumeric character, and contain only dashes, dots, and alphanumerics." + if len(name) > 63 or not bool(re.match("^[a-z0-9]([-.]*[a-z0-9])+$", name)): + log.error(msg) + return False + return True + + def is_valid_label(self, name: str) -> bool: + msg = "The label name must be 63 characters or less, begin and end with an alphanumeric character, and contain only dashes, underscores, dots, and alphanumerics." + if len(name) > 63 or not bool(re.match("^[a-z0-9]([-._]*[a-z0-9])+$", name)): + log.error(msg) + return False + return True diff --git a/src/codeflare_sdk/vendored/python_client_test/README.md b/src/codeflare_sdk/vendored/python_client_test/README.md new file mode 100644 index 00000000..6c32e260 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/README.md @@ -0,0 +1,29 @@ +# Overview + +## For developers + +1. `pip install -U pip setuptools` +1. `cd clients/python-client && pip install -e .` + +Uninstall with `pip uninstall python-client`. + +## For testing run + +`python -m unittest discover 'clients/python-client/python_client_test/'` + +### Coverage report + +#### Pre-requisites + +* `sudo apt install libsqlite3-dev` +* `pyenv install 3.6.5` # or your Python version +* `pip install db-sqlite3 coverage` + +__To gather data__ +`python -m coverage run -m unittest` + +__to generate a coverage report__ +`python -m coverage report` + +__to generate the test coverage report in HTML format__ +`python -m coverage html` diff --git a/src/codeflare_sdk/vendored/python_client_test/helpers.py b/src/codeflare_sdk/vendored/python_client_test/helpers.py new file mode 100644 index 00000000..1bcfdbc2 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/helpers.py @@ -0,0 +1,135 @@ +import time +from codeflare_sdk.vendored.python_client import constants + + +def create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + entrypoint="python -c \"import ray; ray.init(); @ray.remote\ndef hello(): return 'Hello from Ray!'; print(ray.get(hello.remote()))\"", + labels=None, +): + job_body = { + "apiVersion": constants.GROUP + "/" + constants.JOB_VERSION, + "kind": constants.JOB_KIND, + "metadata": { + "name": job_name, + "namespace": namespace, + "labels": { + "app.kubernetes.io/name": job_name, + "app.kubernetes.io/managed-by": "kuberay", + }, + }, + "spec": { + "clusterSelector": { + "ray.io/cluster": cluster_name, + }, + "entrypoint": entrypoint, + "submissionMode": "K8sJobMode", + }, + } + + # Add any additional labels if provided + if labels: + job_body["metadata"]["labels"].update(labels) + + return job_body + + +def create_job_with_ray_cluster_spec( + job_name, + namespace, + entrypoint="python -c \"import ray; ray.init(); @ray.remote\ndef hello(): return 'Hello from Ray!'; print(ray.get(hello.remote()))\"", + labels=None, +): + job_body = { + "apiVersion": constants.GROUP + "/" + constants.JOB_VERSION, + "kind": constants.JOB_KIND, + "metadata": { + "name": job_name, + "namespace": namespace, + "labels": { + "app.kubernetes.io/name": job_name, + "app.kubernetes.io/managed-by": "kuberay", + }, + }, + "spec": { + "rayClusterSpec": { + "headGroupSpec": { + "serviceType": "ClusterIP", + "replicas": 1, + "rayStartParams": { + "dashboard-host": "0.0.0.0", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.48.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + { + "containerPort": 8265, + "name": "dashboard", + }, + { + "containerPort": 10001, + "name": "client", + }, + ], + "resources": { + "limits": { + "cpu": "1", + "memory": "2Gi", + }, + "requests": { + "cpu": "500m", + "memory": "1Gi", + }, + }, + } + ] + } + }, + }, + "workerGroupSpecs": [ + { + "groupName": "small-worker", + "replicas": 1, + "rayStartParams": { + "num-cpus": "1", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.48.0", + "resources": { + "limits": { + "cpu": "1", + "memory": "1Gi", + }, + "requests": { + "cpu": "500m", + "memory": "512Mi", + }, + }, + } + ] + } + }, + } + ], + }, + "entrypoint": entrypoint, + "submissionMode": "K8sJobMode", + "shutdownAfterJobFinishes": True, + }, + } + + if labels: + job_body["metadata"]["labels"].update(labels) + + return job_body diff --git a/src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py b/src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py new file mode 100644 index 00000000..3fdb18e7 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py @@ -0,0 +1,345 @@ +import unittest +from codeflare_sdk.vendored.python_client import kuberay_cluster_api, constants +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +# Keep the original test cluster body for reference if needed +test_cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0"}, + "name": "raycluster-complete-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": {"dashboard-host": "0.0.0.0"}, + "template": { + "metadata": {"labels": {}}, + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "2G"}, + "requests": {"cpu": "500m", "memory": "2G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + }, + }, + }, + "workerGroupSpecs": [ + { + "replicas": 1, + "minReplicas": 1, + "maxReplicas": 10, + "groupName": "small-group", + "rayStartParams": {}, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.46.0", + "lifecycle": { + "preStop": { + "exec": { + "command": ["/bin/sh", "-c", "ray stop"] + } + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + { + "name": "side-car", + "image": "rayproject/ray:2.46.0", + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + } + }, + } + ], + }, + "status": { + "state": "ready", + "availableWorkerReplicas": 2, + "desiredWorkerReplicas": 1, + "endpoints": {"client": "10001", "dashboard": "8265", "gcs-server": "6379"}, + "head": {"serviceIP": "10.152.183.194"}, + "lastUpdateTime": "2023-02-16T05:15:17Z", + "maxWorkerReplicas": 2, + }, +} + + +class TestClusterApi(unittest.TestCase): + """Comprehensive test suite for RayClusterApi functionality.""" + + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + self.api = kuberay_cluster_api.RayClusterApi() + self.director = kuberay_cluster_builder.Director() + + def test_create_and_get_ray_cluster(self): + """Test creating a cluster and retrieving it.""" + cluster_name = "test-create-cluster" + namespace = "default" + + # Build a small cluster using the director + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"test": "create-cluster"}, + ) + + # Ensure cluster was built successfully + self.assertIsNotNone(cluster_body, "Cluster should be built successfully") + self.assertEqual(cluster_body["metadata"]["name"], cluster_name) + + try: + # Create the cluster + created_cluster = self.api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone( + created_cluster, "Cluster should be created successfully" + ) + self.assertEqual(created_cluster["metadata"]["name"], cluster_name) + + # Get the cluster and verify it exists + retrieved_cluster = self.api.get_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.assertIsNotNone( + retrieved_cluster, "Cluster should be retrieved successfully" + ) + self.assertEqual(retrieved_cluster["metadata"]["name"], cluster_name) + self.assertEqual( + retrieved_cluster["spec"]["rayVersion"], + cluster_body["spec"]["rayVersion"], + ) + + finally: + # Clean up + self.api.delete_ray_cluster(name=cluster_name, k8s_namespace=namespace) + + def test_list_ray_clusters(self): + """Test listing Ray clusters in a namespace.""" + cluster_name_1 = "test-list-cluster-1" + cluster_name_2 = "test-list-cluster-2" + namespace = "default" + test_label = "test-list-clusters" + + # Build two small clusters + cluster_body_1 = self.director.build_small_cluster( + name=cluster_name_1, + k8s_namespace=namespace, + labels={"test": test_label}, + ) + cluster_body_2 = self.director.build_small_cluster( + name=cluster_name_2, + k8s_namespace=namespace, + labels={"test": test_label}, + ) + + try: + # Create both clusters + created_cluster_1 = self.api.create_ray_cluster( + body=cluster_body_1, k8s_namespace=namespace + ) + created_cluster_2 = self.api.create_ray_cluster( + body=cluster_body_2, k8s_namespace=namespace + ) + + self.assertIsNotNone(created_cluster_1, "First cluster should be created") + self.assertIsNotNone(created_cluster_2, "Second cluster should be created") + + # List all clusters + clusters_list = self.api.list_ray_clusters(k8s_namespace=namespace) + self.assertIsNotNone(clusters_list, "Should be able to list clusters") + self.assertIn("items", clusters_list, "Response should contain items") + + # Verify our test clusters are in the list + cluster_names = [ + item["metadata"]["name"] for item in clusters_list["items"] + ] + self.assertIn( + cluster_name_1, + cluster_names, + "First test cluster should be in the list", + ) + self.assertIn( + cluster_name_2, + cluster_names, + "Second test cluster should be in the list", + ) + + # Test listing with label selector + labeled_clusters = self.api.list_ray_clusters( + k8s_namespace=namespace, label_selector=f"test={test_label}" + ) + self.assertIsNotNone( + labeled_clusters, "Should be able to list clusters with label selector" + ) + labeled_cluster_names = [ + item["metadata"]["name"] for item in labeled_clusters["items"] + ] + self.assertIn( + cluster_name_1, + labeled_cluster_names, + "First test cluster should match label", + ) + self.assertIn( + cluster_name_2, + labeled_cluster_names, + "Second test cluster should match label", + ) + + finally: + # Clean up both clusters + self.api.delete_ray_cluster(name=cluster_name_1, k8s_namespace=namespace) + self.api.delete_ray_cluster(name=cluster_name_2, k8s_namespace=namespace) + + def test_cluster_status_and_wait_until_running(self): + """Test getting cluster status and waiting for cluster to be ready.""" + cluster_name = "test-status-cluster" + namespace = "default" + + # Build a small cluster + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"test": "status-cluster"}, + ) + + try: + # Create the cluster + created_cluster = self.api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone( + created_cluster, "Cluster should be created successfully" + ) + + # Test getting cluster status (may take some time to populate) + status = self.api.get_ray_cluster_status( + name=cluster_name, + k8s_namespace=namespace, + timeout=120, + delay_between_attempts=5, + ) + self.assertIsNotNone(status, "Cluster status should be retrieved") + + # Test waiting for cluster to be running + is_running = self.api.wait_until_ray_cluster_running( + name=cluster_name, + k8s_namespace=namespace, + timeout=180, + delay_between_attempts=10, + ) + self.assertTrue(is_running, "Cluster should become ready within timeout") + + # Verify final status after cluster is ready + final_status = self.api.get_ray_cluster_status( + name=cluster_name, + k8s_namespace=namespace, + timeout=10, + delay_between_attempts=2, + ) + self.assertIsNotNone(final_status, "Final status should be available") + self.assertIn("state", final_status, "Status should contain state field") + self.assertEqual( + final_status["state"], "ready", "Cluster should be in ready state" + ) + + finally: + # Clean up + self.api.delete_ray_cluster(name=cluster_name, k8s_namespace=namespace) + + def test_patch_ray_cluster(self): + """Test patching an existing Ray cluster.""" + cluster_name = "test-patch-cluster" + namespace = "default" + + # Build a small cluster + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"test": "patch-cluster"}, + ) + + try: + # Create the cluster + created_cluster = self.api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone( + created_cluster, "Cluster should be created successfully" + ) + + # Wait for cluster to be ready before patching + self.api.wait_until_ray_cluster_running( + name=cluster_name, + k8s_namespace=namespace, + timeout=180, + delay_between_attempts=10, + ) + + # Create a patch to update the cluster (e.g., add a label) + patch_data = { + "metadata": {"labels": {"test": "patch-cluster", "patched": "true"}} + } + + # Apply the patch + patch_result = self.api.patch_ray_cluster( + name=cluster_name, ray_patch=patch_data, k8s_namespace=namespace + ) + self.assertTrue(patch_result, "Patch operation should succeed") + + # Verify the patch was applied + updated_cluster = self.api.get_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.assertIsNotNone(updated_cluster, "Updated cluster should be retrieved") + self.assertIn( + "patched", + updated_cluster["metadata"]["labels"], + "Patched label should be present", + ) + self.assertEqual( + updated_cluster["metadata"]["labels"]["patched"], + "true", + "Patched label should have correct value", + ) + + finally: + # Clean up + self.api.delete_ray_cluster(name=cluster_name, k8s_namespace=namespace) diff --git a/src/codeflare_sdk/vendored/python_client_test/test_director.py b/src/codeflare_sdk/vendored/python_client_test/test_director.py new file mode 100644 index 00000000..07536971 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_director.py @@ -0,0 +1,121 @@ +import unittest +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +class TestDirector(unittest.TestCase): + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + self.director = kuberay_cluster_builder.Director() + + def test_build_basic_cluster(self): + cluster = self.director.build_basic_cluster(name="basic-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "basic-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + def test_build_small_cluster(self): + cluster = self.director.build_small_cluster(name="small-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 1 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"] + expected = "1" + self.assertEqual(actual, expected) + + def test_build_medium_cluster(self): + cluster = self.director.build_medium_cluster(name="medium-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "medium-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 3 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["groupName"] + expected = "medium-cluster-workers" + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + def test_build_large_cluster(self): + cluster = self.director.build_large_cluster(name="large-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "large-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 6 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["groupName"] + expected = "large-cluster-workers" + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"] + expected = "3" + self.assertEqual(actual, expected) diff --git a/src/codeflare_sdk/vendored/python_client_test/test_job_api.py b/src/codeflare_sdk/vendored/python_client_test/test_job_api.py new file mode 100644 index 00000000..bad75edc --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_job_api.py @@ -0,0 +1,567 @@ +import time +import unittest +from codeflare_sdk.vendored.python_client import kuberay_job_api, kuberay_cluster_api +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder +from helpers import create_job_with_cluster_selector, create_job_with_ray_cluster_spec + +namespace = "default" + + +class TestJobApi(unittest.TestCase): + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + + self.api = kuberay_job_api.RayjobApi() + self.cluster_api = kuberay_cluster_api.RayClusterApi() + self.director = kuberay_cluster_builder.Director() + + def test_submit_ray_job_to_existing_cluster(self): + """Test submitting a job to an existing cluster using clusterSelector.""" + cluster_name = "premade" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + self.assertIsNotNone(cluster_body, "Cluster should be built successfully") + self.assertEqual(cluster_body["metadata"]["name"], cluster_name) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running(cluster_name, namespace, 60, 10) + job_name = "premade-cluster-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + self.assertEqual(submitted_job["metadata"]["name"], job_name) + self.assertEqual( + submitted_job["spec"]["clusterSelector"]["ray.io/cluster"], cluster_name + ) + + self.api.wait_until_job_finished(job_name, namespace, 120, 10) + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + + self.api.delete_job(job_name, namespace) + + def test_get_job_status(self): + """Test getting job status for a running job.""" + cluster_name = "status-test-cluster" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running(cluster_name, namespace, 60, 10) + + job_name = "status-test-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + status = self.api.get_job_status( + job_name, namespace, timeout=30, delay_between_attempts=2 + ) + self.assertIsNotNone(status, "Job status should be retrieved") + + # Verify expected status fields + self.assertIn( + "jobDeploymentStatus", + status, + "Status should contain jobDeploymentStatus field", + ) + self.assertIn("jobId", status, "Status should contain jobId field") + self.assertIn( + "rayClusterName", status, "Status should contain rayClusterName field" + ) + + self.api.wait_until_job_finished(job_name, namespace, 60, 5) + + final_status = self.api.get_job_status( + job_name, namespace, timeout=10, delay_between_attempts=1 + ) + self.assertIsNotNone(final_status, "Final job status should be retrieved") + + self.assertIn( + "jobDeploymentStatus", + final_status, + "Final status should contain jobDeploymentStatus field", + ) + + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.api.delete_job(job_name, namespace) + + def test_wait_until_job_finished(self): + """Test waiting for job completion.""" + cluster_name = "wait-test-cluster" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running( + cluster_name, namespace, 180, 10 + ) + + job_name = "wait-test-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_finished( + job_name, namespace, timeout=180, delay_between_attempts=2 + ) + self.assertTrue(result, "Job should complete successfully within timeout") + + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.api.delete_job(job_name, namespace) + + def test_delete_job(self): + """Test deleting a job.""" + cluster_name = "delete-test-cluster" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running(cluster_name, namespace, 60, 10) + + job_name = "delete-test-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + self.api.wait_until_job_finished(job_name, namespace, 60, 5) + + delete_result = self.api.delete_job(job_name, namespace) + self.assertTrue(delete_result, "Job should be deleted successfully") + + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + + def test_get_job_status_nonexistent_job(self): + """Test getting status for a non-existent job.""" + status = self.api.get_job_status( + "nonexistent-job", namespace, timeout=2, delay_between_attempts=2 + ) + self.assertIsNone(status, "Status should be None for non-existent job") + + def test_wait_until_job_finished_nonexistent_job(self): + """Test waiting for completion of a non-existent job.""" + result = self.api.wait_until_job_finished( + "nonexistent-job", namespace, timeout=2, delay_between_attempts=2 + ) + self.assertFalse(result, "Should return False for non-existent job") + + def test_delete_job_nonexistent_job(self): + """Test deleting a non-existent job.""" + result = self.api.delete_job("nonexistent-job", namespace) + self.assertFalse(result, "Should return False for non-existent job") + + def test_submit_job_invalid_spec(self): + """Test submitting a job with invalid specification.""" + invalid_job = { + "apiVersion": "invalid/version", + "kind": "InvalidKind", + "metadata": { + "name": "invalid-job", + "namespace": namespace, + }, + "spec": { + "invalidField": "invalidValue", + }, + } + + result = self.api.submit_job(job=invalid_job, k8s_namespace=namespace) + self.assertIsNone(result, "Should return None for invalid job specification") + + def test_submit_job_with_ray_cluster_spec(self): + """Test submitting a job with rayClusterSpec - KubeRay will create and manage the cluster lifecycle.""" + job_name = "cluster-spec-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + self.assertEqual(submitted_job["metadata"]["name"], job_name) + + # Verify rayClusterSpec structure + self.assertIn( + "rayClusterSpec", + submitted_job["spec"], + "Job should have rayClusterSpec", + ) + self.assertIn( + "headGroupSpec", + submitted_job["spec"]["rayClusterSpec"], + "rayClusterSpec should have headGroupSpec", + ) + self.assertIn( + "workerGroupSpecs", + submitted_job["spec"]["rayClusterSpec"], + "rayClusterSpec should have workerGroupSpecs", + ) + + result = self.api.wait_until_job_finished(job_name, namespace, 300, 10) + self.assertTrue(result, "Job should complete successfully within timeout") + + final_status = self.api.get_job_status( + job_name, namespace, timeout=10, delay_between_attempts=1 + ) + self.assertIsNotNone(final_status, "Final job status should be retrieved") + self.assertIn( + "jobDeploymentStatus", + final_status, + "Final status should contain jobDeploymentStatus field", + ) + + finally: + self.api.delete_job(job_name, namespace) + + def test_suspend_job(self): + """Test stopping a running job.""" + job_name = "stop-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Job should reach running state before suspension") + + stop_result = self.api.suspend_job(job_name, namespace) + self.assertTrue(stop_result, "Job should be suspended successfully") + + suspended = self.wait_for_job_status( + job_name, namespace, "Suspended", timeout=30 + ) + self.assertTrue(suspended, "Job deployment status should be Suspended") + + finally: + self.api.delete_job(job_name, namespace) + + def test_resubmit_job(self): + """Test resubmitting a suspended job.""" + job_name = "resubmit-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Job should reach running state before suspension") + + stop_result = self.api.suspend_job(job_name, namespace) + self.assertTrue(stop_result, "Job should be suspended successfully") + + suspended = self.wait_for_job_status( + job_name, namespace, "Suspended", timeout=30 + ) + self.assertTrue( + suspended, "Job should be in Suspended status before resubmission" + ) + + resubmit_result = self.api.resubmit_job(job_name, namespace) + self.assertTrue(resubmit_result, "Job should be resubmitted successfully") + + result = self.api.wait_until_job_finished( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Resubmitted job should complete successfully") + + finally: + self.api.delete_job(job_name, namespace) + + def test_stop_and_resubmit_job(self): + """Test the full stop and resubmit cycle.""" + job_name = "stop-resubmit-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue( + result, + "Job should reach running state before suspension, completion, or failure", + ) + + stop_result = self.api.suspend_job(job_name, namespace) + self.assertTrue(stop_result, "Job should be suspended successfully") + + suspended = self.wait_for_job_status( + job_name, namespace, "Suspended", timeout=30 + ) + self.assertTrue( + suspended, "Job should reach Suspended status within 30 seconds" + ) + + resubmit_result = self.api.resubmit_job(job_name, namespace) + self.assertTrue(resubmit_result, "Job should be resubmitted successfully") + + result = self.api.wait_until_job_finished( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Resubmitted job should complete successfully") + + finally: + self.api.delete_job(job_name, namespace) + + def test_suspend_job_nonexistent(self): + """Test stopping a non-existent job.""" + result = self.api.suspend_job("nonexistent-job", namespace) + self.assertFalse(result, "Should return False for non-existent job") + + def test_resubmit_job_nonexistent(self): + """Test resubmitting a non-existent job.""" + result = self.api.resubmit_job("nonexistent-job", namespace) + self.assertFalse(result, "Should return False for non-existent job") + + def test_wait_until_job_running(self): + """Test waiting for a job to reach running state.""" + job_name = "wait-running-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=60, delay_between_attempts=3 + ) + self.assertTrue(result, "Job should reach running state") + + self.api.wait_until_job_finished(job_name, namespace, 60, 5) + + finally: + self.api.delete_job(job_name, namespace) + + def test_get_job(self): + """Test getting a job.""" + job_name = "get-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + status = self.api.get_job_status( + job_name, namespace, timeout=30, delay_between_attempts=2 + ) + self.assertIsNotNone(status, "Job status should be available") + + job = self.api.get_job(job_name, namespace) + self.assertIsNotNone(job, "Job should be retrieved successfully") + self.assertEqual(job["metadata"]["name"], job_name) + finally: + self.api.delete_job(job_name, namespace) + + def test_list_jobs(self): + """Test listing all jobs in a namespace.""" + created_jobs = [] + + try: + initial_result = self.api.list_jobs(k8s_namespace=namespace) + self.assertIsNotNone(initial_result, "List jobs should return a result") + self.assertIn( + "items", initial_result, "Result should contain 'items' field" + ) + initial_count = len(initial_result.get("items", [])) + + test_jobs = [ + {"name": "list-test-job-1", "type": "cluster_spec"}, + {"name": "list-test-job-2", "type": "cluster_spec"}, + {"name": "list-test-job-3", "type": "cluster_spec"}, + ] + + for job_info in test_jobs: + job_body = create_job_with_ray_cluster_spec( + job_name=job_info["name"], + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone( + submitted_job, + f"Job {job_info['name']} should be submitted successfully", + ) + created_jobs.append(job_info["name"]) + + status = self.api.get_job_status( + job_info["name"], namespace, timeout=10, delay_between_attempts=1 + ) + self.assertIsNotNone( + status, f"Job {job_info['name']} status should be available" + ) + + result = self.api.list_jobs(k8s_namespace=namespace) + self.assertIsNotNone(result, "List jobs should return a result") + self.assertIn("items", result, "Result should contain 'items' field") + + items = result.get("items", []) + current_count = len(items) + + self.assertGreaterEqual( + current_count, + initial_count + len(test_jobs), + f"Should have at least {len(test_jobs)} more jobs than initially", + ) + + job_names_in_list = [item.get("metadata", {}).get("name") for item in items] + for job_name in created_jobs: + self.assertIn( + job_name, job_names_in_list, f"Job {job_name} should be in the list" + ) + + finally: + for job_name in created_jobs: + try: + self.api.delete_job(job_name, namespace) + except Exception as e: + print(f"Failed to delete job {job_name}: {e}") + + def wait_for_job_status( + self, job_name, namespace, expected_status, timeout=60, check_interval=3 + ): + """Wait for a job to reach a specific status with polling.""" + start_time = time.time() + while time.time() - start_time < timeout: + status = self.api.get_job_status( + job_name, namespace, timeout=5, delay_between_attempts=1 + ) + current_status = status.get("jobDeploymentStatus") if status else None + + if current_status == expected_status: + return True + + time.sleep(check_interval) + + return False diff --git a/src/codeflare_sdk/vendored/python_client_test/test_utils.py b/src/codeflare_sdk/vendored/python_client_test/test_utils.py new file mode 100644 index 00000000..93d79db9 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_utils.py @@ -0,0 +1,352 @@ +import unittest +import copy +from codeflare_sdk.vendored.python_client.utils import ( + kuberay_cluster_utils, + kuberay_cluster_builder, +) + + +test_cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0"}, + "name": "raycluster-complete-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": {"dashboard-host": "0.0.0.0"}, + "template": { + "metadata": {"labels": {}}, + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "2G"}, + "requests": {"cpu": "500m", "memory": "2G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + }, + }, + }, + "workerGroupSpecs": [ + { + "replicas": 1, + "minReplicas": 1, + "maxReplicas": 10, + "groupName": "small-group", + "rayStartParams": {}, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.46.0", + "lifecycle": { + "preStop": { + "exec": { + "command": ["/bin/sh", "-c", "ray stop"] + } + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + { + "name": "side-car", + "image": "rayproject/ray:2.46.0", + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + } + }, + } + ], + }, + "status": { + "availableWorkerReplicas": 2, + "desiredWorkerReplicas": 1, + "endpoints": {"client": "10001", "dashboard": "8265", "gcs-server": "6379"}, + "head": {"serviceIP": "10.152.183.194"}, + "lastUpdateTime": "2023-02-16T05:15:17Z", + "maxWorkerReplicas": 2, + }, +} + + +class TestUtils(unittest.TestCase): + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + self.director = kuberay_cluster_builder.Director() + self.utils = kuberay_cluster_utils.ClusterUtils() + + def test_populate_worker_group(self): + worker_group, succeeded = self.utils.populate_worker_group( + group_name="small-group", + ray_image="rayproject/ray:2.46.0", + ray_command=["/bin/bash", "-lc"], + init_image="busybox:1.28", + cpu_requests="3", + memory_requests="1G", + cpu_limits="5", + memory_limits="10G", + replicas=1, + min_replicas=1, + max_replicas=3, + ray_start_params={"block": "True"}, + ) + self.assertIsNotNone(worker_group) + self.assertEqual(succeeded, True) + + self.assertEqual(worker_group["groupName"], "small-group") + self.assertEqual(worker_group["maxReplicas"], 3) + self.assertEqual(worker_group["minReplicas"], 1) + self.assertEqual(worker_group["rayStartParams"], {"block": "True"}) + self.assertEqual(worker_group["replicas"], 1) + + container = worker_group["template"]["spec"]["containers"][0] + self.assertEqual(container["image"], "rayproject/ray:2.46.0") + self.assertEqual(container["command"], ["/bin/bash", "-lc"]) + + resources = container["resources"] + self.assertEqual(resources["requests"]["cpu"], "3") + self.assertEqual(resources["requests"]["memory"], "1G") + self.assertEqual(resources["limits"]["cpu"], "5") + self.assertEqual(resources["limits"]["memory"], "10G") + + # min_replicas can be 0 and ray_start_params can be an empty dict. + worker_group, succeeded = self.utils.populate_worker_group( + group_name="small-group", + ray_image="rayproject/ray:2.46.0", + ray_command=["/bin/bash", "-lc"], + init_image="busybox:1.28", + cpu_requests="3", + memory_requests="1G", + cpu_limits="5", + memory_limits="10G", + replicas=1, + min_replicas=0, + max_replicas=3, + ray_start_params={}, + ) + self.assertIsNotNone(worker_group) + self.assertEqual(succeeded, True) + self.assertEqual(worker_group["rayStartParams"], {}) + self.assertEqual(worker_group["minReplicas"], 0) + + def test_update_worker_group_replicas(self): + cluster = self.director.build_small_cluster(name="small-cluster") + + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.update_worker_group_replicas( + cluster, + group_name="small-cluster-workers", + max_replicas=10, + min_replicas=1, + replicas=5, + ) + + self.assertEqual(succeeded, True) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 5 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["maxReplicas"] + expected = 10 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["minReplicas"] + expected = 1 + self.assertEqual(actual, expected) + + def test_update_worker_group_resources(self): + cluster: dict = copy.deepcopy(test_cluster_body) + actual = cluster["metadata"]["name"] + expected = "raycluster-complete-raw" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="3", + memory_requests="5G", + cpu_limits="5", + memory_limits="10G", + container_name="unspecified", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"], + "3", + ) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][1][ + "resources" + ]["requests"]["cpu"], + "500m", + ) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + memory_requests="5G", + cpu_limits="5", + memory_limits="10G", + container_name="side-car", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][1][ + "resources" + ]["requests"]["cpu"], + "4", + ) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + memory_requests="15G", + cpu_limits="5", + memory_limits="25G", + container_name="all_containers", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][1][ + "resources" + ]["requests"]["memory"], + "15G", + ) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + memory_requests="15G", + cpu_limits="5", + memory_limits="25G", + container_name="wrong_name", + ) + self.assertEqual(succeeded, False) + + # missing parameter test + with self.assertRaises(TypeError): + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + ) + + def test_duplicate_worker_group(self): + cluster = self.director.build_small_cluster(name="small-cluster") + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.duplicate_worker_group( + cluster, + group_name="small-cluster-workers", + new_group_name="new-small-group-workers", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][1]["groupName"], + "new-small-group-workers", + ) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][1]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"], + "1", + ) + + # missing parameter test + with self.assertRaises(TypeError): + cluster, succeeded = self.utils.duplicate_worker_group( + cluster, + group_name="small-cluster-workers", + ) + + def test_delete_worker_group(self): + """ + Test delete_worker_group + """ + cluster = self.director.build_small_cluster(name="small-cluster") + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.delete_worker_group( + cluster, + group_name="small-cluster-workers", + ) + self.assertEqual(succeeded, True) + self.assertEqual(len(cluster["spec"]["workerGroupSpecs"]), 0) + + # deleting the same worker group again should fail + with self.assertRaises(AssertionError): + cluster, succeeded = self.utils.delete_worker_group( + cluster, + group_name="small-cluster-workers", + ) + + def test_name(self): + self.assertEqual(self.utils.is_valid_name("name"), True) + self.assertEqual(self.utils.is_valid_name("name-"), False) + self.assertEqual(self.utils.is_valid_name(".name"), False) + self.assertEqual(self.utils.is_valid_name("name_something"), False) + self.assertEqual( + self.utils.is_valid_name( + "toooooooooooooooooooooooooooooooooooooooooo-loooooooooooooooooooong" + ), + False, + ) + + def test_label(self): + self.assertEqual(self.utils.is_valid_label("name"), True) + self.assertEqual(self.utils.is_valid_label("name-"), False) + self.assertEqual(self.utils.is_valid_label(".name"), False) + self.assertEqual(self.utils.is_valid_label("name_something"), True) + self.assertEqual(self.utils.is_valid_label("good.name"), True) + self.assertEqual( + self.utils.is_valid_label( + "toooooooooooooooooooooooooooooooooooooooooo-loooooooooooooooooooong" + ), + False, + ) diff --git a/tests/e2e/rayjob/rayjob_existing_cluster_test.py b/tests/e2e/rayjob/rayjob_existing_cluster_test.py index 82858d28..8f6f0c3b 100644 --- a/tests/e2e/rayjob/rayjob_existing_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_existing_cluster_test.py @@ -12,7 +12,7 @@ ) from codeflare_sdk import RayJob, TokenAuthentication from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus -from python_client.kuberay_job_api import RayjobApi +from codeflare_sdk.vendored.python_client.kuberay_job_api import RayjobApi class TestRayJobExistingCluster: diff --git a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py index 10390011..2256f06f 100644 --- a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py @@ -10,8 +10,8 @@ from codeflare_sdk import RayJob, ManagedClusterConfig from kubernetes import client -from python_client.kuberay_job_api import RayjobApi -from python_client.kuberay_cluster_api import RayClusterApi +from codeflare_sdk.vendored.python_client.kuberay_job_api import RayjobApi +from codeflare_sdk.vendored.python_client.kuberay_cluster_api import RayClusterApi class TestRayJobLifecycledCluster: From c56eecdd9f3f4ec855e310f10d4cd9a0fd9224f9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 16 Oct 2025 11:51:11 +0000 Subject: [PATCH 33/33] Updated coverage.svg --- coverage.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coverage.svg b/coverage.svg index 59d64b37..fe061433 100644 --- a/coverage.svg +++ b/coverage.svg @@ -15,7 +15,7 @@ coverage coverage - 93% - 93% + 94% + 94%