Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,362 changes: 701 additions & 661 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ cryptography = "43.0.3"
executing = "1.2.0"
pydantic = "< 2"
ipywidgets = "8.1.2"
odh-kuberay-client = {version = "0.0.0.dev40", source = "testpypi"}

[[tool.poetry.source]]
name = "pypi"

[[tool.poetry.source]]
name = "testpypi"
url = "https://test.pypi.org/simple/"

[tool.poetry.group.docs]
optional = true
Expand Down
1 change: 1 addition & 0 deletions src/codeflare_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
AWManager,
AppWrapperStatus,
RayJobClient,
RayJob,
)

from .common.widgets import view_clusters
Expand Down
7 changes: 7 additions & 0 deletions src/codeflare_sdk/ray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
RayJobClient,
)

from .rayjobs import (
RayJob,
RayJobDeploymentStatus,
CodeflareRayJobStatus,
RayJobInfo,
)

from .cluster import (
Cluster,
ClusterConfiguration,
Expand Down
2 changes: 2 additions & 0 deletions src/codeflare_sdk/ray/cluster/build_ray_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def build_ray_cluster(cluster: "codeflare_sdk.ray.cluster.Cluster"):
"enableIngress": False,
"rayStartParams": {
"dashboard-host": "0.0.0.0",
"dashboard-port": "8265",
"block": "true",
"num-gpus": str(head_gpu_count),
"resources": head_resources,
Expand Down Expand Up @@ -245,6 +246,7 @@ def get_labels(cluster: "codeflare_sdk.ray.cluster.Cluster"):
"""
labels = {
"controller-tools.k8s.io": "1.0",
"ray.io/cluster": cluster.config.name, # Enforced label always present
}
if cluster.config.labels != {}:
labels.update(cluster.config.labels)
Expand Down
8 changes: 6 additions & 2 deletions src/codeflare_sdk/ray/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@

from time import sleep
from typing import List, Optional, Tuple, Dict
import copy

from ray.job_submission import JobSubmissionClient
from ray.job_submission import JobSubmissionClient, JobStatus
import time
import uuid
import warnings

from ...common.kubernetes_cluster.auth import (
config_check,
Expand Down Expand Up @@ -57,7 +61,6 @@
from kubernetes.client.rest import ApiException

from kubernetes.client.rest import ApiException
import warnings

CF_SDK_FIELD_MANAGER = "codeflare-sdk"

Expand Down Expand Up @@ -760,6 +763,7 @@ def get_cluster(
head_extended_resource_requests=head_extended_resources,
worker_extended_resource_requests=worker_extended_resources,
)

# Ignore the warning here for the lack of a ClusterConfiguration
with warnings.catch_warnings():
warnings.filterwarnings(
Expand Down
10 changes: 8 additions & 2 deletions src/codeflare_sdk/ray/cluster/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,5 +758,11 @@ def custom_side_effect(group, version, namespace, plural, **kwargs):

# Make sure to always keep this function last
def test_cleanup():
os.remove(f"{aw_dir}test-all-params.yaml")
os.remove(f"{aw_dir}aw-all-params.yaml")
# Remove files only if they exist
test_file = f"{aw_dir}test-all-params.yaml"
if os.path.exists(test_file):
os.remove(test_file)

aw_file = f"{aw_dir}aw-all-params.yaml"
if os.path.exists(aw_file):
os.remove(aw_file)
2 changes: 2 additions & 0 deletions src/codeflare_sdk/ray/rayjobs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .rayjob import RayJob
from .status import RayJobDeploymentStatus, CodeflareRayJobStatus, RayJobInfo
117 changes: 117 additions & 0 deletions src/codeflare_sdk/ray/rayjobs/pretty_print.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright 2025 IBM, Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This sub-module exists primarily to be used internally by the RayJob object
(in the rayjob sub-module) for pretty-printing job status and details.
"""

from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from typing import Tuple, Optional

from .status import RayJobDeploymentStatus, RayJobInfo


def print_job_status(job_info: RayJobInfo):
"""
Pretty print the job status in a format similar to cluster status.
"""
status_display, header_color = _get_status_display(job_info.status)

# Create main info table
table = _create_info_table(header_color, job_info.name, status_display)
table.add_row(f"[bold]Job ID:[/bold] {job_info.job_id}")
table.add_row(f"[bold]Status:[/bold] {job_info.status.value}")
table.add_row(f"[bold]RayCluster:[/bold] {job_info.cluster_name}")
table.add_row(f"[bold]Namespace:[/bold] {job_info.namespace}")

# Add timing information if available
if job_info.start_time:
table.add_row()
table.add_row(f"[bold]Started:[/bold] {job_info.start_time}")

# Add attempt counts if there are failures
if job_info.failed_attempts > 0:
table.add_row(f"[bold]Failed Attempts:[/bold] {job_info.failed_attempts}")

_print_table_in_panel(table)


def print_no_job_found(job_name: str, namespace: str):
"""
Print a message when no job is found.
"""
# Create table with error message
table = _create_info_table(
"[white on red][bold]Name", job_name, "[bold red]No RayJob found"
)
table.add_row()
table.add_row("Have you run rayjob.submit() yet?")
table.add_row()
table.add_row(f"[bold]Namespace:[/bold] {namespace}")

_print_table_in_panel(table)


def _get_status_display(status: RayJobDeploymentStatus) -> Tuple[str, str]:
"""
Get the display string and header color for a given status.

Returns:
Tuple of (status_display, header_color)
"""
status_mapping = {
RayJobDeploymentStatus.COMPLETE: (
"Complete :white_heavy_check_mark:",
"[white on green][bold]Name",
),
RayJobDeploymentStatus.RUNNING: ("Running :gear:", "[white on blue][bold]Name"),
RayJobDeploymentStatus.FAILED: ("Failed :x:", "[white on red][bold]Name"),
RayJobDeploymentStatus.SUSPENDED: (
"Suspended :pause_button:",
"[white on yellow][bold]Name",
),
}

return status_mapping.get(
status, ("Unknown :question:", "[white on red][bold]Name")
)


def _create_info_table(header_color: str, name: str, status_display: str) -> Table:
"""
Create a standardized info table with header and status.

Returns:
Table with header row, name/status row, and empty separator row
"""
table = Table(box=None, show_header=False)
table.add_row(header_color)
table.add_row("[bold underline]" + name, status_display)
table.add_row() # Empty separator row
return table


def _print_table_in_panel(table: Table):
"""
Print a table wrapped in a consistent panel format.
"""
console = Console()
main_table = Table(
box=None, title="[bold] :package: CodeFlare RayJob Status :package:"
)
main_table.add_row(Panel.fit(table))
console.print(main_table)
Loading