Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit cdce225

Browse files
Add serve command line options to list all supported model-ids (#221)
* add modelid serve * add test * fix * fix * fix * test error * test error * fix * fix test bentchmark * fix review * fix
1 parent 3e69237 commit cdce225

File tree

4 files changed

+62
-22
lines changed

4 files changed

+62
-22
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,14 @@ Deploy a model on Ray and expose an endpoint for serving. This command uses GPT2
7171
```bash
7272
llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml
7373
```
74-
74+
You can also use model_ids to serve directly through:
75+
```bash
76+
llm_on_ray-serve --models gpt2
77+
```
78+
List all support model_ids with config file path:
79+
```bash
80+
llm_on_ray-serve --list_model_ids
81+
```
7582
The default served method is to provide an OpenAI-compatible API server ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)), you can access and test it in many ways:
7683
```bash
7784
# using curl

benchmarks/run_benchmark.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,4 +229,4 @@ then
229229
fi
230230
output_tokens_length=32
231231
get_best_latency $iter "${input_tokens_length[*]}" $output_tokens_length $benchmark_dir
232-
fi
232+
fi

llm_on_ray/inference/serve.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
from llm_on_ray.inference.api_server_simple import serve_run
2121
from llm_on_ray.inference.api_server_openai import openai_serve_run
2222
from llm_on_ray.inference.predictor_deployment import PredictorDeployment
23-
from llm_on_ray.inference.inference_config import ModelDescription, InferenceConfig, all_models
23+
from llm_on_ray.inference.inference_config import (
24+
ModelDescription,
25+
InferenceConfig,
26+
all_models,
27+
)
2428

2529

2630
def get_deployed_models(args):
@@ -90,6 +94,11 @@ def main(argv=None):
9094
type=str,
9195
help=f"Only used when config_file is None, valid values can be any items in {list(all_models.keys())}.",
9296
)
97+
parser.add_argument(
98+
"--list_model_ids",
99+
action="store_true",
100+
help="List all supported model IDs with config file path",
101+
)
93102
parser.add_argument(
94103
"--simple",
95104
action="store_true",
@@ -130,6 +139,12 @@ def main(argv=None):
130139

131140
args = parser.parse_args(argv)
132141

142+
all_models_name = list(all_models.keys())
143+
if args.list_model_ids:
144+
for model in all_models_name:
145+
print(f"{model}: \tllm_on_ray/inference/models/{model}.yaml")
146+
sys.exit(0)
147+
133148
ray.init(address="auto")
134149
deployments, model_list = get_deployed_models(args)
135150
if args.simple:

tests/inference/test_serve.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,22 @@
2020

2121
# Parametrize the test function with different combinations of parameters
2222
@pytest.mark.parametrize(
23-
"config_file, models, port, simple, keep_serve_termimal",
23+
"config_file, models, port, simple, keep_serve_termimal, list_model_ids",
2424
[
2525
(
2626
config_file,
2727
models,
2828
port,
2929
simple,
3030
keep_serve_termimal,
31+
list_model_ids,
3132
)
3233
for config_file in ["../.github/workflows/config/gpt2-ci.yaml"]
3334
for models in ["gpt2"]
3435
for port in [8000]
3536
for simple in [False]
3637
for keep_serve_termimal in [False]
38+
for list_model_ids in [False, True]
3739
],
3840
)
3941
def test_script(
@@ -42,25 +44,41 @@ def test_script(
4244
port,
4345
simple,
4446
keep_serve_termimal,
47+
list_model_ids,
4548
):
46-
cmd_serve = ["python", "../llm_on_ray/inference/serve.py"]
47-
if config_file is not None:
48-
cmd_serve.append("--config_file")
49-
cmd_serve.append(str(config_file))
50-
if models is not None:
51-
cmd_serve.append("--models")
52-
cmd_serve.append(str(models))
53-
if port is not None:
54-
cmd_serve.append("--port")
55-
cmd_serve.append(str(port))
56-
if simple:
57-
cmd_serve.append("--simple")
58-
if keep_serve_termimal:
59-
cmd_serve.append("--keep_serve_termimal")
49+
cmd_serve = ["llm_on_ray-serve"]
50+
if list_model_ids:
51+
cmd_serve.append("--list_model_ids")
52+
else:
53+
if config_file is not None:
54+
cmd_serve.append("--config_file")
55+
cmd_serve.append(str(config_file))
56+
elif models is not None:
57+
cmd_serve.append("--models")
58+
cmd_serve.append(str(models))
59+
if port is not None:
60+
cmd_serve.append("--port")
61+
cmd_serve.append(str(port))
62+
if simple:
63+
cmd_serve.append("--simple")
64+
if keep_serve_termimal:
65+
cmd_serve.append("--keep_serve_termimal")
6066

67+
print(cmd_serve)
6168
result_serve = subprocess.run(cmd_serve, capture_output=True, text=True)
69+
if list_model_ids:
70+
output = result_serve.stdout.strip()
71+
lines = output.split("\n")
72+
assert len(lines) > 0, "No model IDs found in the output"
6273

63-
assert "Error" not in result_serve.stderr
64-
assert result_serve.returncode == 0
65-
print("Output of stderr:")
66-
print(result_serve.stderr)
74+
# Check if the model IDs are listed
75+
for line in lines:
76+
parts = line.split()
77+
assert len(parts) == 2, f"Invalid line format: {line}"
78+
model_id, config_path = parts
79+
80+
assert config_path.endswith(".yaml"), f"Invalid config path format: {config_path}"
81+
82+
assert result_serve.returncode == 0, print(
83+
"\n" + "Output of stderr: " + "\n", result_serve.stderr
84+
)

0 commit comments

Comments
 (0)