Skip to content

Commit 4398538

Browse files
toslali-ibmmarkurtz
authored andcommitted
Add response id to chat and audio
Signed-off-by: Mert Toslali <[email protected]>
1 parent 96082d3 commit 4398538

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

src/guidellm/backends/response_handlers.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def add_streaming_line(self, line: str) -> int | None:
164164
"""
165165
if not (data := self.extract_line_data(line)):
166166
return None if data is None else 0
167-
167+
168168
if "id" in data and self.streaming_response_id is None:
169169
self.streaming_response_id = data["id"]
170170

@@ -312,6 +312,7 @@ def compile_non_streaming(
312312
request_args=str(
313313
request.arguments.model_dump() if request.arguments else None
314314
),
315+
response_id=response.get("id"), # use vLLM ID if available
315316
text=text,
316317
input_metrics=input_metrics,
317318
output_metrics=output_metrics,
@@ -330,6 +331,9 @@ def add_streaming_line(self, line: str) -> int | None:
330331
if not (data := self.extract_line_data(line)):
331332
return None if data is None else 0
332333

334+
if "id" in data and self.streaming_response_id is None:
335+
self.streaming_response_id = data["id"]
336+
333337
updated = False
334338
choices, usage = self.extract_choices_and_usage(data)
335339
choice: dict[str, dict] = choices[0] if choices else {}
@@ -358,6 +362,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
358362
request_args=str(
359363
request.arguments.model_dump() if request.arguments else None
360364
),
365+
response_id=self.streaming_response_id, # use vLLM ID if available
361366
text=text,
362367
input_metrics=input_metrics,
363368
output_metrics=output_metrics,
@@ -391,6 +396,8 @@ def __init__(self):
391396
self.streaming_buffer: bytearray = bytearray()
392397
self.streaming_texts: list[str] = []
393398
self.streaming_usage: dict[str, int | dict[str, int]] | None = None
399+
self.streaming_response_id: str | None = None
400+
394401

395402
def compile_non_streaming(
396403
self, request: GenerationRequest, response: dict
@@ -414,6 +421,7 @@ def compile_non_streaming(
414421
request_args=str(
415422
request.arguments.model_dump() if request.arguments else None
416423
),
424+
response_id=response.get("id"), # use vLLM ID if available
417425
text=text,
418426
input_metrics=input_metrics,
419427
output_metrics=output_metrics,
@@ -438,6 +446,9 @@ def add_streaming_line(self, line: str) -> int | None:
438446
data: dict[str, Any] = json.loads(line)
439447
updated = False
440448

449+
if "id" in data and self.streaming_response_id is None:
450+
self.streaming_response_id = data["id"]
451+
441452
if text := data.get("text"):
442453
self.streaming_texts.append(text)
443454
updated = True
@@ -462,6 +473,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
462473
request_args=str(
463474
request.arguments.model_dump() if request.arguments else None
464475
),
476+
response_id=self.streaming_response_id,
465477
text=text,
466478
input_metrics=input_metrics,
467479
output_metrics=output_metrics,

0 commit comments

Comments
 (0)