@@ -268,7 +268,7 @@ def parse_combined_approach(model: str, known_approaches: list, plugin_approache
268268
269269 return operation , approaches , actual_model
270270
271- def execute_single_approach (approach , system_prompt , initial_query , client , model ):
271+ def execute_single_approach (approach , system_prompt , initial_query , client , model , request_config : dict = None ):
272272 if approach in known_approaches :
273273 if approach == 'none' :
274274 # Extract kwargs from the request data
@@ -313,31 +313,42 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
313313 elif approach == 're2' :
314314 return re2_approach (system_prompt , initial_query , client , model , n = server_config ['n' ])
315315 elif approach == 'cepo' :
316- return cepo (system_prompt , initial_query , client , model , cepo_config )
316+ return cepo (system_prompt , initial_query , client , model , cepo_config )
317317 elif approach in plugin_approaches :
318- return plugin_approaches [approach ](system_prompt , initial_query , client , model )
318+ # Check if the plugin accepts request_config
319+ plugin_func = plugin_approaches [approach ]
320+ import inspect
321+ sig = inspect .signature (plugin_func )
322+
323+ if 'request_config' in sig .parameters :
324+ # Plugin supports request_config
325+ return plugin_func (system_prompt , initial_query , client , model , request_config = request_config )
326+ else :
327+ # Legacy plugin without request_config support
328+ return plugin_func (system_prompt , initial_query , client , model )
319329 else :
320330 raise ValueError (f"Unknown approach: { approach } " )
321331
322- def execute_combined_approaches (approaches , system_prompt , initial_query , client , model ):
332+ def execute_combined_approaches (approaches , system_prompt , initial_query , client , model , request_config : dict = None ):
323333 final_response = initial_query
324334 total_tokens = 0
325335 for approach in approaches :
326- response , tokens = execute_single_approach (approach , system_prompt , final_response , client , model )
336+ response , tokens = execute_single_approach (approach , system_prompt , final_response , client , model , request_config )
327337 final_response = response
328338 total_tokens += tokens
329339 return final_response , total_tokens
330340
331- async def execute_parallel_approaches (approaches , system_prompt , initial_query , client , model ):
341+ async def execute_parallel_approaches (approaches , system_prompt , initial_query , client , model , request_config : dict = None ):
332342 async def run_approach (approach ):
333- return await asyncio .to_thread (execute_single_approach , approach , system_prompt , initial_query , client , model )
343+ return await asyncio .to_thread (execute_single_approach , approach , system_prompt , initial_query , client , model , request_config )
334344
335345 tasks = [run_approach (approach ) for approach in approaches ]
336346 results = await asyncio .gather (* tasks )
337347 responses , tokens = zip (* results )
338348 return list (responses ), sum (tokens )
339349
340- def execute_n_times (n : int , approaches , operation : str , system_prompt : str , initial_query : str , client : Any , model : str ) -> Tuple [Union [str , List [str ]], int ]:
350+ def execute_n_times (n : int , approaches , operation : str , system_prompt : str , initial_query : str , client : Any , model : str ,
351+ request_config : dict = None ) -> Tuple [Union [str , List [str ]], int ]:
341352 """
342353 Execute the pipeline n times and return n responses.
343354
@@ -358,13 +369,13 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
358369
359370 for _ in range (n ):
360371 if operation == 'SINGLE' :
361- response , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
372+ response , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model , request_config )
362373 elif operation == 'AND' :
363- response , tokens = execute_combined_approaches (approaches , system_prompt , initial_query , client , model )
374+ response , tokens = execute_combined_approaches (approaches , system_prompt , initial_query , client , model , request_config )
364375 elif operation == 'OR' :
365376 loop = asyncio .new_event_loop ()
366377 asyncio .set_event_loop (loop )
367- response , tokens = loop .run_until_complete (execute_parallel_approaches (approaches , system_prompt , initial_query , client , model ))
378+ response , tokens = loop .run_until_complete (execute_parallel_approaches (approaches , system_prompt , initial_query , client , model , request_config ))
368379 loop .close ()
369380 else :
370381 raise ValueError (f"Unknown operation: { operation } " )
@@ -534,6 +545,15 @@ def proxy():
534545 messages = data .get ('messages' , [])
535546 model = data .get ('model' , server_config ['model' ])
536547 n = data .get ('n' , server_config ['n' ]) # Get n value from request or config
548+ # Extract response_format if present
549+ response_format = data .get ("response_format" , None )
550+
551+ # Create request config with all parameters
552+ request_config = {
553+ "stream" : stream ,
554+ "n" : n ,
555+ "response_format" : response_format # Add response_format to config
556+ }
537557
538558 optillm_approach = data .get ('optillm_approach' , server_config ['approach' ])
539559 logger .debug (data )
@@ -574,12 +594,12 @@ def proxy():
574594 responses = []
575595 completion_tokens = 0
576596 for _ in range (n ):
577- result , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
597+ result , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model , request_config )
578598 responses .append (result )
579599 completion_tokens += tokens
580600 result = responses
581601 else :
582- result , completion_tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
602+ result , completion_tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model , request_config )
583603
584604 logger .debug (f'Direct proxy response: { result } ' )
585605
@@ -593,7 +613,7 @@ def proxy():
593613 raise ValueError ("'none' approach cannot be combined with other approaches" )
594614
595615 # Handle non-none approaches with n attempts
596- response , completion_tokens = execute_n_times (n , approaches , operation , system_prompt , initial_query , client , model )
616+ response , completion_tokens = execute_n_times (n , approaches , operation , system_prompt , initial_query , client , model , request_config )
597617
598618 except Exception as e :
599619 logger .error (f"Error processing request: { str (e )} " )
0 commit comments