From 45cbb2341e9da718752b78ba5da7d1c107031b77 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Wed, 20 Nov 2024 15:24:13 +0800 Subject: [PATCH 1/7] Update optillm.py add plugin_approaches as well to args --- optillm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optillm.py b/optillm.py index b7f0d35a..d78353e5 100644 --- a/optillm.py +++ b/optillm.py @@ -522,7 +522,7 @@ def parse_args(): # Define arguments and their corresponding environment variables args_env = [ ("--optillm-api-key", "OPTILLM_API_KEY", str, "", "Optional API key for client authentication to optillm"), - ("--approach", "OPTILLM_APPROACH", str, "auto", "Inference approach to use", known_approaches), + ("--approach", "OPTILLM_APPROACH", str, "auto", "Inference approach to use", known_approaches + list(plugin_approaches.keys())), ("--mcts-simulations", "OPTILLM_SIMULATIONS", int, 2, "Number of MCTS simulations"), ("--mcts-exploration", "OPTILLM_EXPLORATION", float, 0.2, "Exploration weight for MCTS"), ("--mcts-depth", "OPTILLM_DEPTH", int, 1, "Simulation depth for MCTS"), @@ -571,10 +571,10 @@ def parse_args(): def main(): global server_config - args = parse_args() - # Call this function at the start of main() load_plugins() + args = parse_args() + # Update server_config with all argument values server_config.update(vars(args)) From f18b9210debe54a9904e61bd0ea9305b2895b344 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Thu, 21 Nov 2024 12:11:34 +0800 Subject: [PATCH 2/7] add new chain of code approach --- optillm.py | 4 + optillm/plugins/coc_plugin.py | 162 ++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 optillm/plugins/coc_plugin.py diff --git a/optillm.py b/optillm.py index d78353e5..c8b90795 100644 --- a/optillm.py +++ b/optillm.py @@ -395,6 +395,10 @@ def proxy(): model = data.get('model', server_config['model']) optillm_approach = data.get('optillm_approach', server_config['approach']) + logger.debug(data) + server_config['mcts_depth'] = data.get('mcts_depth', server_config['mcts_depth']) + server_config['mcts_exploration' ] = data.get('mcts_exploration', server_config['mcts_exploration']) + server_config['mcts_simulations'] = data.get('mcts_simulations', server_config['mcts_simulations']) system_prompt, initial_query, message_optillm_approach = parse_conversation(messages) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py new file mode 100644 index 00000000..003ddc62 --- /dev/null +++ b/optillm/plugins/coc_plugin.py @@ -0,0 +1,162 @@ +import re +import logging +from typing import Tuple, Dict, Any, List +import ast +import traceback + +logger = logging.getLogger(__name__) + +# Plugin identifier +SLUG = "coc" + +# Prompts +CHAIN_OF_CODE_PROMPT = ''' +You are an AI assistant that uses Chain of Code (CoC) approach to solve problems. Follow these steps: + +1. Write Python code that breaks down the problem into clear steps +2. Each step should either be: + - Executable Python code that performs computations + - Pseudocode that you will simulate with natural language understanding +3. Track program state after each line execution +4. Return the final answer within the tags + +Format your response using: +```python +[Your code here] +``` + +And track state after each line with: +delta_state: {...} + +Finally provide output as: + +[Your final answer] + +''' + +STATE_SIMULATION_PROMPT = '''You are simulating the execution of Python code. +Given the current program state and a line of code, return ONLY a Python dictionary representing the new state variables. +Do not include any other text, code blocks, or formatting - just the Python dict. + +For example: +state = {'x': 5} +code = "y = x + 3" +You should return: +{'y': 8} +''' + +def extract_code_blocks(text: str) -> List[str]: + """Extract Python code blocks from text.""" + pattern = r'```python\s*(.*?)\s*```' + matches = re.findall(pattern, text, re.DOTALL) + return [m.strip() for m in matches] + +def extract_output(text: str) -> str: + """Extract content from output tags.""" + pattern = r'(.*?)' + match = re.search(pattern, text, re.DOTALL) + return match.group(1).strip() if match else text.strip() + +def extract_state_updates(text: str) -> List[Dict[str, Any]]: + """Extract state updates from delta_state markers.""" + pattern = r'delta_state:\s*({.*?})' + matches = re.findall(pattern, text, re.DOTALL) + states = [] + for m in matches: + try: + # Clean up the state string before evaluation + cleaned = re.sub(r'```python\s*|\s*```', '', m) + state = ast.literal_eval(cleaned) + states.append(state) + except: + logger.warning(f"Could not parse state update: {m}") + return states + +def clean_state_response(response: str) -> str: + """Clean up LM state response to get just the dictionary.""" + # Remove any code blocks + response = re.sub(r'```python\s*|\s*```', '', response) + # Remove any natural language before or after the dict + response = re.sub(r'^[^{]*', '', response) + response = re.sub(r'[^}]*$', '', response) + return response.strip() + +def execute_line(line: str, state: Dict[str, Any], client, model: str) -> Tuple[Any, Dict[str, Any]]: + """Execute a single line of code, either with Python or LM simulation.""" + try: + # Try executing with Python + # Create a copy of state for local execution + local_state = state.copy() + exec(line, globals(), local_state) + # Extract any new/modified variables + new_state = {k:v for k,v in local_state.items() + if k not in state or state[k] != v} + return None, new_state + except Exception as e: + # If Python execution fails, simulate with LM + context = f"Current program state: {state}\nExecute line: {line}" + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": STATE_SIMULATION_PROMPT}, + {"role": "user", "content": context} + ], + temperature=0.2 + ) + try: + # Clean and parse LM response + cleaned_response = clean_state_response(response.choices[0].message.content) + new_state = ast.literal_eval(cleaned_response) + return response.usage.completion_tokens, new_state + except Exception as e: + logger.error(f"Could not parse LM state response: {response.choices[0].message.content}") + logger.error(f"Error: {str(e)}") + logger.error(f"Cleaned response: {cleaned_response}") + return response.usage.completion_tokens, {} + +def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: + """Main Chain of Code execution function.""" + # Generate initial code solution + messages = [ + {"role": "system", "content": system_prompt + "\n" + CHAIN_OF_CODE_PROMPT}, + {"role": "user", "content": initial_query} + ] + + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=0.7 + ) + initial_response = response.choices[0].message.content + total_tokens = response.usage.completion_tokens + + # Extract code blocks + code_blocks = extract_code_blocks(initial_response) + if not code_blocks: + logger.warning("No code blocks found in response") + return initial_response, total_tokens + + # Execute code blocks line by line + final_state = {} + code = code_blocks[0] # Take first code block + + # Split into lines and filter empty lines + lines = [line.strip() for line in code.split('\n') if line.strip()] + + for line in lines: + if not line or line.startswith('#'): + continue + + tokens, new_state = execute_line(line, final_state, client, model) + if tokens: + total_tokens += tokens + final_state.update(new_state) + logger.debug(f"Executed line: {line}") + logger.debug(f"New state: {new_state}") + + # Extract output tags from the initial response, or use answer from state + final_answer = extract_output(initial_response) + if not final_answer and 'answer' in final_state: + final_answer = str(final_state['answer']) + + return final_answer, total_tokens \ No newline at end of file From 1d4bad4518d0711f629d4e631ce5ee7013d6e5bf Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Thu, 21 Nov 2024 16:07:35 +0800 Subject: [PATCH 3/7] Update coc_plugin.py fix coc --- optillm/plugins/coc_plugin.py | 81 +++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index 003ddc62..a3df0c50 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -3,12 +3,19 @@ from typing import Tuple, Dict, Any, List import ast import traceback +import math +import importlib logger = logging.getLogger(__name__) # Plugin identifier SLUG = "coc" +# List of allowed modules for execution +ALLOWED_MODULES = { + 'math': math, +} + # Prompts CHAIN_OF_CODE_PROMPT = ''' You are an AI assistant that uses Chain of Code (CoC) approach to solve problems. Follow these steps: @@ -36,13 +43,18 @@ STATE_SIMULATION_PROMPT = '''You are simulating the execution of Python code. Given the current program state and a line of code, return ONLY a Python dictionary representing the new state variables. -Do not include any other text, code blocks, or formatting - just the Python dict. +For module imports and references, return string representations. + +Return ONLY primitive types (numbers, strings, lists, dicts) - no module references or complex objects. For example: state = {'x': 5} code = "y = x + 3" -You should return: -{'y': 8} +Return: {'y': 8} + +For modules: +code = "import math" +Return: {'math': 'module:math'} ''' def extract_code_blocks(text: str) -> List[str]: @@ -64,9 +76,8 @@ def extract_state_updates(text: str) -> List[Dict[str, Any]]: states = [] for m in matches: try: - # Clean up the state string before evaluation cleaned = re.sub(r'```python\s*|\s*```', '', m) - state = ast.literal_eval(cleaned) + state = parse_state_dict(cleaned) states.append(state) except: logger.warning(f"Could not parse state update: {m}") @@ -81,17 +92,67 @@ def clean_state_response(response: str) -> str: response = re.sub(r'[^}]*$', '', response) return response.strip() +def parse_state_dict(state_str: str) -> Dict[str, Any]: + """Safely parse state dictionary, handling module references.""" + try: + # First try direct evaluation + return ast.literal_eval(state_str) + except: + # If that fails, try to parse manually + state_dict = {} + try: + # Use a custom safe eval that handles module references + # Remove brackets and split by commas + items = state_str.strip('{}').split(',') + for item in items: + if ':' not in item: + continue + key, value = item.split(':', 1) + key = key.strip().strip("'").strip('"') + value = value.strip() + + # Handle module references + if 'module' in value: + module_name = value.split("'")[1] if "'" in value else value.split(':')[1].strip() + if module_name in ALLOWED_MODULES: + state_dict[key] = ALLOWED_MODULES[module_name] + # Handle normal values + else: + try: + state_dict[key] = ast.literal_eval(value) + except: + state_dict[key] = value.strip("'").strip('"') + return state_dict + except Exception as e: + logger.error(f"Failed to parse state dict: {state_str}") + logger.error(f"Error: {str(e)}") + return {} + def execute_line(line: str, state: Dict[str, Any], client, model: str) -> Tuple[Any, Dict[str, Any]]: """Execute a single line of code, either with Python or LM simulation.""" try: + # Handle imports specially + if line.startswith('import '): + module_name = line.split()[1] + if module_name in ALLOWED_MODULES: + return None, {module_name: ALLOWED_MODULES[module_name]} + else: + logger.warning(f"Skipping import of unauthorized module: {module_name}") + return None, {} + # Try executing with Python - # Create a copy of state for local execution local_state = state.copy() + # Add allowed modules to local state + for mod_name, mod in ALLOWED_MODULES.items(): + if mod_name in state: + local_state[mod_name] = mod + exec(line, globals(), local_state) # Extract any new/modified variables new_state = {k:v for k,v in local_state.items() if k not in state or state[k] != v} return None, new_state + except Exception as e: # If Python execution fails, simulate with LM context = f"Current program state: {state}\nExecute line: {line}" @@ -104,9 +165,8 @@ def execute_line(line: str, state: Dict[str, Any], client, model: str) -> Tuple[ temperature=0.2 ) try: - # Clean and parse LM response cleaned_response = clean_state_response(response.choices[0].message.content) - new_state = ast.literal_eval(cleaned_response) + new_state = parse_state_dict(cleaned_response) return response.usage.completion_tokens, new_state except Exception as e: logger.error(f"Could not parse LM state response: {response.choices[0].message.content}") @@ -116,7 +176,6 @@ def execute_line(line: str, state: Dict[str, Any], client, model: str) -> Tuple[ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: """Main Chain of Code execution function.""" - # Generate initial code solution messages = [ {"role": "system", "content": system_prompt + "\n" + CHAIN_OF_CODE_PROMPT}, {"role": "user", "content": initial_query} @@ -130,17 +189,14 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str initial_response = response.choices[0].message.content total_tokens = response.usage.completion_tokens - # Extract code blocks code_blocks = extract_code_blocks(initial_response) if not code_blocks: logger.warning("No code blocks found in response") return initial_response, total_tokens - # Execute code blocks line by line final_state = {} code = code_blocks[0] # Take first code block - # Split into lines and filter empty lines lines = [line.strip() for line in code.split('\n') if line.strip()] for line in lines: @@ -154,7 +210,6 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str logger.debug(f"Executed line: {line}") logger.debug(f"New state: {new_state}") - # Extract output tags from the initial response, or use answer from state final_answer = extract_output(initial_response) if not final_answer and 'answer' in final_state: final_answer = str(final_state['answer']) From 2107637826b6d6509dd8956c62579240d60cd361 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 24 Nov 2024 11:47:33 +0800 Subject: [PATCH 4/7] Update coc_plugin.py --- optillm/plugins/coc_plugin.py | 203 ++++++++++++---------------------- 1 file changed, 71 insertions(+), 132 deletions(-) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index a3df0c50..581baf98 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -5,6 +5,7 @@ import traceback import math import importlib +import json logger = logging.getLogger(__name__) @@ -24,163 +25,105 @@ 2. Each step should either be: - Executable Python code that performs computations - Pseudocode that you will simulate with natural language understanding -3. Track program state after each line execution +3. Track final result in an 'answer' variable 4. Return the final answer within the tags Format your response using: ```python -[Your code here] +[Your complete Python program here] ``` -And track state after each line with: -delta_state: {...} - Finally provide output as: [Your final answer] ''' -STATE_SIMULATION_PROMPT = '''You are simulating the execution of Python code. -Given the current program state and a line of code, return ONLY a Python dictionary representing the new state variables. -For module imports and references, return string representations. - -Return ONLY primitive types (numbers, strings, lists, dicts) - no module references or complex objects. +STATE_SIMULATION_PROMPT = '''You are simulating the execution of a Python program. +Given the code below, simulate its execution and return the final value that would be in the 'answer' variable. +Return ONLY the final value, no explanations or additional text. -For example: -state = {'x': 5} -code = "y = x + 3" -Return: {'y': 8} - -For modules: -code = "import math" -Return: {'math': 'module:math'} +Code to simulate: +{code} ''' def extract_code_blocks(text: str) -> List[str]: """Extract Python code blocks from text.""" pattern = r'```python\s*(.*?)\s*```' matches = re.findall(pattern, text, re.DOTALL) - return [m.strip() for m in matches] + blocks = [m.strip() for m in matches] + logger.info(f"Extracted {len(blocks)} code blocks") + for i, block in enumerate(blocks): + logger.info(f"Code block {i+1}:\n{block}") + return blocks def extract_output(text: str) -> str: """Extract content from output tags.""" pattern = r'(.*?)' match = re.search(pattern, text, re.DOTALL) - return match.group(1).strip() if match else text.strip() - -def extract_state_updates(text: str) -> List[Dict[str, Any]]: - """Extract state updates from delta_state markers.""" - pattern = r'delta_state:\s*({.*?})' - matches = re.findall(pattern, text, re.DOTALL) - states = [] - for m in matches: - try: - cleaned = re.sub(r'```python\s*|\s*```', '', m) - state = parse_state_dict(cleaned) - states.append(state) - except: - logger.warning(f"Could not parse state update: {m}") - return states - -def clean_state_response(response: str) -> str: - """Clean up LM state response to get just the dictionary.""" - # Remove any code blocks - response = re.sub(r'```python\s*|\s*```', '', response) - # Remove any natural language before or after the dict - response = re.sub(r'^[^{]*', '', response) - response = re.sub(r'[^}]*$', '', response) - return response.strip() - -def parse_state_dict(state_str: str) -> Dict[str, Any]: - """Safely parse state dictionary, handling module references.""" - try: - # First try direct evaluation - return ast.literal_eval(state_str) - except: - # If that fails, try to parse manually - state_dict = {} - try: - # Use a custom safe eval that handles module references - # Remove brackets and split by commas - items = state_str.strip('{}').split(',') - for item in items: - if ':' not in item: - continue - key, value = item.split(':', 1) - key = key.strip().strip("'").strip('"') - value = value.strip() - - # Handle module references - if 'module' in value: - module_name = value.split("'")[1] if "'" in value else value.split(':')[1].strip() - if module_name in ALLOWED_MODULES: - state_dict[key] = ALLOWED_MODULES[module_name] - # Handle normal values - else: - try: - state_dict[key] = ast.literal_eval(value) - except: - state_dict[key] = value.strip("'").strip('"') - return state_dict - except Exception as e: - logger.error(f"Failed to parse state dict: {state_str}") - logger.error(f"Error: {str(e)}") - return {} - -def execute_line(line: str, state: Dict[str, Any], client, model: str) -> Tuple[Any, Dict[str, Any]]: - """Execute a single line of code, either with Python or LM simulation.""" + result = match.group(1).strip() if match else text.strip() + logger.info(f"Extracted output: {result}") + return result + +def execute_code(code: str, client, model: str) -> Tuple[Any, int]: + """Execute full code block either with Python or LM simulation.""" + logger.info("Attempting to execute complete code block") + logger.info(f"Code:\n{code}") + + # Add imports + execution_env = {} + for mod_name, mod in ALLOWED_MODULES.items(): + execution_env[mod_name] = mod + try: - # Handle imports specially - if line.startswith('import '): - module_name = line.split()[1] - if module_name in ALLOWED_MODULES: - return None, {module_name: ALLOWED_MODULES[module_name]} - else: - logger.warning(f"Skipping import of unauthorized module: {module_name}") - return None, {} - - # Try executing with Python - local_state = state.copy() - # Add allowed modules to local state - for mod_name, mod in ALLOWED_MODULES.items(): - if mod_name in state: - local_state[mod_name] = mod - - exec(line, globals(), local_state) - # Extract any new/modified variables - new_state = {k:v for k,v in local_state.items() - if k not in state or state[k] != v} - return None, new_state + # Try executing the complete code block with Python + logger.info("Attempting Python execution") + exec(code, execution_env) + answer = execution_env.get('answer') + logger.info(f"Python execution successful. Answer: {answer}") + return answer, 0 except Exception as e: + logger.info(f"Python execution failed: {str(e)}") + logger.info("Falling back to LM simulation") + # If Python execution fails, simulate with LM - context = f"Current program state: {state}\nExecute line: {line}" response = client.chat.completions.create( model=model, messages=[ - {"role": "system", "content": STATE_SIMULATION_PROMPT}, - {"role": "user", "content": context} + {"role": "system", "content": STATE_SIMULATION_PROMPT.format(code=code)}, + {"role": "user", "content": "Simulate this code and return the final value of 'answer'."} ], temperature=0.2 ) + try: - cleaned_response = clean_state_response(response.choices[0].message.content) - new_state = parse_state_dict(cleaned_response) - return response.usage.completion_tokens, new_state + answer = response.choices[0].message.content.strip() + logger.info(f"LM simulation successful. Answer: {answer}") + + # Try to convert to number if possible + try: + answer = ast.literal_eval(answer) + except: + pass + + return answer, response.usage.completion_tokens + except Exception as e: - logger.error(f"Could not parse LM state response: {response.choices[0].message.content}") - logger.error(f"Error: {str(e)}") - logger.error(f"Cleaned response: {cleaned_response}") - return response.usage.completion_tokens, {} + logger.error(f"Could not parse LM simulation response: {str(e)}") + return None, response.usage.completion_tokens def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: """Main Chain of Code execution function.""" + logger.info("Starting Chain of Code execution") + logger.info(f"Query: {initial_query}") + messages = [ {"role": "system", "content": system_prompt + "\n" + CHAIN_OF_CODE_PROMPT}, {"role": "user", "content": initial_query} ] + logger.info("Generating code solution") response = client.chat.completions.create( model=model, messages=messages, @@ -188,30 +131,26 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str ) initial_response = response.choices[0].message.content total_tokens = response.usage.completion_tokens + + logger.info("Initial response from LM:") + logger.info(initial_response) code_blocks = extract_code_blocks(initial_response) if not code_blocks: logger.warning("No code blocks found in response") return initial_response, total_tokens - final_state = {} - code = code_blocks[0] # Take first code block - - lines = [line.strip() for line in code.split('\n') if line.strip()] - - for line in lines: - if not line or line.startswith('#'): - continue + # Execute the complete code block + code = code_blocks[0] + answer, execution_tokens = execute_code(code, client, model) + total_tokens += execution_tokens + + # If we got an answer from code execution, use it + if answer is not None: + final_answer = str(answer) + else: + # Fall back to output tags if code execution failed + final_answer = extract_output(initial_response) - tokens, new_state = execute_line(line, final_state, client, model) - if tokens: - total_tokens += tokens - final_state.update(new_state) - logger.debug(f"Executed line: {line}") - logger.debug(f"New state: {new_state}") - - final_answer = extract_output(initial_response) - if not final_answer and 'answer' in final_state: - final_answer = str(final_state['answer']) - + logger.info(f"Chain of Code execution completed. Final answer: {final_answer}") return final_answer, total_tokens \ No newline at end of file From 7276138307571a7f81335cdd2d05754dd3ca758b Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 24 Nov 2024 17:47:58 +0800 Subject: [PATCH 5/7] Update coc_plugin.py --- optillm/plugins/coc_plugin.py | 265 +++++++++++++++++++++++----------- 1 file changed, 182 insertions(+), 83 deletions(-) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index 581baf98..b575d2f0 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -12,39 +12,70 @@ # Plugin identifier SLUG = "coc" +# Maximum attempts to fix code +MAX_FIX_ATTEMPTS = 3 + # List of allowed modules for execution ALLOWED_MODULES = { 'math': math, } -# Prompts +# Initial code generation prompt CHAIN_OF_CODE_PROMPT = ''' -You are an AI assistant that uses Chain of Code (CoC) approach to solve problems. Follow these steps: - -1. Write Python code that breaks down the problem into clear steps -2. Each step should either be: - - Executable Python code that performs computations - - Pseudocode that you will simulate with natural language understanding -3. Track final result in an 'answer' variable -4. Return the final answer within the tags +Write Python code to solve this problem. The code should: +1. Break down the problem into clear computational steps +2. Use standard Python features and math operations +3. Store the final result in a variable named 'answer' +4. Include error handling where appropriate +5. Be complete and executable Format your response using: ```python [Your complete Python program here] ``` +''' + +# Code fix prompt +CODE_FIX_PROMPT = ''' +The following Python code failed to execute. Fix the code to make it work. +Original code: +```python +{code} +``` -Finally provide output as: - -[Your final answer] - +Error encountered: +{error} + +Please provide a complete, fixed version of the code that: +1. Addresses the error message +2. Maintains the same logic and approach +3. Stores the final result in 'answer' +4. Is complete and executable + +Return only the fixed code in a code block: +```python +[Your fixed code here] +``` ''' -STATE_SIMULATION_PROMPT = '''You are simulating the execution of a Python program. -Given the code below, simulate its execution and return the final value that would be in the 'answer' variable. -Return ONLY the final value, no explanations or additional text. +# Simulation prompt +SIMULATION_PROMPT = ''' +The following Python code could not be executed after several attempts. +Please simulate its execution and determine the final value that would be in the 'answer' variable. Code to simulate: +```python {code} +``` + +Last error encountered: +{error} + +Important: +1. Follow the logic of the code exactly +2. Perform all calculations carefully +3. Return ONLY the final numeric or string value, no explanations +4. If the code contains semantic functions (like text analysis), use your judgment to simulate them ''' def extract_code_blocks(text: str) -> List[str]: @@ -57,100 +88,168 @@ def extract_code_blocks(text: str) -> List[str]: logger.info(f"Code block {i+1}:\n{block}") return blocks -def extract_output(text: str) -> str: - """Extract content from output tags.""" - pattern = r'(.*?)' - match = re.search(pattern, text, re.DOTALL) - result = match.group(1).strip() if match else text.strip() - logger.info(f"Extracted output: {result}") - return result - -def execute_code(code: str, client, model: str) -> Tuple[Any, int]: - """Execute full code block either with Python or LM simulation.""" - logger.info("Attempting to execute complete code block") +def sanitize_code(code: str) -> str: + """Prepare code for execution by adding necessary imports and safety checks.""" + # Add standard imports + imports = "\n".join(f"import {mod}" for mod in ALLOWED_MODULES) + + # Add safety wrapper + wrapper = f""" +{imports} + +def safe_execute(): + {code.replace('\n', '\n ')} + return answer if 'answer' in locals() else None + +result = safe_execute() +answer = result +""" + return wrapper + +def execute_code(code: str) -> Tuple[Any, str]: + """Attempt to execute the code and return result or error.""" + logger.info("Attempting to execute code") logger.info(f"Code:\n{code}") - # Add imports execution_env = {} - for mod_name, mod in ALLOWED_MODULES.items(): - execution_env[mod_name] = mod - try: - # Try executing the complete code block with Python - logger.info("Attempting Python execution") - exec(code, execution_env) + sanitized_code = sanitize_code(code) + exec(sanitized_code, execution_env) answer = execution_env.get('answer') - logger.info(f"Python execution successful. Answer: {answer}") - return answer, 0 + if answer is not None: + logger.info(f"Execution successful. Answer: {answer}") + return answer, None + else: + error = "Code executed but did not produce an answer" + logger.warning(error) + return None, error + except Exception as e: - logger.info(f"Python execution failed: {str(e)}") - logger.info("Falling back to LM simulation") - - # If Python execution fails, simulate with LM - response = client.chat.completions.create( - model=model, - messages=[ - {"role": "system", "content": STATE_SIMULATION_PROMPT.format(code=code)}, - {"role": "user", "content": "Simulate this code and return the final value of 'answer'."} - ], - temperature=0.2 - ) - + error = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}" + logger.error(f"Execution failed: {error}") + return None, error + +def generate_fixed_code(original_code: str, error: str, client, model: str) -> Tuple[str, int]: + """Ask LLM to fix the broken code.""" + logger.info("Requesting code fix from LLM") + logger.info(f"Original error: {error}") + + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": CODE_FIX_PROMPT.format( + code=original_code, error=error)}, + {"role": "user", "content": "Fix the code to make it work."} + ], + temperature=0.2 + ) + + fixed_code = response.choices[0].message.content + code_blocks = extract_code_blocks(fixed_code) + + if code_blocks: + logger.info("Received fixed code from LLM") + return code_blocks[0], response.usage.completion_tokens + else: + logger.warning("No code block found in LLM response") + return None, response.usage.completion_tokens + +def simulate_execution(code: str, error: str, client, model: str) -> Tuple[Any, int]: + """Ask LLM to simulate code execution.""" + logger.info("Attempting code simulation with LLM") + + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": SIMULATION_PROMPT.format( + code=code, error=error)}, + {"role": "user", "content": "Simulate this code and return the final answer value."} + ], + temperature=0.2 + ) + + try: + result = response.choices[0].message.content.strip() + # Try to convert to appropriate type try: - answer = response.choices[0].message.content.strip() - logger.info(f"LM simulation successful. Answer: {answer}") - - # Try to convert to number if possible - try: - answer = ast.literal_eval(answer) - except: - pass - - return answer, response.usage.completion_tokens - - except Exception as e: - logger.error(f"Could not parse LM simulation response: {str(e)}") - return None, response.usage.completion_tokens + answer = ast.literal_eval(result) + except: + answer = result + logger.info(f"Simulation successful. Result: {answer}") + return answer, response.usage.completion_tokens + except Exception as e: + logger.error(f"Failed to parse simulation result: {str(e)}") + return None, response.usage.completion_tokens def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: """Main Chain of Code execution function.""" logger.info("Starting Chain of Code execution") logger.info(f"Query: {initial_query}") + # Initial code generation messages = [ {"role": "system", "content": system_prompt + "\n" + CHAIN_OF_CODE_PROMPT}, {"role": "user", "content": initial_query} ] - logger.info("Generating code solution") response = client.chat.completions.create( model=model, messages=messages, temperature=0.7 ) - initial_response = response.choices[0].message.content total_tokens = response.usage.completion_tokens - logger.info("Initial response from LM:") - logger.info(initial_response) - - code_blocks = extract_code_blocks(initial_response) + # Extract initial code + code_blocks = extract_code_blocks(response.choices[0].message.content) if not code_blocks: logger.warning("No code blocks found in response") - return initial_response, total_tokens - - # Execute the complete code block - code = code_blocks[0] - answer, execution_tokens = execute_code(code, client, model) - total_tokens += execution_tokens + return response.choices[0].message.content, total_tokens - # If we got an answer from code execution, use it - if answer is not None: - final_answer = str(answer) - else: - # Fall back to output tags if code execution failed - final_answer = extract_output(initial_response) + current_code = code_blocks[0] + fix_attempts = 0 + last_error = None + + # Strategy 1: Direct execution and fix attempts + while fix_attempts < MAX_FIX_ATTEMPTS: + fix_attempts += 1 + logger.info(f"Execution attempt {fix_attempts}/{MAX_FIX_ATTEMPTS}") + + # Try to execute current code + answer, error = execute_code(current_code) + + # If successful, return the answer + if error is None: + logger.info(f"Successful execution on attempt {fix_attempts}") + return str(answer), total_tokens - logger.info(f"Chain of Code execution completed. Final answer: {final_answer}") - return final_answer, total_tokens \ No newline at end of file + last_error = error + + # If we hit max attempts, break to try simulation + if fix_attempts >= MAX_FIX_ATTEMPTS: + logger.warning(f"Failed after {fix_attempts} fix attempts") + break + + # Otherwise, try to get fixed code from LLM + logger.info(f"Requesting code fix, attempt {fix_attempts}") + fixed_code, fix_tokens = generate_fixed_code(current_code, error, client, model) + total_tokens += fix_tokens + + if fixed_code: + current_code = fixed_code + else: + logger.error("Failed to get fixed code from LLM") + break + + # Strategy 2: If all execution attempts failed, try simulation + logger.info("All execution attempts failed, trying simulation") + simulated_answer, sim_tokens = simulate_execution(current_code, last_error, client, model) + total_tokens += sim_tokens + + if simulated_answer is not None: + logger.info("Successfully got answer from simulation") + return str(simulated_answer), total_tokens + + # If we get here, everything failed + logger.warning("All strategies failed") + return f"Error: Could not solve problem after all attempts. Last error: {last_error}", total_tokens \ No newline at end of file From 3d4f97094373c023b68af1cde7ddcaf74dc3cb92 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 24 Nov 2024 22:58:29 +0800 Subject: [PATCH 6/7] Update coc_plugin.py fix visualization error --- optillm/plugins/coc_plugin.py | 50 +++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/optillm/plugins/coc_plugin.py b/optillm/plugins/coc_plugin.py index b575d2f0..62ef5ce6 100644 --- a/optillm/plugins/coc_plugin.py +++ b/optillm/plugins/coc_plugin.py @@ -18,6 +18,7 @@ # List of allowed modules for execution ALLOWED_MODULES = { 'math': math, + 'numpy': 'numpy', # String indicates module should be imported in execution context } # Initial code generation prompt @@ -60,22 +61,21 @@ # Simulation prompt SIMULATION_PROMPT = ''' -The following Python code could not be executed after several attempts. -Please simulate its execution and determine the final value that would be in the 'answer' variable. +The following Python code could not be executed directly. Analyze the code and determine what the answer would be. +Pay special attention to: +1. The core computational logic, ignoring any visualization or display code +2. The key mathematical operations that determine the final answer +3. Any logic that affects the 'answer' variable -Code to simulate: +Code to analyze: ```python {code} ``` -Last error encountered: +Runtime error encountered: {error} -Important: -1. Follow the logic of the code exactly -2. Perform all calculations carefully -3. Return ONLY the final numeric or string value, no explanations -4. If the code contains semantic functions (like text analysis), use your judgment to simulate them +Return ONLY the final value that would be in the 'answer' variable. Return just the value, no explanations. ''' def extract_code_blocks(text: str) -> List[str]: @@ -93,12 +93,25 @@ def sanitize_code(code: str) -> str: # Add standard imports imports = "\n".join(f"import {mod}" for mod in ALLOWED_MODULES) + # Remove or modify problematic visualization code + lines = code.split('\n') + safe_lines = [] + for line in lines: + # Skip matplotlib-related imports and plotting commands + if any(x in line.lower() for x in ['matplotlib', 'plt.', '.plot(', '.show(', 'figure', 'subplot']): + continue + # Keep the line if it's not visualization-related + safe_lines.append(line) + + safe_code = '\n'.join(safe_lines) + # Add safety wrapper wrapper = f""" {imports} def safe_execute(): - {code.replace('\n', '\n ')} + import numpy as np # Always allow numpy + {safe_code.replace('\n', '\n ')} return answer if 'answer' in locals() else None result = safe_execute() @@ -111,22 +124,25 @@ def execute_code(code: str) -> Tuple[Any, str]: logger.info("Attempting to execute code") logger.info(f"Code:\n{code}") - execution_env = {} try: - sanitized_code = sanitize_code(code) - exec(sanitized_code, execution_env) - answer = execution_env.get('answer') + # Create a clean environment + execution_env = {} + + # Execute the code as-is + exec(code, execution_env) - if answer is not None: + # Look for answer variable + if 'answer' in execution_env: + answer = execution_env['answer'] logger.info(f"Execution successful. Answer: {answer}") return answer, None else: - error = "Code executed but did not produce an answer" + error = "Code executed but did not produce an answer variable" logger.warning(error) return None, error except Exception as e: - error = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}" + error = str(e) logger.error(f"Execution failed: {error}") return None, error From 90bef2e7c618c24c72f7abd6be793d62e41b4404 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 25 Nov 2024 12:34:01 +0800 Subject: [PATCH 7/7] Update README.md --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index c7354e99..818036d5 100644 --- a/README.md +++ b/README.md @@ -218,6 +218,7 @@ response = client.chat.completions.create( | Plugin | Slug | Description | | ----------------------- | ------------------ | ---------------------------------------------------------------------------------------------- | | Router | `router` | Uses the [optillm-bert-uncased](https://huggingface.co/codelion/optillm-bert-uncased) model to route requests to different approaches based on the user prompt | +| Chain-of-Code | `coc` | Implements a chain of code approach that combines CoT with code execution and LLM based code simulation | | Memory | `memory` | Implements a short term memory layer, enables you to use unbounded context length with any LLM | | Privacy | `privacy` | Anonymize PII data in request and deanonymize it back to original value in response | | Read URLs | `readurls` | Reads all URLs found in the request, fetches the content at the URL and adds it to the context | @@ -290,6 +291,20 @@ Authorization: Bearer your_secret_api_key ``` ## SOTA results on benchmarks with optillm +### coc-claude-3-5-sonnet-20241022 on AIME 2024 pass@1 (Nov 2024) + +| Model | Score | +|-------|-----:| +| o1-mini | 56.67 | +| coc-claude-3-5-sonnet-20241022 | 46.67 | +| coc-gemini/gemini-exp-1121 | 46.67 | +| o1-preview | 40.00 | +| f1-preview | 40.00 | +| gemini-exp-1114 | 36.67 | +| claude-3-5-sonnet-20241022 | 20.00 | +| gemini-1.5-pro-002 | 20.00 | +| gemini-1.5-flash-002 | 16.67 | + ### readurls&memory-gpt-4o-mini on Google FRAMES Benchmark (Oct 2024) | Model | Accuracy | | ----- | -------- | @@ -324,6 +339,7 @@ called patchflows. We saw huge performance gains across all the supported patchf ## References +- [Chain of Code: Reasoning with a Language Model-Augmented Code Emulator](https://arxiv.org/abs/2312.04474) - [Implementation](https://github.com/codelion/optillm/blob/main/optillm/plugins/coc_plugin.py) - [Entropy Based Sampling and Parallel CoT Decoding](https://github.com/xjdr-alt/entropix) - [Implementation](https://github.com/codelion/optillm/blob/main/optillm/entropy_decoding.py) - [Fact, Fetch, and Reason: A Unified Evaluation of Retrieval-Augmented Generation](https://arxiv.org/abs/2409.12941) - [Evaluation script](https://github.com/codelion/optillm/blob/main/scripts/eval_frames_benchmark.py) - [Writing in the Margins: Better Inference Pattern for Long Context Retrieval](https://www.arxiv.org/abs/2408.14906) - [Inspired the implementation of the memory plugin](https://github.com/codelion/optillm/blob/main/optillm/plugins/memory_plugin.py)