From ecf14c9c54b84bbf9a67c2564521bbe80f8cf688 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Wed, 25 Sep 2024 07:57:15 -0700 Subject: [PATCH 1/9] Create wim.py --- optillm/wim.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 optillm/wim.py diff --git a/optillm/wim.py b/optillm/wim.py new file mode 100644 index 00000000..69c5a84c --- /dev/null +++ b/optillm/wim.py @@ -0,0 +1,107 @@ +from collections import deque +import tiktoken + +class WiMInfiniteContextAPI: + def __init__(self, system_prompt, client, model, max_context_tokens=8192, max_margins=50, chunk_size=2000): + self.model = model + self.max_context_tokens = max_context_tokens + self.max_margins = max_margins + self.chunk_size = chunk_size + self.context_buffer = deque() + self.margins = deque(maxlen=max_margins) + self.tokenizer = tiktoken.encoding_for_model(model) + self.system_message = system_prompt + self.client = client + + def count_tokens(self, text): + return len(self.tokenizer.encode(text)) + + def trim_context_buffer(self): + while self.count_tokens("".join(self.context_buffer)) > self.max_context_tokens: + self.context_buffer.popleft() + + def generate_margin(self, chunk, query): + messages = [ + {"role": "system", "content": self.system_message}, + {"role": "user", "content": f""" +'''text +{chunk} +''' +Copy over all context relevant to the query: {query} +Provide the answer in the format: #. +Here are rules: +- If you don't know how to answer the query - start your answer with NO# +- If the text is not related to the query - start your answer with NO# +- If you can extract relevant information - start your answer with YES# +- If the text does not mention the person by name - start your answer with NO# +Example answers: +- YES#Western philosophy originated in Ancient Greece in the 6th century BCE with the pre-Socratics. +- NO#No relevant context. +"""} + ] + response = self.client.ChatCompletion.create( + model=self.model, + messages=messages + ) + return response.choices[0].message['content'] + + def classify_margin(self, margin): + return margin.startswith("YES#") + + def process_chunk(self, chunk, query): + self.context_buffer.append(chunk) + self.trim_context_buffer() + margin = self.generate_margin(chunk, query) + if self.classify_margin(margin): + self.margins.append(margin.split("#", 1)[1]) + + def process_stream(self, text_stream, query): + for chunk in text_stream: + self.process_chunk(chunk, query) + + def generate_final_answer(self, query): + context = "".join(self.context_buffer) + margins = "\n".join(self.margins) + messages = [ + {"role": "system", "content": self.system_message}, + {"role": "user", "content": f""" +'''text +{context} +''' +I asked my assistant to read and analyse the above content page by page to help you complete this task. These are margin notes left on each page: +'''text +{margins} +''' +Read again the note(s) and the provided content, take a deep breath and answer the query. +{self.instruction} +{query} +"""} + ] + response = self.client.ChatCompletion.create( + model=self.model, + messages=messages + ) + return response.choices[0].message['content'] + + def run(self, text_stream, query): + self.process_stream(text_stream, query) + return self.generate_final_answer(query) + + @property + def instruction(self): + return "Answer the following question based on the provided context and margin notes:" + +# Usage +def text_stream_generator(text, chunk_size): + for i in range(0, len(text), chunk_size): + yield text[i:i+chunk_size] + +api_key = "your-api-key-here" +wim = WiMInfiniteContextAPI(api_key) + +text = "Very long text..." # Your infinite context here +query = "What is the main topic?" + +text_stream = text_stream_generator(text, wim.chunk_size) +final_answer = wim.run(text_stream, query) +print(final_answer) From 6f9032c3d5e6ba82e00e46861750dd96e006d878 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Wed, 25 Sep 2024 15:15:00 -0700 Subject: [PATCH 2/9] init --- optillm.py | 6 +++++- optillm/wim.py | 55 ++++++++++++++++++++++++++++++------------------ requirements.txt | 1 + 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/optillm.py b/optillm.py index 241bd0a6..d54ab98a 100644 --- a/optillm.py +++ b/optillm.py @@ -18,6 +18,7 @@ from optillm.plansearch import plansearch from optillm.leap import leap from optillm.reread import re2_approach +from optillm.wim import WiMInfiniteContextAPI # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -72,7 +73,7 @@ # List of known approaches known_approaches = ["mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", - "cot_reflection", "plansearch", "leap", "re2"] + "cot_reflection", "plansearch", "leap", "re2", "wim"] # Optional API key configuration to secure the proxy @app.before_request @@ -155,6 +156,9 @@ def proxy(): final_response, completion_tokens = leap(system_prompt, initial_query, client, model) elif approach == 're2': final_response, completion_tokens = re2_approach(system_prompt, initial_query, client, model, n=n) + elif approach == "wim": + wim_context = WiMInfiniteContextAPI(system_prompt, client, model, max_context_tokens=64000, max_margins=10, chunk_size=16000) + final_response, completion_tokens = wim_context.process_query(initial_query) else: raise ValueError(f"Unknown approach: {approach}") except Exception as e: diff --git a/optillm/wim.py b/optillm/wim.py index 69c5a84c..56596a08 100644 --- a/optillm/wim.py +++ b/optillm/wim.py @@ -1,17 +1,22 @@ from collections import deque import tiktoken +import re class WiMInfiniteContextAPI: - def __init__(self, system_prompt, client, model, max_context_tokens=8192, max_margins=50, chunk_size=2000): + def __init__(self, system_prompt, client, model, max_context_tokens=64000, max_margins=10, chunk_size=16000): self.model = model self.max_context_tokens = max_context_tokens self.max_margins = max_margins self.chunk_size = chunk_size self.context_buffer = deque() self.margins = deque(maxlen=max_margins) - self.tokenizer = tiktoken.encoding_for_model(model) + try: + self.tokenizer = tiktoken.encoding_for_model(model) + except: + self.tokenizer = tiktoken.get_encoding("o200k_base") self.system_message = system_prompt self.client = client + self.win_completion_tokens = 0 def count_tokens(self, text): return len(self.tokenizer.encode(text)) @@ -39,14 +44,27 @@ def generate_margin(self, chunk, query): - NO#No relevant context. """} ] - response = self.client.ChatCompletion.create( + response = self.client.chat.completions.create( model=self.model, - messages=messages + messages=messages, + max_tokens = 512 ) - return response.choices[0].message['content'] + self.win_completion_tokens += response.usage.completion_tokens + return response.choices[0].message.content def classify_margin(self, margin): return margin.startswith("YES#") + + def extract_query(self, text): + # Split the text into sentences + sentences = re.split(r'(?<=[.!?])\s+', text) + + # Check if the last sentence starts with "Query:" + if sentences[-1].startswith("Query:"): + return sentences[-1][6:].strip(), "".join(sentences[:-1]) + + # If not, assume the last sentence is the query + return sentences[-1].strip(), "".join(sentences[:-1]) def process_chunk(self, chunk, query): self.context_buffer.append(chunk) @@ -77,11 +95,12 @@ def generate_final_answer(self, query): {query} """} ] - response = self.client.ChatCompletion.create( + response = self.client.chat.completions.create( model=self.model, messages=messages ) - return response.choices[0].message['content'] + self.win_completion_tokens += response.usage.completion_tokens + return response.choices[0].message.content def run(self, text_stream, query): self.process_stream(text_stream, query) @@ -91,17 +110,13 @@ def run(self, text_stream, query): def instruction(self): return "Answer the following question based on the provided context and margin notes:" -# Usage -def text_stream_generator(text, chunk_size): - for i in range(0, len(text), chunk_size): - yield text[i:i+chunk_size] - -api_key = "your-api-key-here" -wim = WiMInfiniteContextAPI(api_key) - -text = "Very long text..." # Your infinite context here -query = "What is the main topic?" + # Usage + def text_stream_generator(self, text): + for i in range(0, len(text), self.chunk_size): + yield text[i:i+self.chunk_size] -text_stream = text_stream_generator(text, wim.chunk_size) -final_answer = wim.run(text_stream, query) -print(final_answer) + def process_query(self, initial_query): + query, context = self.extract_query(initial_query) + text_stream = self.text_stream_generator(context) + final_answer = self.run(text_stream, query) + return final_answer, self.win_completion_tokens \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f771d90d..a6e431ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ flask torch transformers azure.identity +tiktoken From b4f6defc0912c5ed3891ce866c514d88b368546d Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 30 Sep 2024 02:00:24 -0700 Subject: [PATCH 3/9] add plugins system --- optillm.py | 27 ++++++-- optillm/plugins/memory_plugin.py | 103 +++++++++++++++++++++++++++++++ requirements.txt | 1 + test_cases.json | 5 ++ 4 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 optillm/plugins/memory_plugin.py diff --git a/optillm.py b/optillm.py index 31a7d878..4d95aefd 100644 --- a/optillm.py +++ b/optillm.py @@ -6,6 +6,8 @@ from openai import AzureOpenAI, OpenAI from flask import Response import json +import importlib +import glob # Import approach modules from optillm.mcts import chat_with_mcts @@ -20,7 +22,6 @@ from optillm.plansearch import plansearch from optillm.leap import leap from optillm.reread import re2_approach -from optillm.wim import WiMInfiniteContextAPI # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -77,6 +78,23 @@ known_approaches = ["mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2", "wim"] +plugin_approaches = {} + +def load_plugins(): + plugin_dir = os.path.join(os.path.dirname(__file__), 'plugins') + plugin_files = glob.glob(os.path.join(plugin_dir, '*.py')) + + for plugin_file in plugin_files: + module_name = os.path.basename(plugin_file)[:-3] # Remove .py extension + spec = importlib.util.spec_from_file_location(module_name, plugin_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + if hasattr(module, 'SLUG') and hasattr(module, 'run'): + plugin_approaches[module.SLUG] = module.run + logger.info(f"Loaded plugin: {module.SLUG}") + + def generate_streaming_response(final_response, model): # Yield the final response if isinstance(final_response, list): @@ -191,9 +209,8 @@ def proxy(): final_response, completion_tokens = leap(system_prompt, initial_query, client, model) elif approach == 're2': final_response, completion_tokens = re2_approach(system_prompt, initial_query, client, model, n=n) - elif approach == "wim": - wim_context = WiMInfiniteContextAPI(system_prompt, client, model, max_context_tokens=64000, max_margins=10, chunk_size=16000) - final_response, completion_tokens = wim_context.process_query(initial_query) + elif approach in plugin_approaches: + final_response, completion_tokens = plugin_approaches[approach](system_prompt, initial_query, client, model) else: raise ValueError(f"Unknown approach: {approach}") except Exception as e: @@ -320,6 +337,8 @@ def main(): logger.error(f"Please set the OPENAI_API_KEY environment variable before using the proxy") exit(1) + # Call this function at the start of main() + load_plugins() # Update server_config with all argument values server_config.update(vars(args)) diff --git a/optillm/plugins/memory_plugin.py b/optillm/plugins/memory_plugin.py new file mode 100644 index 00000000..6525dc19 --- /dev/null +++ b/optillm/plugins/memory_plugin.py @@ -0,0 +1,103 @@ +import re +from typing import Tuple, List +import numpy as np +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + +SLUG = "memory" + +class Memory: + def __init__(self, max_size: int = 1000): + self.max_size = max_size + self.items: List[str] = [] + self.vectorizer = TfidfVectorizer() + self.vectors = None + self.completion_tokens = 0 + + def add(self, item: str): + if len(self.items) >= self.max_size: + self.items.pop(0) + self.items.append(item) + self.vectors = None # Reset vectors to force recalculation + + def get_relevant(self, query: str, n: int = 5) -> List[str]: + if not self.items: + return [] + + if self.vectors is None: + self.vectors = self.vectorizer.fit_transform(self.items) + + query_vector = self.vectorizer.transform([query]) + similarities = cosine_similarity(query_vector, self.vectors).flatten() + top_indices = similarities.argsort()[-n:][::-1] + + return [self.items[i] for i in top_indices] + +def extract_query(text: str) -> Tuple[str, str]: + query_index = text.rfind("Query:") + + if query_index != -1: + context = text[:query_index].strip() + query = text[query_index + 6:].strip() + else: + sentences = re.split(r'(?<=[.!?])\s+', text.strip()) + if len(sentences) > 1: + context = ' '.join(sentences[:-1]) + query = sentences[-1] + else: + context = text + query = "What is the main point of this text?" + return query, context + +def extract_key_information(text: str, client, model: str) -> List[str]: + prompt = f"""Extract key information from the following text. Provide a list of important facts or concepts, each on a new line: + +{text} + +Key information:""" + + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + max_tokens=150 + ) + + key_info = response.choices[0].message.content.strip().split('\n') + + return [info.strip('- ') for info in key_info if info.strip()], response.usage.completion_tokens + +def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: + memory = Memory() + query, context = extract_query(initial_query) + completion_tokens = 0 + + # Process context and add to memory + chunk_size = 1000 + for i in range(0, len(context), chunk_size): + chunk = context[i:i+chunk_size] + key_info, tokens = extract_key_information(chunk, client, model) + completion_tokens += tokens + for info in key_info: + memory.add(info) + + # Retrieve relevant information from memory + relevant_info = memory.get_relevant(query) + + # Generate response using relevant information + prompt = f"""System: {system_prompt} + +Context: {' '.join(relevant_info)} + +{query} +""" + + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + max_tokens=500 + ) + + final_response = response.choices[0].message.content.strip() + completion_tokens += response.usage.completion_tokens + + return final_response, completion_tokens \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a6e431ec..c10dbe97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ torch transformers azure.identity tiktoken +scikit-learn \ No newline at end of file diff --git a/test_cases.json b/test_cases.json index a9c8df7a..fadf3e08 100644 --- a/test_cases.json +++ b/test_cases.json @@ -28,5 +28,10 @@ "name" : "reddit", "system_prompt": "", "query" : "There are 24 volunteers. Over the next 3 weeks, each volunteer is assigned to a different task. There are 8 tasks. Each week, the volunteers switch tasks. Each task has 3 volunteers assigned to it. Volunteers cannot be assigned to the same task more than once, and volunteers cannot share the same task more than once." + }, + { + "name" : "GH", + "system_prompt" : "", + "query" : "Find the largest possible real part of[(75+117i)z+\frac{96+144i}{z}]where z is a complex number with |z|=4" } ] From e1794f72455817c4b2eb112b48213c06e4dfadd5 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 30 Sep 2024 02:24:23 -0700 Subject: [PATCH 4/9] add readurls plugin --- optillm.py | 4 +-- optillm/plugins/readurls_plugin.py | 47 ++++++++++++++++++++++++++++++ requirements.txt | 2 ++ setup.py | 5 ++++ 4 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 optillm/plugins/readurls_plugin.py diff --git a/optillm.py b/optillm.py index c0739044..c7f23925 100644 --- a/optillm.py +++ b/optillm.py @@ -84,9 +84,9 @@ plugin_approaches = {} def load_plugins(): - plugin_dir = os.path.join(os.path.dirname(__file__), 'plugins') + plugin_dir = os.path.join(os.path.dirname(__file__), 'optillm/plugins') plugin_files = glob.glob(os.path.join(plugin_dir, '*.py')) - + for plugin_file in plugin_files: module_name = os.path.basename(plugin_file)[:-3] # Remove .py extension spec = importlib.util.spec_from_file_location(module_name, plugin_file) diff --git a/optillm/plugins/readurls_plugin.py b/optillm/plugins/readurls_plugin.py new file mode 100644 index 00000000..74d97549 --- /dev/null +++ b/optillm/plugins/readurls_plugin.py @@ -0,0 +1,47 @@ +import re +from typing import Tuple, List +import requests +from bs4 import BeautifulSoup +from urllib.parse import urlparse + +SLUG = "url_content" + +def extract_urls(text: str) -> List[str]: + url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') + return url_pattern.findall(text) + +def fetch_webpage_content(url: str, max_length: int = 1000) -> str: + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + soup = BeautifulSoup(response.content, 'html.parser') + + # Remove script and style elements + for script in soup(["script", "style"]): + script.decompose() + + # Get text content + text = soup.get_text() + + # Break into lines and remove leading and trailing space on each + lines = (line.strip() for line in text.splitlines()) + + # Break multi-headlines into a line each + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + + # Join the lines and truncate if necessary + content = ' '.join(chunk for chunk in chunks if chunk) + return content[:max_length] + ('...' if len(content) > max_length else '') + except Exception as e: + return f"Error fetching content: {str(e)}" + +def run(system_prompt, initial_query: str, client=None, model=None) -> Tuple[str, int]: + urls = extract_urls(initial_query) + modified_query = initial_query + + for url in urls: + content = fetch_webpage_content(url) + domain = urlparse(url).netloc + modified_query = modified_query.replace(url, f"{url} [Content from {domain}: {content}]") + + return modified_query, 0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a57748f7..09885d7c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ azure.identity tiktoken scikit-learn litellm +requests +beautifulsoup4 \ No newline at end of file diff --git a/setup.py b/setup.py index 61579c36..108548d1 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,11 @@ "torch", "transformers", "azure-identity", + "tiktoken", + "scikit-learn", + "litellm", + "requests", + "beautifulsoup4", ], author="codelion", author_email="codelion@okyasoft.com", From 8efd43c1505fd5dd7cfb7993441ec8c4bfa95172 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 30 Sep 2024 03:58:01 -0700 Subject: [PATCH 5/9] fix bugs --- optillm.py | 148 +++++++++++++++++++++---------- scripts/eval_frames_benchmark.py | 130 +++++++++++++++++++++++++++ scripts/requirements.txt | 1 + 3 files changed, 232 insertions(+), 47 deletions(-) create mode 100644 scripts/eval_frames_benchmark.py create mode 100644 scripts/requirements.txt diff --git a/optillm.py b/optillm.py index c7f23925..4ab36a0b 100644 --- a/optillm.py +++ b/optillm.py @@ -8,6 +8,8 @@ import json import importlib import glob +import asyncio +from concurrent.futures import ThreadPoolExecutor # Import the LiteLLM wrapper from litellm_wrapper import LiteLLMWrapper @@ -97,6 +99,94 @@ def load_plugins(): plugin_approaches[module.SLUG] = module.run logger.info(f"Loaded plugin: {module.SLUG}") +def parse_combined_approach(model: str, known_approaches: list, plugin_approaches: dict): + if model == 'auto': + return 'SINGLE', ['bon'], model + + parts = model.split('-') + approaches = [] + operation = 'SINGLE' + model_parts = [] + parsing_approaches = True + + for part in parts: + if parsing_approaches: + if part in known_approaches or part in plugin_approaches: + approaches.append(part) + elif '&' in part: + operation = 'AND' + approaches.extend(part.split('&')) + elif '|' in part: + operation = 'OR' + approaches.extend(part.split('|')) + else: + parsing_approaches = False + model_parts.append(part) + else: + model_parts.append(part) + + if not approaches: + approaches = ['bon'] + operation = 'SINGLE' + + actual_model = '-'.join(model_parts) + + return operation, approaches, actual_model + +def execute_single_approach(approach, system_prompt, initial_query, client, model): + if approach in known_approaches: + # Execute known approaches + if approach == 'mcts': + return chat_with_mcts(system_prompt, initial_query, client, model, server_config['mcts_simulations'], + server_config['mcts_exploration'], server_config['mcts_depth']) + elif approach == 'bon': + return best_of_n_sampling(system_prompt, initial_query, client, model, server_config['best_of_n']) + elif approach == 'moa': + return mixture_of_agents(system_prompt, initial_query, client, model) + elif approach == 'rto': + return round_trip_optimization(system_prompt, initial_query, client, model) + elif approach == 'z3': + z3_solver = Z3SolverSystem(system_prompt, client, model) + return z3_solver.process_query(initial_query) + elif approach == "self_consistency": + return advanced_self_consistency_approach(system_prompt, initial_query, client, model) + elif approach == "pvg": + return inference_time_pv_game(system_prompt, initial_query, client, model) + elif approach == "rstar": + rstar = RStar(system_prompt, client, model, + max_depth=server_config['rstar_max_depth'], num_rollouts=server_config['rstar_num_rollouts'], + c=server_config['rstar_c']) + return rstar.solve(initial_query) + elif approach == "cot_reflection": + return cot_reflection(system_prompt, initial_query, client, model, return_full_response=server_config['return_full_response']) + elif approach == 'plansearch': + return plansearch(system_prompt, initial_query, client, model, n=n) + elif approach == 'leap': + return leap(system_prompt, initial_query, client, model) + elif approach == 're2': + return re2_approach(system_prompt, initial_query, client, model, n=n) + elif approach in plugin_approaches: + return plugin_approaches[approach](system_prompt, initial_query, client, model) + else: + raise ValueError(f"Unknown approach: {approach}") + +def execute_combined_approaches(approaches, system_prompt, initial_query, client, model): + final_response = initial_query + total_tokens = 0 + for approach in approaches: + response, tokens = execute_single_approach(approach, system_prompt, final_response, client, model) + final_response = response + total_tokens += tokens + return final_response, total_tokens + +async def execute_parallel_approaches(approaches, system_prompt, initial_query, client, model): + async def run_approach(approach): + return await asyncio.to_thread(execute_single_approach, approach, system_prompt, initial_query, client, model) + + tasks = [run_approach(approach) for approach in approaches] + results = await asyncio.gather(*tasks) + responses, tokens = zip(*results) + return list(responses), sum(tokens) def generate_streaming_response(final_response, model): # Yield the final response @@ -167,55 +257,20 @@ def proxy(): else: client = default_client - # Handle 'auto' approach - if approach == 'auto': - for known_approach in known_approaches: - if model.startswith(f"{known_approach}-"): - approach = known_approach - model = model[len(known_approach)+1:] - break - else: - # If no known approach is found in the model name, default to 'bon' - approach = 'bon' - - - logger.info(f'Using approach {approach}, with {model}') - completion_tokens = 0 + operation, approaches, model = parse_combined_approach(model, known_approaches, plugin_approaches) + logger.info(f'Using approach(es) {approaches}, operation {operation}, with model {model}') try: - if approach == 'mcts': - final_response, completion_tokens = chat_with_mcts(system_prompt, initial_query, client, model, server_config['mcts_simulations'], - server_config['mcts_exploration'], server_config['mcts_depth']) - elif approach == 'bon': - final_response, completion_tokens = best_of_n_sampling(system_prompt, initial_query, client, model, server_config['best_of_n']) - elif approach == 'moa': - final_response, completion_tokens = mixture_of_agents(system_prompt, initial_query, client, model) - elif approach == 'rto': - final_response, completion_tokens = round_trip_optimization(system_prompt, initial_query, client, model) - elif approach == 'z3': - z3_solver = Z3SolverSystem(system_prompt, client, model) - final_response, completion_tokens = z3_solver.process_query(initial_query) - elif approach == "self_consistency": - final_response, completion_tokens = advanced_self_consistency_approach(system_prompt, initial_query, client, model) - elif approach == "pvg": - final_response, completion_tokens = inference_time_pv_game(system_prompt, initial_query, client, model) - elif approach == "rstar": - rstar = RStar(system_prompt, client, model, - max_depth=server_config['rstar_max_depth'], num_rollouts=server_config['rstar_num_rollouts'], - c=server_config['rstar_c']) - final_response, completion_tokens = rstar.solve(initial_query) - elif approach == "cot_reflection": - final_response, completion_tokens = cot_reflection(system_prompt, initial_query, client, model, return_full_response=server_config['return_full_response']) - elif approach == 'plansearch': - final_response, completion_tokens = plansearch(system_prompt, initial_query, client, model, n=n) - elif approach == 'leap': - final_response, completion_tokens = leap(system_prompt, initial_query, client, model) - elif approach == 're2': - final_response, completion_tokens = re2_approach(system_prompt, initial_query, client, model, n=n) - elif approach in plugin_approaches: - final_response, completion_tokens = plugin_approaches[approach](system_prompt, initial_query, client, model) + if operation == 'SINGLE': + final_response, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model) + elif operation == 'AND': + final_response, completion_tokens = execute_combined_approaches(approaches, system_prompt, initial_query, client, model) + elif operation == 'OR': + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + final_response, completion_tokens = loop.run_until_complete(execute_parallel_approaches(approaches, system_prompt, initial_query, client, model)) else: - raise ValueError(f"Unknown approach: {approach}") + raise ValueError(f"Unknown operation: {operation}") except Exception as e: logger.error(f"Error processing request: {str(e)}") return jsonify({"error": str(e)}), 500 @@ -254,7 +309,6 @@ def proxy(): logger.debug(f'API response: {response_data}') return jsonify(response_data), 200 - @app.route('/v1/models', methods=['GET']) def proxy_models(): logger.info('Received request to /v1/models') diff --git a/scripts/eval_frames_benchmark.py b/scripts/eval_frames_benchmark.py new file mode 100644 index 00000000..8e0491e2 --- /dev/null +++ b/scripts/eval_frames_benchmark.py @@ -0,0 +1,130 @@ +import argparse +import json +import os +import time +from typing import List, Dict + +from openai import OpenAI +from datasets import load_dataset +from tqdm import tqdm + +client = OpenAI(api_key="none", base_url="http://localhost:8000/v1") +SLEEP_INTERVAL = 10 + +def generate_llm_prompt(prompt: str, wiki_links: List[str]) -> str: + wiki_links_str = "\n".join(wiki_links) + return f"{prompt}\n\nHere are the relevant Wikipedia articles:\n{wiki_links_str}\n\nBased on all this, answer the query." + +def get_llm_response(prompt: str, model: str) -> str: + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} + ], + max_tokens=1000, + n=1, + stop=None, + temperature=0.7, + ) + return response.choices[0].message.content.strip() + +def evaluate_response(question: str, llm_response: str, ground_truth: str, model: str) -> Dict[str, str]: + evaluation_prompt = f"""===Task=== +I need your help in evaluating an answer provided by an LLM against a ground +truth answer. Your task is to determine if the ground truth answer is present in the LLM's +response. Please analyze the provided data and make a decision. +===Instructions=== +1. Carefully compare the "Predicted Answer" with the "Ground Truth Answer". +2. Consider the substance of the answers – look for equivalent information or correct answers. +Do not focus on exact wording unless the exact wording is crucial to the meaning. +3. Your final decision should be based on whether the meaning and the vital facts of the +"Ground Truth Answer" are present in the "Predicted Answer:" +===Input Data=== +- Question: {question} +- Predicted Answer: {llm_response} +- Ground Truth Answer: {ground_truth} +===Output Format=== +Provide your final evaluation in the following format: +"Explanation:" (How you made the decision?) +"Decision:" ("TRUE" or "FALSE" ) +Please proceed with the evaluation.""" + + evaluation_response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": evaluation_prompt} + ], + max_tokens=300, + n=1, + stop=None, + temperature=0.3, + ) + + evaluation_text = evaluation_response.choices[0].message.content.strip() + + # Extract the decision and explanation + lines = evaluation_text.split('\n') + decision = "FALSE" + explanation = "" + for line in lines: + if line.startswith("Decision:"): + decision = line.split(":")[1].strip().upper() + elif line.startswith("Explanation:"): + explanation = line.split(":", 1)[1].strip() + + return {"decision": decision, "explanation": explanation} + +def main(model: str): + + # Load the dataset + dataset = load_dataset("google/frames-benchmark", split="test") + + results = [] + + for item in tqdm(dataset, desc="Processing samples"): + # print(item) + prompt = generate_llm_prompt(item['Prompt'], item['wiki_links']) + llm_response = get_llm_response(prompt, model) + evaluation = evaluate_response(item['Prompt'], llm_response, item['Answer'], model) + + result = { + "prompt": item['Prompt'], + "ground_truth": item['Answer'], + "llm_response": llm_response, + "evaluation_decision": evaluation['decision'], + "evaluation_explanation": evaluation['explanation'], + "reasoning_type": item['reasoning_types'] + } + results.append(result) + time.sleep(SLEEP_INTERVAL) + + # Save results to a JSON file + with open(f"evaluation_results_{model.replace('/', '_')}.json", "w") as f: + json.dump(results, f, indent=2) + + # Calculate and print summary statistics + total_samples = len(results) + correct_answers = sum(1 for r in results if r['evaluation_decision'] == 'TRUE') + accuracy = correct_answers / total_samples + + print(f"Model: {model}") + print(f"Total samples: {total_samples}") + print(f"Correct answers: {correct_answers}") + print(f"Accuracy: {accuracy:.2%}") + + # Print accuracy by reasoning type + reasoning_types = set(r['reasoning_types'] for r in results) + for rt in reasoning_types: + rt_samples = [r for r in results if r['reasoning_types'] == rt] + rt_correct = sum(1 for r in rt_samples if r['evaluation_decision'] == 'TRUE') + rt_accuracy = rt_correct / len(rt_samples) + print(f"Accuracy for {rt}: {rt_accuracy:.2%}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Evaluate LLM performance on google/frames-benchmark") + parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4o, gpt-4o-mini)") + args = parser.parse_args() + + main(args.model) \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..aee11b28 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +datasets From 0bda103947f6235011891806bac420773c10be14 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 30 Sep 2024 14:22:03 -0700 Subject: [PATCH 6/9] Fix bugs to get the eval script to run --- optillm.py | 5 +-- optillm/plugins/memory_plugin.py | 8 ++-- optillm/plugins/readurls_plugin.py | 65 +++++++++++++++++++++++------- requirements.txt | 3 +- scripts/eval_frames_benchmark.py | 6 +-- setup.py | 1 + 6 files changed, 62 insertions(+), 26 deletions(-) diff --git a/optillm.py b/optillm.py index 4ab36a0b..656d7791 100644 --- a/optillm.py +++ b/optillm.py @@ -160,11 +160,11 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode elif approach == "cot_reflection": return cot_reflection(system_prompt, initial_query, client, model, return_full_response=server_config['return_full_response']) elif approach == 'plansearch': - return plansearch(system_prompt, initial_query, client, model, n=n) + return plansearch(system_prompt, initial_query, client, model, n=server_config['n']) elif approach == 'leap': return leap(system_prompt, initial_query, client, model) elif approach == 're2': - return re2_approach(system_prompt, initial_query, client, model, n=n) + return re2_approach(system_prompt, initial_query, client, model, n=server_config['n']) elif approach in plugin_approaches: return plugin_approaches[approach](system_prompt, initial_query, client, model) else: @@ -249,7 +249,6 @@ def proxy(): system_prompt, initial_query = parse_conversation(messages) - approach = server_config['approach'] base_url = server_config['base_url'] if base_url != "": diff --git a/optillm/plugins/memory_plugin.py b/optillm/plugins/memory_plugin.py index 6525dc19..00d2bade 100644 --- a/optillm/plugins/memory_plugin.py +++ b/optillm/plugins/memory_plugin.py @@ -7,7 +7,7 @@ SLUG = "memory" class Memory: - def __init__(self, max_size: int = 1000): + def __init__(self, max_size: int = 100): self.max_size = max_size self.items: List[str] = [] self.vectorizer = TfidfVectorizer() @@ -59,7 +59,7 @@ def extract_key_information(text: str, client, model: str) -> List[str]: response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], - max_tokens=150 + max_tokens=1000 ) key_info = response.choices[0].message.content.strip().split('\n') @@ -72,7 +72,7 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str completion_tokens = 0 # Process context and add to memory - chunk_size = 1000 + chunk_size = 10000 for i in range(0, len(context), chunk_size): chunk = context[i:i+chunk_size] key_info, tokens = extract_key_information(chunk, client, model) @@ -94,7 +94,7 @@ def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], - max_tokens=500 + max_tokens=1000 ) final_response = response.choices[0].message.content.strip() diff --git a/optillm/plugins/readurls_plugin.py b/optillm/plugins/readurls_plugin.py index 74d97549..bffeb72b 100644 --- a/optillm/plugins/readurls_plugin.py +++ b/optillm/plugins/readurls_plugin.py @@ -7,41 +7,76 @@ SLUG = "url_content" def extract_urls(text: str) -> List[str]: - url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') - return url_pattern.findall(text) + # Updated regex pattern to be more precise + url_pattern = re.compile(r'https?://[^\s\'"]+') + + # Find all matches + urls = url_pattern.findall(text) + + # Clean up the URLs + cleaned_urls = [] + for url in urls: + # Remove trailing punctuation and quotes + url = re.sub(r'[,\'\"\)\]]+$', '', url) + cleaned_urls.append(url) + + return cleaned_urls -def fetch_webpage_content(url: str, max_length: int = 1000) -> str: +def fetch_webpage_content(url: str, max_length: int = 40000) -> str: try: - response = requests.get(url, timeout=10) + headers = { + 'User-Agent': 'optillm/0.0.1 (hhttps://github.com/codelion/optillm)' + } + + response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() - soup = BeautifulSoup(response.content, 'html.parser') + + # Make a soup + soup = BeautifulSoup(response.content, 'lxml') # Remove script and style elements for script in soup(["script", "style"]): script.decompose() - # Get text content - text = soup.get_text() + # Get text from various elements + text_elements = [] + + # Prioritize content from main content tags + for tag in ['article', 'main', 'div[role="main"]', '.main-content']: + content = soup.select_one(tag) + if content: + text_elements.extend(content.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p'])) + break - # Break into lines and remove leading and trailing space on each - lines = (line.strip() for line in text.splitlines()) + # If no main content found, fall back to all headers and paragraphs + if not text_elements: + text_elements = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p']) - # Break multi-headlines into a line each - chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + # Extract text from elements + text = ' '.join(element.get_text(strip=True) for element in text_elements) - # Join the lines and truncate if necessary - content = ' '.join(chunk for chunk in chunks if chunk) - return content[:max_length] + ('...' if len(content) > max_length else '') + # Remove extra whitespace + text = re.sub(r'\s+', ' ', text).strip() + + # Remove footnote superscripts in brackets + text = re.sub(r"\[.*?\]+", '', text) + + # Truncate to max_length + if len(text) > max_length: + text = text[:max_length] + '...' + + return text except Exception as e: return f"Error fetching content: {str(e)}" def run(system_prompt, initial_query: str, client=None, model=None) -> Tuple[str, int]: urls = extract_urls(initial_query) + # print(urls) modified_query = initial_query for url in urls: content = fetch_webpage_content(url) domain = urlparse(url).netloc modified_query = modified_query.replace(url, f"{url} [Content from {domain}: {content}]") - + # print(modified_query) return modified_query, 0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 09885d7c..47c69eb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,5 @@ tiktoken scikit-learn litellm requests -beautifulsoup4 \ No newline at end of file +beautifulsoup4 +lxml \ No newline at end of file diff --git a/scripts/eval_frames_benchmark.py b/scripts/eval_frames_benchmark.py index 8e0491e2..f3804511 100644 --- a/scripts/eval_frames_benchmark.py +++ b/scripts/eval_frames_benchmark.py @@ -9,11 +9,10 @@ from tqdm import tqdm client = OpenAI(api_key="none", base_url="http://localhost:8000/v1") -SLEEP_INTERVAL = 10 +SLEEP_INTERVAL = 30 def generate_llm_prompt(prompt: str, wiki_links: List[str]) -> str: - wiki_links_str = "\n".join(wiki_links) - return f"{prompt}\n\nHere are the relevant Wikipedia articles:\n{wiki_links_str}\n\nBased on all this, answer the query." + return f"Here are the relevant Wikipedia articles:\n{wiki_links}\n\nBased on all the information, answer the query. \n\nQuery: {prompt}\n\n" def get_llm_response(prompt: str, model: str) -> str: response = client.chat.completions.create( @@ -98,6 +97,7 @@ def main(model: str): "reasoning_type": item['reasoning_types'] } results.append(result) + print(result["evaluation_decision"]) time.sleep(SLEEP_INTERVAL) # Save results to a JSON file diff --git a/setup.py b/setup.py index 108548d1..8097c206 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ "litellm", "requests", "beautifulsoup4", + "lxml", ], author="codelion", author_email="codelion@okyasoft.com", From b18e64a52979ba3ce7aa9f2616bd7f16a316783c Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 30 Sep 2024 21:14:47 -0700 Subject: [PATCH 7/9] fix --- optillm/plugins/readurls_plugin.py | 2 +- scripts/eval_frames_benchmark.py | 42 +++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/optillm/plugins/readurls_plugin.py b/optillm/plugins/readurls_plugin.py index bffeb72b..4392f2c4 100644 --- a/optillm/plugins/readurls_plugin.py +++ b/optillm/plugins/readurls_plugin.py @@ -4,7 +4,7 @@ from bs4 import BeautifulSoup from urllib.parse import urlparse -SLUG = "url_content" +SLUG = "readurls" def extract_urls(text: str) -> List[str]: # Updated regex pattern to be more precise diff --git a/scripts/eval_frames_benchmark.py b/scripts/eval_frames_benchmark.py index f3804511..16c44c09 100644 --- a/scripts/eval_frames_benchmark.py +++ b/scripts/eval_frames_benchmark.py @@ -9,7 +9,26 @@ from tqdm import tqdm client = OpenAI(api_key="none", base_url="http://localhost:8000/v1") -SLEEP_INTERVAL = 30 +SLEEP_INTERVAL = 60 + +def load_existing_results(filename: str) -> List[Dict]: + try: + with open(filename, 'r') as f: + return json.load(f) + except FileNotFoundError: + return [] + +def save_result(filename: str, result: Dict): + results = load_existing_results(filename) + results.append(result) + with open(filename, 'w') as f: + json.dump(results, f, indent=2) + +def get_last_processed_index(results: List[Dict]) -> int: + if not results: + return -1 + return max(int(r.get('index', -1)) for r in results) + def generate_llm_prompt(prompt: str, wiki_links: List[str]) -> str: return f"Here are the relevant Wikipedia articles:\n{wiki_links}\n\nBased on all the information, answer the query. \n\nQuery: {prompt}\n\n" @@ -76,19 +95,24 @@ def evaluate_response(question: str, llm_response: str, ground_truth: str, model return {"decision": decision, "explanation": explanation} def main(model: str): - # Load the dataset dataset = load_dataset("google/frames-benchmark", split="test") - results = [] + filename = f"evaluation_results_{model.replace('/', '_')}.json" + existing_results = load_existing_results(filename) + last_processed_index = get_last_processed_index(existing_results) for item in tqdm(dataset, desc="Processing samples"): - # print(item) + index = int(item['Unnamed: 0']) + if index <= last_processed_index: + continue + prompt = generate_llm_prompt(item['Prompt'], item['wiki_links']) llm_response = get_llm_response(prompt, model) evaluation = evaluate_response(item['Prompt'], llm_response, item['Answer'], model) result = { + "index": index, "prompt": item['Prompt'], "ground_truth": item['Answer'], "llm_response": llm_response, @@ -96,15 +120,13 @@ def main(model: str): "evaluation_explanation": evaluation['explanation'], "reasoning_type": item['reasoning_types'] } - results.append(result) - print(result["evaluation_decision"]) + + save_result(filename, result) + print(f"Index: {index}, Decision: {result['evaluation_decision']}") time.sleep(SLEEP_INTERVAL) - # Save results to a JSON file - with open(f"evaluation_results_{model.replace('/', '_')}.json", "w") as f: - json.dump(results, f, indent=2) - # Calculate and print summary statistics + results = load_existing_results(filename) total_samples = len(results) correct_answers = sum(1 for r in results if r['evaluation_decision'] == 'TRUE') accuracy = correct_answers / total_samples From 6604dce4c35b97ffb4cedfaacb13ec8331030467 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Tue, 1 Oct 2024 02:04:33 -0700 Subject: [PATCH 8/9] support extra_body and prompt config --- optillm.py | 34 +++++++++--- scripts/gen_optillm_dataset.py | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 scripts/gen_optillm_dataset.py diff --git a/optillm.py b/optillm.py index 656d7791..ce79e70e 100644 --- a/optillm.py +++ b/optillm.py @@ -9,6 +9,7 @@ import importlib import glob import asyncio +import re from concurrent.futures import ThreadPoolExecutor # Import the LiteLLM wrapper @@ -208,18 +209,31 @@ def generate_streaming_response(final_response, model): def parse_conversation(messages): system_prompt = "" conversation = [] + optillm_approach = None for message in messages: role = message['role'] content = message['content'] if role == 'system': - system_prompt = content - elif role in ['user', 'assistant']: - conversation.append(f"{role.capitalize()}: {content}") + system_prompt, optillm_approach = extract_optillm_approach(content) + elif role == 'user': + if not optillm_approach: + content, optillm_approach = extract_optillm_approach(content) + conversation.append(f"User: {content}") + elif role == 'assistant': + conversation.append(f"Assistant: {content}") initial_query = "\n".join(conversation) - return system_prompt, initial_query + return system_prompt, initial_query, optillm_approach + +def extract_optillm_approach(content): + match = re.search(r'(.*?)', content) + if match: + approach = match.group(1) + content = re.sub(r'.*?', '', content).strip() + return content, approach + return content, None # Optional API key configuration to secure the proxy @app.before_request @@ -245,9 +259,17 @@ def proxy(): stream = data.get('stream', False) messages = data.get('messages', []) model = data.get('model', server_config['model']) - n = data.get('n', server_config['n']) - system_prompt, initial_query = parse_conversation(messages) + optillm_approach = data.get('optillm_approach', {}) + + system_prompt, initial_query, message_optillm_approach = parse_conversation(messages) + + # Use optillm_approach from extra_body if present, otherwise use from messages + if not optillm_approach and message_optillm_approach: + optillm_approach = message_optillm_approach + + if optillm_approach: + model = f"{optillm_approach}-{model}" base_url = server_config['base_url'] diff --git a/scripts/gen_optillm_dataset.py b/scripts/gen_optillm_dataset.py new file mode 100644 index 00000000..9a3b3cd4 --- /dev/null +++ b/scripts/gen_optillm_dataset.py @@ -0,0 +1,96 @@ +import os +import json +import argparse +import asyncio +from tqdm import tqdm +from datasets import load_dataset +from openai import AsyncOpenAI +from typing import List, Dict, Any +import random + +# OptILM approaches +APPROACHES = ["none", "mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2"] + +async def generate_response(prompt: str, approach: str) -> Dict[str, Any]: + """Generate a response using the specified approach.""" + if approach == "none": + # Use the base model without any optimization technique + client = AsyncOpenAI() + response = await client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": prompt}], + ) + return { + "content": response.choices[0].message.content, + "tokens": response.usage.completion_tokens, + } + else: + # Use OptILM with the specified approach + client = AsyncOpenAI(api_key="none", base_url="http://localhost:8000/v1") + response = await client.chat.completions.create( + model=f"{approach}-gpt-4o-mini", # Assuming OptILM uses this naming convention + messages=[{"role": "user", "content": prompt}], + ) + return { + "content": response.choices[0].message.content, + "tokens": response.usage.completion_tokens, + } + +async def rank_responses(prompt: str, responses: List[Dict[str, Any]]) -> List[int]: + """Rank the responses using the LLM.""" + ranking_prompt = f"Given the following prompt:\n\n{prompt}\n\nRank the following responses from best to worst, considering accuracy, completeness, and relevance. Provide the ranking as a comma-separated list of indices (0-indexed). Do not add any explanations or any other text other than the comma-separated list.\n\n" + for i, response in enumerate(responses): + ranking_prompt += f"Response {i}:\n{response['content']}\n\n" + client = AsyncOpenAI() + ranking_response = await client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": ranking_prompt}], + ) + + ranking_str = ranking_response.choices[0].message.content.strip() + print(ranking_str) + return [int(idx) for idx in ranking_str.split(",")] + +async def process_sample(sample: Dict[str, Any]) -> Dict[str, Any]: + """Process a single sample from the dataset.""" + prompt = sample["turns"][0]["content"] + results = [] + + # Generate responses for each approach + for approach in APPROACHES: + response = await generate_response(prompt, approach) + results.append({"approach": approach, **response}) + + random.shuffle(results) + # Rank the responses + rankings = await rank_responses(prompt, results) + + # Add rankings to results + for rank, idx in enumerate(rankings): + results[idx]["rank"] = rank + + return { + "prompt": prompt, + "results": results, + } + +async def generate_dataset(num_samples: int, output_file: str): + """Generate the dataset and save it to a JSONL file.""" + dataset = load_dataset("lmsys/arena-hard-auto-v0.1", split="train") + + with open(output_file, "w") as f: + for sample in tqdm(dataset.select(range(num_samples)), total=num_samples): + result = await process_sample(sample) + f.write(json.dumps(result) + "\n") + +def main(): + parser = argparse.ArgumentParser(description="Generate OptILM dataset") + parser.add_argument("--num_samples", type=int, default=100, help="Number of samples to process") + parser.add_argument("--output_file", type=str, default="optillm_dataset.jsonl", help="Output file path") + args = parser.parse_args() + + asyncio.run(generate_dataset(args.num_samples, args.output_file)) + print(f"Dataset generated and saved to {args.output_file}") + +if __name__ == "__main__": + main() From 4083dc640d712051d5eb680459c06b5b423d2015 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Tue, 1 Oct 2024 02:05:47 -0700 Subject: [PATCH 9/9] Update optillm.py --- optillm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optillm.py b/optillm.py index ce79e70e..de767acd 100644 --- a/optillm.py +++ b/optillm.py @@ -82,7 +82,7 @@ # List of known approaches known_approaches = ["mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", - "cot_reflection", "plansearch", "leap", "re2", "wim"] + "cot_reflection", "plansearch", "leap", "re2"] plugin_approaches = {}