fix z3_solver

codelion · codelion · commit 88dcaf156ad1 · 2024-09-20T23:24:52.000-07:00
add new test that was failing z3.
diff --git a/optillm/z3_solver.py b/optillm/z3_solver.py
@@ -35,7 +35,8 @@ def process_query(self, query: str) -> str:
             
             return self.generate_response(query, analysis, solver_result)
         except Exception as e:
-            return f"An error occurred while processing the query: {str(e)}"
+            logging.error(f"An error occurred while processing the query with Z3, returning standard llm inference results: {str(e)}")
+            return self.standard_llm_inference(query)
 
     def analyze_query(self, query: str) -> str:
         analysis_prompt = f"""Analyze the given query and determine if it can be solved using Z3:
diff --git a/test.py b/test.py
@@ -18,15 +18,13 @@
 from optillm.cot_reflection import cot_reflection
 from optillm.plansearch import plansearch
 from optillm.leap import leap
-from optillm.agent import agent_approach
 
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
 # OpenAI API configuration
 API_KEY = os.environ.get("OPENAI_API_KEY")
-client = OpenAI(api_key=API_KEY)
 
 # Mock OpenAI client for testing purposes
 class MockOpenAIClient:
@@ -49,7 +47,6 @@ def __init__(self):
     'cot_reflection': cot_reflection,
     'plansearch': plansearch,
     'leap': leap,
-    'agent': agent_approach,
 }
 
 def load_test_cases(file_path: str) -> List[Dict]:
@@ -93,12 +90,15 @@ def run_test_case(test_case: Dict, approaches: List[str], client, model: str) ->
         'results': results
     }
 
-def run_tests(test_cases: List[Dict], approaches: List[str], client, model: str) -> List[Dict]:
+def run_tests(test_cases: List[Dict], approaches: List[str], client, model: str, single_test_name: str = None) -> List[Dict]:
     results = []
     for test_case in test_cases:
-        result = run_test_case(test_case, approaches, client, model)
-        results.append(result)
-        logger.info(f"Completed test case: {test_case['name']}")
+        if single_test_name is None or test_case['name'] == single_test_name:
+            result = run_test_case(test_case, approaches, client, model)
+            results.append(result)
+            logger.info(f"Completed test case: {test_case['name']}")
+        if single_test_name and test_case['name'] == single_test_name:
+            break
     return results
 
 def print_summary(results: List[Dict]):
@@ -116,15 +116,23 @@ def main():
     parser.add_argument("--test_cases", type=str, default="test_cases.json", help="Path to test cases JSON file")
     parser.add_argument("--approaches", nargs='+', default=list(APPROACHES.keys()), help="Approaches to test")
     parser.add_argument("--model", type=str, default="gpt-4o-mini", help="Model to use for testing")
+    parser.add_argument("--base-url", type=str, default=None, help="The base_url for the OpenAI API compatible endpoint")
+    parser.add_argument("--single-test", type=str, default=None, help="Name of a single test case to run")
     args = parser.parse_args()
 
     test_cases = load_test_cases(args.test_cases)
-    results = run_tests(test_cases, args.approaches, client, args.model)
+
+    if args.base_url:
+        client = OpenAI(api_key=API_KEY, base_url=args.base_url)
+    else:
+        client = OpenAI(api_key=API_KEY)
+
+    results = run_tests(test_cases, args.approaches, client, args.model, args.single_test)
     print_summary(results)
 
     # Optionally, save detailed results to a file
     with open('test_results.json', 'w') as f:
         json.dump(results, f, indent=2)
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/test_cases.json b/test_cases.json
@@ -23,5 +23,10 @@
     "name": "r/LocalLLaMA",
     "system_prompt": "",
     "query" : "I have a dish of potatoes. The following statements are true: No potatoes of mine, that are new, have >been boiled. All my potatoes in this dish are fit to eat. No unboiled potatoes of mine are fit to eat. Are there any new potatoes in this dish?"
+  },
+  {
+    "name" : "reddit",
+    "system_prompt": "",
+    "query" : "There are 24 volunteers. Over the next 3 weeks, each volunteer is assigned to a different task. There are 8 tasks. Each week, the volunteers switch tasks. Each task has 3 volunteers assigned to it. Volunteers cannot be assigned to the same task more than once, and volunteers cannot share the same task more than once."
   }
 ]

Original file line number	Diff line number	Diff line change
`@@ -23,5 +23,10 @@`
`23`	`23`	`"name": "r/LocalLLaMA",`
`24`	`24`	`"system_prompt": "",`
`25`	`25`	`"query" : "I have a dish of potatoes. The following statements are true: No potatoes of mine, that are new, have >been boiled. All my potatoes in this dish are fit to eat. No unboiled potatoes of mine are fit to eat. Are there any new potatoes in this dish?"`
	`26`	`+ },`
	`27`	`+ {`
	`28`	`+ "name" : "reddit",`
	`29`	`+ "system_prompt": "",`
	`30`	`+ "query" : "There are 24 volunteers. Over the next 3 weeks, each volunteer is assigned to a different task. There are 8 tasks. Each week, the volunteers switch tasks. Each task has 3 volunteers assigned to it. Volunteers cannot be assigned to the same task more than once, and volunteers cannot share the same task more than once."`
`26`	`31`	`}`
`27`	`32`	`]`