Skip to content

Commit 88dcaf1

Browse files
committed
fix z3_solver
add new test that was failing z3.
1 parent 61a319b commit 88dcaf1

File tree

3 files changed

+24
-10
lines changed

3 files changed

+24
-10
lines changed

optillm/z3_solver.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ def process_query(self, query: str) -> str:
3535

3636
return self.generate_response(query, analysis, solver_result)
3737
except Exception as e:
38-
return f"An error occurred while processing the query: {str(e)}"
38+
logging.error(f"An error occurred while processing the query with Z3, returning standard llm inference results: {str(e)}")
39+
return self.standard_llm_inference(query)
3940

4041
def analyze_query(self, query: str) -> str:
4142
analysis_prompt = f"""Analyze the given query and determine if it can be solved using Z3:

test.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,13 @@
1818
from optillm.cot_reflection import cot_reflection
1919
from optillm.plansearch import plansearch
2020
from optillm.leap import leap
21-
from optillm.agent import agent_approach
2221

2322
# Setup logging
2423
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
2524
logger = logging.getLogger(__name__)
2625

2726
# OpenAI API configuration
2827
API_KEY = os.environ.get("OPENAI_API_KEY")
29-
client = OpenAI(api_key=API_KEY)
3028

3129
# Mock OpenAI client for testing purposes
3230
class MockOpenAIClient:
@@ -49,7 +47,6 @@ def __init__(self):
4947
'cot_reflection': cot_reflection,
5048
'plansearch': plansearch,
5149
'leap': leap,
52-
'agent': agent_approach,
5350
}
5451

5552
def load_test_cases(file_path: str) -> List[Dict]:
@@ -93,12 +90,15 @@ def run_test_case(test_case: Dict, approaches: List[str], client, model: str) ->
9390
'results': results
9491
}
9592

96-
def run_tests(test_cases: List[Dict], approaches: List[str], client, model: str) -> List[Dict]:
93+
def run_tests(test_cases: List[Dict], approaches: List[str], client, model: str, single_test_name: str = None) -> List[Dict]:
9794
results = []
9895
for test_case in test_cases:
99-
result = run_test_case(test_case, approaches, client, model)
100-
results.append(result)
101-
logger.info(f"Completed test case: {test_case['name']}")
96+
if single_test_name is None or test_case['name'] == single_test_name:
97+
result = run_test_case(test_case, approaches, client, model)
98+
results.append(result)
99+
logger.info(f"Completed test case: {test_case['name']}")
100+
if single_test_name and test_case['name'] == single_test_name:
101+
break
102102
return results
103103

104104
def print_summary(results: List[Dict]):
@@ -116,15 +116,23 @@ def main():
116116
parser.add_argument("--test_cases", type=str, default="test_cases.json", help="Path to test cases JSON file")
117117
parser.add_argument("--approaches", nargs='+', default=list(APPROACHES.keys()), help="Approaches to test")
118118
parser.add_argument("--model", type=str, default="gpt-4o-mini", help="Model to use for testing")
119+
parser.add_argument("--base-url", type=str, default=None, help="The base_url for the OpenAI API compatible endpoint")
120+
parser.add_argument("--single-test", type=str, default=None, help="Name of a single test case to run")
119121
args = parser.parse_args()
120122

121123
test_cases = load_test_cases(args.test_cases)
122-
results = run_tests(test_cases, args.approaches, client, args.model)
124+
125+
if args.base_url:
126+
client = OpenAI(api_key=API_KEY, base_url=args.base_url)
127+
else:
128+
client = OpenAI(api_key=API_KEY)
129+
130+
results = run_tests(test_cases, args.approaches, client, args.model, args.single_test)
123131
print_summary(results)
124132

125133
# Optionally, save detailed results to a file
126134
with open('test_results.json', 'w') as f:
127135
json.dump(results, f, indent=2)
128136

129137
if __name__ == "__main__":
130-
main()
138+
main()

test_cases.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,10 @@
2323
"name": "r/LocalLLaMA",
2424
"system_prompt": "",
2525
"query" : "I have a dish of potatoes. The following statements are true: No potatoes of mine, that are new, have >been boiled. All my potatoes in this dish are fit to eat. No unboiled potatoes of mine are fit to eat. Are there any new potatoes in this dish?"
26+
},
27+
{
28+
"name" : "reddit",
29+
"system_prompt": "",
30+
"query" : "There are 24 volunteers. Over the next 3 weeks, each volunteer is assigned to a different task. There are 8 tasks. Each week, the volunteers switch tasks. Each task has 3 volunteers assigned to it. Volunteers cannot be assigned to the same task more than once, and volunteers cannot share the same task more than once."
2631
}
2732
]

0 commit comments

Comments
 (0)