Final cleanup
This commit is contained in:
@@ -44,27 +44,33 @@ class O3ModelSelectionTest(BaseSimulatorTest):
|
||||
"""Test O3 model selection and usage"""
|
||||
try:
|
||||
self.logger.info(" Test: O3 model selection and usage validation")
|
||||
|
||||
|
||||
# Check which API keys are configured
|
||||
check_cmd = ["docker", "exec", self.container_name, "python", "-c",
|
||||
"import os; print(f'OPENAI_KEY:{bool(os.environ.get(\"OPENAI_API_KEY\"))}|OPENROUTER_KEY:{bool(os.environ.get(\"OPENROUTER_API_KEY\"))}')"]
|
||||
check_cmd = [
|
||||
"docker",
|
||||
"exec",
|
||||
self.container_name,
|
||||
"python",
|
||||
"-c",
|
||||
'import os; print(f\'OPENAI_KEY:{bool(os.environ.get("OPENAI_API_KEY"))}|OPENROUTER_KEY:{bool(os.environ.get("OPENROUTER_API_KEY"))}\')',
|
||||
]
|
||||
result = subprocess.run(check_cmd, capture_output=True, text=True)
|
||||
|
||||
|
||||
has_openai = False
|
||||
has_openrouter = False
|
||||
|
||||
|
||||
if result.returncode == 0:
|
||||
output = result.stdout.strip()
|
||||
if "OPENAI_KEY:True" in output:
|
||||
has_openai = True
|
||||
if "OPENROUTER_KEY:True" in output:
|
||||
has_openrouter = True
|
||||
|
||||
|
||||
# If only OpenRouter is configured, adjust test expectations
|
||||
if has_openrouter and not has_openai:
|
||||
self.logger.info(" ℹ️ Only OpenRouter configured - O3 models will be routed through OpenRouter")
|
||||
return self._run_openrouter_o3_test()
|
||||
|
||||
|
||||
# Original test for when OpenAI is configured
|
||||
self.logger.info(" ℹ️ OpenAI API configured - expecting direct OpenAI API calls")
|
||||
|
||||
@@ -220,10 +226,10 @@ def multiply(x, y):
|
||||
try:
|
||||
# Setup test files
|
||||
self.setup_test_files()
|
||||
|
||||
|
||||
# Test 1: O3 model via OpenRouter
|
||||
self.logger.info(" 1: Testing O3 model via OpenRouter")
|
||||
|
||||
|
||||
response1, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
@@ -232,16 +238,16 @@ def multiply(x, y):
|
||||
"temperature": 1.0,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
if not response1:
|
||||
self.logger.error(" ❌ O3 model test via OpenRouter failed")
|
||||
return False
|
||||
|
||||
|
||||
self.logger.info(" ✅ O3 model call via OpenRouter completed")
|
||||
|
||||
|
||||
# Test 2: O3-mini model via OpenRouter
|
||||
self.logger.info(" 2: Testing O3-mini model via OpenRouter")
|
||||
|
||||
|
||||
response2, _ = self.call_mcp_tool(
|
||||
"chat",
|
||||
{
|
||||
@@ -250,16 +256,16 @@ def multiply(x, y):
|
||||
"temperature": 1.0,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
if not response2:
|
||||
self.logger.error(" ❌ O3-mini model test via OpenRouter failed")
|
||||
return False
|
||||
|
||||
|
||||
self.logger.info(" ✅ O3-mini model call via OpenRouter completed")
|
||||
|
||||
|
||||
# Test 3: Codereview with O3 via OpenRouter
|
||||
self.logger.info(" 3: Testing O3 with codereview tool via OpenRouter")
|
||||
|
||||
|
||||
test_code = """def add(a, b):
|
||||
return a + b
|
||||
|
||||
@@ -267,7 +273,7 @@ def multiply(x, y):
|
||||
return x * y
|
||||
"""
|
||||
test_file = self.create_additional_test_file("simple_math.py", test_code)
|
||||
|
||||
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"codereview",
|
||||
{
|
||||
@@ -277,53 +283,61 @@ def multiply(x, y):
|
||||
"temperature": 1.0,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
if not response3:
|
||||
self.logger.error(" ❌ O3 with codereview tool via OpenRouter failed")
|
||||
return False
|
||||
|
||||
|
||||
self.logger.info(" ✅ O3 with codereview tool via OpenRouter completed")
|
||||
|
||||
|
||||
# Validate OpenRouter usage in logs
|
||||
self.logger.info(" 4: Validating OpenRouter usage in logs")
|
||||
logs = self.get_recent_server_logs()
|
||||
|
||||
|
||||
# Check for OpenRouter API calls
|
||||
openrouter_api_logs = [line for line in logs.split("\n") if "openrouter" in line.lower() and ("API" in line or "request" in line)]
|
||||
|
||||
openrouter_api_logs = [
|
||||
line
|
||||
for line in logs.split("\n")
|
||||
if "openrouter" in line.lower() and ("API" in line or "request" in line)
|
||||
]
|
||||
|
||||
# Check for model resolution through OpenRouter
|
||||
openrouter_model_logs = [line for line in logs.split("\n") if "openrouter" in line.lower() and ("o3" in line or "model" in line)]
|
||||
|
||||
openrouter_model_logs = [
|
||||
line for line in logs.split("\n") if "openrouter" in line.lower() and ("o3" in line or "model" in line)
|
||||
]
|
||||
|
||||
# Check for successful responses
|
||||
openrouter_response_logs = [line for line in logs.split("\n") if "openrouter" in line.lower() and "response" in line]
|
||||
|
||||
openrouter_response_logs = [
|
||||
line for line in logs.split("\n") if "openrouter" in line.lower() and "response" in line
|
||||
]
|
||||
|
||||
self.logger.info(f" OpenRouter API logs: {len(openrouter_api_logs)}")
|
||||
self.logger.info(f" OpenRouter model logs: {len(openrouter_model_logs)}")
|
||||
self.logger.info(f" OpenRouter response logs: {len(openrouter_response_logs)}")
|
||||
|
||||
|
||||
# Success criteria for OpenRouter
|
||||
openrouter_used = len(openrouter_api_logs) >= 3 or len(openrouter_model_logs) >= 3
|
||||
all_calls_succeeded = response1 and response2 and response3
|
||||
|
||||
|
||||
success_criteria = [
|
||||
("All O3 model calls succeeded", all_calls_succeeded),
|
||||
("OpenRouter provider was used", openrouter_used),
|
||||
]
|
||||
|
||||
|
||||
passed_criteria = sum(1 for _, passed in success_criteria if passed)
|
||||
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
|
||||
|
||||
|
||||
for criterion, passed in success_criteria:
|
||||
status = "✅" if passed else "❌"
|
||||
self.logger.info(f" {status} {criterion}")
|
||||
|
||||
|
||||
if passed_criteria == len(success_criteria):
|
||||
self.logger.info(" ✅ O3 model selection via OpenRouter passed")
|
||||
return True
|
||||
else:
|
||||
self.logger.error(" ❌ O3 model selection via OpenRouter failed")
|
||||
return False
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"OpenRouter O3 test failed: {e}")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user