fix: improved error reporting; codex cli would at times fail to figure out how to handle plain-text / JSON errors

fix: working directory should exist, raise error and not try and create one
docs: improved API Lookup instructions
* test added to confirm failures
* chat schema more explicit about file paths
This commit is contained in:
Fahad
2025-10-17 23:42:32 +04:00
parent 71796c0c70
commit 95e69a7cb2
24 changed files with 569 additions and 337 deletions

View File

@@ -38,6 +38,8 @@ import asyncio
import json
from typing import Optional
from tools.shared.exceptions import ToolExecutionError
from .base_test import BaseSimulatorTest
@@ -158,7 +160,15 @@ class ConversationBaseTest(BaseSimulatorTest):
params["_resolved_model_name"] = model_name
# Execute tool asynchronously
result = loop.run_until_complete(tool.execute(params))
try:
result = loop.run_until_complete(tool.execute(params))
except ToolExecutionError as exc:
response_text = exc.payload
continuation_id = self._extract_continuation_id_from_response(response_text)
self.logger.debug(f"Tool '{tool_name}' returned error payload in-process")
if self.verbose and response_text:
self.logger.debug(f"Error response preview: {response_text[:500]}...")
return response_text, continuation_id
if not result or len(result) == 0:
return None, None

View File

@@ -12,6 +12,8 @@ Tests the debug tool's 'certain' confidence feature in a realistic simulation:
import json
from typing import Optional
from tools.shared.exceptions import ToolExecutionError
from .conversation_base_test import ConversationBaseTest
@@ -482,7 +484,12 @@ This happens every time a user tries to log in. The error occurs in the password
loop = self._get_event_loop()
# Call the tool's execute method
result = loop.run_until_complete(tool.execute(params))
try:
result = loop.run_until_complete(tool.execute(params))
except ToolExecutionError as exc:
response_text = exc.payload
continuation_id = self._extract_debug_continuation_id(response_text)
return response_text, continuation_id
if not result or len(result) == 0:
self.logger.error(f"Tool '{tool_name}' returned empty result")