fix: improved error reporting; codex cli would at times fail to figure out how to handle plain-text / JSON errors

fix: working directory should exist, raise error and not try and create one docs: improved API Lookup instructions * test added to confirm failures * chat schema more explicit about file paths
2025-10-17 23:42:32 +04:00
parent 71796c0c70
commit 95e69a7cb2
24 changed files with 569 additions and 337 deletions
--- a/simulator_tests/conversation_base_test.py
+++ b/simulator_tests/conversation_base_test.py
@@ -38,6 +38,8 @@ import asyncio
 import json
 from typing import Optional

+from tools.shared.exceptions import ToolExecutionError
+
 from .base_test import BaseSimulatorTest


@@ -158,7 +160,15 @@ class ConversationBaseTest(BaseSimulatorTest):
            params["_resolved_model_name"] = model_name

            # Execute tool asynchronously
-            result = loop.run_until_complete(tool.execute(params))
+            try:
+                result = loop.run_until_complete(tool.execute(params))
+            except ToolExecutionError as exc:
+                response_text = exc.payload
+                continuation_id = self._extract_continuation_id_from_response(response_text)
+                self.logger.debug(f"Tool '{tool_name}' returned error payload in-process")
+                if self.verbose and response_text:
+                    self.logger.debug(f"Error response preview: {response_text[:500]}...")
+                return response_text, continuation_id

            if not result or len(result) == 0:
                return None, None
--- a/simulator_tests/test_debug_certain_confidence.py
+++ b/simulator_tests/test_debug_certain_confidence.py
@@ -12,6 +12,8 @@ Tests the debug tool's 'certain' confidence feature in a realistic simulation:
 import json
 from typing import Optional

+from tools.shared.exceptions import ToolExecutionError
+
 from .conversation_base_test import ConversationBaseTest


@@ -482,7 +484,12 @@ This happens every time a user tries to log in. The error occurs in the password
            loop = self._get_event_loop()

            # Call the tool's execute method
-            result = loop.run_until_complete(tool.execute(params))
+            try:
+                result = loop.run_until_complete(tool.execute(params))
+            except ToolExecutionError as exc:
+                response_text = exc.payload
+                continuation_id = self._extract_debug_continuation_id(response_text)
+                return response_text, continuation_id

            if not result or len(result) == 0:
                self.logger.error(f"Tool '{tool_name}' returned empty result")