fix: improved error reporting; codex cli would at times fail to figure out how to handle plain-text / JSON errors

fix: working directory should exist, raise error and not try and create one docs: improved API Lookup instructions * test added to confirm failures * chat schema more explicit about file paths
2025-10-17 23:42:32 +04:00
parent 71796c0c70
commit 95e69a7cb2
24 changed files with 569 additions and 337 deletions
--- a/tools/apilookup.py
+++ b/tools/apilookup.py
@@ -28,8 +28,9 @@ LOOKUP_PROMPT = """
 MANDATORY: You MUST perform this research in a SEPARATE SUB-TASK using your web search tool.

 CRITICAL RULES - READ CAREFULLY:
- NEVER call `apilookup` / `zen.apilookup` or any other zen tool again for this mission. Launch your environment's dedicated web search capability
-  (for example `websearch`, `web_search`, or another native web-search tool such as the one you use to perform a web search online) to gather sources.
+- Launch your environment's dedicated web search capability (for example `websearch`, `web_search`, or another native
+web-search tool such as the one you use to perform a web search online) to gather sources - do NOT call this `apilookup` tool again
+during the same lookup, this is ONLY an orchestration tool to guide you and has NO web search capability of its own.
 - ALWAYS run the search from a separate sub-task/sub-process so the research happens outside this tool invocation.
 - If the environment does not expose a web search tool, immediately report that limitation instead of invoking `apilookup` again.

--- a/tools/challenge.py
+++ b/tools/challenge.py
@@ -17,6 +17,7 @@ if TYPE_CHECKING:

 from config import TEMPERATURE_ANALYTICAL
 from tools.shared.base_models import ToolRequest
+from tools.shared.exceptions import ToolExecutionError

 from .simple.base import SimpleTool

@@ -138,6 +139,8 @@ class ChallengeTool(SimpleTool):

            return [TextContent(type="text", text=json.dumps(response_data, indent=2, ensure_ascii=False))]

+        except ToolExecutionError:
+            raise
        except Exception as e:
            import logging

@@ -150,7 +153,7 @@ class ChallengeTool(SimpleTool):
                "content": f"Failed to create challenge prompt: {str(e)}",
            }

-            return [TextContent(type="text", text=json.dumps(error_data, ensure_ascii=False))]
+            raise ToolExecutionError(json.dumps(error_data, ensure_ascii=False)) from e

    def _wrap_prompt_for_challenge(self, prompt: str) -> str:
        """
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -30,10 +30,10 @@ CHAT_FIELD_DESCRIPTIONS = {
        "Your question or idea for collaborative thinking. Provide detailed context, including your goal, what you've tried, and any specific challenges. "
        "CRITICAL: To discuss code, use 'files' parameter instead of pasting code blocks here."
    ),
-    "files": "absolute file or folder paths for code context (do NOT shorten).",
-    "images": "Optional absolute image paths or base64 for visual context when helpful.",
+    "files": "Absolute file or folder paths for code context.",
+    "images": "Image paths (absolute) or base64 strings for optional visual context.",
    "working_directory": (
-        "Absolute full directory path where the assistant AI can save generated code for implementation. The directory must already exist"
+        "Absolute directory path where generated code artifacts are stored. The directory must already exist."
    ),
 }

@@ -98,17 +98,11 @@ class ChatTool(SimpleTool):
        """Return the Chat-specific request model"""
        return ChatRequest

-    # === Schema Generation ===
-    # For maximum compatibility, we override get_input_schema() to match the original Chat tool exactly
+    # === Schema Generation Utilities ===

    def get_input_schema(self) -> dict[str, Any]:
-        """
-        Generate input schema matching the original Chat tool exactly.
+        """Generate input schema matching the original Chat tool expectations."""

-        This maintains 100% compatibility with the original Chat tool by using
-        the same schema generation approach while still benefiting from SimpleTool
-        convenience methods.
-        """
        required_fields = ["prompt", "working_directory"]
        if self.is_effective_auto_mode():
            required_fields.append("model")
@@ -152,22 +146,14 @@ class ChatTool(SimpleTool):
                },
            },
            "required": required_fields,
+            "additionalProperties": False,
        }

        return schema

-    # === Tool-specific field definitions (alternative approach for reference) ===
-    # These aren't used since we override get_input_schema(), but they show how
-    # the tool could be implemented using the automatic SimpleTool schema building
-
    def get_tool_fields(self) -> dict[str, dict[str, Any]]:
-        """
-        Tool-specific field definitions for ChatSimple.
+        """Tool-specific field definitions used by SimpleTool scaffolding."""

-        Note: This method isn't used since we override get_input_schema() for
-        exact compatibility, but it demonstrates how ChatSimple could be
-        implemented using automatic schema building.
-        """
        return {
            "prompt": {
                "type": "string",
@@ -204,6 +190,19 @@ class ChatTool(SimpleTool):
    def _validate_file_paths(self, request) -> Optional[str]:
        """Extend validation to cover the working directory path."""

+        files = self.get_request_files(request)
+        if files:
+            expanded_files: list[str] = []
+            for file_path in files:
+                expanded = os.path.expanduser(file_path)
+                if not os.path.isabs(expanded):
+                    return (
+                        "Error: All file paths must be FULL absolute paths to real files / folders - DO NOT SHORTEN. "
+                        f"Received: {file_path}"
+                    )
+                expanded_files.append(expanded)
+            self.set_request_files(request, expanded_files)
+
        error = super()._validate_file_paths(request)
        if error:
            return error
@@ -216,6 +215,10 @@ class ChatTool(SimpleTool):
                    "Error: 'working_directory' must be an absolute path (you may use '~' which will be expanded). "
                    f"Received: {working_directory}"
                )
+            if not os.path.isdir(expanded):
+                return (
+                    "Error: 'working_directory' must reference an existing directory. " f"Received: {working_directory}"
+                )
        return None

    def format_response(self, response: str, request: ChatRequest, model_info: Optional[dict] = None) -> str:
@@ -227,7 +230,7 @@ class ChatTool(SimpleTool):
        recordable_override: Optional[str] = None

        if self._model_supports_code_generation():
-            block, remainder = self._extract_generated_code_block(response)
+            block, remainder, _ = self._extract_generated_code_block(response)
            if block:
                sanitized_text = remainder.strip()
                try:
@@ -239,14 +242,15 @@ class ChatTool(SimpleTool):
                        "Check the path permissions and re-run. The generated code block is included below for manual handling."
                    )

-                    history_copy = self._join_sections(sanitized_text, warning) if sanitized_text else warning
+                    history_copy_base = sanitized_text
+                    history_copy = self._join_sections(history_copy_base, warning) if history_copy_base else warning
                    recordable_override = history_copy

                    sanitized_warning = history_copy.strip()
                    body = f"{sanitized_warning}\n\n{block.strip()}".strip()
                else:
                    if not sanitized_text:
-                        sanitized_text = (
+                        base_message = (
                            "Generated code saved to zen_generated.code.\n"
                            "\n"
                            "CRITICAL: Contains mixed instructions + partial snippets - NOT complete code to copy as-is!\n"
@@ -260,6 +264,7 @@ class ChatTool(SimpleTool):
                            "\n"
                            "Treat as guidance to implement thoughtfully, not ready-to-paste code."
                        )
+                        sanitized_text = base_message

                    instruction = self._build_agent_instruction(artifact_path)
                    body = self._join_sections(sanitized_text, instruction)
@@ -300,26 +305,35 @@ class ChatTool(SimpleTool):

        return bool(capabilities.allow_code_generation)

-    def _extract_generated_code_block(self, text: str) -> tuple[Optional[str], str]:
-        match = re.search(r"<GENERATED-CODE>.*?</GENERATED-CODE>", text, flags=re.DOTALL | re.IGNORECASE)
-        if not match:
-            return None, text
+    def _extract_generated_code_block(self, text: str) -> tuple[Optional[str], str, int]:
+        matches = list(re.finditer(r"<GENERATED-CODE>.*?</GENERATED-CODE>", text, flags=re.DOTALL | re.IGNORECASE))
+        if not matches:
+            return None, text, 0

-        block = match.group(0)
-        before = text[: match.start()].rstrip()
-        after = text[match.end() :].lstrip()
+        blocks = [match.group(0).strip() for match in matches]
+        combined_block = "\n\n".join(blocks)

-        if before and after:
-            remainder = f"{before}\n\n{after}"
-        else:
-            remainder = before or after
+        remainder_parts: list[str] = []
+        last_end = 0
+        for match in matches:
+            start, end = match.span()
+            segment = text[last_end:start]
+            if segment:
+                remainder_parts.append(segment)
+            last_end = end
+        tail = text[last_end:]
+        if tail:
+            remainder_parts.append(tail)

-        return block, remainder or ""
+        remainder = self._join_sections(*remainder_parts)
+
+        return combined_block, remainder, len(blocks)

    def _persist_generated_code_block(self, block: str, working_directory: str) -> Path:
        expanded = os.path.expanduser(working_directory)
        target_dir = Path(expanded).resolve()
-        target_dir.mkdir(parents=True, exist_ok=True)
+        if not target_dir.is_dir():
+            raise FileNotFoundError(f"Working directory '{working_directory}' does not exist")

        target_file = target_dir / "zen_generated.code"
        if target_file.exists():
--- a/tools/clink.py
+++ b/tools/clink.py
@@ -17,6 +17,7 @@ from clink.models import ResolvedCLIClient, ResolvedCLIRole
 from config import TEMPERATURE_BALANCED
 from tools.models import ToolModelCategory, ToolOutput
 from tools.shared.base_models import COMMON_FIELD_DESCRIPTIONS
+from tools.shared.exceptions import ToolExecutionError
 from tools.simple.base import SchemaBuilder, SimpleTool

 logger = logging.getLogger(__name__)
@@ -166,21 +167,21 @@ class CLinkTool(SimpleTool):

        path_error = self._validate_file_paths(request)
        if path_error:
-            return [self._error_response(path_error)]
+            self._raise_tool_error(path_error)

        selected_cli = request.cli_name or self._default_cli_name
        if not selected_cli:
-            return [self._error_response("No CLI clients are configured for clink.")]
+            self._raise_tool_error("No CLI clients are configured for clink.")

        try:
            client_config = self._registry.get_client(selected_cli)
        except KeyError as exc:
-            return [self._error_response(str(exc))]
+            self._raise_tool_error(str(exc))

        try:
            role_config = client_config.get_role(request.role)
        except KeyError as exc:
-            return [self._error_response(str(exc))]
+            self._raise_tool_error(str(exc))

        files = self.get_request_files(request)
        images = self.get_request_images(request)
@@ -200,7 +201,7 @@ class CLinkTool(SimpleTool):
            )
        except Exception as exc:
            logger.exception("Failed to prepare clink prompt")
-            return [self._error_response(f"Failed to prepare prompt: {exc}")]
+            self._raise_tool_error(f"Failed to prepare prompt: {exc}")

        agent = create_agent(client_config)
        try:
@@ -213,13 +214,10 @@ class CLinkTool(SimpleTool):
            )
        except CLIAgentError as exc:
            metadata = self._build_error_metadata(client_config, exc)
-            error_output = ToolOutput(
-                status="error",
-                content=f"CLI '{client_config.name}' execution failed: {exc}",
-                content_type="text",
+            self._raise_tool_error(
+                f"CLI '{client_config.name}' execution failed: {exc}",
                metadata=metadata,
            )
-            return [TextContent(type="text", text=error_output.model_dump_json())]

        metadata = self._build_success_metadata(client_config, role_config, result)
        metadata = self._prune_metadata(metadata, client_config, reason="normal")
@@ -436,9 +434,9 @@ class CLinkTool(SimpleTool):
            metadata["stderr"] = exc.stderr.strip()
        return metadata

-    def _error_response(self, message: str) -> TextContent:
-        error_output = ToolOutput(status="error", content=message, content_type="text")
-        return TextContent(type="text", text=error_output.model_dump_json())
+    def _raise_tool_error(self, message: str, metadata: dict[str, Any] | None = None) -> None:
+        error_output = ToolOutput(status="error", content=message, content_type="text", metadata=metadata)
+        raise ToolExecutionError(error_output.model_dump_json())

    def _agent_capabilities_guidance(self) -> str:
        return (
--- a/tools/shared/exceptions.py
+++ b/tools/shared/exceptions.py
@@ -0,0 +1,20 @@
+"""
+Custom exceptions for Zen MCP tools.
+
+These exceptions allow tools to signal protocol-level errors that should be surfaced
+to MCP clients using the `isError` flag on `CallToolResult`. Raising one of these
+exceptions ensures the low-level server adapter marks the result as an error while
+preserving the structured payload we pass through the exception message.
+"""
+
+
+class ToolExecutionError(RuntimeError):
+    """Raised to indicate a tool-level failure that must set `isError=True`."""
+
+    def __init__(self, payload: str):
+        """
+        Args:
+            payload: Serialized error payload (typically JSON) to return to the client.
+        """
+        super().__init__(payload)
+        self.payload = payload
--- a/tools/simple/base.py
+++ b/tools/simple/base.py
@@ -17,6 +17,7 @@ from typing import Any, Optional

 from tools.shared.base_models import ToolRequest
 from tools.shared.base_tool import BaseTool
+from tools.shared.exceptions import ToolExecutionError
 from tools.shared.schema_builders import SchemaBuilder


@@ -269,7 +270,6 @@ class SimpleTool(BaseTool):

        This method replicates the proven execution pattern while using SimpleTool hooks.
        """
-        import json
        import logging

        from mcp.types import TextContent
@@ -298,7 +298,8 @@ class SimpleTool(BaseTool):
                    content=path_error,
                    content_type="text",
                )
-                return [TextContent(type="text", text=error_output.model_dump_json())]
+                logger.error("Path validation failed for %s: %s", self.get_name(), path_error)
+                raise ToolExecutionError(error_output.model_dump_json())

            # Handle model resolution like old base.py
            model_name = self.get_request_model_name(request)
@@ -389,7 +390,15 @@ class SimpleTool(BaseTool):
                    images, model_context=self._model_context, continuation_id=continuation_id
                )
                if image_validation_error:
-                    return [TextContent(type="text", text=json.dumps(image_validation_error, ensure_ascii=False))]
+                    error_output = ToolOutput(
+                        status=image_validation_error.get("status", "error"),
+                        content=image_validation_error.get("content"),
+                        content_type=image_validation_error.get("content_type", "text"),
+                        metadata=image_validation_error.get("metadata"),
+                    )
+                    payload = error_output.model_dump_json()
+                    logger.error("Image validation failed for %s: %s", self.get_name(), payload)
+                    raise ToolExecutionError(payload)

            # Get and validate temperature against model constraints
            temperature, temp_warnings = self.get_validated_temperature(request, self._model_context)
@@ -552,15 +561,21 @@ class SimpleTool(BaseTool):
                            content_type="text",
                        )

-            # Return the tool output as TextContent
-            return [TextContent(type="text", text=tool_output.model_dump_json())]
+            # Return the tool output as TextContent, marking protocol errors appropriately
+            payload = tool_output.model_dump_json()
+            if tool_output.status == "error":
+                logger.error("%s reported error status - raising ToolExecutionError", self.get_name())
+                raise ToolExecutionError(payload)
+            return [TextContent(type="text", text=payload)]

+        except ToolExecutionError:
+            raise
        except Exception as e:
            # Special handling for MCP size check errors
            if str(e).startswith("MCP_SIZE_CHECK:"):
                # Extract the JSON content after the prefix
                json_content = str(e)[len("MCP_SIZE_CHECK:") :]
-                return [TextContent(type="text", text=json_content)]
+                raise ToolExecutionError(json_content)

            logger.error(f"Error in {self.get_name()}: {str(e)}")
            error_output = ToolOutput(
@@ -568,7 +583,7 @@ class SimpleTool(BaseTool):
                content=f"Error in {self.get_name()}: {str(e)}",
                content_type="text",
            )
-            return [TextContent(type="text", text=error_output.model_dump_json())]
+            raise ToolExecutionError(error_output.model_dump_json()) from e

    def _parse_response(self, raw_text: str, request, model_info: Optional[dict] = None):
        """
--- a/tools/workflow/workflow_mixin.py
+++ b/tools/workflow/workflow_mixin.py
@@ -33,6 +33,7 @@ from config import MCP_PROMPT_SIZE_LIMIT
 from utils.conversation_memory import add_turn, create_thread

 from ..shared.base_models import ConsolidatedFindings
+from ..shared.exceptions import ToolExecutionError

 logger = logging.getLogger(__name__)

@@ -645,7 +646,8 @@ class BaseWorkflowMixin(ABC):
                        content=path_error,
                        content_type="text",
                    )
-                    return [TextContent(type="text", text=error_output.model_dump_json())]
+                    logger.error("Path validation failed for %s: %s", self.get_name(), path_error)
+                    raise ToolExecutionError(error_output.model_dump_json())
            except AttributeError:
                # validate_file_paths method not available - skip validation
                pass
@@ -738,7 +740,13 @@ class BaseWorkflowMixin(ABC):

            return [TextContent(type="text", text=json.dumps(response_data, indent=2, ensure_ascii=False))]

+        except ToolExecutionError:
+            raise
        except Exception as e:
+            if str(e).startswith("MCP_SIZE_CHECK:"):
+                payload = str(e)[len("MCP_SIZE_CHECK:") :]
+                raise ToolExecutionError(payload)
+
            logger.error(f"Error in {self.get_name()} work: {e}", exc_info=True)
            error_data = {
                "status": f"{self.get_name()}_failed",
@@ -749,7 +757,7 @@ class BaseWorkflowMixin(ABC):
            # Add metadata to error responses too
            self._add_workflow_metadata(error_data, arguments)

-            return [TextContent(type="text", text=json.dumps(error_data, indent=2, ensure_ascii=False))]
+            raise ToolExecutionError(json.dumps(error_data, indent=2, ensure_ascii=False)) from e

    # Hook methods for tool customization

@@ -1577,11 +1585,13 @@ class BaseWorkflowMixin(ABC):
                error_data = {"status": "error", "content": "No arguments provided"}
                # Add basic metadata even for validation errors
                error_data["metadata"] = {"tool_name": self.get_name()}
-                return [TextContent(type="text", text=json.dumps(error_data, ensure_ascii=False))]
+                raise ToolExecutionError(json.dumps(error_data, ensure_ascii=False))

            # Delegate to execute_workflow
            return await self.execute_workflow(arguments)

+        except ToolExecutionError:
+            raise
        except Exception as e:
            logger.error(f"Error in {self.get_name()} tool execution: {e}", exc_info=True)
            error_data = {
@@ -1589,12 +1599,7 @@ class BaseWorkflowMixin(ABC):
                "content": f"Error in {self.get_name()}: {str(e)}",
            }  # Add metadata to error responses
            self._add_workflow_metadata(error_data, arguments)
-            return [
-                TextContent(
-                    type="text",
-                    text=json.dumps(error_data, ensure_ascii=False),
-                )
-            ]
+            raise ToolExecutionError(json.dumps(error_data, ensure_ascii=False)) from e

    # Default implementations for methods that workflow-based tools typically don't need