feat!: Full code can now be generated by an external model and shared with the AI tool (Claude Code / Codex etc)!

model definitions now support a new `allow_code_generation` flag, only to be used with higher reasoning models such as GPT-5-Pro and-Gemini 2.5-Pro

 When `true`, the `chat` tool can now request the external model to generate a full implementation / update / instructions etc and then share the implementation with the calling agent.

 This effectively allows us to utilize more powerful models such as GPT-5-Pro to generate code for us or entire implementations (which are either API-only or part of the $200 Pro plan from within the ChatGPT app)
This commit is contained in:
Fahad
2025-10-07 18:49:13 +04:00
parent 04f7ce5b03
commit ece8a5ebed
29 changed files with 1008 additions and 122 deletions

View File

@@ -205,7 +205,7 @@ Zen activates any provider that has credentials in your `.env`. See `.env.exampl
**Collaboration & Planning** *(Enabled by default)* **Collaboration & Planning** *(Enabled by default)*
- **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.) - **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.)
- **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches - **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches. With capable models (GPT-5 Pro, Gemini 2.5 Pro), generates complete code / implementation
- **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives - **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives
- **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans - **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans
- **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering - **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering

View File

@@ -20,7 +20,8 @@
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.", "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model", "description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
"allow_code_generation": "Whether this model can generate and suggest fully working code - complete with functions, files, and detailed implementation instructions - for your AI tool to use right away. Only set this to 'true' for a model more capable than the AI model / CLI you're currently using."
} }
}, },
"models": [ "models": [
@@ -44,6 +45,7 @@
"supports_json_mode": true, "supports_json_mode": true,
"supports_images": true, "supports_images": true,
"supports_temperature": true, "supports_temperature": true,
"allow_code_generation": true,
"max_image_size_mb": 32.0 "max_image_size_mb": 32.0
}, },
{ {

View File

@@ -20,7 +20,8 @@
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.", "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model", "description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
"allow_code_generation": "Whether this model can generate and suggest fully working code - complete with functions, files, and detailed implementation instructions - for your AI tool to use right away. Only set this to 'true' for a model more capable than the AI model / CLI you're currently using."
} }
}, },
"models": [ "models": [
@@ -66,6 +67,7 @@
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"use_openai_response_api": true, "use_openai_response_api": true,
"default_reasoning_effort": "high", "default_reasoning_effort": "high",
"allow_code_generation": true,
"temperature_constraint": "fixed" "temperature_constraint": "fixed"
}, },
{ {

View File

@@ -19,7 +19,8 @@
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.", "use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.", "default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
"description": "Human-readable description of the model", "description": "Human-readable description of the model",
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering" "intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
"allow_code_generation": "Whether this model can generate and suggest fully working code - complete with functions, files, and detailed implementation instructions - for your AI tool to use right away. Only set this to 'true' for a model more capable than the AI model / CLI you're currently using."
} }
}, },
"models": [ "models": [
@@ -100,6 +101,7 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_images": true, "supports_images": true,
"max_image_size_mb": 20.0, "max_image_size_mb": 20.0,
"allow_code_generation": true,
"description": "Google's Gemini 2.5 Pro via OpenRouter with vision", "description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
"intelligence_score": 18 "intelligence_score": 18
}, },
@@ -310,8 +312,9 @@
"temperature_constraint": "fixed", "temperature_constraint": "fixed",
"use_openai_response_api": true, "use_openai_response_api": true,
"default_reasoning_effort": "high", "default_reasoning_effort": "high",
"allow_code_generation": true,
"description": "GPT-5 Pro - Advanced reasoning model with highest quality responses (text+image input, text output only)", "description": "GPT-5 Pro - Advanced reasoning model with highest quality responses (text+image input, text output only)",
"intelligence_score": 17 "intelligence_score": 18
}, },
{ {
"model_name": "openai/gpt-5-codex", "model_name": "openai/gpt-5-codex",

View File

@@ -52,6 +52,9 @@ from tools.simple.base import SimpleTool
class ChatRequest(ToolRequest): class ChatRequest(ToolRequest):
prompt: str = Field(..., description="Your question or idea.") prompt: str = Field(..., description="Your question or idea.")
files: list[str] | None = Field(default_factory=list) files: list[str] | None = Field(default_factory=list)
working_directory: str = Field(
..., description="Absolute full directory path where the assistant AI can save generated code for implementation."
)
class ChatTool(SimpleTool): class ChatTool(SimpleTool):
def get_name(self) -> str: # required by BaseTool def get_name(self) -> str: # required by BaseTool
@@ -67,10 +70,17 @@ class ChatTool(SimpleTool):
return ChatRequest return ChatRequest
def get_tool_fields(self) -> dict[str, dict[str, object]]: def get_tool_fields(self) -> dict[str, dict[str, object]]:
return {"prompt": {"type": "string", "description": "Your question."}, "files": SimpleTool.FILES_FIELD} return {
"prompt": {"type": "string", "description": "Your question."},
"files": SimpleTool.FILES_FIELD,
"working_directory": {
"type": "string",
"description": "Absolute full directory path where the assistant AI can save generated code for implementation.",
},
}
def get_required_fields(self) -> list[str]: def get_required_fields(self) -> list[str]:
return ["prompt"] return ["prompt", "working_directory"]
async def prepare_prompt(self, request: ChatRequest) -> str: async def prepare_prompt(self, request: ChatRequest) -> str:
return self.prepare_chat_style_prompt(request) return self.prepare_chat_style_prompt(request)

View File

@@ -75,7 +75,7 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
- `conf/dial_models.json` DIAL aggregation catalogue (`DIAL_MODELS_CONFIG_PATH`) - `conf/dial_models.json` DIAL aggregation catalogue (`DIAL_MODELS_CONFIG_PATH`)
- `conf/custom_models.json` Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`) - `conf/custom_models.json` Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`)
Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags. Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, or expose additional aliases without touching Python code. Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags (including `allow_code_generation`). Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, enable structured code generation, or expose additional aliases without touching Python code.
The shipped defaults cover: The shipped defaults cover:
@@ -87,7 +87,63 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` | | OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry | | Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support) without editing Python. > **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support, code generation) without editing Python.
### Code Generation Capability
**`allow_code_generation` Flag:**
The `allow_code_generation` capability enables models to generate complete, production-ready implementations in a structured format. When enabled, the `chat` tool will inject special instructions for substantial code generation tasks.
```json
{
"model_name": "gpt-5",
"allow_code_generation": true,
...
}
```
**When to Enable:**
- **Enable for**: Models MORE capable than your primary CLI's model (e.g., GPT-5, GPT-5 Pro when using Claude Code with Sonnet 4.5)
- **Purpose**: Get complete implementations from a more powerful reasoning model that your primary CLI can then review and apply
- **Use case**: Large-scale implementations, major refactoring, complete module creation
**Important Guidelines:**
1. Only enable for models significantly more capable than your primary CLI to ensure high-quality generated code
2. The capability triggers structured code output (`<GENERATED-CODE>` blocks) for substantial implementation requests
3. Minor code changes still use inline code blocks regardless of this setting
4. Generated code is saved to `zen_generated.code` in the user's working directory
5. Your CLI receives instructions to review and apply the generated code systematically
**Example Configuration:**
```json
// OpenAI models configuration (conf/openai_models.json)
{
"models": [
{
"model_name": "gpt-5",
"allow_code_generation": true,
"intelligence_score": 18,
...
},
{
"model_name": "gpt-5-pro",
"allow_code_generation": true,
"intelligence_score": 19,
...
}
]
}
```
**Typical Workflow:**
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **gpt-5-pro**
2. GPT-5-Pro generates structured implementation and shares the complete implementation with Zen
3. Zen saves the code to `zen_generated.code` and asks AI agent to implement the plan
4. AI agent continues from the previous context, reads the file, applies the implementation
### Thinking Mode Configuration ### Thinking Mode Configuration

View File

@@ -39,13 +39,14 @@ word verdict in the end.
- **Collaborative thinking partner** for your analysis and planning - **Collaborative thinking partner** for your analysis and planning
- **Get second opinions** on your designs and approaches - **Get second opinions** on your designs and approaches
- **Brainstorm solutions** and explore alternatives together - **Brainstorm solutions** and explore alternatives together
- **Structured code generation**: When using GPT-5 Pro or Gemini 2.5 Pro, get complete, production-ready implementations saved to `zen_generated.code` for your CLI to review and apply
- **Validate your checklists** and implementation plans - **Validate your checklists** and implementation plans
- **General development questions** and explanations - **General development questions** and explanations
- **Technology comparisons** and best practices - **Technology comparisons** and best practices
- **Architecture and design discussions** - **Architecture and design discussions**
- **File reference support**: `"Use gemini to explain this algorithm with context from algorithm.py"` - **File reference support**: `"Use gemini to explain this algorithm with context from algorithm.py"`
- **Image support**: Include screenshots, diagrams, UI mockups for visual analysis: `"Chat with gemini about this error dialog screenshot to understand the user experience issue"` - **Image support**: Include screenshots, diagrams, UI mockups for visual analysis: `"Chat with gemini about this error dialog screenshot to understand the user experience issue"`
- **Dynamic collaboration**: Gemini can request additional files or context during the conversation if needed for a more thorough response - **Dynamic collaboration**: Models can request additional files or context during the conversation if needed for a more thorough response
- **Web search awareness**: Automatically identifies when online research would help and instructs Claude to perform targeted searches using continuation IDs - **Web search awareness**: Automatically identifies when online research would help and instructs Claude to perform targeted searches using continuation IDs
## Tool Parameters ## Tool Parameters
@@ -54,10 +55,48 @@ word verdict in the end.
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default) - `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
- `files`: Optional files for context (absolute paths) - `files`: Optional files for context (absolute paths)
- `images`: Optional images for visual context (absolute paths) - `images`: Optional images for visual context (absolute paths)
- `working_directory`: **Required** - Absolute directory path where generated code artifacts will be saved
- `temperature`: Response creativity (0-1, default 0.5) - `temperature`: Response creativity (0-1, default 0.5)
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only) - `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
- `continuation_id`: Continue previous conversations - `continuation_id`: Continue previous conversations
## Structured Code Generation
When using advanced reasoning models like **GPT-5 Pro** or **Gemini 2.5 Pro**, the chat tool can generate complete, production-ready code implementations in a structured format.
### How It Works
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **GPT-5 Pro** or **Gemini 2.5 Pro**
2. The model generates structured implementation and shares the complete implementation with Zen
3. Zen saves the code to `zen_generated.code` and asks AI agent to implement the plan
4. AI agent continues from the previous context, reads the file, applies the implementation
### When Code Generation Activates
The structured format activates for **substantial implementation work**:
- Creating new features from scratch with multiple files or significant code
- Major refactoring across multiple files or large sections
- Implementing new modules, components, or subsystems
- Large-scale updates affecting substantial portions of the codebase
- Complete rewrites of functions, algorithms, or approaches
For minor changes (small tweaks, bug fixes, algorithm improvements), the model responds normally with inline code blocks.
### Example Usage
```
chat with gpt-5-pro and ask it to make me a standalone, classic version of the
Pacman game using pygame that I can run from the commandline. Give me a single
script to execute in the end with any / all dependencies setup for me.
Do everything using pygame, we have no external resources / images / audio at
hand. Instead of ghosts, it'll be different geometric shapes moving around
in the maze that Pacman can eat (so there are no baddies). Pacman gets to eat
everything including bread-crumbs and large geometric shapes but make me the
classic maze / walls that it navigates within using keyboard arrow keys.
```
See the [Configuration Guide](../configuration.md#code-generation-capability) for details on the `allow_code_generation` flag.
## Usage Examples ## Usage Examples
**Basic Development Chat:** **Basic Development Chat:**

View File

@@ -28,6 +28,8 @@ class ModelCapabilities:
* Tool selection logic inspects attributes such as * Tool selection logic inspects attributes such as
``supports_extended_thinking`` or ``context_window`` to choose an ``supports_extended_thinking`` or ``context_window`` to choose an
appropriate model for a task. appropriate model for a task.
* The ``allow_code_generation`` flag enables structured code generation
in the chat tool for models more capable than the primary CLI.
""" """
provider: ProviderType provider: ProviderType
@@ -52,6 +54,9 @@ class ModelCapabilities:
supports_temperature: bool = True supports_temperature: bool = True
use_openai_response_api: bool = False use_openai_response_api: bool = False
default_reasoning_effort: Optional[str] = None default_reasoning_effort: Optional[str] = None
allow_code_generation: bool = (
False # Enables structured code generation in chat tool for substantial implementations
)
# Additional attributes # Additional attributes
max_image_size_mb: float = 0.0 max_image_size_mb: float = 0.0

View File

@@ -8,6 +8,7 @@ from .codereview_prompt import CODEREVIEW_PROMPT
from .consensus_prompt import CONSENSUS_PROMPT from .consensus_prompt import CONSENSUS_PROMPT
from .debug_prompt import DEBUG_ISSUE_PROMPT from .debug_prompt import DEBUG_ISSUE_PROMPT
from .docgen_prompt import DOCGEN_PROMPT from .docgen_prompt import DOCGEN_PROMPT
from .generate_code_prompt import GENERATE_CODE_PROMPT
from .planner_prompt import PLANNER_PROMPT from .planner_prompt import PLANNER_PROMPT
from .precommit_prompt import PRECOMMIT_PROMPT from .precommit_prompt import PRECOMMIT_PROMPT
from .refactor_prompt import REFACTOR_PROMPT from .refactor_prompt import REFACTOR_PROMPT
@@ -21,6 +22,7 @@ __all__ = [
"CODEREVIEW_PROMPT", "CODEREVIEW_PROMPT",
"DEBUG_ISSUE_PROMPT", "DEBUG_ISSUE_PROMPT",
"DOCGEN_PROMPT", "DOCGEN_PROMPT",
"GENERATE_CODE_PROMPT",
"ANALYZE_PROMPT", "ANALYZE_PROMPT",
"CHAT_PROMPT", "CHAT_PROMPT",
"CONSENSUS_PROMPT", "CONSENSUS_PROMPT",

View File

@@ -0,0 +1,181 @@
"""System prompt fragment enabling structured code generation exports.
This prompt is injected into the system prompt for models that have the
'allow_code_generation' capability enabled. It instructs the model to output
complete, working code in a structured format that coding agents can parse
and apply automatically.
The structured format uses XML-like tags to clearly delineate:
- New files to create (<NEWFILE>)
- Existing files to update (<UPDATED_EXISTING_FILE>)
- Step-by-step instructions for the coding agent
This enables:
1. Automated code extraction and application
2. Clear separation between instructions and implementation
3. Complete, runnable code without manual edits
4. Precise change tracking across multiple files
"""
GENERATE_CODE_PROMPT = """
# Structured Code Generation Protocol
**WHEN TO USE THIS PROTOCOL:**
Use this structured format ONLY when you are explicitly tasked with substantial code generation, such as:
- Creating new features from scratch with multiple files or significant code and you have been asked to help implement this
- Major refactoring across multiple files or large sections of code and you have been tasked to help do this
- Implementing new modules, components, or subsystems and you have been tasked to help with the implementation
- Large-scale updates affecting substantial portions of the codebase that you have been asked to help implement
**WHEN NOT TO USE THIS PROTOCOL:**
Do NOT use this format for minor changes:
- Small tweaks to existing functions or methods (1-20 lines)
- Bug fixes in isolated sections
- Simple algorithm improvements
- Minor refactoring of a single function
- Adding/removing a few lines of code
- Quick parameter adjustments or config changes
For minor changes:
- Follow the existing instructions provided earlier in your system prompt, such as the CRITICAL LINE NUMBER INSTRUCTIONS.
- Use inline code blocks with proper line number references and direct explanations instead of this structured format.
**IMPORTANT:** This protocol is for SUBSTANTIAL implementation work when explicitly requested, such as:
- "implement feature X"
- "create module Y"
- "refactor system Z"
- "rewrite the authentication logic"
- "redesign the data processing pipeline"
- "rebuild the algorithm from scratch"
- "convert this approach to use a different pattern"
- "create a complete implementation of..."
- "build out the entire workflow for..."
If the request is for explanation, analysis, debugging, planning, or discussion WITHOUT substantial code generation, respond normally without this structured format.
## Core Requirements (for substantial code generation tasks)
1. **Complete, Working Code**: Every code block must be fully functional without requiring additional edits. Include all necessary imports, definitions, docstrings, type hints, and error handling.
2. **Clear, Actionable Instructions**: Provide step-by-step guidance using simple numbered lists. Each instruction should map directly to file blocks that follow.
3. **Structured Output Format**: All generated code MUST be contained within a single `<GENERATED-CODE>` block using the exact structure defined below.
4. **Minimal External Commentary**: Keep any text outside the `<GENERATED-CODE>` block brief. Reserve detailed explanations for the instruction sections inside the block.
## Required Structure
Use this exact format (do not improvise tag names or reorder components):
```
<GENERATED-CODE>
[Step-by-step instructions for the coding agent]
1. Create new file [filename] with [description]
2. Update existing file [filename] by [description]
3. [Additional steps as needed]
<NEWFILE: path/to/new_file.py>
[Complete file contents with all necessary components:
- File-level docstring
- All imports (standard library, third-party, local)
- All class/function definitions with complete implementations
- All necessary helper functions
- Inline comments for complex logic
- Type hints where applicable]
</NEWFILE>
[Additional instructions for the next file, if needed]
<NEWFILE: path/to/another_file.py>
[Complete, working code for this file - no partial implementations or placeholders]
</NEWFILE>
[Instructions for updating existing files]
<UPDATED_EXISTING_FILE: existing/path.py>
[Complete replacement code for the modified sections or routines / lines that need updating:
- Full function/method bodies (not just the changed lines)
- Complete class definitions if modifying class methods
- All necessary imports if adding new dependencies
- Preserve existing code structure and style]
</UPDATED_EXISTING_FILE>
[If additional files need updates (based on existing code that was shared with you earlier), repeat the UPDATED_EXISTING_FILE block]
<UPDATED_EXISTING_FILE: another/existing/file.py>
[Complete code for this file's modifications]
</UPDATED_EXISTING_FILE>
[For file deletions, explicitly state in instructions with justification:
"Delete file path/to/obsolete.py - no longer needed because [reason]"]
</GENERATED-CODE>
```
## Critical Rules
**Completeness:**
- Never output partial code snippets or placeholder comments like "# rest of code here"
- Include complete function/class implementations from start to finish
- Add all required imports at the file level
- Include proper error handling and edge case logic
**Accuracy:**
- Match the existing codebase indentation style (tabs vs spaces)
- Preserve language-specific formatting conventions
- Include trailing newlines where required by language tooling
- Use correct file paths relative to project root
**Clarity:**
- Number instructions sequentially (1, 2, 3...)
- Map each instruction to specific file blocks below it
- Explain *why* changes are needed, not just *what* changes
- Highlight any breaking changes or migration steps required
**Structure:**
- Use `<NEWFILE: ...>` for files that don't exist yet
- Use `<UPDATED_EXISTING_FILE: ...>` for modifying existing files
- Place instructions between file blocks to provide context
- Keep the single `<GENERATED-CODE>` wrapper around everything
## Special Cases
**No Changes Needed:**
If the task doesn't require file creation or modification, explicitly state:
"No file changes required. The existing implementation already handles [requirement]."
Do not emit an empty `<GENERATED-CODE>` block.
**Configuration Changes:**
If modifying configuration files (JSON, YAML, TOML), include complete file contents with the changes applied, not just the changed lines.
**Test Files:**
When generating tests, include complete test suites with:
- All necessary test fixtures and setup
- Multiple test cases covering happy path and edge cases
- Proper teardown and cleanup
- Clear test descriptions and assertions
**Documentation:**
Include docstrings for all public functions, classes, and modules using the project's documentation style (Google, NumPy, Sphinx, etc.).
## Context Awareness
**CRITICAL:** Your implementation builds upon the ongoing conversation context:
- All previously shared files, requirements, and constraints remain relevant
- If updating existing code discussed earlier, reference it and preserve unmodified sections
- If the user shared code for improvement, your generated code should build upon it, not replace everything
- The coding agent has full conversation history—your instructions should reference prior discussion as needed
Your generated code is NOT standalone—it's a continuation of the collaborative session with full context awareness.
## Remember
The coding agent depends on this structured format to:
- Parse and extract code automatically
- Apply changes to the correct files within the conversation context
- Validate completeness before execution
- Track modifications across the codebase
Always prioritize clarity, completeness, correctness, and context awareness over brevity.
"""

File diff suppressed because one or more lines are too long

View File

@@ -137,7 +137,7 @@ class TestAutoMode:
importlib.reload(config) importlib.reload(config)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_auto_mode_requires_model_parameter(self): async def test_auto_mode_requires_model_parameter(self, tmp_path):
"""Test that auto mode enforces model parameter""" """Test that auto mode enforces model parameter"""
# Save original # Save original
original = os.environ.get("DEFAULT_MODEL", "") original = os.environ.get("DEFAULT_MODEL", "")
@@ -154,7 +154,7 @@ class TestAutoMode:
# Mock the provider to avoid real API calls # Mock the provider to avoid real API calls
with patch.object(tool, "get_model_provider"): with patch.object(tool, "get_model_provider"):
# Execute without model parameter # Execute without model parameter
result = await tool.execute({"prompt": "Test prompt"}) result = await tool.execute({"prompt": "Test prompt", "working_directory": str(tmp_path)})
# Should get error # Should get error
assert len(result) == 1 assert len(result) == 1

View File

@@ -200,7 +200,7 @@ class TestAutoModeComprehensive:
assert tool.get_model_category() == expected_category assert tool.get_model_category() == expected_category
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_auto_mode_with_gemini_only_uses_correct_models(self): async def test_auto_mode_with_gemini_only_uses_correct_models(self, tmp_path):
"""Test that auto mode with only Gemini uses flash for fast tools and pro for reasoning tools.""" """Test that auto mode with only Gemini uses flash for fast tools and pro for reasoning tools."""
provider_config = { provider_config = {
@@ -234,9 +234,13 @@ class TestAutoModeComprehensive:
) )
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider): with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
workdir = tmp_path / "chat_artifacts"
workdir.mkdir(parents=True, exist_ok=True)
# Test ChatTool (FAST_RESPONSE) - should prefer flash # Test ChatTool (FAST_RESPONSE) - should prefer flash
chat_tool = ChatTool() chat_tool = ChatTool()
await chat_tool.execute({"prompt": "test", "model": "auto"}) # This should trigger auto selection await chat_tool.execute(
{"prompt": "test", "model": "auto", "working_directory": str(workdir)}
) # This should trigger auto selection
# In auto mode, the tool should get an error requiring model selection # In auto mode, the tool should get an error requiring model selection
# but the suggested model should be flash # but the suggested model should be flash
@@ -355,7 +359,7 @@ class TestAutoModeComprehensive:
# would show models from all providers when called # would show models from all providers when called
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_auto_mode_model_parameter_required_error(self): async def test_auto_mode_model_parameter_required_error(self, tmp_path):
"""Test that auto mode properly requires model parameter and suggests correct model.""" """Test that auto mode properly requires model parameter and suggests correct model."""
provider_config = { provider_config = {
@@ -384,9 +388,12 @@ class TestAutoModeComprehensive:
# Test with ChatTool (FAST_RESPONSE category) # Test with ChatTool (FAST_RESPONSE category)
chat_tool = ChatTool() chat_tool = ChatTool()
workdir = tmp_path / "chat_artifacts"
workdir.mkdir(parents=True, exist_ok=True)
result = await chat_tool.execute( result = await chat_tool.execute(
{ {
"prompt": "test" "prompt": "test",
"working_directory": str(workdir),
# Note: no model parameter provided in auto mode # Note: no model parameter provided in auto mode
} }
) )
@@ -508,7 +515,7 @@ class TestAutoModeComprehensive:
assert fast_response is not None assert fast_response is not None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_actual_model_name_resolution_in_auto_mode(self): async def test_actual_model_name_resolution_in_auto_mode(self, tmp_path):
"""Test that when a model is selected in auto mode, the tool executes successfully.""" """Test that when a model is selected in auto mode, the tool executes successfully."""
provider_config = { provider_config = {
@@ -547,7 +554,11 @@ class TestAutoModeComprehensive:
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider): with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
chat_tool = ChatTool() chat_tool = ChatTool()
result = await chat_tool.execute({"prompt": "test", "model": "flash"}) # Use alias in auto mode workdir = tmp_path / "chat_artifacts"
workdir.mkdir(parents=True, exist_ok=True)
result = await chat_tool.execute(
{"prompt": "test", "model": "flash", "working_directory": str(workdir)}
) # Use alias in auto mode
# Should succeed with proper model resolution # Should succeed with proper model resolution
assert len(result) == 1 assert len(result) == 1

View File

@@ -0,0 +1,113 @@
"""Integration test for Chat tool code generation with Gemini 2.5 Pro.
This test uses the Google Gemini SDK's built-in record/replay support. To refresh the
cassette, delete the existing JSON file under
``tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json`` and run:
```
GEMINI_API_KEY=<real-key> pytest tests/test_chat_codegen_integration.py::test_chat_codegen_saves_file
```
The test will automatically record a new interaction when the cassette is missing and
the environment variable `GEMINI_API_KEY` is set to a valid key.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from providers.gemini import GeminiModelProvider
from providers.registry import ModelProviderRegistry, ProviderType
from tools.chat import ChatTool
REPLAYS_ROOT = Path(__file__).parent / "gemini_cassettes"
CASSETTE_DIR = REPLAYS_ROOT / "chat_codegen"
CASSETTE_PATH = CASSETTE_DIR / "gemini25_pro_calculator" / "mldev.json"
CASSETTE_REPLAY_ID = "chat_codegen/gemini25_pro_calculator/mldev"
@pytest.mark.asyncio
@pytest.mark.no_mock_provider
async def test_chat_codegen_saves_file(monkeypatch, tmp_path):
"""Ensure Gemini 2.5 Pro responses create zen_generated.code when code is emitted."""
CASSETTE_PATH.parent.mkdir(parents=True, exist_ok=True)
recording_mode = not CASSETTE_PATH.exists()
gemini_key = os.getenv("GEMINI_API_KEY", "")
if recording_mode:
if not gemini_key or gemini_key.startswith("dummy"):
pytest.skip("Cassette missing and GEMINI_API_KEY not configured. Provide a real key to record.")
client_mode = "record"
else:
gemini_key = "dummy-key-for-replay"
client_mode = "replay"
with monkeypatch.context() as m:
m.setenv("GEMINI_API_KEY", gemini_key)
m.setenv("DEFAULT_MODEL", "auto")
m.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
m.setenv("GOOGLE_GENAI_CLIENT_MODE", client_mode)
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(REPLAYS_ROOT))
m.setenv("GOOGLE_GENAI_REPLAY_ID", CASSETTE_REPLAY_ID)
# Clear other provider keys to avoid unintended routing
for key in ["OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY", "CUSTOM_API_KEY"]:
m.delenv(key, raising=False)
ModelProviderRegistry.reset_for_testing()
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
working_dir = tmp_path / "codegen"
working_dir.mkdir()
preexisting = working_dir / "zen_generated.code"
preexisting.write_text("stale contents", encoding="utf-8")
chat_tool = ChatTool()
prompt = (
"Please generate a Python module with functions `add` and `multiply` that perform"
" basic addition and multiplication. Produce the response using the structured"
" <GENERATED-CODE> format so the assistant can apply the files directly."
)
result = await chat_tool.execute(
{
"prompt": prompt,
"model": "gemini-2.5-pro",
"working_directory": str(working_dir),
}
)
provider = ModelProviderRegistry.get_provider_for_model("gemini-2.5-pro")
if provider is not None:
try:
provider.client.close()
except AttributeError:
pass
# Reset restriction service cache to avoid leaking allowed-model config
try:
from utils import model_restrictions
model_restrictions._restriction_service = None # type: ignore[attr-defined]
except Exception:
pass
assert result and result[0].type == "text"
payload = json.loads(result[0].text)
assert payload["status"] in {"success", "continuation_available"}
artifact_path = working_dir / "zen_generated.code"
assert artifact_path.exists()
saved = artifact_path.read_text()
assert "<GENERATED-CODE>" in saved
assert "<NEWFILE:" in saved
assert "def add" in saved and "def multiply" in saved
assert "stale contents" not in saved
artifact_path.unlink()

View File

@@ -55,7 +55,7 @@ def _extract_number(text: str) -> str:
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.no_mock_provider @pytest.mark.no_mock_provider
async def test_chat_cross_model_continuation(monkeypatch): async def test_chat_cross_model_continuation(monkeypatch, tmp_path):
"""Verify continuation across Gemini then OpenAI using recorded interactions.""" """Verify continuation across Gemini then OpenAI using recorded interactions."""
env_updates = { env_updates = {
@@ -115,10 +115,13 @@ async def test_chat_cross_model_continuation(monkeypatch):
m.setattr(conversation_memory.uuid, "uuid4", lambda: FIXED_THREAD_ID) m.setattr(conversation_memory.uuid, "uuid4", lambda: FIXED_THREAD_ID)
chat_tool = ChatTool() chat_tool = ChatTool()
working_directory = str(tmp_path)
step1_args = { step1_args = {
"prompt": "Pick a number between 1 and 10 and respond with JUST that number.", "prompt": "Pick a number between 1 and 10 and respond with JUST that number.",
"model": "gemini-2.5-flash", "model": "gemini-2.5-flash",
"temperature": 0.2, "temperature": 0.2,
"working_directory": working_directory,
} }
step1_result = await chat_tool.execute(step1_args) step1_result = await chat_tool.execute(step1_args)
@@ -183,6 +186,7 @@ async def test_chat_cross_model_continuation(monkeypatch):
"model": "gpt-5", "model": "gpt-5",
"continuation_id": continuation_id, "continuation_id": continuation_id,
"temperature": 0.2, "temperature": 0.2,
"working_directory": working_directory,
} }
step2_result = await chat_tool.execute(step2_args) step2_result = await chat_tool.execute(step2_args)

View File

@@ -23,7 +23,7 @@ CASSETTE_CONTINUATION_PATH = CASSETTE_DIR / "chat_gpt5_continuation.json"
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.no_mock_provider @pytest.mark.no_mock_provider
async def test_chat_auto_mode_with_openai(monkeypatch): async def test_chat_auto_mode_with_openai(monkeypatch, tmp_path):
"""Ensure ChatTool in auto mode selects gpt-5 via OpenAI and returns a valid response.""" """Ensure ChatTool in auto mode selects gpt-5 via OpenAI and returns a valid response."""
# Prepare environment so only OpenAI is available in auto mode # Prepare environment so only OpenAI is available in auto mode
env_updates = { env_updates = {
@@ -63,10 +63,12 @@ async def test_chat_auto_mode_with_openai(monkeypatch):
# Execute ChatTool request targeting gpt-5 directly (server normally resolves auto→model) # Execute ChatTool request targeting gpt-5 directly (server normally resolves auto→model)
chat_tool = ChatTool() chat_tool = ChatTool()
working_directory = str(tmp_path)
arguments = { arguments = {
"prompt": "Use chat with gpt5 and ask how far the moon is from earth.", "prompt": "Use chat with gpt5 and ask how far the moon is from earth.",
"model": "gpt-5", "model": "gpt-5",
"temperature": 1.0, "temperature": 1.0,
"working_directory": working_directory,
} }
result = await chat_tool.execute(arguments) result = await chat_tool.execute(arguments)
@@ -87,7 +89,7 @@ async def test_chat_auto_mode_with_openai(monkeypatch):
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.no_mock_provider @pytest.mark.no_mock_provider
async def test_chat_openai_continuation(monkeypatch): async def test_chat_openai_continuation(monkeypatch, tmp_path):
"""Verify continuation_id workflow against gpt-5 using recorded OpenAI responses.""" """Verify continuation_id workflow against gpt-5 using recorded OpenAI responses."""
env_updates = { env_updates = {
@@ -126,12 +128,14 @@ async def test_chat_openai_continuation(monkeypatch):
m.setattr(conversation_memory.uuid, "uuid4", lambda: fixed_thread_id) m.setattr(conversation_memory.uuid, "uuid4", lambda: fixed_thread_id)
chat_tool = ChatTool() chat_tool = ChatTool()
working_directory = str(tmp_path)
# First message: obtain continuation_id # First message: obtain continuation_id
first_args = { first_args = {
"prompt": "In one word, which sells better: iOS app or macOS app?", "prompt": "In one word, which sells better: iOS app or macOS app?",
"model": "gpt-5", "model": "gpt-5",
"temperature": 1.0, "temperature": 1.0,
"working_directory": working_directory,
} }
first_result = await chat_tool.execute(first_args) first_result = await chat_tool.execute(first_args)
@@ -152,6 +156,7 @@ async def test_chat_openai_continuation(monkeypatch):
"model": "gpt-5", "model": "gpt-5",
"continuation_id": continuation_id, "continuation_id": continuation_id,
"temperature": 1.0, "temperature": 1.0,
"working_directory": working_directory,
} }
second_result = await chat_tool.execute(second_args) second_result = await chat_tool.execute(second_args)

View File

@@ -38,12 +38,14 @@ class TestChatTool:
# Required fields # Required fields
assert "prompt" in schema["required"] assert "prompt" in schema["required"]
assert "working_directory" in schema["required"]
# Properties # Properties
properties = schema["properties"] properties = schema["properties"]
assert "prompt" in properties assert "prompt" in properties
assert "files" in properties assert "files" in properties
assert "images" in properties assert "images" in properties
assert "working_directory" in properties
def test_request_model_validation(self): def test_request_model_validation(self):
"""Test that the request model validates correctly""" """Test that the request model validates correctly"""
@@ -54,6 +56,7 @@ class TestChatTool:
"images": ["test.png"], "images": ["test.png"],
"model": "anthropic/claude-opus-4.1", "model": "anthropic/claude-opus-4.1",
"temperature": 0.7, "temperature": 0.7,
"working_directory": "/tmp", # Dummy absolute path
} }
request = ChatRequest(**request_data) request = ChatRequest(**request_data)
@@ -62,6 +65,7 @@ class TestChatTool:
assert request.images == ["test.png"] assert request.images == ["test.png"]
assert request.model == "anthropic/claude-opus-4.1" assert request.model == "anthropic/claude-opus-4.1"
assert request.temperature == 0.7 assert request.temperature == 0.7
assert request.working_directory == "/tmp"
def test_required_fields(self): def test_required_fields(self):
"""Test that required fields are enforced""" """Test that required fields are enforced"""
@@ -69,7 +73,7 @@ class TestChatTool:
from pydantic import ValidationError from pydantic import ValidationError
with pytest.raises(ValidationError): with pytest.raises(ValidationError):
ChatRequest(model="anthropic/claude-opus-4.1") ChatRequest(model="anthropic/claude-opus-4.1", working_directory="/tmp")
def test_model_availability(self): def test_model_availability(self):
"""Test that model availability works""" """Test that model availability works"""
@@ -96,7 +100,7 @@ class TestChatTool:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_prompt_preparation(self): async def test_prompt_preparation(self):
"""Test that prompt preparation works correctly""" """Test that prompt preparation works correctly"""
request = ChatRequest(prompt="Test prompt", files=[]) request = ChatRequest(prompt="Test prompt", files=[], working_directory="/tmp")
# Mock the system prompt and file handling # Mock the system prompt and file handling
with patch.object(self.tool, "get_system_prompt", return_value="System prompt"): with patch.object(self.tool, "get_system_prompt", return_value="System prompt"):
@@ -113,7 +117,7 @@ class TestChatTool:
def test_response_formatting(self): def test_response_formatting(self):
"""Test that response formatting works correctly""" """Test that response formatting works correctly"""
response = "Test response content" response = "Test response content"
request = ChatRequest(prompt="Test") request = ChatRequest(prompt="Test", working_directory="/tmp")
formatted = self.tool.format_response(response, request) formatted = self.tool.format_response(response, request)
@@ -146,6 +150,7 @@ class TestChatTool:
required_fields = self.tool.get_required_fields() required_fields = self.tool.get_required_fields()
assert "prompt" in required_fields assert "prompt" in required_fields
assert "working_directory" in required_fields
class TestChatRequestModel: class TestChatRequestModel:
@@ -160,10 +165,11 @@ class TestChatRequestModel:
assert "context" in CHAT_FIELD_DESCRIPTIONS["prompt"] assert "context" in CHAT_FIELD_DESCRIPTIONS["prompt"]
assert "full-paths" in CHAT_FIELD_DESCRIPTIONS["files"] or "absolute" in CHAT_FIELD_DESCRIPTIONS["files"] assert "full-paths" in CHAT_FIELD_DESCRIPTIONS["files"] or "absolute" in CHAT_FIELD_DESCRIPTIONS["files"]
assert "visual context" in CHAT_FIELD_DESCRIPTIONS["images"] assert "visual context" in CHAT_FIELD_DESCRIPTIONS["images"]
assert "directory" in CHAT_FIELD_DESCRIPTIONS["working_directory"].lower()
def test_default_values(self): def test_default_values(self):
"""Test that default values work correctly""" """Test that default values work correctly"""
request = ChatRequest(prompt="Test") request = ChatRequest(prompt="Test", working_directory="/tmp")
assert request.prompt == "Test" assert request.prompt == "Test"
assert request.files == [] # Should default to empty list assert request.files == [] # Should default to empty list
@@ -173,7 +179,7 @@ class TestChatRequestModel:
"""Test that ChatRequest properly inherits from ToolRequest""" """Test that ChatRequest properly inherits from ToolRequest"""
from tools.shared.base_models import ToolRequest from tools.shared.base_models import ToolRequest
request = ChatRequest(prompt="Test") request = ChatRequest(prompt="Test", working_directory="/tmp")
assert isinstance(request, ToolRequest) assert isinstance(request, ToolRequest)
# Should have inherited fields # Should have inherited fields

View File

@@ -5,7 +5,7 @@ from utils.conversation_memory import get_thread
from utils.storage_backend import get_storage_backend from utils.storage_backend import get_storage_backend
def test_first_response_persisted_in_conversation_history(): def test_first_response_persisted_in_conversation_history(tmp_path):
"""Ensure the assistant's initial reply is stored for newly created threads.""" """Ensure the assistant's initial reply is stored for newly created threads."""
# Clear in-memory storage to avoid cross-test contamination # Clear in-memory storage to avoid cross-test contamination
@@ -13,7 +13,7 @@ def test_first_response_persisted_in_conversation_history():
storage._store.clear() # type: ignore[attr-defined] storage._store.clear() # type: ignore[attr-defined]
tool = ChatTool() tool = ChatTool()
request = ChatRequest(prompt="First question?", model="local-llama") request = ChatRequest(prompt="First question?", model="local-llama", working_directory=str(tmp_path))
response_text = "Here is the initial answer." response_text = "Here is the initial answer."
# Mimic the first tool invocation (no continuation_id supplied) # Mimic the first tool invocation (no continuation_id supplied)

View File

@@ -91,6 +91,7 @@ def helper_function():
"prompt": "Analyze this codebase structure", "prompt": "Analyze this codebase structure",
"files": [directory], # Directory path, not individual files "files": [directory], # Directory path, not individual files
"model": "flash", "model": "flash",
"working_directory": directory,
} }
# Execute the tool # Execute the tool
@@ -168,6 +169,7 @@ def helper_function():
"files": [directory], # Same directory again "files": [directory], # Same directory again
"model": "flash", "model": "flash",
"continuation_id": thread_id, "continuation_id": thread_id,
"working_directory": directory,
} }
# Mock to capture file filtering behavior # Mock to capture file filtering behavior
@@ -299,6 +301,7 @@ def helper_function():
"prompt": "Analyze this code", "prompt": "Analyze this code",
"files": [directory], "files": [directory],
"model": "flash", "model": "flash",
"working_directory": directory,
} }
result = await tool.execute(request_args) result = await tool.execute(request_args)

View File

@@ -56,7 +56,12 @@ class TestLargePromptHandling:
async def test_chat_large_prompt_detection(self, large_prompt): async def test_chat_large_prompt_detection(self, large_prompt):
"""Test that chat tool detects large prompts.""" """Test that chat tool detects large prompts."""
tool = ChatTool() tool = ChatTool()
result = await tool.execute({"prompt": large_prompt}) temp_dir = tempfile.mkdtemp()
temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": large_prompt, "working_directory": temp_dir})
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
assert len(result) == 1 assert len(result) == 1
assert isinstance(result[0], TextContent) assert isinstance(result[0], TextContent)
@@ -73,9 +78,16 @@ class TestLargePromptHandling:
"""Test that chat tool works normally with regular prompts.""" """Test that chat tool works normally with regular prompts."""
tool = ChatTool() tool = ChatTool()
temp_dir = tempfile.mkdtemp()
# This test runs in the test environment which uses dummy keys # This test runs in the test environment which uses dummy keys
# The chat tool will return an error for dummy keys, which is expected # The chat tool will return an error for dummy keys, which is expected
result = await tool.execute({"prompt": normal_prompt, "model": "gemini-2.5-flash"}) try:
result = await tool.execute(
{"prompt": normal_prompt, "model": "gemini-2.5-flash", "working_directory": temp_dir}
)
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
assert len(result) == 1 assert len(result) == 1
output = json.loads(result[0].text) output = json.loads(result[0].text)
@@ -105,7 +117,14 @@ class TestLargePromptHandling:
try: try:
# This test runs in the test environment which uses dummy keys # This test runs in the test environment which uses dummy keys
# The chat tool will return an error for dummy keys, which is expected # The chat tool will return an error for dummy keys, which is expected
result = await tool.execute({"prompt": "", "files": [temp_prompt_file], "model": "gemini-2.5-flash"}) result = await tool.execute(
{
"prompt": "",
"files": [temp_prompt_file],
"model": "gemini-2.5-flash",
"working_directory": temp_dir,
}
)
assert len(result) == 1 assert len(result) == 1
output = json.loads(result[0].text) output = json.loads(result[0].text)
@@ -261,7 +280,13 @@ class TestLargePromptHandling:
mock_prepare_files.return_value = ("File content", [other_file]) mock_prepare_files.return_value = ("File content", [other_file])
# Use a small prompt to avoid triggering size limit # Use a small prompt to avoid triggering size limit
await tool.execute({"prompt": "Test prompt", "files": [temp_prompt_file, other_file]}) await tool.execute(
{
"prompt": "Test prompt",
"files": [temp_prompt_file, other_file],
"working_directory": os.path.dirname(temp_prompt_file),
}
)
# Verify handle_prompt_file was called with the original files list # Verify handle_prompt_file was called with the original files list
mock_handle_prompt.assert_called_once_with([temp_prompt_file, other_file]) mock_handle_prompt.assert_called_once_with([temp_prompt_file, other_file])
@@ -295,7 +320,11 @@ class TestLargePromptHandling:
mock_get_provider.return_value = mock_provider mock_get_provider.return_value = mock_provider
# With the fix, this should now pass because we check at MCP transport boundary before adding internal content # With the fix, this should now pass because we check at MCP transport boundary before adding internal content
result = await tool.execute({"prompt": exact_prompt}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": exact_prompt, "working_directory": temp_dir})
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
output = json.loads(result[0].text) output = json.loads(result[0].text)
assert output["status"] != "resend_prompt" assert output["status"] != "resend_prompt"
@@ -305,7 +334,11 @@ class TestLargePromptHandling:
tool = ChatTool() tool = ChatTool()
over_prompt = "x" * (MCP_PROMPT_SIZE_LIMIT + 1) over_prompt = "x" * (MCP_PROMPT_SIZE_LIMIT + 1)
result = await tool.execute({"prompt": over_prompt}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": over_prompt, "working_directory": temp_dir})
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
output = json.loads(result[0].text) output = json.loads(result[0].text)
assert output["status"] == "resend_prompt" assert output["status"] == "resend_prompt"
@@ -326,7 +359,11 @@ class TestLargePromptHandling:
) )
mock_get_provider.return_value = mock_provider mock_get_provider.return_value = mock_provider
result = await tool.execute({"prompt": ""}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": "", "working_directory": temp_dir})
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
output = json.loads(result[0].text) output = json.loads(result[0].text)
assert output["status"] != "resend_prompt" assert output["status"] != "resend_prompt"
@@ -362,7 +399,11 @@ class TestLargePromptHandling:
mock_model_context_class.return_value = mock_model_context mock_model_context_class.return_value = mock_model_context
# Should continue with empty prompt when file can't be read # Should continue with empty prompt when file can't be read
result = await tool.execute({"prompt": "", "files": [bad_file]}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": "", "files": [bad_file], "working_directory": temp_dir})
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
output = json.loads(result[0].text) output = json.loads(result[0].text)
assert output["status"] != "resend_prompt" assert output["status"] != "resend_prompt"
@@ -408,6 +449,7 @@ class TestLargePromptHandling:
"prompt": "Summarize the design decisions", "prompt": "Summarize the design decisions",
"files": [str(large_file)], "files": [str(large_file)],
"model": "flash", "model": "flash",
"working_directory": str(tmp_path),
"_model_context": dummy_context, "_model_context": dummy_context,
} }
) )
@@ -424,6 +466,7 @@ class TestLargePromptHandling:
This test verifies that even if our internal prompt (with system prompts, history, etc.) This test verifies that even if our internal prompt (with system prompts, history, etc.)
exceeds MCP_PROMPT_SIZE_LIMIT, it should still work as long as the user's input is small. exceeds MCP_PROMPT_SIZE_LIMIT, it should still work as long as the user's input is small.
""" """
tool = ChatTool() tool = ChatTool()
# Small user input that should pass MCP boundary check # Small user input that should pass MCP boundary check
@@ -432,18 +475,20 @@ class TestLargePromptHandling:
# Mock a huge conversation history that would exceed MCP limits if incorrectly checked # Mock a huge conversation history that would exceed MCP limits if incorrectly checked
huge_history = "x" * (MCP_PROMPT_SIZE_LIMIT * 2) # 100K chars = way over 50K limit huge_history = "x" * (MCP_PROMPT_SIZE_LIMIT * 2) # 100K chars = way over 50K limit
temp_dir = tempfile.mkdtemp()
original_prepare_prompt = tool.prepare_prompt
try:
with ( with (
patch.object(tool, "get_model_provider") as mock_get_provider, patch.object(tool, "get_model_provider") as mock_get_provider,
patch("utils.model_context.ModelContext") as mock_model_context_class, patch("utils.model_context.ModelContext") as mock_model_context_class,
): ):
from tests.mock_helpers import create_mock_provider from tests.mock_helpers import create_mock_provider
from utils.model_context import TokenAllocation
mock_provider = create_mock_provider(model_name="flash") mock_provider = create_mock_provider(model_name="flash")
mock_get_provider.return_value = mock_provider mock_get_provider.return_value = mock_provider
# Mock ModelContext to avoid the comparison issue
from utils.model_context import TokenAllocation
mock_model_context = MagicMock() mock_model_context = MagicMock()
mock_model_context.model_name = "flash" mock_model_context.model_name = "flash"
mock_model_context.provider = mock_provider mock_model_context.provider = mock_provider
@@ -456,38 +501,31 @@ class TestLargePromptHandling:
) )
mock_model_context_class.return_value = mock_model_context mock_model_context_class.return_value = mock_model_context
# Mock the prepare_prompt to simulate huge internal context
original_prepare_prompt = tool.prepare_prompt
async def mock_prepare_prompt(request): async def mock_prepare_prompt(request):
# Call original to get normal processing
normal_prompt = await original_prepare_prompt(request) normal_prompt = await original_prepare_prompt(request)
# Add huge internal context (simulating large history, system prompts, files)
huge_internal_prompt = f"{normal_prompt}\n\n=== HUGE INTERNAL CONTEXT ===\n{huge_history}" huge_internal_prompt = f"{normal_prompt}\n\n=== HUGE INTERNAL CONTEXT ===\n{huge_history}"
# Verify the huge internal prompt would exceed MCP limits if incorrectly checked
assert len(huge_internal_prompt) > MCP_PROMPT_SIZE_LIMIT assert len(huge_internal_prompt) > MCP_PROMPT_SIZE_LIMIT
return huge_internal_prompt return huge_internal_prompt
tool.prepare_prompt = mock_prepare_prompt tool.prepare_prompt = mock_prepare_prompt
# This should succeed because we only check user input at MCP boundary result = await tool.execute(
result = await tool.execute({"prompt": small_user_prompt, "model": "flash"}) {"prompt": small_user_prompt, "model": "flash", "working_directory": temp_dir}
)
output = json.loads(result[0].text) output = json.loads(result[0].text)
# Should succeed even though internal context is huge
assert output["status"] != "resend_prompt" assert output["status"] != "resend_prompt"
# Verify the model was actually called with the huge prompt
mock_provider.generate_content.assert_called_once() mock_provider.generate_content.assert_called_once()
call_kwargs = mock_provider.generate_content.call_args[1] call_kwargs = mock_provider.generate_content.call_args[1]
actual_prompt = call_kwargs.get("prompt") actual_prompt = call_kwargs.get("prompt")
# Verify internal prompt was huge (proving we don't limit internal processing)
assert len(actual_prompt) > MCP_PROMPT_SIZE_LIMIT assert len(actual_prompt) > MCP_PROMPT_SIZE_LIMIT
assert huge_history in actual_prompt assert huge_history in actual_prompt
assert small_user_prompt in actual_prompt assert small_user_prompt in actual_prompt
finally:
tool.prepare_prompt = original_prepare_prompt
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_mcp_boundary_vs_internal_processing_distinction(self): async def test_mcp_boundary_vs_internal_processing_distinction(self):
@@ -500,7 +538,9 @@ class TestLargePromptHandling:
# Test case 1: Large user input should fail at MCP boundary # Test case 1: Large user input should fail at MCP boundary
large_user_input = "x" * (MCP_PROMPT_SIZE_LIMIT + 1000) large_user_input = "x" * (MCP_PROMPT_SIZE_LIMIT + 1000)
result = await tool.execute({"prompt": large_user_input, "model": "flash"}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": large_user_input, "model": "flash", "working_directory": temp_dir})
output = json.loads(result[0].text) output = json.loads(result[0].text)
assert output["status"] == "resend_prompt" # Should fail assert output["status"] == "resend_prompt" # Should fail
assert "too large for MCP's token limits" in output["content"] assert "too large for MCP's token limits" in output["content"]
@@ -510,7 +550,13 @@ class TestLargePromptHandling:
# This test runs in the test environment which uses dummy keys # This test runs in the test environment which uses dummy keys
# The chat tool will return an error for dummy keys, which is expected # The chat tool will return an error for dummy keys, which is expected
result = await tool.execute({"prompt": small_user_input, "model": "gemini-2.5-flash"}) result = await tool.execute(
{
"prompt": small_user_input,
"model": "gemini-2.5-flash",
"working_directory": temp_dir,
}
)
output = json.loads(result[0].text) output = json.loads(result[0].text)
# The test will fail with dummy API keys, which is expected behavior # The test will fail with dummy API keys, which is expected behavior
@@ -521,6 +567,8 @@ class TestLargePromptHandling:
else: else:
# If somehow it succeeds (e.g., with mocked provider), check the response # If somehow it succeeds (e.g., with mocked provider), check the response
assert output["status"] != "resend_prompt" assert output["status"] != "resend_prompt"
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_continuation_with_huge_conversation_history(self): async def test_continuation_with_huge_conversation_history(self):
@@ -548,6 +596,8 @@ class TestLargePromptHandling:
# Ensure the history exceeds MCP limits # Ensure the history exceeds MCP limits
assert len(huge_conversation_history) > MCP_PROMPT_SIZE_LIMIT assert len(huge_conversation_history) > MCP_PROMPT_SIZE_LIMIT
temp_dir = tempfile.mkdtemp()
with ( with (
patch.object(tool, "get_model_provider") as mock_get_provider, patch.object(tool, "get_model_provider") as mock_get_provider,
patch("utils.model_context.ModelContext") as mock_model_context_class, patch("utils.model_context.ModelContext") as mock_model_context_class,
@@ -579,6 +629,7 @@ class TestLargePromptHandling:
"prompt": f"{huge_conversation_history}\n\n=== CURRENT REQUEST ===\n{small_continuation_prompt}", "prompt": f"{huge_conversation_history}\n\n=== CURRENT REQUEST ===\n{small_continuation_prompt}",
"model": "flash", "model": "flash",
"continuation_id": "test_thread_123", "continuation_id": "test_thread_123",
"working_directory": temp_dir,
} }
# Mock the conversation history embedding to simulate server.py behavior # Mock the conversation history embedding to simulate server.py behavior
@@ -628,6 +679,7 @@ class TestLargePromptHandling:
finally: finally:
# Restore original execute method # Restore original execute method
tool.__class__.execute = original_execute tool.__class__.execute = original_execute
shutil.rmtree(temp_dir, ignore_errors=True)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -68,6 +68,7 @@ class TestListModelsTool:
assert "`flash` → `gemini-2.5-flash`" in content assert "`flash` → `gemini-2.5-flash`" in content
assert "`pro` → `gemini-2.5-pro`" in content assert "`pro` → `gemini-2.5-pro`" in content
assert "1M context" in content assert "1M context" in content
assert "Supports structured code generation" in content
# Check summary # Check summary
assert "**Configured Providers**: 1" in content assert "**Configured Providers**: 1" in content

View File

@@ -12,6 +12,7 @@ RECORDING: To record new responses, delete the cassette file and run with real A
import logging import logging
import os import os
import tempfile
from pathlib import Path from pathlib import Path
from unittest.mock import patch from unittest.mock import patch
@@ -92,7 +93,13 @@ class TestO3ProOutputTextFix:
async def _execute_chat_tool_test(self): async def _execute_chat_tool_test(self):
"""Execute the ChatTool with o3-pro and return the result.""" """Execute the ChatTool with o3-pro and return the result."""
chat_tool = ChatTool() chat_tool = ChatTool()
arguments = {"prompt": "What is 2 + 2?", "model": "o3-pro", "temperature": 1.0} with tempfile.TemporaryDirectory() as workdir:
arguments = {
"prompt": "What is 2 + 2?",
"model": "o3-pro",
"temperature": 1.0,
"working_directory": workdir,
}
return await chat_tool.execute(arguments) return await chat_tool.execute(arguments)

View File

@@ -4,6 +4,8 @@ Test per-tool model default selection functionality
import json import json
import os import os
import shutil
import tempfile
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
@@ -290,7 +292,13 @@ class TestAutoModeErrorMessages:
mock_get_provider_for.return_value = None mock_get_provider_for.return_value = None
tool = ChatTool() tool = ChatTool()
result = await tool.execute({"prompt": "test", "model": "auto"}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute(
{"prompt": "test", "model": "auto", "working_directory": temp_dir}
)
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
assert len(result) == 1 assert len(result) == 1
# The SimpleTool will wrap the error message # The SimpleTool will wrap the error message
@@ -418,7 +426,13 @@ class TestRuntimeModelSelection:
mock_get_provider.return_value = None mock_get_provider.return_value = None
tool = ChatTool() tool = ChatTool()
result = await tool.execute({"prompt": "test", "model": "gpt-5-turbo"}) temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute(
{"prompt": "test", "model": "gpt-5-turbo", "working_directory": temp_dir}
)
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
# Should require model selection # Should require model selection
assert len(result) == 1 assert len(result) == 1
@@ -515,7 +529,11 @@ class TestUnavailableModelFallback:
mock_get_model_provider.return_value = mock_provider mock_get_model_provider.return_value = mock_provider
tool = ChatTool() tool = ChatTool()
result = await tool.execute({"prompt": "test"}) # No model specified temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute({"prompt": "test", "working_directory": temp_dir})
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
# Should work normally, not require model parameter # Should work normally, not require model parameter
assert len(result) == 1 assert len(result) == 1

View File

@@ -3,6 +3,8 @@ Tests for individual tool implementations
""" """
import json import json
import shutil
import tempfile
import pytest import pytest
@@ -343,12 +345,17 @@ class TestAbsolutePathValidation:
async def test_chat_tool_relative_path_rejected(self): async def test_chat_tool_relative_path_rejected(self):
"""Test that chat tool rejects relative paths""" """Test that chat tool rejects relative paths"""
tool = ChatTool() tool = ChatTool()
temp_dir = tempfile.mkdtemp()
try:
result = await tool.execute( result = await tool.execute(
{ {
"prompt": "Explain this code", "prompt": "Explain this code",
"files": ["code.py"], # relative path without ./ "files": ["code.py"], # relative path without ./
"working_directory": temp_dir,
} }
) )
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
assert len(result) == 1 assert len(result) == 1
response = json.loads(result[0].text) response = json.loads(result[0].text)

View File

@@ -6,15 +6,20 @@ brainstorming, problem-solving, and collaborative thinking. It supports file con
images, and conversation continuation for seamless multi-turn interactions. images, and conversation continuation for seamless multi-turn interactions.
""" """
import logging
import os
import re
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional from typing import TYPE_CHECKING, Any, Optional
from pydantic import Field from pydantic import Field
if TYPE_CHECKING: if TYPE_CHECKING:
from providers.shared import ModelCapabilities
from tools.models import ToolModelCategory from tools.models import ToolModelCategory
from config import TEMPERATURE_BALANCED from config import TEMPERATURE_BALANCED
from systemprompts import CHAT_PROMPT from systemprompts import CHAT_PROMPT, GENERATE_CODE_PROMPT
from tools.shared.base_models import COMMON_FIELD_DESCRIPTIONS, ToolRequest from tools.shared.base_models import COMMON_FIELD_DESCRIPTIONS, ToolRequest
from .simple.base import SimpleTool from .simple.base import SimpleTool
@@ -27,6 +32,9 @@ CHAT_FIELD_DESCRIPTIONS = {
), ),
"files": "absolute file or folder paths for code context (do NOT shorten).", "files": "absolute file or folder paths for code context (do NOT shorten).",
"images": "Optional absolute image paths or base64 for visual context when helpful.", "images": "Optional absolute image paths or base64 for visual context when helpful.",
"working_directory": (
"Absolute full directory path where the assistant AI can save generated code for implementation. The directory must already exist"
),
} }
@@ -36,6 +44,7 @@ class ChatRequest(ToolRequest):
prompt: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["prompt"]) prompt: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["prompt"])
files: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["files"]) files: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["files"])
images: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["images"]) images: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["images"])
working_directory: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["working_directory"])
class ChatTool(SimpleTool): class ChatTool(SimpleTool):
@@ -49,6 +58,10 @@ class ChatTool(SimpleTool):
Chat tool with 100% behavioral compatibility. Chat tool with 100% behavioral compatibility.
""" """
def __init__(self) -> None:
super().__init__()
self._last_recordable_response: Optional[str] = None
def get_name(self) -> str: def get_name(self) -> str:
return "chat" return "chat"
@@ -58,9 +71,20 @@ class ChatTool(SimpleTool):
"getting second opinions, and exploring ideas. Use for ideas, validations, questions, and thoughtful explanations." "getting second opinions, and exploring ideas. Use for ideas, validations, questions, and thoughtful explanations."
) )
def get_annotations(self) -> Optional[dict[str, Any]]:
"""Chat writes generated artifacts when code-generation is enabled."""
return {"readOnlyHint": False}
def get_system_prompt(self) -> str: def get_system_prompt(self) -> str:
return CHAT_PROMPT return CHAT_PROMPT
def get_capability_system_prompts(self, capabilities: Optional["ModelCapabilities"]) -> list[str]:
prompts = list(super().get_capability_system_prompts(capabilities))
if capabilities and capabilities.allow_code_generation:
prompts.append(GENERATE_CODE_PROMPT)
return prompts
def get_default_temperature(self) -> float: def get_default_temperature(self) -> float:
return TEMPERATURE_BALANCED return TEMPERATURE_BALANCED
@@ -85,7 +109,7 @@ class ChatTool(SimpleTool):
the same schema generation approach while still benefiting from SimpleTool the same schema generation approach while still benefiting from SimpleTool
convenience methods. convenience methods.
""" """
required_fields = ["prompt"] required_fields = ["prompt", "working_directory"]
if self.is_effective_auto_mode(): if self.is_effective_auto_mode():
required_fields.append("model") required_fields.append("model")
@@ -106,6 +130,10 @@ class ChatTool(SimpleTool):
"items": {"type": "string"}, "items": {"type": "string"},
"description": CHAT_FIELD_DESCRIPTIONS["images"], "description": CHAT_FIELD_DESCRIPTIONS["images"],
}, },
"working_directory": {
"type": "string",
"description": CHAT_FIELD_DESCRIPTIONS["working_directory"],
},
"model": self.get_model_field_schema(), "model": self.get_model_field_schema(),
"temperature": { "temperature": {
"type": "number", "type": "number",
@@ -159,7 +187,7 @@ class ChatTool(SimpleTool):
def get_required_fields(self) -> list[str]: def get_required_fields(self) -> list[str]:
"""Required fields for ChatSimple tool""" """Required fields for ChatSimple tool"""
return ["prompt"] return ["prompt", "working_directory"]
# === Hook Method Implementations === # === Hook Method Implementations ===
@@ -173,17 +201,165 @@ class ChatTool(SimpleTool):
# Use SimpleTool's Chat-style prompt preparation # Use SimpleTool's Chat-style prompt preparation
return self.prepare_chat_style_prompt(request) return self.prepare_chat_style_prompt(request)
def _validate_file_paths(self, request) -> Optional[str]:
"""Extend validation to cover the working directory path."""
error = super()._validate_file_paths(request)
if error:
return error
working_directory = getattr(request, "working_directory", None)
if working_directory:
expanded = os.path.expanduser(working_directory)
if not os.path.isabs(expanded):
return (
"Error: 'working_directory' must be an absolute path (you may use '~' which will be expanded). "
f"Received: {working_directory}"
)
return None
def format_response(self, response: str, request: ChatRequest, model_info: Optional[dict] = None) -> str: def format_response(self, response: str, request: ChatRequest, model_info: Optional[dict] = None) -> str:
""" """
Format the chat response to match the original Chat tool exactly. Format the chat response to match the original Chat tool exactly.
""" """
return ( self._last_recordable_response = None
f"{response}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to " body = response
recordable_override: Optional[str] = None
if self._model_supports_code_generation():
block, remainder = self._extract_generated_code_block(response)
if block:
sanitized_text = remainder.strip()
try:
artifact_path = self._persist_generated_code_block(block, request.working_directory)
except Exception as exc: # pragma: no cover - rare filesystem failures
logger.error("Failed to persist generated code block: %s", exc, exc_info=True)
warning = (
f"WARNING: Unable to write zen_generated.code inside '{request.working_directory}'. "
"Check the path permissions and re-run. The generated code block is included below for manual handling."
)
history_copy = self._join_sections(sanitized_text, warning) if sanitized_text else warning
recordable_override = history_copy
sanitized_warning = history_copy.strip()
body = f"{sanitized_warning}\n\n{block.strip()}".strip()
else:
if not sanitized_text:
sanitized_text = "Generated code saved to zen_generated.code. Follow the structured instructions in that file exactly before continuing."
instruction = self._build_agent_instruction(artifact_path)
body = self._join_sections(sanitized_text, instruction)
final_output = (
f"{body}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to "
"form a comprehensive solution and continue with the user's request and task at hand." "form a comprehensive solution and continue with the user's request and task at hand."
) )
if recordable_override is not None:
self._last_recordable_response = (
f"{recordable_override}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to "
"form a comprehensive solution and continue with the user's request and task at hand."
)
else:
self._last_recordable_response = final_output
return final_output
def _record_assistant_turn(
self, continuation_id: str, response_text: str, request, model_info: Optional[dict]
) -> None:
recordable = self._last_recordable_response if self._last_recordable_response is not None else response_text
try:
super()._record_assistant_turn(continuation_id, recordable, request, model_info)
finally:
self._last_recordable_response = None
def _model_supports_code_generation(self) -> bool:
context = getattr(self, "_model_context", None)
if not context:
return False
try:
capabilities = context.capabilities
except Exception: # pragma: no cover - defensive fallback
return False
return bool(capabilities.allow_code_generation)
def _extract_generated_code_block(self, text: str) -> tuple[Optional[str], str]:
match = re.search(r"<GENERATED-CODE>.*?</GENERATED-CODE>", text, flags=re.DOTALL | re.IGNORECASE)
if not match:
return None, text
block = match.group(0)
before = text[: match.start()].rstrip()
after = text[match.end() :].lstrip()
if before and after:
remainder = f"{before}\n\n{after}"
else:
remainder = before or after
return block, remainder or ""
def _persist_generated_code_block(self, block: str, working_directory: str) -> Path:
expanded = os.path.expanduser(working_directory)
target_dir = Path(expanded).resolve()
target_dir.mkdir(parents=True, exist_ok=True)
target_file = target_dir / "zen_generated.code"
if target_file.exists():
try:
target_file.unlink()
except OSError as exc:
logger.warning("Unable to remove existing zen_generated.code: %s", exc)
content = block if block.endswith("\n") else f"{block}\n"
target_file.write_text(content, encoding="utf-8")
logger.info("Generated code artifact written to %s", target_file)
return target_file
@staticmethod
def _build_agent_instruction(artifact_path: Path) -> str:
return (
f"CONTINUING FROM PREVIOUS DISCUSSION: The coding assistant has analyzed our conversation context and generated "
f"a structured implementation plan at `{artifact_path}`. This is a direct continuation of our discussion—all previous "
"context, requirements, and shared code remain relevant.\n"
"\n"
f"MANDATORY NEXT STEP: Open `{artifact_path}` immediately and review the implementation plan:\n"
"1. Read the step-by-step instructions—they reference our previous discussion. You may need to read the file in parts if it's too long.\n"
"2. Review each <NEWFILE:…> or <UPDATED_EXISTING_FILE:…> section in the context of what we've discussed\n"
"3. Verify the proposed changes align with the requirements and code we've already shared\n"
"4. Check for syntax errors, missing imports, or incomplete implementations\n"
"\n"
"Then systematically apply the changes:\n"
"- Create new files or update existing ones as instructed, maintaining code style consistency\n"
"- If updating existing code we discussed earlier, carefully preserve unmodified sections\n"
"- Run syntax validation after each modification\n"
"- Execute relevant tests to confirm functionality\n"
"- Verify the implementation works end-to-end with existing code\n"
"\n"
"Remember: This builds upon our conversation. The generated code reflects the full context of what we've discussed, "
"including any files, requirements, or constraints mentioned earlier. Proceed with implementation immediately."
"Only after you finish applying ALL the changes completely: delete `zen_generated.code` so stale instructions do not linger."
)
@staticmethod
def _join_sections(*sections: str) -> str:
chunks: list[str] = []
for section in sections:
if section:
trimmed = section.strip()
if trimmed:
chunks.append(trimmed)
return "\n\n".join(chunks)
def get_websearch_guidance(self) -> str: def get_websearch_guidance(self) -> str:
""" """
Return Chat tool-style web search guidance. Return Chat tool-style web search guidance.
""" """
return self.get_chat_style_websearch_guidance() return self.get_chat_style_websearch_guidance()
logger = logging.getLogger(__name__)

View File

@@ -140,6 +140,8 @@ class ListModelsTool(BaseTool):
except AttributeError: except AttributeError:
description = "No description available" description = "No description available"
lines = [header, f" - {context_str}", f" - {description}"] lines = [header, f" - {context_str}", f" - {description}"]
if capabilities.allow_code_generation:
lines.append(" - Supports structured code generation")
return lines return lines
# Check each native provider type # Check each native provider type
@@ -187,6 +189,8 @@ class ListModelsTool(BaseTool):
output_lines.append(f"- `{model_name}` - {context_str}") output_lines.append(f"- `{model_name}` - {context_str}")
output_lines.append(f" - {description}") output_lines.append(f" - {description}")
if capabilities.allow_code_generation:
output_lines.append(" - Supports structured code generation")
for alias in capabilities.aliases or []: for alias in capabilities.aliases or []:
if alias != model_name: if alias != model_name:

View File

@@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Any, Optional
from mcp.types import TextContent from mcp.types import TextContent
if TYPE_CHECKING: if TYPE_CHECKING:
from providers.shared import ModelCapabilities
from tools.models import ToolModelCategory from tools.models import ToolModelCategory
from config import MCP_PROMPT_SIZE_LIMIT from config import MCP_PROMPT_SIZE_LIMIT
@@ -165,6 +166,42 @@ class BaseTool(ABC):
""" """
pass pass
def get_capability_system_prompts(self, capabilities: Optional["ModelCapabilities"]) -> list[str]:
"""Return additional system prompt snippets gated on model capabilities.
Subclasses can override this hook to append capability-specific
instructions (for example, enabling code-generation exports when a
model advertises support). The default implementation returns an empty
list so no extra instructions are appended.
Args:
capabilities: The resolved capabilities for the active model.
Returns:
List of prompt fragments to append after the base system prompt.
"""
return []
def _augment_system_prompt_with_capabilities(
self, base_prompt: str, capabilities: Optional["ModelCapabilities"]
) -> str:
"""Merge capability-driven prompt addenda with the base system prompt."""
additions: list[str] = []
if capabilities is not None:
additions = [fragment.strip() for fragment in self.get_capability_system_prompts(capabilities) if fragment]
if not additions:
return base_prompt
addition_text = "\n\n".join(additions)
if not base_prompt:
return addition_text
suffix = "" if base_prompt.endswith("\n\n") else "\n\n"
return f"{base_prompt}{suffix}{addition_text}"
def get_annotations(self) -> Optional[dict[str, Any]]: def get_annotations(self) -> Optional[dict[str, Any]]:
""" """
Return optional annotations for this tool. Return optional annotations for this tool.
@@ -413,13 +450,16 @@ class BaseTool(ABC):
for rank, canonical_name, capabilities in filtered[:limit]: for rank, canonical_name, capabilities in filtered[:limit]:
details: list[str] = [] details: list[str] = []
context_str = self._format_context_window(getattr(capabilities, "context_window", 0)) context_str = self._format_context_window(capabilities.context_window)
if context_str: if context_str:
details.append(context_str) details.append(context_str)
if getattr(capabilities, "supports_extended_thinking", False): if capabilities.supports_extended_thinking:
details.append("thinking") details.append("thinking")
if capabilities.allow_code_generation:
details.append("code-gen")
base = f"{canonical_name} (score {rank}" base = f"{canonical_name} (score {rank}"
if details: if details:
base = f"{base}, {', '.join(details)}" base = f"{base}, {', '.join(details)}"

View File

@@ -404,11 +404,15 @@ class SimpleTool(BaseTool):
# Get the provider from model context (clean OOP - no re-fetching) # Get the provider from model context (clean OOP - no re-fetching)
provider = self._model_context.provider provider = self._model_context.provider
capabilities = self._model_context.capabilities
# Get system prompt for this tool # Get system prompt for this tool
base_system_prompt = self.get_system_prompt() base_system_prompt = self.get_system_prompt()
capability_augmented_prompt = self._augment_system_prompt_with_capabilities(
base_system_prompt, capabilities
)
language_instruction = self.get_language_instruction() language_instruction = self.get_language_instruction()
system_prompt = language_instruction + base_system_prompt system_prompt = language_instruction + capability_augmented_prompt
# Generate AI response using the provider # Generate AI response using the provider
logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.get_name()}") logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.get_name()}")
@@ -423,7 +427,6 @@ class SimpleTool(BaseTool):
logger.debug(f"Prompt length: {len(prompt)} characters (~{estimated_tokens:,} tokens)") logger.debug(f"Prompt length: {len(prompt)} characters (~{estimated_tokens:,} tokens)")
# Resolve model capabilities for feature gating # Resolve model capabilities for feature gating
capabilities = self._model_context.capabilities
supports_thinking = capabilities.supports_extended_thinking supports_thinking = capabilities.supports_extended_thinking
# Generate content with provider abstraction # Generate content with provider abstraction

View File

@@ -1480,8 +1480,11 @@ class BaseWorkflowMixin(ABC):
# Get system prompt for this tool with localization support # Get system prompt for this tool with localization support
base_system_prompt = self.get_system_prompt() base_system_prompt = self.get_system_prompt()
capability_augmented_prompt = self._augment_system_prompt_with_capabilities(
base_system_prompt, getattr(self._model_context, "capabilities", None)
)
language_instruction = self.get_language_instruction() language_instruction = self.get_language_instruction()
system_prompt = language_instruction + base_system_prompt system_prompt = language_instruction + capability_augmented_prompt
# Check if tool wants system prompt embedded in main prompt # Check if tool wants system prompt embedded in main prompt
if self.should_embed_system_prompt(): if self.should_embed_system_prompt():