feat!: Full code can now be generated by an external model and shared with the AI tool (Claude Code / Codex etc)!
model definitions now support a new `allow_code_generation` flag, only to be used with higher reasoning models such as GPT-5-Pro and-Gemini 2.5-Pro When `true`, the `chat` tool can now request the external model to generate a full implementation / update / instructions etc and then share the implementation with the calling agent. This effectively allows us to utilize more powerful models such as GPT-5-Pro to generate code for us or entire implementations (which are either API-only or part of the $200 Pro plan from within the ChatGPT app)
This commit is contained in:
@@ -205,7 +205,7 @@ Zen activates any provider that has credentials in your `.env`. See `.env.exampl
|
|||||||
|
|
||||||
**Collaboration & Planning** *(Enabled by default)*
|
**Collaboration & Planning** *(Enabled by default)*
|
||||||
- **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.)
|
- **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.)
|
||||||
- **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches
|
- **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches. With capable models (GPT-5 Pro, Gemini 2.5 Pro), generates complete code / implementation
|
||||||
- **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives
|
- **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives
|
||||||
- **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans
|
- **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans
|
||||||
- **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering
|
- **[`consensus`](docs/tools/consensus.md)** - Get expert opinions from multiple AI models with stance steering
|
||||||
|
|||||||
@@ -20,7 +20,8 @@
|
|||||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||||
"description": "Human-readable description of the model",
|
"description": "Human-readable description of the model",
|
||||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
|
||||||
|
"allow_code_generation": "Whether this model can generate and suggest fully working code - complete with functions, files, and detailed implementation instructions - for your AI tool to use right away. Only set this to 'true' for a model more capable than the AI model / CLI you're currently using."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"models": [
|
"models": [
|
||||||
@@ -44,6 +45,7 @@
|
|||||||
"supports_json_mode": true,
|
"supports_json_mode": true,
|
||||||
"supports_images": true,
|
"supports_images": true,
|
||||||
"supports_temperature": true,
|
"supports_temperature": true,
|
||||||
|
"allow_code_generation": true,
|
||||||
"max_image_size_mb": 32.0
|
"max_image_size_mb": 32.0
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -20,7 +20,8 @@
|
|||||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||||
"description": "Human-readable description of the model",
|
"description": "Human-readable description of the model",
|
||||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
|
||||||
|
"allow_code_generation": "Whether this model can generate and suggest fully working code - complete with functions, files, and detailed implementation instructions - for your AI tool to use right away. Only set this to 'true' for a model more capable than the AI model / CLI you're currently using."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"models": [
|
"models": [
|
||||||
@@ -66,6 +67,7 @@
|
|||||||
"max_image_size_mb": 20.0,
|
"max_image_size_mb": 20.0,
|
||||||
"use_openai_response_api": true,
|
"use_openai_response_api": true,
|
||||||
"default_reasoning_effort": "high",
|
"default_reasoning_effort": "high",
|
||||||
|
"allow_code_generation": true,
|
||||||
"temperature_constraint": "fixed"
|
"temperature_constraint": "fixed"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -19,7 +19,8 @@
|
|||||||
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
"use_openai_response_api": "Set to true when the model must use the /responses endpoint (reasoning models like GPT-5 Pro). Leave false/omit for standard chat completions.",
|
||||||
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
"default_reasoning_effort": "Default reasoning effort level for models that support it (e.g., 'low', 'medium', 'high'). Omit if not applicable.",
|
||||||
"description": "Human-readable description of the model",
|
"description": "Human-readable description of the model",
|
||||||
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering"
|
"intelligence_score": "1-20 human rating used as the primary signal for auto-mode model ordering",
|
||||||
|
"allow_code_generation": "Whether this model can generate and suggest fully working code - complete with functions, files, and detailed implementation instructions - for your AI tool to use right away. Only set this to 'true' for a model more capable than the AI model / CLI you're currently using."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"models": [
|
"models": [
|
||||||
@@ -100,6 +101,7 @@
|
|||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_images": true,
|
"supports_images": true,
|
||||||
"max_image_size_mb": 20.0,
|
"max_image_size_mb": 20.0,
|
||||||
|
"allow_code_generation": true,
|
||||||
"description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
|
"description": "Google's Gemini 2.5 Pro via OpenRouter with vision",
|
||||||
"intelligence_score": 18
|
"intelligence_score": 18
|
||||||
},
|
},
|
||||||
@@ -310,8 +312,9 @@
|
|||||||
"temperature_constraint": "fixed",
|
"temperature_constraint": "fixed",
|
||||||
"use_openai_response_api": true,
|
"use_openai_response_api": true,
|
||||||
"default_reasoning_effort": "high",
|
"default_reasoning_effort": "high",
|
||||||
|
"allow_code_generation": true,
|
||||||
"description": "GPT-5 Pro - Advanced reasoning model with highest quality responses (text+image input, text output only)",
|
"description": "GPT-5 Pro - Advanced reasoning model with highest quality responses (text+image input, text output only)",
|
||||||
"intelligence_score": 17
|
"intelligence_score": 18
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model_name": "openai/gpt-5-codex",
|
"model_name": "openai/gpt-5-codex",
|
||||||
|
|||||||
@@ -52,6 +52,9 @@ from tools.simple.base import SimpleTool
|
|||||||
class ChatRequest(ToolRequest):
|
class ChatRequest(ToolRequest):
|
||||||
prompt: str = Field(..., description="Your question or idea.")
|
prompt: str = Field(..., description="Your question or idea.")
|
||||||
files: list[str] | None = Field(default_factory=list)
|
files: list[str] | None = Field(default_factory=list)
|
||||||
|
working_directory: str = Field(
|
||||||
|
..., description="Absolute full directory path where the assistant AI can save generated code for implementation."
|
||||||
|
)
|
||||||
|
|
||||||
class ChatTool(SimpleTool):
|
class ChatTool(SimpleTool):
|
||||||
def get_name(self) -> str: # required by BaseTool
|
def get_name(self) -> str: # required by BaseTool
|
||||||
@@ -67,10 +70,17 @@ class ChatTool(SimpleTool):
|
|||||||
return ChatRequest
|
return ChatRequest
|
||||||
|
|
||||||
def get_tool_fields(self) -> dict[str, dict[str, object]]:
|
def get_tool_fields(self) -> dict[str, dict[str, object]]:
|
||||||
return {"prompt": {"type": "string", "description": "Your question."}, "files": SimpleTool.FILES_FIELD}
|
return {
|
||||||
|
"prompt": {"type": "string", "description": "Your question."},
|
||||||
|
"files": SimpleTool.FILES_FIELD,
|
||||||
|
"working_directory": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Absolute full directory path where the assistant AI can save generated code for implementation.",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def get_required_fields(self) -> list[str]:
|
def get_required_fields(self) -> list[str]:
|
||||||
return ["prompt"]
|
return ["prompt", "working_directory"]
|
||||||
|
|
||||||
async def prepare_prompt(self, request: ChatRequest) -> str:
|
async def prepare_prompt(self, request: ChatRequest) -> str:
|
||||||
return self.prepare_chat_style_prompt(request)
|
return self.prepare_chat_style_prompt(request)
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
|||||||
- `conf/dial_models.json` – DIAL aggregation catalogue (`DIAL_MODELS_CONFIG_PATH`)
|
- `conf/dial_models.json` – DIAL aggregation catalogue (`DIAL_MODELS_CONFIG_PATH`)
|
||||||
- `conf/custom_models.json` – Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`)
|
- `conf/custom_models.json` – Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`)
|
||||||
|
|
||||||
Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags. Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, or expose additional aliases without touching Python code.
|
Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags (including `allow_code_generation`). Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, enable structured code generation, or expose additional aliases without touching Python code.
|
||||||
|
|
||||||
The shipped defaults cover:
|
The shipped defaults cover:
|
||||||
|
|
||||||
@@ -87,7 +87,63 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
|
|||||||
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
|
| OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` |
|
||||||
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
|
| Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry |
|
||||||
|
|
||||||
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support) without editing Python.
|
> **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support, code generation) without editing Python.
|
||||||
|
|
||||||
|
### Code Generation Capability
|
||||||
|
|
||||||
|
**`allow_code_generation` Flag:**
|
||||||
|
|
||||||
|
The `allow_code_generation` capability enables models to generate complete, production-ready implementations in a structured format. When enabled, the `chat` tool will inject special instructions for substantial code generation tasks.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model_name": "gpt-5",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to Enable:**
|
||||||
|
|
||||||
|
- **Enable for**: Models MORE capable than your primary CLI's model (e.g., GPT-5, GPT-5 Pro when using Claude Code with Sonnet 4.5)
|
||||||
|
- **Purpose**: Get complete implementations from a more powerful reasoning model that your primary CLI can then review and apply
|
||||||
|
- **Use case**: Large-scale implementations, major refactoring, complete module creation
|
||||||
|
|
||||||
|
**Important Guidelines:**
|
||||||
|
|
||||||
|
1. Only enable for models significantly more capable than your primary CLI to ensure high-quality generated code
|
||||||
|
2. The capability triggers structured code output (`<GENERATED-CODE>` blocks) for substantial implementation requests
|
||||||
|
3. Minor code changes still use inline code blocks regardless of this setting
|
||||||
|
4. Generated code is saved to `zen_generated.code` in the user's working directory
|
||||||
|
5. Your CLI receives instructions to review and apply the generated code systematically
|
||||||
|
|
||||||
|
**Example Configuration:**
|
||||||
|
|
||||||
|
```json
|
||||||
|
// OpenAI models configuration (conf/openai_models.json)
|
||||||
|
{
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-5",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"intelligence_score": 18,
|
||||||
|
...
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-5-pro",
|
||||||
|
"allow_code_generation": true,
|
||||||
|
"intelligence_score": 19,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Typical Workflow:**
|
||||||
|
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **gpt-5-pro**
|
||||||
|
2. GPT-5-Pro generates structured implementation and shares the complete implementation with Zen
|
||||||
|
3. Zen saves the code to `zen_generated.code` and asks AI agent to implement the plan
|
||||||
|
4. AI agent continues from the previous context, reads the file, applies the implementation
|
||||||
|
|
||||||
### Thinking Mode Configuration
|
### Thinking Mode Configuration
|
||||||
|
|
||||||
|
|||||||
@@ -39,13 +39,14 @@ word verdict in the end.
|
|||||||
- **Collaborative thinking partner** for your analysis and planning
|
- **Collaborative thinking partner** for your analysis and planning
|
||||||
- **Get second opinions** on your designs and approaches
|
- **Get second opinions** on your designs and approaches
|
||||||
- **Brainstorm solutions** and explore alternatives together
|
- **Brainstorm solutions** and explore alternatives together
|
||||||
|
- **Structured code generation**: When using GPT-5 Pro or Gemini 2.5 Pro, get complete, production-ready implementations saved to `zen_generated.code` for your CLI to review and apply
|
||||||
- **Validate your checklists** and implementation plans
|
- **Validate your checklists** and implementation plans
|
||||||
- **General development questions** and explanations
|
- **General development questions** and explanations
|
||||||
- **Technology comparisons** and best practices
|
- **Technology comparisons** and best practices
|
||||||
- **Architecture and design discussions**
|
- **Architecture and design discussions**
|
||||||
- **File reference support**: `"Use gemini to explain this algorithm with context from algorithm.py"`
|
- **File reference support**: `"Use gemini to explain this algorithm with context from algorithm.py"`
|
||||||
- **Image support**: Include screenshots, diagrams, UI mockups for visual analysis: `"Chat with gemini about this error dialog screenshot to understand the user experience issue"`
|
- **Image support**: Include screenshots, diagrams, UI mockups for visual analysis: `"Chat with gemini about this error dialog screenshot to understand the user experience issue"`
|
||||||
- **Dynamic collaboration**: Gemini can request additional files or context during the conversation if needed for a more thorough response
|
- **Dynamic collaboration**: Models can request additional files or context during the conversation if needed for a more thorough response
|
||||||
- **Web search awareness**: Automatically identifies when online research would help and instructs Claude to perform targeted searches using continuation IDs
|
- **Web search awareness**: Automatically identifies when online research would help and instructs Claude to perform targeted searches using continuation IDs
|
||||||
|
|
||||||
## Tool Parameters
|
## Tool Parameters
|
||||||
@@ -54,10 +55,48 @@ word verdict in the end.
|
|||||||
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
- `model`: auto|pro|flash|flash-2.0|flashlite|o3|o3-mini|o4-mini|gpt4.1|gpt5|gpt5-mini|gpt5-nano (default: server default)
|
||||||
- `files`: Optional files for context (absolute paths)
|
- `files`: Optional files for context (absolute paths)
|
||||||
- `images`: Optional images for visual context (absolute paths)
|
- `images`: Optional images for visual context (absolute paths)
|
||||||
|
- `working_directory`: **Required** - Absolute directory path where generated code artifacts will be saved
|
||||||
- `temperature`: Response creativity (0-1, default 0.5)
|
- `temperature`: Response creativity (0-1, default 0.5)
|
||||||
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
- `thinking_mode`: minimal|low|medium|high|max (default: medium, Gemini only)
|
||||||
- `continuation_id`: Continue previous conversations
|
- `continuation_id`: Continue previous conversations
|
||||||
|
|
||||||
|
## Structured Code Generation
|
||||||
|
|
||||||
|
When using advanced reasoning models like **GPT-5 Pro** or **Gemini 2.5 Pro**, the chat tool can generate complete, production-ready code implementations in a structured format.
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. You ask your AI agent to implement a complex new feature using `chat` with a higher-reasoning model such as **GPT-5 Pro** or **Gemini 2.5 Pro**
|
||||||
|
2. The model generates structured implementation and shares the complete implementation with Zen
|
||||||
|
3. Zen saves the code to `zen_generated.code` and asks AI agent to implement the plan
|
||||||
|
4. AI agent continues from the previous context, reads the file, applies the implementation
|
||||||
|
|
||||||
|
### When Code Generation Activates
|
||||||
|
|
||||||
|
The structured format activates for **substantial implementation work**:
|
||||||
|
- Creating new features from scratch with multiple files or significant code
|
||||||
|
- Major refactoring across multiple files or large sections
|
||||||
|
- Implementing new modules, components, or subsystems
|
||||||
|
- Large-scale updates affecting substantial portions of the codebase
|
||||||
|
- Complete rewrites of functions, algorithms, or approaches
|
||||||
|
|
||||||
|
For minor changes (small tweaks, bug fixes, algorithm improvements), the model responds normally with inline code blocks.
|
||||||
|
|
||||||
|
### Example Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
chat with gpt-5-pro and ask it to make me a standalone, classic version of the
|
||||||
|
Pacman game using pygame that I can run from the commandline. Give me a single
|
||||||
|
script to execute in the end with any / all dependencies setup for me.
|
||||||
|
Do everything using pygame, we have no external resources / images / audio at
|
||||||
|
hand. Instead of ghosts, it'll be different geometric shapes moving around
|
||||||
|
in the maze that Pacman can eat (so there are no baddies). Pacman gets to eat
|
||||||
|
everything including bread-crumbs and large geometric shapes but make me the
|
||||||
|
classic maze / walls that it navigates within using keyboard arrow keys.
|
||||||
|
```
|
||||||
|
|
||||||
|
See the [Configuration Guide](../configuration.md#code-generation-capability) for details on the `allow_code_generation` flag.
|
||||||
|
|
||||||
## Usage Examples
|
## Usage Examples
|
||||||
|
|
||||||
**Basic Development Chat:**
|
**Basic Development Chat:**
|
||||||
|
|||||||
@@ -28,6 +28,8 @@ class ModelCapabilities:
|
|||||||
* Tool selection logic inspects attributes such as
|
* Tool selection logic inspects attributes such as
|
||||||
``supports_extended_thinking`` or ``context_window`` to choose an
|
``supports_extended_thinking`` or ``context_window`` to choose an
|
||||||
appropriate model for a task.
|
appropriate model for a task.
|
||||||
|
* The ``allow_code_generation`` flag enables structured code generation
|
||||||
|
in the chat tool for models more capable than the primary CLI.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
provider: ProviderType
|
provider: ProviderType
|
||||||
@@ -52,6 +54,9 @@ class ModelCapabilities:
|
|||||||
supports_temperature: bool = True
|
supports_temperature: bool = True
|
||||||
use_openai_response_api: bool = False
|
use_openai_response_api: bool = False
|
||||||
default_reasoning_effort: Optional[str] = None
|
default_reasoning_effort: Optional[str] = None
|
||||||
|
allow_code_generation: bool = (
|
||||||
|
False # Enables structured code generation in chat tool for substantial implementations
|
||||||
|
)
|
||||||
|
|
||||||
# Additional attributes
|
# Additional attributes
|
||||||
max_image_size_mb: float = 0.0
|
max_image_size_mb: float = 0.0
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from .codereview_prompt import CODEREVIEW_PROMPT
|
|||||||
from .consensus_prompt import CONSENSUS_PROMPT
|
from .consensus_prompt import CONSENSUS_PROMPT
|
||||||
from .debug_prompt import DEBUG_ISSUE_PROMPT
|
from .debug_prompt import DEBUG_ISSUE_PROMPT
|
||||||
from .docgen_prompt import DOCGEN_PROMPT
|
from .docgen_prompt import DOCGEN_PROMPT
|
||||||
|
from .generate_code_prompt import GENERATE_CODE_PROMPT
|
||||||
from .planner_prompt import PLANNER_PROMPT
|
from .planner_prompt import PLANNER_PROMPT
|
||||||
from .precommit_prompt import PRECOMMIT_PROMPT
|
from .precommit_prompt import PRECOMMIT_PROMPT
|
||||||
from .refactor_prompt import REFACTOR_PROMPT
|
from .refactor_prompt import REFACTOR_PROMPT
|
||||||
@@ -21,6 +22,7 @@ __all__ = [
|
|||||||
"CODEREVIEW_PROMPT",
|
"CODEREVIEW_PROMPT",
|
||||||
"DEBUG_ISSUE_PROMPT",
|
"DEBUG_ISSUE_PROMPT",
|
||||||
"DOCGEN_PROMPT",
|
"DOCGEN_PROMPT",
|
||||||
|
"GENERATE_CODE_PROMPT",
|
||||||
"ANALYZE_PROMPT",
|
"ANALYZE_PROMPT",
|
||||||
"CHAT_PROMPT",
|
"CHAT_PROMPT",
|
||||||
"CONSENSUS_PROMPT",
|
"CONSENSUS_PROMPT",
|
||||||
|
|||||||
181
systemprompts/generate_code_prompt.py
Normal file
181
systemprompts/generate_code_prompt.py
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
"""System prompt fragment enabling structured code generation exports.
|
||||||
|
|
||||||
|
This prompt is injected into the system prompt for models that have the
|
||||||
|
'allow_code_generation' capability enabled. It instructs the model to output
|
||||||
|
complete, working code in a structured format that coding agents can parse
|
||||||
|
and apply automatically.
|
||||||
|
|
||||||
|
The structured format uses XML-like tags to clearly delineate:
|
||||||
|
- New files to create (<NEWFILE>)
|
||||||
|
- Existing files to update (<UPDATED_EXISTING_FILE>)
|
||||||
|
- Step-by-step instructions for the coding agent
|
||||||
|
|
||||||
|
This enables:
|
||||||
|
1. Automated code extraction and application
|
||||||
|
2. Clear separation between instructions and implementation
|
||||||
|
3. Complete, runnable code without manual edits
|
||||||
|
4. Precise change tracking across multiple files
|
||||||
|
"""
|
||||||
|
|
||||||
|
GENERATE_CODE_PROMPT = """
|
||||||
|
# Structured Code Generation Protocol
|
||||||
|
|
||||||
|
**WHEN TO USE THIS PROTOCOL:**
|
||||||
|
|
||||||
|
Use this structured format ONLY when you are explicitly tasked with substantial code generation, such as:
|
||||||
|
- Creating new features from scratch with multiple files or significant code and you have been asked to help implement this
|
||||||
|
- Major refactoring across multiple files or large sections of code and you have been tasked to help do this
|
||||||
|
- Implementing new modules, components, or subsystems and you have been tasked to help with the implementation
|
||||||
|
- Large-scale updates affecting substantial portions of the codebase that you have been asked to help implement
|
||||||
|
|
||||||
|
**WHEN NOT TO USE THIS PROTOCOL:**
|
||||||
|
|
||||||
|
Do NOT use this format for minor changes:
|
||||||
|
- Small tweaks to existing functions or methods (1-20 lines)
|
||||||
|
- Bug fixes in isolated sections
|
||||||
|
- Simple algorithm improvements
|
||||||
|
- Minor refactoring of a single function
|
||||||
|
- Adding/removing a few lines of code
|
||||||
|
- Quick parameter adjustments or config changes
|
||||||
|
|
||||||
|
For minor changes:
|
||||||
|
- Follow the existing instructions provided earlier in your system prompt, such as the CRITICAL LINE NUMBER INSTRUCTIONS.
|
||||||
|
- Use inline code blocks with proper line number references and direct explanations instead of this structured format.
|
||||||
|
|
||||||
|
**IMPORTANT:** This protocol is for SUBSTANTIAL implementation work when explicitly requested, such as:
|
||||||
|
- "implement feature X"
|
||||||
|
- "create module Y"
|
||||||
|
- "refactor system Z"
|
||||||
|
- "rewrite the authentication logic"
|
||||||
|
- "redesign the data processing pipeline"
|
||||||
|
- "rebuild the algorithm from scratch"
|
||||||
|
- "convert this approach to use a different pattern"
|
||||||
|
- "create a complete implementation of..."
|
||||||
|
- "build out the entire workflow for..."
|
||||||
|
|
||||||
|
If the request is for explanation, analysis, debugging, planning, or discussion WITHOUT substantial code generation, respond normally without this structured format.
|
||||||
|
|
||||||
|
## Core Requirements (for substantial code generation tasks)
|
||||||
|
|
||||||
|
1. **Complete, Working Code**: Every code block must be fully functional without requiring additional edits. Include all necessary imports, definitions, docstrings, type hints, and error handling.
|
||||||
|
|
||||||
|
2. **Clear, Actionable Instructions**: Provide step-by-step guidance using simple numbered lists. Each instruction should map directly to file blocks that follow.
|
||||||
|
|
||||||
|
3. **Structured Output Format**: All generated code MUST be contained within a single `<GENERATED-CODE>` block using the exact structure defined below.
|
||||||
|
|
||||||
|
4. **Minimal External Commentary**: Keep any text outside the `<GENERATED-CODE>` block brief. Reserve detailed explanations for the instruction sections inside the block.
|
||||||
|
|
||||||
|
## Required Structure
|
||||||
|
|
||||||
|
Use this exact format (do not improvise tag names or reorder components):
|
||||||
|
|
||||||
|
```
|
||||||
|
<GENERATED-CODE>
|
||||||
|
[Step-by-step instructions for the coding agent]
|
||||||
|
1. Create new file [filename] with [description]
|
||||||
|
2. Update existing file [filename] by [description]
|
||||||
|
3. [Additional steps as needed]
|
||||||
|
|
||||||
|
<NEWFILE: path/to/new_file.py>
|
||||||
|
[Complete file contents with all necessary components:
|
||||||
|
- File-level docstring
|
||||||
|
- All imports (standard library, third-party, local)
|
||||||
|
- All class/function definitions with complete implementations
|
||||||
|
- All necessary helper functions
|
||||||
|
- Inline comments for complex logic
|
||||||
|
- Type hints where applicable]
|
||||||
|
</NEWFILE>
|
||||||
|
|
||||||
|
[Additional instructions for the next file, if needed]
|
||||||
|
|
||||||
|
<NEWFILE: path/to/another_file.py>
|
||||||
|
[Complete, working code for this file - no partial implementations or placeholders]
|
||||||
|
</NEWFILE>
|
||||||
|
|
||||||
|
[Instructions for updating existing files]
|
||||||
|
|
||||||
|
<UPDATED_EXISTING_FILE: existing/path.py>
|
||||||
|
[Complete replacement code for the modified sections or routines / lines that need updating:
|
||||||
|
- Full function/method bodies (not just the changed lines)
|
||||||
|
- Complete class definitions if modifying class methods
|
||||||
|
- All necessary imports if adding new dependencies
|
||||||
|
- Preserve existing code structure and style]
|
||||||
|
</UPDATED_EXISTING_FILE>
|
||||||
|
|
||||||
|
[If additional files need updates (based on existing code that was shared with you earlier), repeat the UPDATED_EXISTING_FILE block]
|
||||||
|
|
||||||
|
<UPDATED_EXISTING_FILE: another/existing/file.py>
|
||||||
|
[Complete code for this file's modifications]
|
||||||
|
</UPDATED_EXISTING_FILE>
|
||||||
|
|
||||||
|
[For file deletions, explicitly state in instructions with justification:
|
||||||
|
"Delete file path/to/obsolete.py - no longer needed because [reason]"]
|
||||||
|
</GENERATED-CODE>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Critical Rules
|
||||||
|
|
||||||
|
**Completeness:**
|
||||||
|
- Never output partial code snippets or placeholder comments like "# rest of code here"
|
||||||
|
- Include complete function/class implementations from start to finish
|
||||||
|
- Add all required imports at the file level
|
||||||
|
- Include proper error handling and edge case logic
|
||||||
|
|
||||||
|
**Accuracy:**
|
||||||
|
- Match the existing codebase indentation style (tabs vs spaces)
|
||||||
|
- Preserve language-specific formatting conventions
|
||||||
|
- Include trailing newlines where required by language tooling
|
||||||
|
- Use correct file paths relative to project root
|
||||||
|
|
||||||
|
**Clarity:**
|
||||||
|
- Number instructions sequentially (1, 2, 3...)
|
||||||
|
- Map each instruction to specific file blocks below it
|
||||||
|
- Explain *why* changes are needed, not just *what* changes
|
||||||
|
- Highlight any breaking changes or migration steps required
|
||||||
|
|
||||||
|
**Structure:**
|
||||||
|
- Use `<NEWFILE: ...>` for files that don't exist yet
|
||||||
|
- Use `<UPDATED_EXISTING_FILE: ...>` for modifying existing files
|
||||||
|
- Place instructions between file blocks to provide context
|
||||||
|
- Keep the single `<GENERATED-CODE>` wrapper around everything
|
||||||
|
|
||||||
|
## Special Cases
|
||||||
|
|
||||||
|
**No Changes Needed:**
|
||||||
|
If the task doesn't require file creation or modification, explicitly state:
|
||||||
|
"No file changes required. The existing implementation already handles [requirement]."
|
||||||
|
Do not emit an empty `<GENERATED-CODE>` block.
|
||||||
|
|
||||||
|
**Configuration Changes:**
|
||||||
|
If modifying configuration files (JSON, YAML, TOML), include complete file contents with the changes applied, not just the changed lines.
|
||||||
|
|
||||||
|
**Test Files:**
|
||||||
|
When generating tests, include complete test suites with:
|
||||||
|
- All necessary test fixtures and setup
|
||||||
|
- Multiple test cases covering happy path and edge cases
|
||||||
|
- Proper teardown and cleanup
|
||||||
|
- Clear test descriptions and assertions
|
||||||
|
|
||||||
|
**Documentation:**
|
||||||
|
Include docstrings for all public functions, classes, and modules using the project's documentation style (Google, NumPy, Sphinx, etc.).
|
||||||
|
|
||||||
|
## Context Awareness
|
||||||
|
|
||||||
|
**CRITICAL:** Your implementation builds upon the ongoing conversation context:
|
||||||
|
- All previously shared files, requirements, and constraints remain relevant
|
||||||
|
- If updating existing code discussed earlier, reference it and preserve unmodified sections
|
||||||
|
- If the user shared code for improvement, your generated code should build upon it, not replace everything
|
||||||
|
- The coding agent has full conversation history—your instructions should reference prior discussion as needed
|
||||||
|
|
||||||
|
Your generated code is NOT standalone—it's a continuation of the collaborative session with full context awareness.
|
||||||
|
|
||||||
|
## Remember
|
||||||
|
|
||||||
|
The coding agent depends on this structured format to:
|
||||||
|
- Parse and extract code automatically
|
||||||
|
- Apply changes to the correct files within the conversation context
|
||||||
|
- Validate completeness before execution
|
||||||
|
- Track modifications across the codebase
|
||||||
|
|
||||||
|
Always prioritize clarity, completeness, correctness, and context awareness over brevity.
|
||||||
|
"""
|
||||||
File diff suppressed because one or more lines are too long
@@ -137,7 +137,7 @@ class TestAutoMode:
|
|||||||
importlib.reload(config)
|
importlib.reload(config)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_auto_mode_requires_model_parameter(self):
|
async def test_auto_mode_requires_model_parameter(self, tmp_path):
|
||||||
"""Test that auto mode enforces model parameter"""
|
"""Test that auto mode enforces model parameter"""
|
||||||
# Save original
|
# Save original
|
||||||
original = os.environ.get("DEFAULT_MODEL", "")
|
original = os.environ.get("DEFAULT_MODEL", "")
|
||||||
@@ -154,7 +154,7 @@ class TestAutoMode:
|
|||||||
# Mock the provider to avoid real API calls
|
# Mock the provider to avoid real API calls
|
||||||
with patch.object(tool, "get_model_provider"):
|
with patch.object(tool, "get_model_provider"):
|
||||||
# Execute without model parameter
|
# Execute without model parameter
|
||||||
result = await tool.execute({"prompt": "Test prompt"})
|
result = await tool.execute({"prompt": "Test prompt", "working_directory": str(tmp_path)})
|
||||||
|
|
||||||
# Should get error
|
# Should get error
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|||||||
@@ -200,7 +200,7 @@ class TestAutoModeComprehensive:
|
|||||||
assert tool.get_model_category() == expected_category
|
assert tool.get_model_category() == expected_category
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_auto_mode_with_gemini_only_uses_correct_models(self):
|
async def test_auto_mode_with_gemini_only_uses_correct_models(self, tmp_path):
|
||||||
"""Test that auto mode with only Gemini uses flash for fast tools and pro for reasoning tools."""
|
"""Test that auto mode with only Gemini uses flash for fast tools and pro for reasoning tools."""
|
||||||
|
|
||||||
provider_config = {
|
provider_config = {
|
||||||
@@ -234,9 +234,13 @@ class TestAutoModeComprehensive:
|
|||||||
)
|
)
|
||||||
|
|
||||||
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
|
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
|
||||||
|
workdir = tmp_path / "chat_artifacts"
|
||||||
|
workdir.mkdir(parents=True, exist_ok=True)
|
||||||
# Test ChatTool (FAST_RESPONSE) - should prefer flash
|
# Test ChatTool (FAST_RESPONSE) - should prefer flash
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
await chat_tool.execute({"prompt": "test", "model": "auto"}) # This should trigger auto selection
|
await chat_tool.execute(
|
||||||
|
{"prompt": "test", "model": "auto", "working_directory": str(workdir)}
|
||||||
|
) # This should trigger auto selection
|
||||||
|
|
||||||
# In auto mode, the tool should get an error requiring model selection
|
# In auto mode, the tool should get an error requiring model selection
|
||||||
# but the suggested model should be flash
|
# but the suggested model should be flash
|
||||||
@@ -355,7 +359,7 @@ class TestAutoModeComprehensive:
|
|||||||
# would show models from all providers when called
|
# would show models from all providers when called
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_auto_mode_model_parameter_required_error(self):
|
async def test_auto_mode_model_parameter_required_error(self, tmp_path):
|
||||||
"""Test that auto mode properly requires model parameter and suggests correct model."""
|
"""Test that auto mode properly requires model parameter and suggests correct model."""
|
||||||
|
|
||||||
provider_config = {
|
provider_config = {
|
||||||
@@ -384,9 +388,12 @@ class TestAutoModeComprehensive:
|
|||||||
|
|
||||||
# Test with ChatTool (FAST_RESPONSE category)
|
# Test with ChatTool (FAST_RESPONSE category)
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
|
workdir = tmp_path / "chat_artifacts"
|
||||||
|
workdir.mkdir(parents=True, exist_ok=True)
|
||||||
result = await chat_tool.execute(
|
result = await chat_tool.execute(
|
||||||
{
|
{
|
||||||
"prompt": "test"
|
"prompt": "test",
|
||||||
|
"working_directory": str(workdir),
|
||||||
# Note: no model parameter provided in auto mode
|
# Note: no model parameter provided in auto mode
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -508,7 +515,7 @@ class TestAutoModeComprehensive:
|
|||||||
assert fast_response is not None
|
assert fast_response is not None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_actual_model_name_resolution_in_auto_mode(self):
|
async def test_actual_model_name_resolution_in_auto_mode(self, tmp_path):
|
||||||
"""Test that when a model is selected in auto mode, the tool executes successfully."""
|
"""Test that when a model is selected in auto mode, the tool executes successfully."""
|
||||||
|
|
||||||
provider_config = {
|
provider_config = {
|
||||||
@@ -547,7 +554,11 @@ class TestAutoModeComprehensive:
|
|||||||
|
|
||||||
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
|
with patch.object(ModelProviderRegistry, "get_provider_for_model", return_value=mock_provider):
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
result = await chat_tool.execute({"prompt": "test", "model": "flash"}) # Use alias in auto mode
|
workdir = tmp_path / "chat_artifacts"
|
||||||
|
workdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
result = await chat_tool.execute(
|
||||||
|
{"prompt": "test", "model": "flash", "working_directory": str(workdir)}
|
||||||
|
) # Use alias in auto mode
|
||||||
|
|
||||||
# Should succeed with proper model resolution
|
# Should succeed with proper model resolution
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|||||||
113
tests/test_chat_codegen_integration.py
Normal file
113
tests/test_chat_codegen_integration.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
"""Integration test for Chat tool code generation with Gemini 2.5 Pro.
|
||||||
|
|
||||||
|
This test uses the Google Gemini SDK's built-in record/replay support. To refresh the
|
||||||
|
cassette, delete the existing JSON file under
|
||||||
|
``tests/gemini_cassettes/chat_codegen/gemini25_pro_calculator/mldev.json`` and run:
|
||||||
|
|
||||||
|
```
|
||||||
|
GEMINI_API_KEY=<real-key> pytest tests/test_chat_codegen_integration.py::test_chat_codegen_saves_file
|
||||||
|
```
|
||||||
|
|
||||||
|
The test will automatically record a new interaction when the cassette is missing and
|
||||||
|
the environment variable `GEMINI_API_KEY` is set to a valid key.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from providers.gemini import GeminiModelProvider
|
||||||
|
from providers.registry import ModelProviderRegistry, ProviderType
|
||||||
|
from tools.chat import ChatTool
|
||||||
|
|
||||||
|
REPLAYS_ROOT = Path(__file__).parent / "gemini_cassettes"
|
||||||
|
CASSETTE_DIR = REPLAYS_ROOT / "chat_codegen"
|
||||||
|
CASSETTE_PATH = CASSETTE_DIR / "gemini25_pro_calculator" / "mldev.json"
|
||||||
|
CASSETTE_REPLAY_ID = "chat_codegen/gemini25_pro_calculator/mldev"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.no_mock_provider
|
||||||
|
async def test_chat_codegen_saves_file(monkeypatch, tmp_path):
|
||||||
|
"""Ensure Gemini 2.5 Pro responses create zen_generated.code when code is emitted."""
|
||||||
|
|
||||||
|
CASSETTE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
recording_mode = not CASSETTE_PATH.exists()
|
||||||
|
gemini_key = os.getenv("GEMINI_API_KEY", "")
|
||||||
|
|
||||||
|
if recording_mode:
|
||||||
|
if not gemini_key or gemini_key.startswith("dummy"):
|
||||||
|
pytest.skip("Cassette missing and GEMINI_API_KEY not configured. Provide a real key to record.")
|
||||||
|
client_mode = "record"
|
||||||
|
else:
|
||||||
|
gemini_key = "dummy-key-for-replay"
|
||||||
|
client_mode = "replay"
|
||||||
|
|
||||||
|
with monkeypatch.context() as m:
|
||||||
|
m.setenv("GEMINI_API_KEY", gemini_key)
|
||||||
|
m.setenv("DEFAULT_MODEL", "auto")
|
||||||
|
m.setenv("GOOGLE_ALLOWED_MODELS", "gemini-2.5-pro")
|
||||||
|
m.setenv("GOOGLE_GENAI_CLIENT_MODE", client_mode)
|
||||||
|
m.setenv("GOOGLE_GENAI_REPLAYS_DIRECTORY", str(REPLAYS_ROOT))
|
||||||
|
m.setenv("GOOGLE_GENAI_REPLAY_ID", CASSETTE_REPLAY_ID)
|
||||||
|
|
||||||
|
# Clear other provider keys to avoid unintended routing
|
||||||
|
for key in ["OPENAI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY", "CUSTOM_API_KEY"]:
|
||||||
|
m.delenv(key, raising=False)
|
||||||
|
|
||||||
|
ModelProviderRegistry.reset_for_testing()
|
||||||
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
|
|
||||||
|
working_dir = tmp_path / "codegen"
|
||||||
|
working_dir.mkdir()
|
||||||
|
preexisting = working_dir / "zen_generated.code"
|
||||||
|
preexisting.write_text("stale contents", encoding="utf-8")
|
||||||
|
|
||||||
|
chat_tool = ChatTool()
|
||||||
|
prompt = (
|
||||||
|
"Please generate a Python module with functions `add` and `multiply` that perform"
|
||||||
|
" basic addition and multiplication. Produce the response using the structured"
|
||||||
|
" <GENERATED-CODE> format so the assistant can apply the files directly."
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await chat_tool.execute(
|
||||||
|
{
|
||||||
|
"prompt": prompt,
|
||||||
|
"model": "gemini-2.5-pro",
|
||||||
|
"working_directory": str(working_dir),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
provider = ModelProviderRegistry.get_provider_for_model("gemini-2.5-pro")
|
||||||
|
if provider is not None:
|
||||||
|
try:
|
||||||
|
provider.client.close()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Reset restriction service cache to avoid leaking allowed-model config
|
||||||
|
try:
|
||||||
|
from utils import model_restrictions
|
||||||
|
|
||||||
|
model_restrictions._restriction_service = None # type: ignore[attr-defined]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert result and result[0].type == "text"
|
||||||
|
payload = json.loads(result[0].text)
|
||||||
|
assert payload["status"] in {"success", "continuation_available"}
|
||||||
|
|
||||||
|
artifact_path = working_dir / "zen_generated.code"
|
||||||
|
assert artifact_path.exists()
|
||||||
|
saved = artifact_path.read_text()
|
||||||
|
assert "<GENERATED-CODE>" in saved
|
||||||
|
assert "<NEWFILE:" in saved
|
||||||
|
assert "def add" in saved and "def multiply" in saved
|
||||||
|
assert "stale contents" not in saved
|
||||||
|
|
||||||
|
artifact_path.unlink()
|
||||||
@@ -55,7 +55,7 @@ def _extract_number(text: str) -> str:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
async def test_chat_cross_model_continuation(monkeypatch):
|
async def test_chat_cross_model_continuation(monkeypatch, tmp_path):
|
||||||
"""Verify continuation across Gemini then OpenAI using recorded interactions."""
|
"""Verify continuation across Gemini then OpenAI using recorded interactions."""
|
||||||
|
|
||||||
env_updates = {
|
env_updates = {
|
||||||
@@ -115,10 +115,13 @@ async def test_chat_cross_model_continuation(monkeypatch):
|
|||||||
m.setattr(conversation_memory.uuid, "uuid4", lambda: FIXED_THREAD_ID)
|
m.setattr(conversation_memory.uuid, "uuid4", lambda: FIXED_THREAD_ID)
|
||||||
|
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
|
working_directory = str(tmp_path)
|
||||||
|
|
||||||
step1_args = {
|
step1_args = {
|
||||||
"prompt": "Pick a number between 1 and 10 and respond with JUST that number.",
|
"prompt": "Pick a number between 1 and 10 and respond with JUST that number.",
|
||||||
"model": "gemini-2.5-flash",
|
"model": "gemini-2.5-flash",
|
||||||
"temperature": 0.2,
|
"temperature": 0.2,
|
||||||
|
"working_directory": working_directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
step1_result = await chat_tool.execute(step1_args)
|
step1_result = await chat_tool.execute(step1_args)
|
||||||
@@ -183,6 +186,7 @@ async def test_chat_cross_model_continuation(monkeypatch):
|
|||||||
"model": "gpt-5",
|
"model": "gpt-5",
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
"temperature": 0.2,
|
"temperature": 0.2,
|
||||||
|
"working_directory": working_directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
step2_result = await chat_tool.execute(step2_args)
|
step2_result = await chat_tool.execute(step2_args)
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ CASSETTE_CONTINUATION_PATH = CASSETTE_DIR / "chat_gpt5_continuation.json"
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
async def test_chat_auto_mode_with_openai(monkeypatch):
|
async def test_chat_auto_mode_with_openai(monkeypatch, tmp_path):
|
||||||
"""Ensure ChatTool in auto mode selects gpt-5 via OpenAI and returns a valid response."""
|
"""Ensure ChatTool in auto mode selects gpt-5 via OpenAI and returns a valid response."""
|
||||||
# Prepare environment so only OpenAI is available in auto mode
|
# Prepare environment so only OpenAI is available in auto mode
|
||||||
env_updates = {
|
env_updates = {
|
||||||
@@ -63,10 +63,12 @@ async def test_chat_auto_mode_with_openai(monkeypatch):
|
|||||||
|
|
||||||
# Execute ChatTool request targeting gpt-5 directly (server normally resolves auto→model)
|
# Execute ChatTool request targeting gpt-5 directly (server normally resolves auto→model)
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
|
working_directory = str(tmp_path)
|
||||||
arguments = {
|
arguments = {
|
||||||
"prompt": "Use chat with gpt5 and ask how far the moon is from earth.",
|
"prompt": "Use chat with gpt5 and ask how far the moon is from earth.",
|
||||||
"model": "gpt-5",
|
"model": "gpt-5",
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
"working_directory": working_directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
result = await chat_tool.execute(arguments)
|
result = await chat_tool.execute(arguments)
|
||||||
@@ -87,7 +89,7 @@ async def test_chat_auto_mode_with_openai(monkeypatch):
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.no_mock_provider
|
@pytest.mark.no_mock_provider
|
||||||
async def test_chat_openai_continuation(monkeypatch):
|
async def test_chat_openai_continuation(monkeypatch, tmp_path):
|
||||||
"""Verify continuation_id workflow against gpt-5 using recorded OpenAI responses."""
|
"""Verify continuation_id workflow against gpt-5 using recorded OpenAI responses."""
|
||||||
|
|
||||||
env_updates = {
|
env_updates = {
|
||||||
@@ -126,12 +128,14 @@ async def test_chat_openai_continuation(monkeypatch):
|
|||||||
m.setattr(conversation_memory.uuid, "uuid4", lambda: fixed_thread_id)
|
m.setattr(conversation_memory.uuid, "uuid4", lambda: fixed_thread_id)
|
||||||
|
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
|
working_directory = str(tmp_path)
|
||||||
|
|
||||||
# First message: obtain continuation_id
|
# First message: obtain continuation_id
|
||||||
first_args = {
|
first_args = {
|
||||||
"prompt": "In one word, which sells better: iOS app or macOS app?",
|
"prompt": "In one word, which sells better: iOS app or macOS app?",
|
||||||
"model": "gpt-5",
|
"model": "gpt-5",
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
"working_directory": working_directory,
|
||||||
}
|
}
|
||||||
first_result = await chat_tool.execute(first_args)
|
first_result = await chat_tool.execute(first_args)
|
||||||
|
|
||||||
@@ -152,6 +156,7 @@ async def test_chat_openai_continuation(monkeypatch):
|
|||||||
"model": "gpt-5",
|
"model": "gpt-5",
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
"temperature": 1.0,
|
"temperature": 1.0,
|
||||||
|
"working_directory": working_directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
second_result = await chat_tool.execute(second_args)
|
second_result = await chat_tool.execute(second_args)
|
||||||
|
|||||||
@@ -38,12 +38,14 @@ class TestChatTool:
|
|||||||
|
|
||||||
# Required fields
|
# Required fields
|
||||||
assert "prompt" in schema["required"]
|
assert "prompt" in schema["required"]
|
||||||
|
assert "working_directory" in schema["required"]
|
||||||
|
|
||||||
# Properties
|
# Properties
|
||||||
properties = schema["properties"]
|
properties = schema["properties"]
|
||||||
assert "prompt" in properties
|
assert "prompt" in properties
|
||||||
assert "files" in properties
|
assert "files" in properties
|
||||||
assert "images" in properties
|
assert "images" in properties
|
||||||
|
assert "working_directory" in properties
|
||||||
|
|
||||||
def test_request_model_validation(self):
|
def test_request_model_validation(self):
|
||||||
"""Test that the request model validates correctly"""
|
"""Test that the request model validates correctly"""
|
||||||
@@ -54,6 +56,7 @@ class TestChatTool:
|
|||||||
"images": ["test.png"],
|
"images": ["test.png"],
|
||||||
"model": "anthropic/claude-opus-4.1",
|
"model": "anthropic/claude-opus-4.1",
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
|
"working_directory": "/tmp", # Dummy absolute path
|
||||||
}
|
}
|
||||||
|
|
||||||
request = ChatRequest(**request_data)
|
request = ChatRequest(**request_data)
|
||||||
@@ -62,6 +65,7 @@ class TestChatTool:
|
|||||||
assert request.images == ["test.png"]
|
assert request.images == ["test.png"]
|
||||||
assert request.model == "anthropic/claude-opus-4.1"
|
assert request.model == "anthropic/claude-opus-4.1"
|
||||||
assert request.temperature == 0.7
|
assert request.temperature == 0.7
|
||||||
|
assert request.working_directory == "/tmp"
|
||||||
|
|
||||||
def test_required_fields(self):
|
def test_required_fields(self):
|
||||||
"""Test that required fields are enforced"""
|
"""Test that required fields are enforced"""
|
||||||
@@ -69,7 +73,7 @@ class TestChatTool:
|
|||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
with pytest.raises(ValidationError):
|
with pytest.raises(ValidationError):
|
||||||
ChatRequest(model="anthropic/claude-opus-4.1")
|
ChatRequest(model="anthropic/claude-opus-4.1", working_directory="/tmp")
|
||||||
|
|
||||||
def test_model_availability(self):
|
def test_model_availability(self):
|
||||||
"""Test that model availability works"""
|
"""Test that model availability works"""
|
||||||
@@ -96,7 +100,7 @@ class TestChatTool:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_prompt_preparation(self):
|
async def test_prompt_preparation(self):
|
||||||
"""Test that prompt preparation works correctly"""
|
"""Test that prompt preparation works correctly"""
|
||||||
request = ChatRequest(prompt="Test prompt", files=[])
|
request = ChatRequest(prompt="Test prompt", files=[], working_directory="/tmp")
|
||||||
|
|
||||||
# Mock the system prompt and file handling
|
# Mock the system prompt and file handling
|
||||||
with patch.object(self.tool, "get_system_prompt", return_value="System prompt"):
|
with patch.object(self.tool, "get_system_prompt", return_value="System prompt"):
|
||||||
@@ -113,7 +117,7 @@ class TestChatTool:
|
|||||||
def test_response_formatting(self):
|
def test_response_formatting(self):
|
||||||
"""Test that response formatting works correctly"""
|
"""Test that response formatting works correctly"""
|
||||||
response = "Test response content"
|
response = "Test response content"
|
||||||
request = ChatRequest(prompt="Test")
|
request = ChatRequest(prompt="Test", working_directory="/tmp")
|
||||||
|
|
||||||
formatted = self.tool.format_response(response, request)
|
formatted = self.tool.format_response(response, request)
|
||||||
|
|
||||||
@@ -146,6 +150,7 @@ class TestChatTool:
|
|||||||
|
|
||||||
required_fields = self.tool.get_required_fields()
|
required_fields = self.tool.get_required_fields()
|
||||||
assert "prompt" in required_fields
|
assert "prompt" in required_fields
|
||||||
|
assert "working_directory" in required_fields
|
||||||
|
|
||||||
|
|
||||||
class TestChatRequestModel:
|
class TestChatRequestModel:
|
||||||
@@ -160,10 +165,11 @@ class TestChatRequestModel:
|
|||||||
assert "context" in CHAT_FIELD_DESCRIPTIONS["prompt"]
|
assert "context" in CHAT_FIELD_DESCRIPTIONS["prompt"]
|
||||||
assert "full-paths" in CHAT_FIELD_DESCRIPTIONS["files"] or "absolute" in CHAT_FIELD_DESCRIPTIONS["files"]
|
assert "full-paths" in CHAT_FIELD_DESCRIPTIONS["files"] or "absolute" in CHAT_FIELD_DESCRIPTIONS["files"]
|
||||||
assert "visual context" in CHAT_FIELD_DESCRIPTIONS["images"]
|
assert "visual context" in CHAT_FIELD_DESCRIPTIONS["images"]
|
||||||
|
assert "directory" in CHAT_FIELD_DESCRIPTIONS["working_directory"].lower()
|
||||||
|
|
||||||
def test_default_values(self):
|
def test_default_values(self):
|
||||||
"""Test that default values work correctly"""
|
"""Test that default values work correctly"""
|
||||||
request = ChatRequest(prompt="Test")
|
request = ChatRequest(prompt="Test", working_directory="/tmp")
|
||||||
|
|
||||||
assert request.prompt == "Test"
|
assert request.prompt == "Test"
|
||||||
assert request.files == [] # Should default to empty list
|
assert request.files == [] # Should default to empty list
|
||||||
@@ -173,7 +179,7 @@ class TestChatRequestModel:
|
|||||||
"""Test that ChatRequest properly inherits from ToolRequest"""
|
"""Test that ChatRequest properly inherits from ToolRequest"""
|
||||||
from tools.shared.base_models import ToolRequest
|
from tools.shared.base_models import ToolRequest
|
||||||
|
|
||||||
request = ChatRequest(prompt="Test")
|
request = ChatRequest(prompt="Test", working_directory="/tmp")
|
||||||
assert isinstance(request, ToolRequest)
|
assert isinstance(request, ToolRequest)
|
||||||
|
|
||||||
# Should have inherited fields
|
# Should have inherited fields
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from utils.conversation_memory import get_thread
|
|||||||
from utils.storage_backend import get_storage_backend
|
from utils.storage_backend import get_storage_backend
|
||||||
|
|
||||||
|
|
||||||
def test_first_response_persisted_in_conversation_history():
|
def test_first_response_persisted_in_conversation_history(tmp_path):
|
||||||
"""Ensure the assistant's initial reply is stored for newly created threads."""
|
"""Ensure the assistant's initial reply is stored for newly created threads."""
|
||||||
|
|
||||||
# Clear in-memory storage to avoid cross-test contamination
|
# Clear in-memory storage to avoid cross-test contamination
|
||||||
@@ -13,7 +13,7 @@ def test_first_response_persisted_in_conversation_history():
|
|||||||
storage._store.clear() # type: ignore[attr-defined]
|
storage._store.clear() # type: ignore[attr-defined]
|
||||||
|
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
request = ChatRequest(prompt="First question?", model="local-llama")
|
request = ChatRequest(prompt="First question?", model="local-llama", working_directory=str(tmp_path))
|
||||||
response_text = "Here is the initial answer."
|
response_text = "Here is the initial answer."
|
||||||
|
|
||||||
# Mimic the first tool invocation (no continuation_id supplied)
|
# Mimic the first tool invocation (no continuation_id supplied)
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ def helper_function():
|
|||||||
"prompt": "Analyze this codebase structure",
|
"prompt": "Analyze this codebase structure",
|
||||||
"files": [directory], # Directory path, not individual files
|
"files": [directory], # Directory path, not individual files
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
|
"working_directory": directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execute the tool
|
# Execute the tool
|
||||||
@@ -168,6 +169,7 @@ def helper_function():
|
|||||||
"files": [directory], # Same directory again
|
"files": [directory], # Same directory again
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
"continuation_id": thread_id,
|
"continuation_id": thread_id,
|
||||||
|
"working_directory": directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Mock to capture file filtering behavior
|
# Mock to capture file filtering behavior
|
||||||
@@ -299,6 +301,7 @@ def helper_function():
|
|||||||
"prompt": "Analyze this code",
|
"prompt": "Analyze this code",
|
||||||
"files": [directory],
|
"files": [directory],
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
|
"working_directory": directory,
|
||||||
}
|
}
|
||||||
|
|
||||||
result = await tool.execute(request_args)
|
result = await tool.execute(request_args)
|
||||||
|
|||||||
@@ -56,7 +56,12 @@ class TestLargePromptHandling:
|
|||||||
async def test_chat_large_prompt_detection(self, large_prompt):
|
async def test_chat_large_prompt_detection(self, large_prompt):
|
||||||
"""Test that chat tool detects large prompts."""
|
"""Test that chat tool detects large prompts."""
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
result = await tool.execute({"prompt": large_prompt})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute({"prompt": large_prompt, "working_directory": temp_dir})
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert isinstance(result[0], TextContent)
|
assert isinstance(result[0], TextContent)
|
||||||
@@ -73,9 +78,16 @@ class TestLargePromptHandling:
|
|||||||
"""Test that chat tool works normally with regular prompts."""
|
"""Test that chat tool works normally with regular prompts."""
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
|
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
# This test runs in the test environment which uses dummy keys
|
# This test runs in the test environment which uses dummy keys
|
||||||
# The chat tool will return an error for dummy keys, which is expected
|
# The chat tool will return an error for dummy keys, which is expected
|
||||||
result = await tool.execute({"prompt": normal_prompt, "model": "gemini-2.5-flash"})
|
try:
|
||||||
|
result = await tool.execute(
|
||||||
|
{"prompt": normal_prompt, "model": "gemini-2.5-flash", "working_directory": temp_dir}
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
@@ -105,7 +117,14 @@ class TestLargePromptHandling:
|
|||||||
try:
|
try:
|
||||||
# This test runs in the test environment which uses dummy keys
|
# This test runs in the test environment which uses dummy keys
|
||||||
# The chat tool will return an error for dummy keys, which is expected
|
# The chat tool will return an error for dummy keys, which is expected
|
||||||
result = await tool.execute({"prompt": "", "files": [temp_prompt_file], "model": "gemini-2.5-flash"})
|
result = await tool.execute(
|
||||||
|
{
|
||||||
|
"prompt": "",
|
||||||
|
"files": [temp_prompt_file],
|
||||||
|
"model": "gemini-2.5-flash",
|
||||||
|
"working_directory": temp_dir,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
@@ -261,7 +280,13 @@ class TestLargePromptHandling:
|
|||||||
mock_prepare_files.return_value = ("File content", [other_file])
|
mock_prepare_files.return_value = ("File content", [other_file])
|
||||||
|
|
||||||
# Use a small prompt to avoid triggering size limit
|
# Use a small prompt to avoid triggering size limit
|
||||||
await tool.execute({"prompt": "Test prompt", "files": [temp_prompt_file, other_file]})
|
await tool.execute(
|
||||||
|
{
|
||||||
|
"prompt": "Test prompt",
|
||||||
|
"files": [temp_prompt_file, other_file],
|
||||||
|
"working_directory": os.path.dirname(temp_prompt_file),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Verify handle_prompt_file was called with the original files list
|
# Verify handle_prompt_file was called with the original files list
|
||||||
mock_handle_prompt.assert_called_once_with([temp_prompt_file, other_file])
|
mock_handle_prompt.assert_called_once_with([temp_prompt_file, other_file])
|
||||||
@@ -295,7 +320,11 @@ class TestLargePromptHandling:
|
|||||||
mock_get_provider.return_value = mock_provider
|
mock_get_provider.return_value = mock_provider
|
||||||
|
|
||||||
# With the fix, this should now pass because we check at MCP transport boundary before adding internal content
|
# With the fix, this should now pass because we check at MCP transport boundary before adding internal content
|
||||||
result = await tool.execute({"prompt": exact_prompt})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute({"prompt": exact_prompt, "working_directory": temp_dir})
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
assert output["status"] != "resend_prompt"
|
assert output["status"] != "resend_prompt"
|
||||||
|
|
||||||
@@ -305,7 +334,11 @@ class TestLargePromptHandling:
|
|||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
over_prompt = "x" * (MCP_PROMPT_SIZE_LIMIT + 1)
|
over_prompt = "x" * (MCP_PROMPT_SIZE_LIMIT + 1)
|
||||||
|
|
||||||
result = await tool.execute({"prompt": over_prompt})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute({"prompt": over_prompt, "working_directory": temp_dir})
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
assert output["status"] == "resend_prompt"
|
assert output["status"] == "resend_prompt"
|
||||||
|
|
||||||
@@ -326,7 +359,11 @@ class TestLargePromptHandling:
|
|||||||
)
|
)
|
||||||
mock_get_provider.return_value = mock_provider
|
mock_get_provider.return_value = mock_provider
|
||||||
|
|
||||||
result = await tool.execute({"prompt": ""})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute({"prompt": "", "working_directory": temp_dir})
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
assert output["status"] != "resend_prompt"
|
assert output["status"] != "resend_prompt"
|
||||||
|
|
||||||
@@ -362,7 +399,11 @@ class TestLargePromptHandling:
|
|||||||
mock_model_context_class.return_value = mock_model_context
|
mock_model_context_class.return_value = mock_model_context
|
||||||
|
|
||||||
# Should continue with empty prompt when file can't be read
|
# Should continue with empty prompt when file can't be read
|
||||||
result = await tool.execute({"prompt": "", "files": [bad_file]})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute({"prompt": "", "files": [bad_file], "working_directory": temp_dir})
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
assert output["status"] != "resend_prompt"
|
assert output["status"] != "resend_prompt"
|
||||||
|
|
||||||
@@ -408,6 +449,7 @@ class TestLargePromptHandling:
|
|||||||
"prompt": "Summarize the design decisions",
|
"prompt": "Summarize the design decisions",
|
||||||
"files": [str(large_file)],
|
"files": [str(large_file)],
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
|
"working_directory": str(tmp_path),
|
||||||
"_model_context": dummy_context,
|
"_model_context": dummy_context,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -424,6 +466,7 @@ class TestLargePromptHandling:
|
|||||||
This test verifies that even if our internal prompt (with system prompts, history, etc.)
|
This test verifies that even if our internal prompt (with system prompts, history, etc.)
|
||||||
exceeds MCP_PROMPT_SIZE_LIMIT, it should still work as long as the user's input is small.
|
exceeds MCP_PROMPT_SIZE_LIMIT, it should still work as long as the user's input is small.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
|
|
||||||
# Small user input that should pass MCP boundary check
|
# Small user input that should pass MCP boundary check
|
||||||
@@ -432,62 +475,57 @@ class TestLargePromptHandling:
|
|||||||
# Mock a huge conversation history that would exceed MCP limits if incorrectly checked
|
# Mock a huge conversation history that would exceed MCP limits if incorrectly checked
|
||||||
huge_history = "x" * (MCP_PROMPT_SIZE_LIMIT * 2) # 100K chars = way over 50K limit
|
huge_history = "x" * (MCP_PROMPT_SIZE_LIMIT * 2) # 100K chars = way over 50K limit
|
||||||
|
|
||||||
with (
|
temp_dir = tempfile.mkdtemp()
|
||||||
patch.object(tool, "get_model_provider") as mock_get_provider,
|
original_prepare_prompt = tool.prepare_prompt
|
||||||
patch("utils.model_context.ModelContext") as mock_model_context_class,
|
|
||||||
):
|
|
||||||
from tests.mock_helpers import create_mock_provider
|
|
||||||
|
|
||||||
mock_provider = create_mock_provider(model_name="flash")
|
try:
|
||||||
mock_get_provider.return_value = mock_provider
|
with (
|
||||||
|
patch.object(tool, "get_model_provider") as mock_get_provider,
|
||||||
|
patch("utils.model_context.ModelContext") as mock_model_context_class,
|
||||||
|
):
|
||||||
|
from tests.mock_helpers import create_mock_provider
|
||||||
|
from utils.model_context import TokenAllocation
|
||||||
|
|
||||||
# Mock ModelContext to avoid the comparison issue
|
mock_provider = create_mock_provider(model_name="flash")
|
||||||
from utils.model_context import TokenAllocation
|
mock_get_provider.return_value = mock_provider
|
||||||
|
|
||||||
mock_model_context = MagicMock()
|
mock_model_context = MagicMock()
|
||||||
mock_model_context.model_name = "flash"
|
mock_model_context.model_name = "flash"
|
||||||
mock_model_context.provider = mock_provider
|
mock_model_context.provider = mock_provider
|
||||||
mock_model_context.calculate_token_allocation.return_value = TokenAllocation(
|
mock_model_context.calculate_token_allocation.return_value = TokenAllocation(
|
||||||
total_tokens=1_048_576,
|
total_tokens=1_048_576,
|
||||||
content_tokens=838_861,
|
content_tokens=838_861,
|
||||||
response_tokens=209_715,
|
response_tokens=209_715,
|
||||||
file_tokens=335_544,
|
file_tokens=335_544,
|
||||||
history_tokens=335_544,
|
history_tokens=335_544,
|
||||||
)
|
)
|
||||||
mock_model_context_class.return_value = mock_model_context
|
mock_model_context_class.return_value = mock_model_context
|
||||||
|
|
||||||
# Mock the prepare_prompt to simulate huge internal context
|
async def mock_prepare_prompt(request):
|
||||||
original_prepare_prompt = tool.prepare_prompt
|
normal_prompt = await original_prepare_prompt(request)
|
||||||
|
huge_internal_prompt = f"{normal_prompt}\n\n=== HUGE INTERNAL CONTEXT ===\n{huge_history}"
|
||||||
|
assert len(huge_internal_prompt) > MCP_PROMPT_SIZE_LIMIT
|
||||||
|
return huge_internal_prompt
|
||||||
|
|
||||||
async def mock_prepare_prompt(request):
|
tool.prepare_prompt = mock_prepare_prompt
|
||||||
# Call original to get normal processing
|
|
||||||
normal_prompt = await original_prepare_prompt(request)
|
|
||||||
# Add huge internal context (simulating large history, system prompts, files)
|
|
||||||
huge_internal_prompt = f"{normal_prompt}\n\n=== HUGE INTERNAL CONTEXT ===\n{huge_history}"
|
|
||||||
|
|
||||||
# Verify the huge internal prompt would exceed MCP limits if incorrectly checked
|
result = await tool.execute(
|
||||||
assert len(huge_internal_prompt) > MCP_PROMPT_SIZE_LIMIT
|
{"prompt": small_user_prompt, "model": "flash", "working_directory": temp_dir}
|
||||||
|
)
|
||||||
|
output = json.loads(result[0].text)
|
||||||
|
|
||||||
return huge_internal_prompt
|
assert output["status"] != "resend_prompt"
|
||||||
|
|
||||||
tool.prepare_prompt = mock_prepare_prompt
|
mock_provider.generate_content.assert_called_once()
|
||||||
|
call_kwargs = mock_provider.generate_content.call_args[1]
|
||||||
|
actual_prompt = call_kwargs.get("prompt")
|
||||||
|
|
||||||
# This should succeed because we only check user input at MCP boundary
|
assert len(actual_prompt) > MCP_PROMPT_SIZE_LIMIT
|
||||||
result = await tool.execute({"prompt": small_user_prompt, "model": "flash"})
|
assert huge_history in actual_prompt
|
||||||
output = json.loads(result[0].text)
|
assert small_user_prompt in actual_prompt
|
||||||
|
finally:
|
||||||
# Should succeed even though internal context is huge
|
tool.prepare_prompt = original_prepare_prompt
|
||||||
assert output["status"] != "resend_prompt"
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
# Verify the model was actually called with the huge prompt
|
|
||||||
mock_provider.generate_content.assert_called_once()
|
|
||||||
call_kwargs = mock_provider.generate_content.call_args[1]
|
|
||||||
actual_prompt = call_kwargs.get("prompt")
|
|
||||||
|
|
||||||
# Verify internal prompt was huge (proving we don't limit internal processing)
|
|
||||||
assert len(actual_prompt) > MCP_PROMPT_SIZE_LIMIT
|
|
||||||
assert huge_history in actual_prompt
|
|
||||||
assert small_user_prompt in actual_prompt
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mcp_boundary_vs_internal_processing_distinction(self):
|
async def test_mcp_boundary_vs_internal_processing_distinction(self):
|
||||||
@@ -500,27 +538,37 @@ class TestLargePromptHandling:
|
|||||||
|
|
||||||
# Test case 1: Large user input should fail at MCP boundary
|
# Test case 1: Large user input should fail at MCP boundary
|
||||||
large_user_input = "x" * (MCP_PROMPT_SIZE_LIMIT + 1000)
|
large_user_input = "x" * (MCP_PROMPT_SIZE_LIMIT + 1000)
|
||||||
result = await tool.execute({"prompt": large_user_input, "model": "flash"})
|
temp_dir = tempfile.mkdtemp()
|
||||||
output = json.loads(result[0].text)
|
try:
|
||||||
assert output["status"] == "resend_prompt" # Should fail
|
result = await tool.execute({"prompt": large_user_input, "model": "flash", "working_directory": temp_dir})
|
||||||
assert "too large for MCP's token limits" in output["content"]
|
output = json.loads(result[0].text)
|
||||||
|
assert output["status"] == "resend_prompt" # Should fail
|
||||||
|
assert "too large for MCP's token limits" in output["content"]
|
||||||
|
|
||||||
# Test case 2: Small user input should succeed even with huge internal processing
|
# Test case 2: Small user input should succeed even with huge internal processing
|
||||||
small_user_input = "Hello"
|
small_user_input = "Hello"
|
||||||
|
|
||||||
# This test runs in the test environment which uses dummy keys
|
# This test runs in the test environment which uses dummy keys
|
||||||
# The chat tool will return an error for dummy keys, which is expected
|
# The chat tool will return an error for dummy keys, which is expected
|
||||||
result = await tool.execute({"prompt": small_user_input, "model": "gemini-2.5-flash"})
|
result = await tool.execute(
|
||||||
output = json.loads(result[0].text)
|
{
|
||||||
|
"prompt": small_user_input,
|
||||||
|
"model": "gemini-2.5-flash",
|
||||||
|
"working_directory": temp_dir,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
output = json.loads(result[0].text)
|
||||||
|
|
||||||
# The test will fail with dummy API keys, which is expected behavior
|
# The test will fail with dummy API keys, which is expected behavior
|
||||||
# We're mainly testing that the tool processes small prompts correctly without size errors
|
# We're mainly testing that the tool processes small prompts correctly without size errors
|
||||||
if output["status"] == "error":
|
if output["status"] == "error":
|
||||||
# If it's an API error, that's fine - we're testing prompt handling, not API calls
|
# If it's an API error, that's fine - we're testing prompt handling, not API calls
|
||||||
assert "API" in output["content"] or "key" in output["content"] or "authentication" in output["content"]
|
assert "API" in output["content"] or "key" in output["content"] or "authentication" in output["content"]
|
||||||
else:
|
else:
|
||||||
# If somehow it succeeds (e.g., with mocked provider), check the response
|
# If somehow it succeeds (e.g., with mocked provider), check the response
|
||||||
assert output["status"] != "resend_prompt"
|
assert output["status"] != "resend_prompt"
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_continuation_with_huge_conversation_history(self):
|
async def test_continuation_with_huge_conversation_history(self):
|
||||||
@@ -548,6 +596,8 @@ class TestLargePromptHandling:
|
|||||||
# Ensure the history exceeds MCP limits
|
# Ensure the history exceeds MCP limits
|
||||||
assert len(huge_conversation_history) > MCP_PROMPT_SIZE_LIMIT
|
assert len(huge_conversation_history) > MCP_PROMPT_SIZE_LIMIT
|
||||||
|
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch.object(tool, "get_model_provider") as mock_get_provider,
|
patch.object(tool, "get_model_provider") as mock_get_provider,
|
||||||
patch("utils.model_context.ModelContext") as mock_model_context_class,
|
patch("utils.model_context.ModelContext") as mock_model_context_class,
|
||||||
@@ -579,6 +629,7 @@ class TestLargePromptHandling:
|
|||||||
"prompt": f"{huge_conversation_history}\n\n=== CURRENT REQUEST ===\n{small_continuation_prompt}",
|
"prompt": f"{huge_conversation_history}\n\n=== CURRENT REQUEST ===\n{small_continuation_prompt}",
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
"continuation_id": "test_thread_123",
|
"continuation_id": "test_thread_123",
|
||||||
|
"working_directory": temp_dir,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Mock the conversation history embedding to simulate server.py behavior
|
# Mock the conversation history embedding to simulate server.py behavior
|
||||||
@@ -628,6 +679,7 @@ class TestLargePromptHandling:
|
|||||||
finally:
|
finally:
|
||||||
# Restore original execute method
|
# Restore original execute method
|
||||||
tool.__class__.execute = original_execute
|
tool.__class__.execute = original_execute
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -68,6 +68,7 @@ class TestListModelsTool:
|
|||||||
assert "`flash` → `gemini-2.5-flash`" in content
|
assert "`flash` → `gemini-2.5-flash`" in content
|
||||||
assert "`pro` → `gemini-2.5-pro`" in content
|
assert "`pro` → `gemini-2.5-pro`" in content
|
||||||
assert "1M context" in content
|
assert "1M context" in content
|
||||||
|
assert "Supports structured code generation" in content
|
||||||
|
|
||||||
# Check summary
|
# Check summary
|
||||||
assert "**Configured Providers**: 1" in content
|
assert "**Configured Providers**: 1" in content
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ RECORDING: To record new responses, delete the cassette file and run with real A
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@@ -92,9 +93,15 @@ class TestO3ProOutputTextFix:
|
|||||||
async def _execute_chat_tool_test(self):
|
async def _execute_chat_tool_test(self):
|
||||||
"""Execute the ChatTool with o3-pro and return the result."""
|
"""Execute the ChatTool with o3-pro and return the result."""
|
||||||
chat_tool = ChatTool()
|
chat_tool = ChatTool()
|
||||||
arguments = {"prompt": "What is 2 + 2?", "model": "o3-pro", "temperature": 1.0}
|
with tempfile.TemporaryDirectory() as workdir:
|
||||||
|
arguments = {
|
||||||
|
"prompt": "What is 2 + 2?",
|
||||||
|
"model": "o3-pro",
|
||||||
|
"temperature": 1.0,
|
||||||
|
"working_directory": workdir,
|
||||||
|
}
|
||||||
|
|
||||||
return await chat_tool.execute(arguments)
|
return await chat_tool.execute(arguments)
|
||||||
|
|
||||||
def _verify_chat_tool_response(self, result):
|
def _verify_chat_tool_response(self, result):
|
||||||
"""Verify the ChatTool response contains expected data."""
|
"""Verify the ChatTool response contains expected data."""
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ Test per-tool model default selection functionality
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -290,7 +292,13 @@ class TestAutoModeErrorMessages:
|
|||||||
mock_get_provider_for.return_value = None
|
mock_get_provider_for.return_value = None
|
||||||
|
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
result = await tool.execute({"prompt": "test", "model": "auto"})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute(
|
||||||
|
{"prompt": "test", "model": "auto", "working_directory": temp_dir}
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
# The SimpleTool will wrap the error message
|
# The SimpleTool will wrap the error message
|
||||||
@@ -418,7 +426,13 @@ class TestRuntimeModelSelection:
|
|||||||
mock_get_provider.return_value = None
|
mock_get_provider.return_value = None
|
||||||
|
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
result = await tool.execute({"prompt": "test", "model": "gpt-5-turbo"})
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute(
|
||||||
|
{"prompt": "test", "model": "gpt-5-turbo", "working_directory": temp_dir}
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
# Should require model selection
|
# Should require model selection
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
@@ -515,7 +529,11 @@ class TestUnavailableModelFallback:
|
|||||||
mock_get_model_provider.return_value = mock_provider
|
mock_get_model_provider.return_value = mock_provider
|
||||||
|
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
result = await tool.execute({"prompt": "test"}) # No model specified
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
result = await tool.execute({"prompt": "test", "working_directory": temp_dir})
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
# Should work normally, not require model parameter
|
# Should work normally, not require model parameter
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ Tests for individual tool implementations
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -343,12 +345,17 @@ class TestAbsolutePathValidation:
|
|||||||
async def test_chat_tool_relative_path_rejected(self):
|
async def test_chat_tool_relative_path_rejected(self):
|
||||||
"""Test that chat tool rejects relative paths"""
|
"""Test that chat tool rejects relative paths"""
|
||||||
tool = ChatTool()
|
tool = ChatTool()
|
||||||
result = await tool.execute(
|
temp_dir = tempfile.mkdtemp()
|
||||||
{
|
try:
|
||||||
"prompt": "Explain this code",
|
result = await tool.execute(
|
||||||
"files": ["code.py"], # relative path without ./
|
{
|
||||||
}
|
"prompt": "Explain this code",
|
||||||
)
|
"files": ["code.py"], # relative path without ./
|
||||||
|
"working_directory": temp_dir,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
response = json.loads(result[0].text)
|
response = json.loads(result[0].text)
|
||||||
|
|||||||
186
tools/chat.py
186
tools/chat.py
@@ -6,15 +6,20 @@ brainstorming, problem-solving, and collaborative thinking. It supports file con
|
|||||||
images, and conversation continuation for seamless multi-turn interactions.
|
images, and conversation continuation for seamless multi-turn interactions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from providers.shared import ModelCapabilities
|
||||||
from tools.models import ToolModelCategory
|
from tools.models import ToolModelCategory
|
||||||
|
|
||||||
from config import TEMPERATURE_BALANCED
|
from config import TEMPERATURE_BALANCED
|
||||||
from systemprompts import CHAT_PROMPT
|
from systemprompts import CHAT_PROMPT, GENERATE_CODE_PROMPT
|
||||||
from tools.shared.base_models import COMMON_FIELD_DESCRIPTIONS, ToolRequest
|
from tools.shared.base_models import COMMON_FIELD_DESCRIPTIONS, ToolRequest
|
||||||
|
|
||||||
from .simple.base import SimpleTool
|
from .simple.base import SimpleTool
|
||||||
@@ -27,6 +32,9 @@ CHAT_FIELD_DESCRIPTIONS = {
|
|||||||
),
|
),
|
||||||
"files": "absolute file or folder paths for code context (do NOT shorten).",
|
"files": "absolute file or folder paths for code context (do NOT shorten).",
|
||||||
"images": "Optional absolute image paths or base64 for visual context when helpful.",
|
"images": "Optional absolute image paths or base64 for visual context when helpful.",
|
||||||
|
"working_directory": (
|
||||||
|
"Absolute full directory path where the assistant AI can save generated code for implementation. The directory must already exist"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -36,6 +44,7 @@ class ChatRequest(ToolRequest):
|
|||||||
prompt: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["prompt"])
|
prompt: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["prompt"])
|
||||||
files: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["files"])
|
files: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["files"])
|
||||||
images: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["images"])
|
images: Optional[list[str]] = Field(default_factory=list, description=CHAT_FIELD_DESCRIPTIONS["images"])
|
||||||
|
working_directory: str = Field(..., description=CHAT_FIELD_DESCRIPTIONS["working_directory"])
|
||||||
|
|
||||||
|
|
||||||
class ChatTool(SimpleTool):
|
class ChatTool(SimpleTool):
|
||||||
@@ -49,6 +58,10 @@ class ChatTool(SimpleTool):
|
|||||||
Chat tool with 100% behavioral compatibility.
|
Chat tool with 100% behavioral compatibility.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self._last_recordable_response: Optional[str] = None
|
||||||
|
|
||||||
def get_name(self) -> str:
|
def get_name(self) -> str:
|
||||||
return "chat"
|
return "chat"
|
||||||
|
|
||||||
@@ -58,9 +71,20 @@ class ChatTool(SimpleTool):
|
|||||||
"getting second opinions, and exploring ideas. Use for ideas, validations, questions, and thoughtful explanations."
|
"getting second opinions, and exploring ideas. Use for ideas, validations, questions, and thoughtful explanations."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_annotations(self) -> Optional[dict[str, Any]]:
|
||||||
|
"""Chat writes generated artifacts when code-generation is enabled."""
|
||||||
|
|
||||||
|
return {"readOnlyHint": False}
|
||||||
|
|
||||||
def get_system_prompt(self) -> str:
|
def get_system_prompt(self) -> str:
|
||||||
return CHAT_PROMPT
|
return CHAT_PROMPT
|
||||||
|
|
||||||
|
def get_capability_system_prompts(self, capabilities: Optional["ModelCapabilities"]) -> list[str]:
|
||||||
|
prompts = list(super().get_capability_system_prompts(capabilities))
|
||||||
|
if capabilities and capabilities.allow_code_generation:
|
||||||
|
prompts.append(GENERATE_CODE_PROMPT)
|
||||||
|
return prompts
|
||||||
|
|
||||||
def get_default_temperature(self) -> float:
|
def get_default_temperature(self) -> float:
|
||||||
return TEMPERATURE_BALANCED
|
return TEMPERATURE_BALANCED
|
||||||
|
|
||||||
@@ -85,7 +109,7 @@ class ChatTool(SimpleTool):
|
|||||||
the same schema generation approach while still benefiting from SimpleTool
|
the same schema generation approach while still benefiting from SimpleTool
|
||||||
convenience methods.
|
convenience methods.
|
||||||
"""
|
"""
|
||||||
required_fields = ["prompt"]
|
required_fields = ["prompt", "working_directory"]
|
||||||
if self.is_effective_auto_mode():
|
if self.is_effective_auto_mode():
|
||||||
required_fields.append("model")
|
required_fields.append("model")
|
||||||
|
|
||||||
@@ -106,6 +130,10 @@ class ChatTool(SimpleTool):
|
|||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
"description": CHAT_FIELD_DESCRIPTIONS["images"],
|
"description": CHAT_FIELD_DESCRIPTIONS["images"],
|
||||||
},
|
},
|
||||||
|
"working_directory": {
|
||||||
|
"type": "string",
|
||||||
|
"description": CHAT_FIELD_DESCRIPTIONS["working_directory"],
|
||||||
|
},
|
||||||
"model": self.get_model_field_schema(),
|
"model": self.get_model_field_schema(),
|
||||||
"temperature": {
|
"temperature": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
@@ -159,7 +187,7 @@ class ChatTool(SimpleTool):
|
|||||||
|
|
||||||
def get_required_fields(self) -> list[str]:
|
def get_required_fields(self) -> list[str]:
|
||||||
"""Required fields for ChatSimple tool"""
|
"""Required fields for ChatSimple tool"""
|
||||||
return ["prompt"]
|
return ["prompt", "working_directory"]
|
||||||
|
|
||||||
# === Hook Method Implementations ===
|
# === Hook Method Implementations ===
|
||||||
|
|
||||||
@@ -173,17 +201,165 @@ class ChatTool(SimpleTool):
|
|||||||
# Use SimpleTool's Chat-style prompt preparation
|
# Use SimpleTool's Chat-style prompt preparation
|
||||||
return self.prepare_chat_style_prompt(request)
|
return self.prepare_chat_style_prompt(request)
|
||||||
|
|
||||||
|
def _validate_file_paths(self, request) -> Optional[str]:
|
||||||
|
"""Extend validation to cover the working directory path."""
|
||||||
|
|
||||||
|
error = super()._validate_file_paths(request)
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
|
||||||
|
working_directory = getattr(request, "working_directory", None)
|
||||||
|
if working_directory:
|
||||||
|
expanded = os.path.expanduser(working_directory)
|
||||||
|
if not os.path.isabs(expanded):
|
||||||
|
return (
|
||||||
|
"Error: 'working_directory' must be an absolute path (you may use '~' which will be expanded). "
|
||||||
|
f"Received: {working_directory}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
def format_response(self, response: str, request: ChatRequest, model_info: Optional[dict] = None) -> str:
|
def format_response(self, response: str, request: ChatRequest, model_info: Optional[dict] = None) -> str:
|
||||||
"""
|
"""
|
||||||
Format the chat response to match the original Chat tool exactly.
|
Format the chat response to match the original Chat tool exactly.
|
||||||
"""
|
"""
|
||||||
return (
|
self._last_recordable_response = None
|
||||||
f"{response}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to "
|
body = response
|
||||||
|
recordable_override: Optional[str] = None
|
||||||
|
|
||||||
|
if self._model_supports_code_generation():
|
||||||
|
block, remainder = self._extract_generated_code_block(response)
|
||||||
|
if block:
|
||||||
|
sanitized_text = remainder.strip()
|
||||||
|
try:
|
||||||
|
artifact_path = self._persist_generated_code_block(block, request.working_directory)
|
||||||
|
except Exception as exc: # pragma: no cover - rare filesystem failures
|
||||||
|
logger.error("Failed to persist generated code block: %s", exc, exc_info=True)
|
||||||
|
warning = (
|
||||||
|
f"WARNING: Unable to write zen_generated.code inside '{request.working_directory}'. "
|
||||||
|
"Check the path permissions and re-run. The generated code block is included below for manual handling."
|
||||||
|
)
|
||||||
|
|
||||||
|
history_copy = self._join_sections(sanitized_text, warning) if sanitized_text else warning
|
||||||
|
recordable_override = history_copy
|
||||||
|
|
||||||
|
sanitized_warning = history_copy.strip()
|
||||||
|
body = f"{sanitized_warning}\n\n{block.strip()}".strip()
|
||||||
|
else:
|
||||||
|
if not sanitized_text:
|
||||||
|
sanitized_text = "Generated code saved to zen_generated.code. Follow the structured instructions in that file exactly before continuing."
|
||||||
|
|
||||||
|
instruction = self._build_agent_instruction(artifact_path)
|
||||||
|
body = self._join_sections(sanitized_text, instruction)
|
||||||
|
|
||||||
|
final_output = (
|
||||||
|
f"{body}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to "
|
||||||
"form a comprehensive solution and continue with the user's request and task at hand."
|
"form a comprehensive solution and continue with the user's request and task at hand."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if recordable_override is not None:
|
||||||
|
self._last_recordable_response = (
|
||||||
|
f"{recordable_override}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to "
|
||||||
|
"form a comprehensive solution and continue with the user's request and task at hand."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._last_recordable_response = final_output
|
||||||
|
|
||||||
|
return final_output
|
||||||
|
|
||||||
|
def _record_assistant_turn(
|
||||||
|
self, continuation_id: str, response_text: str, request, model_info: Optional[dict]
|
||||||
|
) -> None:
|
||||||
|
recordable = self._last_recordable_response if self._last_recordable_response is not None else response_text
|
||||||
|
try:
|
||||||
|
super()._record_assistant_turn(continuation_id, recordable, request, model_info)
|
||||||
|
finally:
|
||||||
|
self._last_recordable_response = None
|
||||||
|
|
||||||
|
def _model_supports_code_generation(self) -> bool:
|
||||||
|
context = getattr(self, "_model_context", None)
|
||||||
|
if not context:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
capabilities = context.capabilities
|
||||||
|
except Exception: # pragma: no cover - defensive fallback
|
||||||
|
return False
|
||||||
|
|
||||||
|
return bool(capabilities.allow_code_generation)
|
||||||
|
|
||||||
|
def _extract_generated_code_block(self, text: str) -> tuple[Optional[str], str]:
|
||||||
|
match = re.search(r"<GENERATED-CODE>.*?</GENERATED-CODE>", text, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
if not match:
|
||||||
|
return None, text
|
||||||
|
|
||||||
|
block = match.group(0)
|
||||||
|
before = text[: match.start()].rstrip()
|
||||||
|
after = text[match.end() :].lstrip()
|
||||||
|
|
||||||
|
if before and after:
|
||||||
|
remainder = f"{before}\n\n{after}"
|
||||||
|
else:
|
||||||
|
remainder = before or after
|
||||||
|
|
||||||
|
return block, remainder or ""
|
||||||
|
|
||||||
|
def _persist_generated_code_block(self, block: str, working_directory: str) -> Path:
|
||||||
|
expanded = os.path.expanduser(working_directory)
|
||||||
|
target_dir = Path(expanded).resolve()
|
||||||
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
target_file = target_dir / "zen_generated.code"
|
||||||
|
if target_file.exists():
|
||||||
|
try:
|
||||||
|
target_file.unlink()
|
||||||
|
except OSError as exc:
|
||||||
|
logger.warning("Unable to remove existing zen_generated.code: %s", exc)
|
||||||
|
|
||||||
|
content = block if block.endswith("\n") else f"{block}\n"
|
||||||
|
target_file.write_text(content, encoding="utf-8")
|
||||||
|
logger.info("Generated code artifact written to %s", target_file)
|
||||||
|
return target_file
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_agent_instruction(artifact_path: Path) -> str:
|
||||||
|
return (
|
||||||
|
f"CONTINUING FROM PREVIOUS DISCUSSION: The coding assistant has analyzed our conversation context and generated "
|
||||||
|
f"a structured implementation plan at `{artifact_path}`. This is a direct continuation of our discussion—all previous "
|
||||||
|
"context, requirements, and shared code remain relevant.\n"
|
||||||
|
"\n"
|
||||||
|
f"MANDATORY NEXT STEP: Open `{artifact_path}` immediately and review the implementation plan:\n"
|
||||||
|
"1. Read the step-by-step instructions—they reference our previous discussion. You may need to read the file in parts if it's too long.\n"
|
||||||
|
"2. Review each <NEWFILE:…> or <UPDATED_EXISTING_FILE:…> section in the context of what we've discussed\n"
|
||||||
|
"3. Verify the proposed changes align with the requirements and code we've already shared\n"
|
||||||
|
"4. Check for syntax errors, missing imports, or incomplete implementations\n"
|
||||||
|
"\n"
|
||||||
|
"Then systematically apply the changes:\n"
|
||||||
|
"- Create new files or update existing ones as instructed, maintaining code style consistency\n"
|
||||||
|
"- If updating existing code we discussed earlier, carefully preserve unmodified sections\n"
|
||||||
|
"- Run syntax validation after each modification\n"
|
||||||
|
"- Execute relevant tests to confirm functionality\n"
|
||||||
|
"- Verify the implementation works end-to-end with existing code\n"
|
||||||
|
"\n"
|
||||||
|
"Remember: This builds upon our conversation. The generated code reflects the full context of what we've discussed, "
|
||||||
|
"including any files, requirements, or constraints mentioned earlier. Proceed with implementation immediately."
|
||||||
|
"Only after you finish applying ALL the changes completely: delete `zen_generated.code` so stale instructions do not linger."
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _join_sections(*sections: str) -> str:
|
||||||
|
chunks: list[str] = []
|
||||||
|
for section in sections:
|
||||||
|
if section:
|
||||||
|
trimmed = section.strip()
|
||||||
|
if trimmed:
|
||||||
|
chunks.append(trimmed)
|
||||||
|
return "\n\n".join(chunks)
|
||||||
|
|
||||||
def get_websearch_guidance(self) -> str:
|
def get_websearch_guidance(self) -> str:
|
||||||
"""
|
"""
|
||||||
Return Chat tool-style web search guidance.
|
Return Chat tool-style web search guidance.
|
||||||
"""
|
"""
|
||||||
return self.get_chat_style_websearch_guidance()
|
return self.get_chat_style_websearch_guidance()
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -140,6 +140,8 @@ class ListModelsTool(BaseTool):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
description = "No description available"
|
description = "No description available"
|
||||||
lines = [header, f" - {context_str}", f" - {description}"]
|
lines = [header, f" - {context_str}", f" - {description}"]
|
||||||
|
if capabilities.allow_code_generation:
|
||||||
|
lines.append(" - Supports structured code generation")
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
# Check each native provider type
|
# Check each native provider type
|
||||||
@@ -187,6 +189,8 @@ class ListModelsTool(BaseTool):
|
|||||||
|
|
||||||
output_lines.append(f"- `{model_name}` - {context_str}")
|
output_lines.append(f"- `{model_name}` - {context_str}")
|
||||||
output_lines.append(f" - {description}")
|
output_lines.append(f" - {description}")
|
||||||
|
if capabilities.allow_code_generation:
|
||||||
|
output_lines.append(" - Supports structured code generation")
|
||||||
|
|
||||||
for alias in capabilities.aliases or []:
|
for alias in capabilities.aliases or []:
|
||||||
if alias != model_name:
|
if alias != model_name:
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|||||||
from mcp.types import TextContent
|
from mcp.types import TextContent
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from providers.shared import ModelCapabilities
|
||||||
from tools.models import ToolModelCategory
|
from tools.models import ToolModelCategory
|
||||||
|
|
||||||
from config import MCP_PROMPT_SIZE_LIMIT
|
from config import MCP_PROMPT_SIZE_LIMIT
|
||||||
@@ -165,6 +166,42 @@ class BaseTool(ABC):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_capability_system_prompts(self, capabilities: Optional["ModelCapabilities"]) -> list[str]:
|
||||||
|
"""Return additional system prompt snippets gated on model capabilities.
|
||||||
|
|
||||||
|
Subclasses can override this hook to append capability-specific
|
||||||
|
instructions (for example, enabling code-generation exports when a
|
||||||
|
model advertises support). The default implementation returns an empty
|
||||||
|
list so no extra instructions are appended.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
capabilities: The resolved capabilities for the active model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of prompt fragments to append after the base system prompt.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _augment_system_prompt_with_capabilities(
|
||||||
|
self, base_prompt: str, capabilities: Optional["ModelCapabilities"]
|
||||||
|
) -> str:
|
||||||
|
"""Merge capability-driven prompt addenda with the base system prompt."""
|
||||||
|
|
||||||
|
additions: list[str] = []
|
||||||
|
if capabilities is not None:
|
||||||
|
additions = [fragment.strip() for fragment in self.get_capability_system_prompts(capabilities) if fragment]
|
||||||
|
|
||||||
|
if not additions:
|
||||||
|
return base_prompt
|
||||||
|
|
||||||
|
addition_text = "\n\n".join(additions)
|
||||||
|
if not base_prompt:
|
||||||
|
return addition_text
|
||||||
|
|
||||||
|
suffix = "" if base_prompt.endswith("\n\n") else "\n\n"
|
||||||
|
return f"{base_prompt}{suffix}{addition_text}"
|
||||||
|
|
||||||
def get_annotations(self) -> Optional[dict[str, Any]]:
|
def get_annotations(self) -> Optional[dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Return optional annotations for this tool.
|
Return optional annotations for this tool.
|
||||||
@@ -413,13 +450,16 @@ class BaseTool(ABC):
|
|||||||
for rank, canonical_name, capabilities in filtered[:limit]:
|
for rank, canonical_name, capabilities in filtered[:limit]:
|
||||||
details: list[str] = []
|
details: list[str] = []
|
||||||
|
|
||||||
context_str = self._format_context_window(getattr(capabilities, "context_window", 0))
|
context_str = self._format_context_window(capabilities.context_window)
|
||||||
if context_str:
|
if context_str:
|
||||||
details.append(context_str)
|
details.append(context_str)
|
||||||
|
|
||||||
if getattr(capabilities, "supports_extended_thinking", False):
|
if capabilities.supports_extended_thinking:
|
||||||
details.append("thinking")
|
details.append("thinking")
|
||||||
|
|
||||||
|
if capabilities.allow_code_generation:
|
||||||
|
details.append("code-gen")
|
||||||
|
|
||||||
base = f"{canonical_name} (score {rank}"
|
base = f"{canonical_name} (score {rank}"
|
||||||
if details:
|
if details:
|
||||||
base = f"{base}, {', '.join(details)}"
|
base = f"{base}, {', '.join(details)}"
|
||||||
|
|||||||
@@ -404,11 +404,15 @@ class SimpleTool(BaseTool):
|
|||||||
|
|
||||||
# Get the provider from model context (clean OOP - no re-fetching)
|
# Get the provider from model context (clean OOP - no re-fetching)
|
||||||
provider = self._model_context.provider
|
provider = self._model_context.provider
|
||||||
|
capabilities = self._model_context.capabilities
|
||||||
|
|
||||||
# Get system prompt for this tool
|
# Get system prompt for this tool
|
||||||
base_system_prompt = self.get_system_prompt()
|
base_system_prompt = self.get_system_prompt()
|
||||||
|
capability_augmented_prompt = self._augment_system_prompt_with_capabilities(
|
||||||
|
base_system_prompt, capabilities
|
||||||
|
)
|
||||||
language_instruction = self.get_language_instruction()
|
language_instruction = self.get_language_instruction()
|
||||||
system_prompt = language_instruction + base_system_prompt
|
system_prompt = language_instruction + capability_augmented_prompt
|
||||||
|
|
||||||
# Generate AI response using the provider
|
# Generate AI response using the provider
|
||||||
logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.get_name()}")
|
logger.info(f"Sending request to {provider.get_provider_type().value} API for {self.get_name()}")
|
||||||
@@ -423,7 +427,6 @@ class SimpleTool(BaseTool):
|
|||||||
logger.debug(f"Prompt length: {len(prompt)} characters (~{estimated_tokens:,} tokens)")
|
logger.debug(f"Prompt length: {len(prompt)} characters (~{estimated_tokens:,} tokens)")
|
||||||
|
|
||||||
# Resolve model capabilities for feature gating
|
# Resolve model capabilities for feature gating
|
||||||
capabilities = self._model_context.capabilities
|
|
||||||
supports_thinking = capabilities.supports_extended_thinking
|
supports_thinking = capabilities.supports_extended_thinking
|
||||||
|
|
||||||
# Generate content with provider abstraction
|
# Generate content with provider abstraction
|
||||||
|
|||||||
@@ -1480,8 +1480,11 @@ class BaseWorkflowMixin(ABC):
|
|||||||
|
|
||||||
# Get system prompt for this tool with localization support
|
# Get system prompt for this tool with localization support
|
||||||
base_system_prompt = self.get_system_prompt()
|
base_system_prompt = self.get_system_prompt()
|
||||||
|
capability_augmented_prompt = self._augment_system_prompt_with_capabilities(
|
||||||
|
base_system_prompt, getattr(self._model_context, "capabilities", None)
|
||||||
|
)
|
||||||
language_instruction = self.get_language_instruction()
|
language_instruction = self.get_language_instruction()
|
||||||
system_prompt = language_instruction + base_system_prompt
|
system_prompt = language_instruction + capability_augmented_prompt
|
||||||
|
|
||||||
# Check if tool wants system prompt embedded in main prompt
|
# Check if tool wants system prompt embedded in main prompt
|
||||||
if self.should_embed_system_prompt():
|
if self.should_embed_system_prompt():
|
||||||
|
|||||||
Reference in New Issue
Block a user