test: fixed integration tests, removed magicmock
This commit is contained in:
@@ -5,9 +5,8 @@ This test verifies that SimpleTool correctly validates only the original user pr
|
|||||||
when conversation history is embedded, rather than validating the full enhanced prompt.
|
when conversation history is embedded, rather than validating the full enhanced prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
from tools.chat import ChatTool
|
from tools.chat import ChatTool
|
||||||
|
from tools.shared.base_models import ToolRequest
|
||||||
|
|
||||||
|
|
||||||
class TestPromptSizeLimitBugFix:
|
class TestPromptSizeLimitBugFix:
|
||||||
@@ -28,10 +27,6 @@ class TestPromptSizeLimitBugFix:
|
|||||||
# Simulate enhanced prompt with conversation history (what server.py creates)
|
# Simulate enhanced prompt with conversation history (what server.py creates)
|
||||||
enhanced_prompt = f"{conversation_history}\n\n=== NEW USER INPUT ===\n{short_user_prompt}"
|
enhanced_prompt = f"{conversation_history}\n\n=== NEW USER INPUT ===\n{short_user_prompt}"
|
||||||
|
|
||||||
# Create request object simulation
|
|
||||||
request = MagicMock()
|
|
||||||
request.prompt = enhanced_prompt # This is what get_request_prompt() would return
|
|
||||||
|
|
||||||
# Simulate server.py behavior: store original prompt in _current_arguments
|
# Simulate server.py behavior: store original prompt in _current_arguments
|
||||||
tool._current_arguments = {
|
tool._current_arguments = {
|
||||||
"prompt": enhanced_prompt, # Enhanced with history
|
"prompt": enhanced_prompt, # Enhanced with history
|
||||||
@@ -107,8 +102,8 @@ class TestPromptSizeLimitBugFix:
|
|||||||
def get_input_schema(self) -> dict:
|
def get_input_schema(self) -> dict:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_request_model(self, request) -> str:
|
def get_request_model(self):
|
||||||
return "flash"
|
return ToolRequest
|
||||||
|
|
||||||
def get_system_prompt(self) -> str:
|
def get_system_prompt(self) -> str:
|
||||||
return "Test system prompt"
|
return "Test system prompt"
|
||||||
|
|||||||
@@ -1,42 +1,79 @@
|
|||||||
"""
|
"""Integration tests for workflow step size validation.
|
||||||
Test for the simple workflow tool prompt size validation fix.
|
|
||||||
|
|
||||||
This test verifies that workflow tools now have basic size validation for the 'step' field
|
These tests exercise the debug workflow tool end-to-end to ensure that step size
|
||||||
to prevent oversized instructions. The fix is minimal - just prompts users to use shorter
|
validation operates on the real execution path rather than mocked helpers.
|
||||||
instructions and put detailed content in files.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from config import MCP_PROMPT_SIZE_LIMIT
|
from config import MCP_PROMPT_SIZE_LIMIT
|
||||||
|
from tools.debug import DebugIssueTool
|
||||||
|
|
||||||
|
|
||||||
class TestWorkflowPromptSizeValidationSimple:
|
def build_debug_arguments(**overrides) -> dict[str, object]:
|
||||||
"""Test that workflow tools have minimal size validation for step field"""
|
"""Create a minimal set of workflow arguments for DebugIssueTool."""
|
||||||
|
|
||||||
def test_workflow_tool_normal_step_content_works(self):
|
base_arguments: dict[str, object] = {
|
||||||
"""Test that normal step content works fine"""
|
"step": "Investigate the authentication issue in the login module",
|
||||||
|
"step_number": 1,
|
||||||
|
"total_steps": 3,
|
||||||
|
"next_step_required": True,
|
||||||
|
"findings": "Initial observations about the login failure",
|
||||||
|
"files_checked": [],
|
||||||
|
"relevant_files": [],
|
||||||
|
"relevant_context": [],
|
||||||
|
"issues_found": [],
|
||||||
|
"confidence": "low",
|
||||||
|
"use_assistant_model": False,
|
||||||
|
# WorkflowRequest accepts optional fields; leave hypothesis/continuation unset
|
||||||
|
}
|
||||||
|
|
||||||
# Normal step content should be fine
|
base_arguments.update(overrides)
|
||||||
normal_step = "Investigate the authentication issue in the login module"
|
return base_arguments
|
||||||
|
|
||||||
assert len(normal_step) < MCP_PROMPT_SIZE_LIMIT, "Normal step should be under limit"
|
|
||||||
|
|
||||||
def test_workflow_tool_large_step_content_exceeds_limit(self):
|
@pytest.mark.asyncio
|
||||||
"""Test that very large step content would exceed the limit"""
|
async def test_workflow_tool_accepts_normal_step_content() -> None:
|
||||||
|
"""Verify a typical step executes through the real workflow path."""
|
||||||
|
|
||||||
# Create very large step content
|
tool = DebugIssueTool()
|
||||||
large_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000))
|
arguments = build_debug_arguments()
|
||||||
|
|
||||||
assert len(large_step) > MCP_PROMPT_SIZE_LIMIT, "Large step should exceed limit"
|
responses = await tool.execute(arguments)
|
||||||
|
assert len(responses) == 1
|
||||||
|
|
||||||
def test_workflow_tool_size_validation_message(self):
|
payload = json.loads(responses[0].text)
|
||||||
"""Test that the size validation gives helpful guidance"""
|
assert payload["status"] == "pause_for_investigation"
|
||||||
|
assert payload["step_number"] == 1
|
||||||
|
assert "error" not in payload
|
||||||
|
|
||||||
# The validation should tell users to:
|
|
||||||
# 1. Use shorter instructions
|
|
||||||
# 2. Put detailed content in files
|
|
||||||
|
|
||||||
expected_guidance = "use shorter instructions and provide detailed context via file paths"
|
@pytest.mark.asyncio
|
||||||
|
async def test_workflow_tool_rejects_oversized_step_with_guidance() -> None:
|
||||||
|
"""Large step content should trigger the size safeguard with helpful guidance."""
|
||||||
|
|
||||||
# This is what the error message should contain
|
oversized_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000))
|
||||||
assert "shorter instructions" in expected_guidance.lower()
|
tool = DebugIssueTool()
|
||||||
assert "file paths" in expected_guidance.lower()
|
arguments = build_debug_arguments(step=oversized_step)
|
||||||
|
|
||||||
|
responses = await tool.execute(arguments)
|
||||||
|
assert len(responses) == 1
|
||||||
|
|
||||||
|
payload = json.loads(responses[0].text)
|
||||||
|
assert payload["status"] == "debug_failed"
|
||||||
|
assert "error" in payload
|
||||||
|
|
||||||
|
# Extract the serialized ToolOutput from the MCP_SIZE_CHECK marker
|
||||||
|
error_details = payload["error"].split("MCP_SIZE_CHECK:", 1)[1]
|
||||||
|
output_payload = json.loads(error_details)
|
||||||
|
|
||||||
|
assert output_payload["status"] == "resend_prompt"
|
||||||
|
assert output_payload["metadata"]["prompt_size"] > MCP_PROMPT_SIZE_LIMIT
|
||||||
|
|
||||||
|
guidance = output_payload["content"].lower()
|
||||||
|
assert "shorter instructions" in guidance
|
||||||
|
assert "file paths" in guidance
|
||||||
|
|||||||
Reference in New Issue
Block a user