From 87ccb6b25ba32a3cb9c4cc64fc0e96294f492c04 Mon Sep 17 00:00:00 2001 From: Fahad Date: Thu, 2 Oct 2025 23:47:44 +0400 Subject: [PATCH] test: fixed integration tests, removed magicmock --- tests/test_prompt_size_limit_bug_fix.py | 11 +-- ..._workflow_prompt_size_validation_simple.py | 89 +++++++++++++------ 2 files changed, 66 insertions(+), 34 deletions(-) diff --git a/tests/test_prompt_size_limit_bug_fix.py b/tests/test_prompt_size_limit_bug_fix.py index 89a3e8f..ea63e39 100644 --- a/tests/test_prompt_size_limit_bug_fix.py +++ b/tests/test_prompt_size_limit_bug_fix.py @@ -5,9 +5,8 @@ This test verifies that SimpleTool correctly validates only the original user pr when conversation history is embedded, rather than validating the full enhanced prompt. """ -from unittest.mock import MagicMock - from tools.chat import ChatTool +from tools.shared.base_models import ToolRequest class TestPromptSizeLimitBugFix: @@ -28,10 +27,6 @@ class TestPromptSizeLimitBugFix: # Simulate enhanced prompt with conversation history (what server.py creates) enhanced_prompt = f"{conversation_history}\n\n=== NEW USER INPUT ===\n{short_user_prompt}" - # Create request object simulation - request = MagicMock() - request.prompt = enhanced_prompt # This is what get_request_prompt() would return - # Simulate server.py behavior: store original prompt in _current_arguments tool._current_arguments = { "prompt": enhanced_prompt, # Enhanced with history @@ -107,8 +102,8 @@ class TestPromptSizeLimitBugFix: def get_input_schema(self) -> dict: return {} - def get_request_model(self, request) -> str: - return "flash" + def get_request_model(self): + return ToolRequest def get_system_prompt(self) -> str: return "Test system prompt" diff --git a/tests/test_workflow_prompt_size_validation_simple.py b/tests/test_workflow_prompt_size_validation_simple.py index c6392dd..4fd84a7 100644 --- a/tests/test_workflow_prompt_size_validation_simple.py +++ b/tests/test_workflow_prompt_size_validation_simple.py @@ -1,42 +1,79 @@ -""" -Test for the simple workflow tool prompt size validation fix. +"""Integration tests for workflow step size validation. -This test verifies that workflow tools now have basic size validation for the 'step' field -to prevent oversized instructions. The fix is minimal - just prompts users to use shorter -instructions and put detailed content in files. +These tests exercise the debug workflow tool end-to-end to ensure that step size +validation operates on the real execution path rather than mocked helpers. """ +from __future__ import annotations + +import json + +import pytest + from config import MCP_PROMPT_SIZE_LIMIT +from tools.debug import DebugIssueTool -class TestWorkflowPromptSizeValidationSimple: - """Test that workflow tools have minimal size validation for step field""" +def build_debug_arguments(**overrides) -> dict[str, object]: + """Create a minimal set of workflow arguments for DebugIssueTool.""" - def test_workflow_tool_normal_step_content_works(self): - """Test that normal step content works fine""" + base_arguments: dict[str, object] = { + "step": "Investigate the authentication issue in the login module", + "step_number": 1, + "total_steps": 3, + "next_step_required": True, + "findings": "Initial observations about the login failure", + "files_checked": [], + "relevant_files": [], + "relevant_context": [], + "issues_found": [], + "confidence": "low", + "use_assistant_model": False, + # WorkflowRequest accepts optional fields; leave hypothesis/continuation unset + } - # Normal step content should be fine - normal_step = "Investigate the authentication issue in the login module" + base_arguments.update(overrides) + return base_arguments - assert len(normal_step) < MCP_PROMPT_SIZE_LIMIT, "Normal step should be under limit" - def test_workflow_tool_large_step_content_exceeds_limit(self): - """Test that very large step content would exceed the limit""" +@pytest.mark.asyncio +async def test_workflow_tool_accepts_normal_step_content() -> None: + """Verify a typical step executes through the real workflow path.""" - # Create very large step content - large_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000)) + tool = DebugIssueTool() + arguments = build_debug_arguments() - assert len(large_step) > MCP_PROMPT_SIZE_LIMIT, "Large step should exceed limit" + responses = await tool.execute(arguments) + assert len(responses) == 1 - def test_workflow_tool_size_validation_message(self): - """Test that the size validation gives helpful guidance""" + payload = json.loads(responses[0].text) + assert payload["status"] == "pause_for_investigation" + assert payload["step_number"] == 1 + assert "error" not in payload - # The validation should tell users to: - # 1. Use shorter instructions - # 2. Put detailed content in files - expected_guidance = "use shorter instructions and provide detailed context via file paths" +@pytest.mark.asyncio +async def test_workflow_tool_rejects_oversized_step_with_guidance() -> None: + """Large step content should trigger the size safeguard with helpful guidance.""" - # This is what the error message should contain - assert "shorter instructions" in expected_guidance.lower() - assert "file paths" in expected_guidance.lower() + oversized_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000)) + tool = DebugIssueTool() + arguments = build_debug_arguments(step=oversized_step) + + responses = await tool.execute(arguments) + assert len(responses) == 1 + + payload = json.loads(responses[0].text) + assert payload["status"] == "debug_failed" + assert "error" in payload + + # Extract the serialized ToolOutput from the MCP_SIZE_CHECK marker + error_details = payload["error"].split("MCP_SIZE_CHECK:", 1)[1] + output_payload = json.loads(error_details) + + assert output_payload["status"] == "resend_prompt" + assert output_payload["metadata"]["prompt_size"] > MCP_PROMPT_SIZE_LIMIT + + guidance = output_payload["content"].lower() + assert "shorter instructions" in guidance + assert "file paths" in guidance