From 87ccb6b25ba32a3cb9c4cc64fc0e96294f492c04 Mon Sep 17 00:00:00 2001
From: Fahad <fahad@2doapp.com>
Date: Thu, 2 Oct 2025 23:47:44 +0400
Subject: [PATCH] test: fixed integration tests, removed magicmock

---
 tests/test_prompt_size_limit_bug_fix.py       | 11 +--
 ..._workflow_prompt_size_validation_simple.py | 89 +++++++++++++------
 2 files changed, 66 insertions(+), 34 deletions(-)

diff --git a/tests/test_prompt_size_limit_bug_fix.py b/tests/test_prompt_size_limit_bug_fix.py
index 89a3e8f..ea63e39 100644
--- a/tests/test_prompt_size_limit_bug_fix.py
+++ b/tests/test_prompt_size_limit_bug_fix.py
@@ -5,9 +5,8 @@ This test verifies that SimpleTool correctly validates only the original user pr
 when conversation history is embedded, rather than validating the full enhanced prompt.
 """
 
-from unittest.mock import MagicMock
-
 from tools.chat import ChatTool
+from tools.shared.base_models import ToolRequest
 
 
 class TestPromptSizeLimitBugFix:
@@ -28,10 +27,6 @@ class TestPromptSizeLimitBugFix:
         # Simulate enhanced prompt with conversation history (what server.py creates)
         enhanced_prompt = f"{conversation_history}\n\n=== NEW USER INPUT ===\n{short_user_prompt}"
 
-        # Create request object simulation
-        request = MagicMock()
-        request.prompt = enhanced_prompt  # This is what get_request_prompt() would return
-
         # Simulate server.py behavior: store original prompt in _current_arguments
         tool._current_arguments = {
             "prompt": enhanced_prompt,  # Enhanced with history
@@ -107,8 +102,8 @@ class TestPromptSizeLimitBugFix:
             def get_input_schema(self) -> dict:
                 return {}
 
-            def get_request_model(self, request) -> str:
-                return "flash"
+            def get_request_model(self):
+                return ToolRequest
 
             def get_system_prompt(self) -> str:
                 return "Test system prompt"
diff --git a/tests/test_workflow_prompt_size_validation_simple.py b/tests/test_workflow_prompt_size_validation_simple.py
index c6392dd..4fd84a7 100644
--- a/tests/test_workflow_prompt_size_validation_simple.py
+++ b/tests/test_workflow_prompt_size_validation_simple.py
@@ -1,42 +1,79 @@
-"""
-Test for the simple workflow tool prompt size validation fix.
+"""Integration tests for workflow step size validation.
 
-This test verifies that workflow tools now have basic size validation for the 'step' field
-to prevent oversized instructions. The fix is minimal - just prompts users to use shorter
-instructions and put detailed content in files.
+These tests exercise the debug workflow tool end-to-end to ensure that step size
+validation operates on the real execution path rather than mocked helpers.
 """
 
+from __future__ import annotations
+
+import json
+
+import pytest
+
 from config import MCP_PROMPT_SIZE_LIMIT
+from tools.debug import DebugIssueTool
 
 
-class TestWorkflowPromptSizeValidationSimple:
-    """Test that workflow tools have minimal size validation for step field"""
+def build_debug_arguments(**overrides) -> dict[str, object]:
+    """Create a minimal set of workflow arguments for DebugIssueTool."""
 
-    def test_workflow_tool_normal_step_content_works(self):
-        """Test that normal step content works fine"""
+    base_arguments: dict[str, object] = {
+        "step": "Investigate the authentication issue in the login module",
+        "step_number": 1,
+        "total_steps": 3,
+        "next_step_required": True,
+        "findings": "Initial observations about the login failure",
+        "files_checked": [],
+        "relevant_files": [],
+        "relevant_context": [],
+        "issues_found": [],
+        "confidence": "low",
+        "use_assistant_model": False,
+        # WorkflowRequest accepts optional fields; leave hypothesis/continuation unset
+    }
 
-        # Normal step content should be fine
-        normal_step = "Investigate the authentication issue in the login module"
+    base_arguments.update(overrides)
+    return base_arguments
 
-        assert len(normal_step) < MCP_PROMPT_SIZE_LIMIT, "Normal step should be under limit"
 
-    def test_workflow_tool_large_step_content_exceeds_limit(self):
-        """Test that very large step content would exceed the limit"""
+@pytest.mark.asyncio
+async def test_workflow_tool_accepts_normal_step_content() -> None:
+    """Verify a typical step executes through the real workflow path."""
 
-        # Create very large step content
-        large_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000))
+    tool = DebugIssueTool()
+    arguments = build_debug_arguments()
 
-        assert len(large_step) > MCP_PROMPT_SIZE_LIMIT, "Large step should exceed limit"
+    responses = await tool.execute(arguments)
+    assert len(responses) == 1
 
-    def test_workflow_tool_size_validation_message(self):
-        """Test that the size validation gives helpful guidance"""
+    payload = json.loads(responses[0].text)
+    assert payload["status"] == "pause_for_investigation"
+    assert payload["step_number"] == 1
+    assert "error" not in payload
 
-        # The validation should tell users to:
-        # 1. Use shorter instructions
-        # 2. Put detailed content in files
 
-        expected_guidance = "use shorter instructions and provide detailed context via file paths"
+@pytest.mark.asyncio
+async def test_workflow_tool_rejects_oversized_step_with_guidance() -> None:
+    """Large step content should trigger the size safeguard with helpful guidance."""
 
-        # This is what the error message should contain
-        assert "shorter instructions" in expected_guidance.lower()
-        assert "file paths" in expected_guidance.lower()
+    oversized_step = "Investigate this issue: " + ("A" * (MCP_PROMPT_SIZE_LIMIT + 1000))
+    tool = DebugIssueTool()
+    arguments = build_debug_arguments(step=oversized_step)
+
+    responses = await tool.execute(arguments)
+    assert len(responses) == 1
+
+    payload = json.loads(responses[0].text)
+    assert payload["status"] == "debug_failed"
+    assert "error" in payload
+
+    # Extract the serialized ToolOutput from the MCP_SIZE_CHECK marker
+    error_details = payload["error"].split("MCP_SIZE_CHECK:", 1)[1]
+    output_payload = json.loads(error_details)
+
+    assert output_payload["status"] == "resend_prompt"
+    assert output_payload["metadata"]["prompt_size"] > MCP_PROMPT_SIZE_LIMIT
+
+    guidance = output_payload["content"].lower()
+    assert "shorter instructions" in guidance
+    assert "file paths" in guidance