Improved prompts to encourage better investigative flow

Improved abstraction Fixed failing tests after refactor
2025-06-19 13:02:07 +04:00
parent b8c8e6f91e
commit 883aa220a7
3 changed files with 26 additions and 21 deletions
--- a/tests/test_auto_model_planner_fix.py
+++ b/tests/test_auto_model_planner_fix.py
@@ -198,20 +198,17 @@ class TestAutoModelPlannerFix:
        Verify that other tools still properly require model resolution.

        This ensures our fix doesn't break existing functionality.
-        Note: Debug tool now manages its own model calls like planner.
+        Note: Debug tool requires model resolution for expert analysis phase.
        """
        from tools.analyze import AnalyzeTool
        from tools.chat import ChatTool
        from tools.debug import DebugIssueTool

        # Test various tools still require models
-        tools_requiring_models = [ChatTool(), AnalyzeTool()]
+        tools_requiring_models = [ChatTool(), AnalyzeTool(), DebugIssueTool()]

        for tool in tools_requiring_models:
            assert tool.requires_model() is True, f"{tool.get_name()} should require model resolution"

-        # Test tools that manage their own model calls
-        tools_managing_own_models = [DebugIssueTool()]
-
-        for tool in tools_managing_own_models:
-            assert tool.requires_model() is False, f"{tool.get_name()} should manage its own model calls"
+        # Note: Debug tool requires model resolution for expert analysis phase
+        # Only planner truly manages its own model calls and doesn't need resolution
--- a/tests/test_debug.py
+++ b/tests/test_debug.py
@@ -21,7 +21,7 @@ class TestDebugTool:
        assert "DEBUG & ROOT CAUSE ANALYSIS" in tool.get_description()
        assert tool.get_default_temperature() == 0.2  # TEMPERATURE_ANALYTICAL
        assert tool.get_model_category() == ToolModelCategory.EXTENDED_REASONING
-        assert tool.requires_model() is False  # Since it manages its own model calls
+        assert tool.requires_model() is True  # Requires model resolution for expert analysis

    def test_request_validation(self):
        """Test Pydantic request model validation."""
@@ -83,8 +83,9 @@ class TestDebugTool:
        assert "continuation_id" in schema["properties"]
        assert "images" in schema["properties"]  # Now supported for visual debugging

+        # Check model field is present (fixed from previous bug)
+        assert "model" in schema["properties"]
        # Check excluded fields are NOT present
-        assert "model" not in schema["properties"]
        assert "temperature" not in schema["properties"]
        assert "thinking_mode" not in schema["properties"]
        assert "use_websearch" not in schema["properties"]
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -21,9 +21,14 @@ logger = logging.getLogger(__name__)
 # Field descriptions for the investigation steps
 DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
    "step": (
-        "Describe what you're currently investigating. In step 1, clearly state the issue to investigate and begin "
-        "thinking deeply about where the problem might originate. In all subsequent steps, continue uncovering relevant "
-        "code, examining patterns, and formulating hypotheses with deliberate attention to detail."
+        "Describe what you're currently investigating by beginning to think deeply about the issue, its root cause"
+        "and possible reasons. Prepare and learn about the related code first. In step 1, clearly state the issue to investigate and begin "
+        "thinking deeply about not just the described issue, but possible underlying causes, side-effects, or external "
+        "components that might contribute to it. Follow the code flow carefully—bugs may originate "
+        "in one part of the code-dependencies, or upstream logic may not be immediately visible. Bugs and issues can "
+        "arise due to poor logic, incorrect assumptions, bad input or failures elsewhere."
+        "In all subsequent steps, continue uncovering relevant code, examining patterns, and formulating hypotheses "
+        "with deliberate attention to detail."
    ),
    "step_number": "Current step number in the investigation sequence (starts at 1).",
    "total_steps": "Estimate of total investigation steps expected (adjustable as the process evolves).",
@@ -33,10 +38,11 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
        "evidence collected, and any partial conclusions or leads."
    ),
    "files_checked": (
-        "List all files examined during the investigation so far. Include even files ruled out, as this tracks your exploration path."
+        "List all files (as absolute paths, do not clip or shrink file names) examined during the investigation so far. "
+        "Include even files ruled out, as this tracks your exploration path."
    ),
    "relevant_files": (
-        "Subset of files_checked that contain code directly relevant to the issue. Only list those that are directly tied to the root cause or its effects."
+        "Subset of files_checked (as full absolute paths) that contain code directly relevant to the issue. Only list those that are directly tied to the root cause or its effects."
    ),
    "relevant_methods": (
        "List specific methods/functions clearly tied to the issue. Use 'ClassName.methodName' or 'functionName' format."
@@ -46,7 +52,7 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
    ),
    "confidence": "How confident you are in the current hypothesis: 'low', 'medium', or 'high'.",
    "backtrack_from_step": "If a previous step needs revision, specify the step number to backtrack from.",
-    "continuation_id": "Continuation token used for linking multi-step investigations.",
+    "continuation_id": "Continuation token used for linking multi-step investigations and continuing conversations after discovery.",
    "images": (
        "Optional. Include full absolute paths to visual debugging images (UI issues, logs, error screens) that help clarify the issue."
    ),
@@ -100,8 +106,7 @@ class DebugInvestigationRequest(ToolRequest):
    # Optional images for visual debugging
    images: Optional[list[str]] = Field(default=None, description=DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["images"])

-    # Override inherited fields to exclude them
-    model: Optional[str] = Field(default=None, exclude=True)
+    # Override inherited fields to exclude them from schema (except model which needs to be available)
    temperature: Optional[float] = Field(default=None, exclude=True)
    thinking_mode: Optional[str] = Field(default=None, exclude=True)
    use_websearch: Optional[bool] = Field(default=None, exclude=True)
@@ -209,9 +214,12 @@ class DebugIssueTool(BaseTool):
                    "items": {"type": "string"},
                    "description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["images"],
                },
+                # Add model field for proper model selection
+                "model": self.get_model_field_schema(),
            },
            # Required fields for investigation
-            "required": ["step", "step_number", "total_steps", "next_step_required", "findings"],
+            "required": ["step", "step_number", "total_steps", "next_step_required", "findings"]
+            + (["model"] if self.is_effective_auto_mode() else []),
        }
        return schema

@@ -232,10 +240,9 @@ class DebugIssueTool(BaseTool):

    def requires_model(self) -> bool:
        """
-        Debug tool manages its own model interactions.
-        It doesn't need model during investigation steps, only for final analysis.
+        Debug tool requires a model for expert analysis after investigation.
        """
-        return False
+        return True

    async def execute(self, arguments: dict[str, Any]) -> list:
        """