fix: reduced token usage, removed parameters from schema that CLIs never seem to use

2025-10-22 13:31:08 +04:00
parent 3efff6056e
commit 3e27319e60
31 changed files with 86 additions and 235 deletions
--- a/docs/tools/analyze.md
+++ b/docs/tools/analyze.md
@@ -60,7 +60,6 @@ This workflow ensures methodical analysis before expert insights, resulting in d
 - `relevant_context`: Methods/functions/classes central to analysis findings
 - `issues_found`: Issues or concerns identified with severity levels
 - `confidence`: Confidence level in analysis completeness (exploring/low/medium/high/certain)
- `backtrack_from_step`: Step number to backtrack from (for revisions)
 - `images`: Visual references for analysis context

 **Initial Configuration (used in step 1):**
--- a/docs/tools/codereview.md
+++ b/docs/tools/codereview.md
@@ -75,7 +75,6 @@ The above prompt will simultaneously run two separate `codereview` tools with tw
 - `relevant_context`: Methods/functions/classes central to review findings
 - `issues_found`: Issues identified with severity levels
 - `confidence`: Confidence level in review completeness (exploring/low/medium/high/certain)
- `backtrack_from_step`: Step number to backtrack from (for revisions)
 - `images`: Visual references for review context

 **Initial Review Configuration (used in step 1):**
--- a/docs/tools/debug.md
+++ b/docs/tools/debug.md
@@ -68,7 +68,6 @@ This structured approach ensures Claude performs methodical groundwork before ex
 - `relevant_methods`: Specific methods/functions involved in the issue
 - `hypothesis`: Current best guess about the underlying cause
 - `confidence`: Confidence level in current hypothesis (exploring/low/medium/high/certain)
- `backtrack_from_step`: Step number to backtrack from (for revisions)
 - `continuation_id`: Thread ID for continuing investigations across sessions
 - `images`: Visual debugging materials (error screenshots, logs, etc.)

--- a/docs/tools/precommit.md
+++ b/docs/tools/precommit.md
@@ -135,7 +135,6 @@ Use zen and perform a thorough precommit ensuring there aren't any new regressio
 - `relevant_context`: Methods/functions/classes affected by changes
 - `issues_found`: Issues identified with severity levels
 - `precommit_type`: Type of validation to perform (external/internal, default: external - ALWAYS use external unless explicitly told otherwise)
- `backtrack_from_step`: Step number to backtrack from (for revisions)
 - `images`: Screenshots of requirements, design mockups for validation

 **Initial Configuration (used in step 1):**
--- a/docs/tools/refactor.md
+++ b/docs/tools/refactor.md
@@ -97,7 +97,6 @@ This results in Claude first performing its own expert analysis, encouraging it
 - `relevant_context`: Methods/functions/classes requiring refactoring
 - `issues_found`: Refactoring opportunities with severity and type
 - `confidence`: Confidence level in analysis completeness (exploring/incomplete/partial/complete)
- `backtrack_from_step`: Step number to backtrack from (for revisions)
 - `hypothesis`: Current assessment of refactoring priorities

 **Initial Configuration (used in step 1):**
@@ -198,4 +197,4 @@ Analyzes multiple files together to understand:
 - **Use `refactor`** for: Structural improvements, decomposition, modernization, code organization
 - **Use `codereview`** for: Finding bugs and security issues with immediate fixes
 - **Use `analyze`** for: Understanding code without making change recommendations  
- **Use `debug`** for: Solving specific runtime issues rather than structural problems
+- **Use `debug`** for: Solving specific runtime issues rather than structural problems
--- a/docs/tools/secaudit.md
+++ b/docs/tools/secaudit.md
@@ -82,7 +82,6 @@ security remediation plan using planner
 - `relevant_context`: Methods/functions/classes central to security findings
 - `issues_found`: Security issues identified with severity levels
 - `confidence`: Confidence level in security assessment completeness (exploring/low/medium/high/certain)
- `backtrack_from_step`: Step number to backtrack from (for revisions)
 - `images`: Architecture diagrams, security documentation, or visual references

 **Initial Security Configuration (used in step 1):**
--- a/docs/tools/testgen.md
+++ b/docs/tools/testgen.md
@@ -66,7 +66,6 @@ Test generation excels with extended reasoning models like Gemini Pro or O3, whi
 - `relevant_files`: Files directly needing tests (required in step 1)
 - `relevant_context`: Methods/functions/classes requiring test coverage
 - `confidence`: Confidence level in test plan completeness (exploring/low/medium/high/certain)
- `backtrack_from_step`: Step number to backtrack from (for revisions)

 **Initial Configuration (used in step 1):**
 - `prompt`: Description of what to test, testing objectives, and specific scope/focus areas (required)
@@ -217,4 +216,4 @@ For UI components and visual elements:
 - **Use `testgen`** for: Creating comprehensive test suites, filling test coverage gaps, testing new features
 - **Use `debug`** for: Diagnosing specific test failures or runtime issues
 - **Use `codereview`** for: Reviewing existing test quality and coverage
- **Use `analyze`** for: Understanding existing test structure without generating new tests
+- **Use `analyze`** for: Understanding existing test structure without generating new tests
--- a/simulator_tests/test_analyze_validation.py
+++ b/simulator_tests/test_analyze_validation.py
@@ -39,8 +39,8 @@ class AnalyzeValidationTest(ConversationBaseTest):
            if not self._test_single_analysis_session():
                return False

-            # Test 2: Analysis with backtracking
-            if not self._test_analysis_with_backtracking():
+            # Test 2: Analysis flow that requires refocusing
+            if not self._test_analysis_refocus_flow():
                return False

            # Test 3: Complete analysis with expert validation
@@ -530,13 +530,13 @@ class PerformanceTimer:
            self.logger.error(f"Single analysis session test failed: {e}")
            return False

-    def _test_analysis_with_backtracking(self) -> bool:
-        """Test analysis with backtracking to revise findings"""
+    def _test_analysis_refocus_flow(self) -> bool:
+        """Test analysis flow that requires refocusing to revise findings"""
        try:
-            self.logger.info("  1.2: Testing analysis with backtracking")
+            self.logger.info("  1.2: Testing analysis refocus workflow")

-            # Start a new analysis for testing backtracking
-            self.logger.info("    1.2.1: Start analysis for backtracking test")
+            # Start a new analysis for testing refocus behaviour
+            self.logger.info("    1.2.1: Start analysis for refocus test")
            response1, continuation_id = self.call_mcp_tool(
                "analyze",
                {
@@ -553,7 +553,7 @@ class PerformanceTimer:
            )

            if not response1 or not continuation_id:
-                self.logger.error("Failed to start backtracking test analysis")
+                self.logger.error("Failed to start refocus test analysis")
                return False

            # Step 2: Wrong direction
@@ -579,12 +579,12 @@ class PerformanceTimer:
                self.logger.error("Failed to continue to step 2")
                return False

-            # Step 3: Backtrack from step 2
-            self.logger.info("    1.2.3: Step 3 - Backtrack and revise approach")
+            # Step 3: Adjust investigation path
+            self.logger.info("    1.2.3: Step 3 - Refocus the analysis")
            response3, _ = self.call_mcp_tool(
                "analyze",
                {
-                    "step": "Backtracking - the performance issue might not be database related. Let me examine the caching and serialization patterns instead.",
+                    "step": "Refocus - the performance issue might not be database related. Let me examine the caching and serialization patterns instead.",
                    "step_number": 3,
                    "total_steps": 4,
                    "next_step_required": True,
@@ -597,20 +597,19 @@ class PerformanceTimer:
                        {"severity": "low", "description": "Cache key generation lacks proper escaping"},
                    ],
                    "confidence": "medium",
-                    "backtrack_from_step": 2,  # Backtrack from step 2
                    "continuation_id": continuation_id,
                },
            )

            if not response3:
-                self.logger.error("Failed to backtrack")
+                self.logger.error("Failed to refocus analysis")
                return False

            response3_data = self._parse_analyze_response(response3)
            if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_analysis"):
                return False

-            self.logger.info("    ✅ Backtracking working correctly")
+            self.logger.info("    ✅ Analysis refocus flow working correctly")
            return True

        except Exception as e:
--- a/simulator_tests/test_codereview_validation.py
+++ b/simulator_tests/test_codereview_validation.py
@@ -39,8 +39,8 @@ class CodeReviewValidationTest(ConversationBaseTest):
            if not self._test_single_review_session():
                return False

-            # Test 2: Review with backtracking
-            if not self._test_review_with_backtracking():
+            # Test 2: Review flow that requires refocusing
+            if not self._test_review_refocus_flow():
                return False

            # Test 3: Complete review with expert analysis
@@ -336,13 +336,13 @@ class ConfigurationManager:
            self.logger.error(f"Single review session test failed: {e}")
            return False

-    def _test_review_with_backtracking(self) -> bool:
-        """Test code review with backtracking to revise findings"""
+    def _test_review_refocus_flow(self) -> bool:
+        """Test code review flow that revises findings by refocusing"""
        try:
-            self.logger.info("  1.2: Testing code review with backtracking")
+            self.logger.info("  1.2: Testing code review refocus workflow")

-            # Start a new review for testing backtracking
-            self.logger.info("    1.2.1: Start review for backtracking test")
+            # Start a new review for testing refocus behaviour
+            self.logger.info("    1.2.1: Start review for refocus test")
            response1, continuation_id = self.call_mcp_tool(
                "codereview",
                {
@@ -359,7 +359,7 @@ class ConfigurationManager:
            )

            if not response1 or not continuation_id:
-                self.logger.error("Failed to start backtracking test review")
+                self.logger.error("Failed to start refocus test review")
                return False

            # Step 2: Initial direction
@@ -386,12 +386,12 @@ class ConfigurationManager:
                self.logger.error("Failed to continue to step 2")
                return False

-            # Step 3: Backtrack and focus on security
-            self.logger.info("    1.2.3: Step 3 - Backtrack to focus on security issues")
+            # Step 3: Shift focus based on new evidence
+            self.logger.info("    1.2.3: Step 3 - Refocus on security issues")
            response3, _ = self.call_mcp_tool(
                "codereview",
                {
-                    "step": "Backtracking - need to focus on the critical security issues I initially missed. Found hardcoded secrets and credentials in plain text.",
+                    "step": "Refocusing - need to concentrate on the critical security issues I initially missed. Found hardcoded secrets and credentials in plain text.",
                    "step_number": 3,
                    "total_steps": 4,
                    "next_step_required": True,
@@ -405,24 +405,23 @@ class ConfigurationManager:
                        {"severity": "high", "description": "Over-engineered configuration system"},
                    ],
                    "confidence": "high",
-                    "backtrack_from_step": 2,  # Backtrack from step 2
                    "continuation_id": continuation_id,
                },
            )

            if not response3:
-                self.logger.error("Failed to backtrack")
+                self.logger.error("Failed to refocus")
                return False

            response3_data = self._parse_review_response(response3)
            if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_code_review"):
                return False

-            self.logger.info("    ✅ Backtracking working correctly")
+            self.logger.info("    ✅ Refocus flow working correctly")
            return True

        except Exception as e:
-            self.logger.error(f"Backtracking test failed: {e}")
+            self.logger.error(f"Refocus test failed: {e}")
            return False

    def _test_complete_review_with_analysis(self) -> bool:
--- a/simulator_tests/test_debug_validation.py
+++ b/simulator_tests/test_debug_validation.py
@@ -39,8 +39,8 @@ class DebugValidationTest(ConversationBaseTest):
            if not self._test_single_investigation_session():
                return False

-            # Test 2: Investigation with backtracking
-            if not self._test_investigation_with_backtracking():
+            # Test 2: Investigation flow that requires refinement
+            if not self._test_investigation_refine_flow():
                return False

            # Test 3: Complete investigation with expert analysis
@@ -230,13 +230,13 @@ RuntimeError: dictionary changed size during iteration
            self.logger.error(f"Single investigation session test failed: {e}")
            return False

-    def _test_investigation_with_backtracking(self) -> bool:
-        """Test investigation with backtracking to revise findings"""
+    def _test_investigation_refine_flow(self) -> bool:
+        """Test investigation flow that requires refining the approach"""
        try:
-            self.logger.info("  1.2: Testing investigation with backtracking")
+            self.logger.info("  1.2: Testing investigation refinement workflow")

-            # Start a new investigation for testing backtracking
-            self.logger.info("    1.2.1: Start investigation for backtracking test")
+            # Start a new investigation for testing refinement behaviour
+            self.logger.info("    1.2.1: Start investigation for refinement test")
            response1, continuation_id = self.call_mcp_tool(
                "debug",
                {
@@ -251,7 +251,7 @@ RuntimeError: dictionary changed size during iteration
            )

            if not response1 or not continuation_id:
-                self.logger.error("Failed to start backtracking test investigation")
+                self.logger.error("Failed to start refinement test investigation")
                return False

            # Step 2: Wrong direction
@@ -277,11 +277,11 @@ RuntimeError: dictionary changed size during iteration
                return False

            # Step 3: Backtrack from step 2
-            self.logger.info("    1.2.3: Step 3 - Backtrack and revise approach")
+            self.logger.info("    1.2.3: Step 3 - Refine investigation path")
            response3, _ = self.call_mcp_tool(
                "debug",
                {
-                    "step": "Backtracking - the issue might not be database related. Let me investigate the data processing algorithm instead.",
+                    "step": "Refocusing - the issue might not be database related. Let me investigate the data processing algorithm instead.",
                    "step_number": 3,
                    "total_steps": 4,
                    "next_step_required": True,
@@ -291,24 +291,23 @@ RuntimeError: dictionary changed size during iteration
                    "relevant_context": ["DataProcessor.process_batch"],
                    "hypothesis": "Inefficient algorithm causing performance issues",
                    "confidence": "medium",
-                    "backtrack_from_step": 2,  # Backtrack from step 2
                    "continuation_id": continuation_id,
                },
            )

            if not response3:
-                self.logger.error("Failed to backtrack")
+                self.logger.error("Failed to refine investigation")
                return False

            response3_data = self._parse_debug_response(response3)
            if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_investigation"):
                return False

-            self.logger.info("    ✅ Backtracking working correctly")
+            self.logger.info("    ✅ Investigation refinement working correctly")
            return True

        except Exception as e:
-            self.logger.error(f"Backtracking test failed: {e}")
+            self.logger.error(f"Investigation refinement test failed: {e}")
            return False

    def _test_complete_investigation_with_analysis(self) -> bool:
--- a/simulator_tests/test_precommitworkflow_validation.py
+++ b/simulator_tests/test_precommitworkflow_validation.py
@@ -39,8 +39,8 @@ class PrecommitWorkflowValidationTest(ConversationBaseTest):
            if not self._test_single_validation_session():
                return False

-            # Test 2: Validation with backtracking
-            if not self._test_validation_with_backtracking():
+            # Test 2: Validation flow that requires refocusing
+            if not self._test_validation_refocus_flow():
                return False

            # Test 3: Complete validation with expert analysis
@@ -263,13 +263,13 @@ REQUIREMENTS:
            self.logger.error(f"Single validation session test failed: {e}")
            return False

-    def _test_validation_with_backtracking(self) -> bool:
-        """Test validation with backtracking to revise findings"""
+    def _test_validation_refocus_flow(self) -> bool:
+        """Test validation workflow that requires refocusing to revise findings"""
        try:
-            self.logger.info("  1.2: Testing validation with backtracking")
+            self.logger.info("  1.2: Testing validation refocus workflow")

-            # Start a new validation for testing backtracking
-            self.logger.info("    1.2.1: Start validation for backtracking test")
+            # Start a new validation for testing refocus behaviour
+            self.logger.info("    1.2.1: Start validation for refocus test")
            response1, continuation_id = self.call_mcp_tool(
                "precommit",
                {
@@ -285,7 +285,7 @@ REQUIREMENTS:
            )

            if not response1 or not continuation_id:
-                self.logger.error("Failed to start backtracking test validation")
+                self.logger.error("Failed to start refocus test validation")
                return False

            # Step 2: Wrong direction
@@ -309,12 +309,12 @@ REQUIREMENTS:
                self.logger.error("Failed to continue to step 2")
                return False

-            # Step 3: Backtrack from step 2
-            self.logger.info("    1.2.3: Step 3 - Backtrack and revise approach")
+            # Step 3: Shift investigation focus
+            self.logger.info("    1.2.3: Step 3 - Refocus and revise approach")
            response3, _ = self.call_mcp_tool(
                "precommit",
                {
-                    "step": "Backtracking - the issue might not be database configuration. Let me examine the actual SQL queries and data access patterns instead.",
+                    "step": "Refocusing - the issue might not be database configuration. Let me examine the actual SQL queries and data access patterns instead.",
                    "step_number": 3,
                    "total_steps": 4,
                    "next_step_required": True,
@@ -326,24 +326,23 @@ REQUIREMENTS:
                        {"severity": "medium", "description": "N+1 query pattern in user profile loading"}
                    ],
                    # Assessment fields removed - using precommit_type instead
-                    "backtrack_from_step": 2,  # Backtrack from step 2
                    "continuation_id": continuation_id,
                },
            )

            if not response3:
-                self.logger.error("Failed to backtrack")
+                self.logger.error("Failed to refocus")
                return False

            response3_data = self._parse_precommit_response(response3)
            if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_validation"):
                return False

-            self.logger.info("    ✅ Backtracking working correctly")
+            self.logger.info("    ✅ Refocus flow working correctly")
            return True

        except Exception as e:
-            self.logger.error(f"Backtracking test failed: {e}")
+            self.logger.error(f"Refocus test failed: {e}")
            return False

    def _test_complete_validation_with_analysis(self) -> bool:
--- a/simulator_tests/test_refactor_validation.py
+++ b/simulator_tests/test_refactor_validation.py
@@ -38,8 +38,8 @@ class RefactorValidationTest(ConversationBaseTest):
            if not self._test_single_refactoring_session():
                return False

-            # Test 2: Refactoring analysis with backtracking
-            if not self._test_refactoring_with_backtracking():
+            # Test 2: Refactoring analysis requiring refocus
+            if not self._test_refactoring_refocus_flow():
                return False

            # Test 3: Complete refactoring analysis with expert analysis
@@ -389,13 +389,13 @@ class UserData:
            self.logger.error(f"Single refactoring session test failed: {e}")
            return False

-    def _test_refactoring_with_backtracking(self) -> bool:
-        """Test refactoring analysis with backtracking to revise findings"""
+    def _test_refactoring_refocus_flow(self) -> bool:
+        """Test refactoring analysis that shifts focus mid-investigation"""
        try:
-            self.logger.info("  1.2: Testing refactoring analysis with backtracking")
+            self.logger.info("  1.2: Testing refactoring analysis refocus workflow")

-            # Start a new refactoring analysis for testing backtracking
-            self.logger.info("    1.2.1: Start refactoring analysis for backtracking test")
+            # Start a new refactoring analysis for testing refocus behaviour
+            self.logger.info("    1.2.1: Start refactoring analysis for refocus test")
            response1, continuation_id = self.call_mcp_tool(
                "refactor",
                {
@@ -412,7 +412,7 @@ class UserData:
            )

            if not response1 or not continuation_id:
-                self.logger.error("Failed to start backtracking test refactoring analysis")
+                self.logger.error("Failed to start refocus test refactoring analysis")
                return False

            # Step 2: Wrong direction
@@ -437,11 +437,11 @@ class UserData:
                return False

            # Step 3: Backtrack from step 2
-            self.logger.info("    1.2.3: Step 3 - Backtrack and focus on function decomposition")
+            self.logger.info("    1.2.3: Step 3 - Refocus on function decomposition")
            response3, _ = self.call_mcp_tool(
                "refactor",
                {
-                    "step": "Backtracking - the real decomposition opportunity is the god function process_everything. Let me analyze function-level refactoring instead.",
+                    "step": "Refocusing - the real decomposition opportunity is the god function process_everything. Let me analyze function-level refactoring instead.",
                    "step_number": 3,
                    "total_steps": 4,
                    "next_step_required": True,
@@ -462,13 +462,12 @@ class UserData:
                        },
                    ],
                    "confidence": "partial",
-                    "backtrack_from_step": 2,  # Backtrack from step 2
                    "continuation_id": continuation_id,
                },
            )

            if not response3:
-                self.logger.error("Failed to backtrack")
+                self.logger.error("Failed to refocus")
                return False

            response3_data = self._parse_refactor_response(response3)
@@ -477,11 +476,11 @@ class UserData:
            ):
                return False

-            self.logger.info("    ✅ Backtracking working correctly for refactoring analysis")
+            self.logger.info("    ✅ Refocus working correctly for refactoring analysis")
            return True

        except Exception as e:
-            self.logger.error(f"Refactoring backtracking test failed: {e}")
+            self.logger.error(f"Refocusing test failed: {e}")
            return False

    def _test_complete_refactoring_with_analysis(self) -> bool:
--- a/simulator_tests/test_thinkdeep_validation.py
+++ b/simulator_tests/test_thinkdeep_validation.py
@@ -39,8 +39,8 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
            if not self._test_single_thinking_session():
                return False

-            # Test 2: Thinking with backtracking
-            if not self._test_thinking_with_backtracking():
+            # Test 2: Thinking flow that requires refocusing
+            if not self._test_thinking_refocus_flow():
                return False

            # Test 3: Complete thinking with expert analysis
@@ -243,13 +243,13 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
            self.logger.error(f"Single thinking session test failed: {e}")
            return False

-    def _test_thinking_with_backtracking(self) -> bool:
-        """Test thinking with backtracking to revise analysis"""
+    def _test_thinking_refocus_flow(self) -> bool:
+        """Test thinking workflow that shifts direction mid-analysis"""
        try:
-            self.logger.info("  1.2: Testing thinking with backtracking")
+            self.logger.info("  1.2: Testing thinking refocus workflow")

-            # Start a new thinking session for testing backtracking
-            self.logger.info("    1.2.1: Start thinking for backtracking test")
+            # Start a new thinking session for testing refocus behaviour
+            self.logger.info("    1.2.1: Start thinking session for refocus test")
            response1, continuation_id = self.call_mcp_tool(
                "thinkdeep",
                {
@@ -266,7 +266,7 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
            )

            if not response1 or not continuation_id:
-                self.logger.error("Failed to start backtracking test thinking")
+                self.logger.error("Failed to start refocus test thinking")
                return False

            # Step 2: Initial direction
@@ -300,7 +300,7 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
            response3, _ = self.call_mcp_tool(
                "thinkdeep",
                {
-                    "step": "Backtracking - maybe shared database with service-specific schemas is better initially. Then gradually extract databases as services mature.",
+                    "step": "Refocusing - maybe shared database with service-specific schemas is better initially. Then gradually extract databases as services mature.",
                    "step_number": 3,
                    "total_steps": 4,
                    "next_step_required": True,
@@ -309,24 +309,23 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
                    "relevant_files": [self.architecture_file, self.requirements_file],
                    "relevant_context": ["shared_database", "bounded_contexts", "gradual_extraction"],
                    "confidence": "medium",
-                    "backtrack_from_step": 2,  # Backtrack from step 2
                    "continuation_id": continuation_id,
                },
            )

            if not response3:
-                self.logger.error("Failed to backtrack")
+                self.logger.error("Failed to refocus")
                return False

            response3_data = self._parse_thinkdeep_response(response3)
            if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_thinkdeep"):
                return False

-            self.logger.info("    ✅ Backtracking working correctly")
+            self.logger.info("    ✅ Refocus working correctly")
            return True

        except Exception as e:
-            self.logger.error(f"Backtracking test failed: {e}")
+            self.logger.error(f"Refocus test failed: {e}")
            return False

    def _test_complete_thinking_with_analysis(self) -> bool:
--- a/tests/test_precommit_workflow.py
+++ b/tests/test_precommit_workflow.py
@@ -108,20 +108,6 @@ class TestPrecommitWorkflowTool:
        assert len(request.issues_found) == 1
        assert len(request.images) == 1

-    def test_request_model_backtracking(self):
-        """Test backtracking functionality"""
-        request = PrecommitRequest(
-            step="Backtracking from previous step",
-            step_number=3,
-            total_steps=4,
-            next_step_required=True,
-            findings="Revised findings after backtracking",
-            backtrack_from_step=2,  # Backtrack from step 2
-        )
-
-        assert request.backtrack_from_step == 2
-        assert request.step_number == 3
-
    def test_precommit_specific_fields(self):
        """Test precommit-specific configuration fields"""
        request = PrecommitRequest(
--- a/tests/test_secaudit.py
+++ b/tests/test_secaudit.py
@@ -298,7 +298,6 @@ class TestSecauditTool:
            "relevant_context",
            "issues_found",
            "confidence",
-            "backtrack_from_step",
            "images",
            "security_scope",
            "threat_level",
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -68,7 +68,6 @@ ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = {
        "List methods/functions central to analysis findings, in 'ClassName.methodName' or 'functionName' format. "
        "Prioritize those demonstrating key patterns, architectural decisions, or improvement opportunities."
    ),
-    "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."),
    "images": (
        "Optional absolute paths to architecture diagrams or visual references that help with analysis context."
    ),
@@ -108,11 +107,6 @@ class AnalyzeWorkflowRequest(WorkflowRequest):
        description="Issues or concerns identified during analysis, each with severity level (critical, high, medium, low)",
    )

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(
-        None, description=ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
-
    # Optional images for visual context
    images: Optional[list[str]] = Field(default=None, description=ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["images"])

@@ -223,11 +217,6 @@ class AnalyzeTool(WorkflowTool):
                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "images": {
                "type": "array",
                "items": {"type": "string"},
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -53,7 +53,6 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
    "relevant_context": "Functions or methods central to findings (e.g. 'Class.method' or 'function_name').",
    "issues_found": "Issues with severity (critical/high/medium/low) and descriptions.",
    "review_validation_type": "Set 'external' (default) for expert follow-up or 'internal' for local-only review.",
-    "backtrack_from_step": "If revising earlier analysis, note the step number to revisit.",
    "images": "Optional diagram or screenshot paths that clarify review context.",
    "review_type": "Review focus: full, security, performance, or quick.",
    "focus_on": "Optional note on areas to emphasise (e.g. 'threading', 'auth flow').",
@@ -91,11 +90,6 @@ class CodeReviewRequest(WorkflowRequest):
        "external", description=CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS.get("review_validation_type", "")
    )

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(
-        None, description=CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
-
    # Optional images for visual context
    images: Optional[list[str]] = Field(default=None, description=CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["images"])

@@ -206,11 +200,6 @@ class CodeReviewTool(WorkflowTool):
                "default": "external",
                "description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS.get("review_validation_type", ""),
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "issues_found": {
                "type": "array",
                "items": {"type": "object"},
--- a/tools/consensus.py
+++ b/tools/consensus.py
@@ -101,7 +101,6 @@ class ConsensusRequest(WorkflowRequest):
    relevant_context: list[str] | None = Field(default_factory=list, exclude=True)
    issues_found: list[dict] | None = Field(default_factory=list, exclude=True)
    hypothesis: str | None = Field(None, exclude=True)
-    backtrack_from_step: int | None = Field(None, exclude=True)

    @model_validator(mode="after")
    def validate_step_one_requirements(self):
@@ -293,7 +292,6 @@ of the evidence, even when it strongly points in one direction.""",
            "relevant_context",  # Not used in consensus workflow
            "issues_found",  # Not used in consensus workflow
            "hypothesis",  # Not used in consensus workflow
-            "backtrack_from_step",  # Not used in consensus workflow
            "confidence",  # Not used in consensus workflow
        ]

--- a/tools/debug.py
+++ b/tools/debug.py
@@ -3,8 +3,8 @@ Debug tool - Systematic root cause analysis and debugging assistance

 This tool provides a structured workflow for investigating complex bugs and issues.
 It guides you through systematic investigation steps with forced pauses between each step
-to ensure thorough code examination before proceeding. The tool supports backtracking,
-hypothesis evolution, and expert analysis integration for comprehensive debugging.
+to ensure thorough code examination before proceeding. The tool supports hypothesis evolution
+and expert analysis integration for comprehensive debugging.

 Key features:
 - Step-by-step investigation workflow with progress tracking
@@ -65,7 +65,6 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
        "WARNING: Do NOT use 'certain' unless the issue can be fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. "
        "Using 'certain' means you have ABSOLUTE confidence locally and PREVENTS external model validation."
    ),
-    "backtrack_from_step": "Step number to backtrack from if revision needed.",
    "images": "Optional screenshots/visuals clarifying issue (absolute paths).",
 }

@@ -93,11 +92,6 @@ class DebugInvestigationRequest(WorkflowRequest):
    hypothesis: Optional[str] = Field(None, description=DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["hypothesis"])
    confidence: Optional[str] = Field("low", description=DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["confidence"])

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(
-        None, description=DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
-
    # Optional images for visual debugging
    images: Optional[list[str]] = Field(default=None, description=DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["images"])

@@ -193,11 +187,6 @@ class DebugIssueTool(WorkflowTool):
                "type": "string",
                "description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["hypothesis"],
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "images": {
                "type": "array",
                "items": {"type": "string"},
--- a/tools/docgen.py
+++ b/tools/docgen.py
@@ -197,7 +197,6 @@ class DocgenTool(WorkflowTool):
        excluded_workflow_fields = [
            "confidence",  # Documentation doesn't use confidence levels
            "hypothesis",  # Documentation doesn't use hypothesis
-            "backtrack_from_step",  # Documentation uses simpler error recovery
            "files_checked",  # Documentation uses doc_files and doc_methods instead for better tracking
        ]

--- a/tools/planner.py
+++ b/tools/planner.py
@@ -83,7 +83,6 @@ class PlannerRequest(WorkflowRequest):
    issues_found: list[dict] = Field(default_factory=list, exclude=True, description="Planning doesn't find issues")
    confidence: str = Field(default="planning", exclude=True, description="Planning uses different confidence model")
    hypothesis: str | None = Field(default=None, exclude=True, description="Planning doesn't use hypothesis")
-    backtrack_from_step: int | None = Field(default=None, exclude=True, description="Planning uses revision instead")

    # Exclude other non-planning fields
    temperature: float | None = Field(default=None, exclude=True)
@@ -211,7 +210,6 @@ class PlannerTool(WorkflowTool):
            "issues_found",  # Planning doesn't find issues
            "confidence",  # Planning uses different confidence model
            "hypothesis",  # Planning doesn't use hypothesis
-            "backtrack_from_step",  # Planning uses revision instead
        ]

        excluded_common_fields = [
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -4,7 +4,7 @@ Precommit Workflow tool - Step-by-step pre-commit validation with expert analysi
 This tool provides a structured workflow for comprehensive pre-commit validation.
 It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, git change analysis, and issue detection before proceeding.
-The tool supports backtracking, finding updates, and expert analysis integration.
+The tool supports finding updates and expert analysis integration.

 Key features:
 - Step-by-step pre-commit investigation workflow with progress tracking
@@ -51,7 +51,6 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    "relevant_context": "Key functions/methods touched by the change (e.g. 'Class.method', 'function_name').",
    "issues_found": "List issues with severity (critical/high/medium/low) plus descriptions (bugs, security, performance, coverage).",
    "precommit_type": "'external' (default, triggers expert model) or 'internal' (local-only validation).",
-    "backtrack_from_step": "Step number to revisit when revising earlier analysis.",
    "images": "Optional absolute paths to screenshots or diagrams that aid validation.",
    "path": "Absolute path to the repository root. Required in step 1.",
    "compare_to": "Optional git ref (branch/tag/commit) to diff against; falls back to staged/unstaged changes.",
@@ -89,11 +88,6 @@ class PrecommitRequest(WorkflowRequest):
        "external", description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["precommit_type"]
    )

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(
-        None, description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
-
    # Optional images for visual validation
    images: Optional[list[str]] = Field(default=None, description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["images"])

@@ -207,11 +201,6 @@ class PrecommitTool(WorkflowTool):
                "default": "external",
                "description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["precommit_type"],
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "issues_found": {
                "type": "array",
                "items": {"type": "object"},
--- a/tools/refactor.py
+++ b/tools/refactor.py
@@ -78,7 +78,6 @@ REFACTOR_FIELD_DESCRIPTIONS = {
        "WARNING: Use 'complete' ONLY when fully analyzed and can provide recommendations without expert help. "
        "'complete' PREVENTS expert validation. Use 'partial' for large files or uncertain analysis."
    ),
-    "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."),
    "images": (
        "Optional list of absolute paths to architecture diagrams, UI mockups, design documents, or visual references "
        "that help with refactoring context. Only include if they materially assist understanding or assessment."
@@ -113,9 +112,6 @@ class RefactorRequest(WorkflowRequest):
        "incomplete", description=REFACTOR_FIELD_DESCRIPTIONS["confidence"]
    )

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(None, description=REFACTOR_FIELD_DESCRIPTIONS["backtrack_from_step"])
-
    # Optional images for visual context
    images: Optional[list[str]] = Field(default=None, description=REFACTOR_FIELD_DESCRIPTIONS["images"])

@@ -228,11 +224,6 @@ class RefactorTool(WorkflowTool):
                "default": "incomplete",
                "description": REFACTOR_FIELD_DESCRIPTIONS["confidence"],
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": REFACTOR_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "issues_found": {
                "type": "array",
                "items": {"type": "object"},
--- a/tools/secaudit.py
+++ b/tools/secaudit.py
@@ -47,7 +47,6 @@ SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    "relevant_context": "Security-critical classes/methods (e.g. 'AuthService.login', 'encryption_helper').",
    "issues_found": "Security issues with severity (critical/high/medium/low) and descriptions (vulns, auth flaws, injection, crypto, config).",
    "confidence": "exploring/low/medium/high/very_high/almost_certain/certain. 'certain' blocks external validation—use only when fully complete.",
-    "backtrack_from_step": "Step number to revisit when revising earlier audit work.",
    "images": "Optional absolute paths to diagrams or threat models that inform the audit.",
    "security_scope": "Security context (web, mobile, API, cloud, etc.) including stack, user types, data sensitivity, and threat landscape.",
    "threat_level": "Assess the threat level: low (internal/low-risk), medium (customer-facing/business data), high (regulated or sensitive), critical (financial/healthcare/PII).",
@@ -82,11 +81,6 @@ class SecauditRequest(WorkflowRequest):
    )
    confidence: Optional[str] = Field("low", description=SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"])

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(
-        None, description=SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
-
    # Optional images for visual context
    images: Optional[list[str]] = Field(default=None, description=SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["images"])

@@ -398,11 +392,6 @@ class SecauditTool(WorkflowTool):
                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "issues_found": {
                "type": "array",
                "items": {"type": "object"},
--- a/tools/shared/base_models.py
+++ b/tools/shared/base_models.py
@@ -49,7 +49,6 @@ WORKFLOW_FIELD_DESCRIPTIONS = {
        "almost_certain (near complete confidence), certain (100% confidence locally - no external validation needed)"
    ),
    "hypothesis": "Current theory about issue/goal based on work",
-    "backtrack_from_step": "Step number to backtrack from if work needs revision",
    "use_assistant_model": (
        "Use assistant model for expert analysis after workflow steps. "
        "False skips expert analysis, relies solely on your personal investigation. "
@@ -122,9 +121,6 @@ class WorkflowRequest(BaseWorkflowRequest):

    # Optional workflow fields
    hypothesis: Optional[str] = Field(None, description=WORKFLOW_FIELD_DESCRIPTIONS["hypothesis"])
-    backtrack_from_step: Optional[int] = Field(
-        None, ge=1, description=WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
    use_assistant_model: Optional[bool] = Field(True, description=WORKFLOW_FIELD_DESCRIPTIONS["use_assistant_model"])

    @field_validator("files_checked", "relevant_files", "relevant_context", mode="before")
--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -4,8 +4,7 @@ TestGen Workflow tool - Step-by-step test generation with expert validation
 This tool provides a structured workflow for comprehensive test generation.
 It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, test planning, and pattern identification before proceeding.
-The tool supports backtracking, finding updates, and expert analysis integration for
-comprehensive test suite generation.
+The tool supports finding updates and expert analysis integration for comprehensive test suite generation.

 Key features:
 - Step-by-step test generation workflow with progress tracking
@@ -52,7 +51,6 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
        "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
-    "backtrack_from_step": "Step number to revisit if earlier findings need revision.",
    "images": "Optional absolute paths to diagrams or visuals that clarify the system under test.",
 }

@@ -79,11 +77,6 @@ class TestGenRequest(WorkflowRequest):
    )
    confidence: Optional[str] = Field("low", description=TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"])

-    # Optional backtracking field
-    backtrack_from_step: Optional[int] = Field(
-        None, description=TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"]
-    )
-
    # Optional images for visual context
    images: Optional[list[str]] = Field(default=None, description=TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["images"])

@@ -184,11 +177,6 @@ class TestGenTool(WorkflowTool):
                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
-            "backtrack_from_step": {
-                "type": "integer",
-                "minimum": 1,
-                "description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"],
-            },
            "images": {
                "type": "array",
                "items": {"type": "string"},
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -71,13 +71,6 @@ class ThinkDeepWorkflowRequest(WorkflowRequest):
        description="exploring/low/medium/high/very_high/almost_certain/certain. CRITICAL: 'certain' PREVENTS external validation.",
    )

-    # Advanced workflow features
-    backtrack_from_step: Optional[int] = Field(
-        default=None,
-        description="Step number to backtrack from if revision needed.",
-        ge=1,
-    )
-
    # Expert analysis configuration - keep these fields available for configuring the final assistant model
    # in expert analysis (commented out exclude=True)
    temperature: Optional[float] = Field(
--- a/tools/tracer.py
+++ b/tools/tracer.py
@@ -115,10 +115,6 @@ class TracerRequest(WorkflowRequest):
    # Exclude fields not relevant to tracing workflow
    issues_found: list[dict] = Field(default_factory=list, exclude=True, description="Tracing doesn't track issues")
    hypothesis: Optional[str] = Field(default=None, exclude=True, description="Tracing doesn't use hypothesis")
-    backtrack_from_step: Optional[int] = Field(
-        default=None, exclude=True, description="Tracing doesn't use backtracking"
-    )
-
    # Exclude other non-tracing fields
    temperature: Optional[float] = Field(default=None, exclude=True)
    thinking_mode: Optional[str] = Field(default=None, exclude=True)
@@ -220,7 +216,6 @@ class TracerTool(WorkflowTool):
        excluded_workflow_fields = [
            "issues_found",  # Tracing doesn't track issues
            "hypothesis",  # Tracing doesn't use hypothesis
-            "backtrack_from_step",  # Tracing doesn't use backtracking
        ]

        # Exclude common fields that tracing doesn't need
--- a/tools/workflow/base.py
+++ b/tools/workflow/base.py
@@ -76,7 +76,7 @@ class WorkflowTool(BaseTool, BaseWorkflowMixin):
        Workflow tools automatically get all standard workflow fields:
        - step, step_number, total_steps, next_step_required
        - findings, files_checked, relevant_files, relevant_context
-        - issues_found, confidence, hypothesis, backtrack_from_step
+        - issues_found, confidence, hypothesis
        - plus common fields (model, temperature, etc.)

        Override this method to add additional tool-specific fields.
--- a/tools/workflow/schema_builders.py
+++ b/tools/workflow/schema_builders.py
@@ -72,11 +72,6 @@ class WorkflowSchemaBuilder:
            "type": "string",
            "description": WORKFLOW_FIELD_DESCRIPTIONS["hypothesis"],
        },
-        "backtrack_from_step": {
-            "type": "integer",
-            "minimum": 1,
-            "description": WORKFLOW_FIELD_DESCRIPTIONS["backtrack_from_step"],
-        },
        "use_assistant_model": {
            "type": "boolean",
            "default": True,
--- a/tools/workflow/workflow_mixin.py
+++ b/tools/workflow/workflow_mixin.py
@@ -701,11 +701,6 @@ class BaseWorkflowMixin(ABC):
                # Allow tools to store initial description for expert analysis
                self.store_initial_issue(request.step)

-            # Handle backtracking if requested
-            backtrack_step = self.get_backtrack_step(request)
-            if backtrack_step:
-                self._handle_backtracking(backtrack_step)
-
            # Process work step - allow tools to customize field mapping
            step_data = self.prepare_step_data(request)

@@ -992,13 +987,6 @@ class BaseWorkflowMixin(ABC):
        except AttributeError:
            return {}

-    def get_backtrack_step(self, request) -> Optional[int]:
-        """Get backtrack step from request. Override for custom backtrack handling."""
-        try:
-            return request.backtrack_from_step
-        except AttributeError:
-            return None
-
    def store_initial_issue(self, step_description: str):
        """Store initial issue description. Override for custom storage."""
        # Default implementation - tools can override to store differently
@@ -1378,13 +1366,6 @@ class BaseWorkflowMixin(ABC):

        return response_data

-    def _handle_backtracking(self, backtrack_step: int):
-        """Handle backtracking to a previous step"""
-        # Remove findings after the backtrack point
-        self.work_history = [s for s in self.work_history if s["step_number"] < backtrack_step]
-        # Reprocess consolidated findings
-        self._reprocess_consolidated_findings()
-
    def _update_consolidated_findings(self, step_data: dict):
        """Update consolidated findings with new step data"""
        self.consolidated_findings.files_checked.update(step_data.get("files_checked", []))