From b9c2e4f5e674b75d5b4406d7387853b2d6572465 Mon Sep 17 00:00:00 2001
From: Fahad <fahad@2doapp.com>
Date: Sat, 28 Jun 2025 22:30:58 +0400
Subject: [PATCH] Tweaks to prompts to prevent Claude from becoming
 overconfident

---
 config.py           |  2 +-
 tools/codereview.py |  3 ++-
 tools/debug.py      | 38 +++++++++++++++++++++++++++++++++++---
 tools/precommit.py  |  3 ++-
 tools/refactor.py   |  3 ++-
 5 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/config.py b/config.py
index acdeade..5094181 100644
--- a/config.py
+++ b/config.py
@@ -16,7 +16,7 @@ import os
 # Semantic versioning: MAJOR.MINOR.PATCH
 __version__ = "5.7.5"
 # Last update date in ISO format
-__updated__ = "2025-06-27"
+__updated__ = "2025-06-28"
 # Primary maintainer
 __author__ = "Fahad Gilani"
 
diff --git a/tools/codereview.py b/tools/codereview.py
index 55cb6a2..1aa6416 100644
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -96,7 +96,8 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
         "'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
         "code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
         "Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
-        "Using 'certain' means you have complete confidence locally and prevents external model validation."
+        "Using 'certain' means you have complete confidence locally and prevents external model validation. Also do "
+        "NOT set confidence to 'certain' if the user has strongly requested that external review must be performed."
     ),
     "backtrack_from_step": (
         "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
diff --git a/tools/debug.py b/tools/debug.py
index 456cc70..bfe755f 100644
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -95,7 +95,8 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
         "'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
         "confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
         "fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
-        "means you have complete confidence locally and prevents external model validation."
+        "means you have complete confidence locally and prevents external model validation. Also do "
+        "NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
     ),
     "backtrack_from_step": (
         "If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
@@ -284,7 +285,7 @@ class DebugIssueTool(WorkflowTool):
                 "Check for edge cases, boundary conditions, and assumptions in the code",
                 "Look for related configuration, dependencies, or external factors",
             ]
-        elif confidence in ["medium", "high", "very_high", "almost_certain"]:
+        elif confidence in ["medium", "high", "very_high"]:
             # Close to root cause - need confirmation
             return [
                 "Examine the exact code sections where you believe the issue occurs",
@@ -292,6 +293,14 @@ class DebugIssueTool(WorkflowTool):
                 "Verify your hypothesis with concrete code evidence",
                 "Check for any similar patterns elsewhere in the codebase",
             ]
+        elif confidence == "almost_certain":
+            # Almost certain - final verification before conclusion
+            return [
+                "Finalize your root cause analysis with specific evidence",
+                "Document the complete chain of causation from symptom to root cause",
+                "Verify the minimal fix approach is correct",
+                "Consider if expert analysis would provide additional insights",
+            ]
         else:
             # General investigation needed
             return [
@@ -324,6 +333,19 @@ class DebugIssueTool(WorkflowTool):
             f"=== ISSUE DESCRIPTION ===\n{self.initial_issue or 'Investigation initiated'}\n=== END DESCRIPTION ==="
         ]
 
+        # Add special note if confidence is almost_certain
+        if consolidated_findings.confidence == "almost_certain":
+            context_parts.append(
+                "\n=== IMPORTANT: ALMOST CERTAIN CONFIDENCE ===\n"
+                "The agent has reached 'almost_certain' confidence but has NOT confirmed the bug with 100% certainty. "
+                "Your role is to:\n"
+                "1. Validate the agent's hypothesis and investigation\n"
+                "2. Identify any missing evidence or overlooked aspects\n"
+                "3. Provide additional insights that could confirm or refute the hypothesis\n"
+                "4. Help finalize the root cause analysis with complete certainty\n"
+                "=== END IMPORTANT ==="
+            )
+
         # Add investigation summary
         investigation_summary = self._build_investigation_summary(consolidated_findings)
         context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")
@@ -421,7 +443,7 @@ class DebugIssueTool(WorkflowTool):
                 + f"\n\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
                 + "completing these investigations."
             )
-        elif confidence in ["medium", "high"]:
+        elif confidence in ["medium", "high", "very_high"]:
             next_steps = (
                 f"WAIT! Your hypothesis needs verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\n"
                 + "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
@@ -430,6 +452,16 @@ class DebugIssueTool(WorkflowTool):
                 f"or engineering assistant for clarification. Document findings with specific file:line references, "
                 f"then call {self.get_name()} with step_number: {step_number + 1}."
             )
+        elif confidence == "almost_certain":
+            next_steps = (
+                "ALMOST CERTAIN - Prepare for final analysis. REQUIRED ACTIONS:\n"
+                + "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
+                + "\n\nIMPORTANT: You're almost certain about the root cause. If you have NOT found the bug with "
+                "100% certainty, consider setting next_step_required=false to invoke expert analysis. The expert "
+                "can validate your hypotheses and provide additional insights. If you ARE 100% certain and have "
+                "identified the exact bug and fix, proceed to confidence='certain'. Otherwise, let expert analysis "
+                "help finalize the investigation."
+            )
         else:
             next_steps = (
                 f"PAUSE INVESTIGATION. Before calling {self.get_name()} step {step_number + 1}, you MUST examine code. "
diff --git a/tools/precommit.py b/tools/precommit.py
index 5b1cbf4..0b656b0 100644
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -90,7 +90,8 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
         "'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
         "analysis is complete and all issues are identified with no need for external model validation). "
         "Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
-        "Using 'certain' means you have complete confidence locally and prevents external model validation."
+        "Using 'certain' means you have complete confidence locally and prevents external model validation. Also "
+        "do NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
     ),
     "backtrack_from_step": (
         "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
diff --git a/tools/refactor.py b/tools/refactor.py
index b9e3973..2045bbb 100644
--- a/tools/refactor.py
+++ b/tools/refactor.py
@@ -96,7 +96,8 @@ REFACTOR_FIELD_DESCRIPTIONS = {
         "Use 'complete' ONLY when you have fully analyzed all code, identified all significant refactoring "
         "opportunities, and can provide comprehensive recommendations without expert assistance. When files are "
         "too large to read fully or analysis is uncertain, use 'partial'. Using 'complete' prevents expert "
-        "analysis to save time and money."
+        "analysis to save time and money. Do NOT set confidence to 'certain' if the user has strongly requested that "
+        "external validation MUST be performed."
     ),
     "backtrack_from_step": (
         "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "