Tweaks to prompts to prevent Claude from becoming overconfident
This commit is contained in:
@@ -16,7 +16,7 @@ import os
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
__version__ = "5.7.5"
|
||||
# Last update date in ISO format
|
||||
__updated__ = "2025-06-27"
|
||||
__updated__ = "2025-06-28"
|
||||
# Primary maintainer
|
||||
__author__ = "Fahad Gilani"
|
||||
|
||||
|
||||
@@ -96,7 +96,8 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
|
||||
"'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
|
||||
"code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
|
||||
"Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
|
||||
"Using 'certain' means you have complete confidence locally and prevents external model validation."
|
||||
"Using 'certain' means you have complete confidence locally and prevents external model validation. Also do "
|
||||
"NOT set confidence to 'certain' if the user has strongly requested that external review must be performed."
|
||||
),
|
||||
"backtrack_from_step": (
|
||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||
|
||||
@@ -95,7 +95,8 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
|
||||
"'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
|
||||
"confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
|
||||
"fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
|
||||
"means you have complete confidence locally and prevents external model validation."
|
||||
"means you have complete confidence locally and prevents external model validation. Also do "
|
||||
"NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
|
||||
),
|
||||
"backtrack_from_step": (
|
||||
"If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
|
||||
@@ -284,7 +285,7 @@ class DebugIssueTool(WorkflowTool):
|
||||
"Check for edge cases, boundary conditions, and assumptions in the code",
|
||||
"Look for related configuration, dependencies, or external factors",
|
||||
]
|
||||
elif confidence in ["medium", "high", "very_high", "almost_certain"]:
|
||||
elif confidence in ["medium", "high", "very_high"]:
|
||||
# Close to root cause - need confirmation
|
||||
return [
|
||||
"Examine the exact code sections where you believe the issue occurs",
|
||||
@@ -292,6 +293,14 @@ class DebugIssueTool(WorkflowTool):
|
||||
"Verify your hypothesis with concrete code evidence",
|
||||
"Check for any similar patterns elsewhere in the codebase",
|
||||
]
|
||||
elif confidence == "almost_certain":
|
||||
# Almost certain - final verification before conclusion
|
||||
return [
|
||||
"Finalize your root cause analysis with specific evidence",
|
||||
"Document the complete chain of causation from symptom to root cause",
|
||||
"Verify the minimal fix approach is correct",
|
||||
"Consider if expert analysis would provide additional insights",
|
||||
]
|
||||
else:
|
||||
# General investigation needed
|
||||
return [
|
||||
@@ -324,6 +333,19 @@ class DebugIssueTool(WorkflowTool):
|
||||
f"=== ISSUE DESCRIPTION ===\n{self.initial_issue or 'Investigation initiated'}\n=== END DESCRIPTION ==="
|
||||
]
|
||||
|
||||
# Add special note if confidence is almost_certain
|
||||
if consolidated_findings.confidence == "almost_certain":
|
||||
context_parts.append(
|
||||
"\n=== IMPORTANT: ALMOST CERTAIN CONFIDENCE ===\n"
|
||||
"The agent has reached 'almost_certain' confidence but has NOT confirmed the bug with 100% certainty. "
|
||||
"Your role is to:\n"
|
||||
"1. Validate the agent's hypothesis and investigation\n"
|
||||
"2. Identify any missing evidence or overlooked aspects\n"
|
||||
"3. Provide additional insights that could confirm or refute the hypothesis\n"
|
||||
"4. Help finalize the root cause analysis with complete certainty\n"
|
||||
"=== END IMPORTANT ==="
|
||||
)
|
||||
|
||||
# Add investigation summary
|
||||
investigation_summary = self._build_investigation_summary(consolidated_findings)
|
||||
context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")
|
||||
@@ -421,7 +443,7 @@ class DebugIssueTool(WorkflowTool):
|
||||
+ f"\n\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
|
||||
+ "completing these investigations."
|
||||
)
|
||||
elif confidence in ["medium", "high"]:
|
||||
elif confidence in ["medium", "high", "very_high"]:
|
||||
next_steps = (
|
||||
f"WAIT! Your hypothesis needs verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\n"
|
||||
+ "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
||||
@@ -430,6 +452,16 @@ class DebugIssueTool(WorkflowTool):
|
||||
f"or engineering assistant for clarification. Document findings with specific file:line references, "
|
||||
f"then call {self.get_name()} with step_number: {step_number + 1}."
|
||||
)
|
||||
elif confidence == "almost_certain":
|
||||
next_steps = (
|
||||
"ALMOST CERTAIN - Prepare for final analysis. REQUIRED ACTIONS:\n"
|
||||
+ "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
||||
+ "\n\nIMPORTANT: You're almost certain about the root cause. If you have NOT found the bug with "
|
||||
"100% certainty, consider setting next_step_required=false to invoke expert analysis. The expert "
|
||||
"can validate your hypotheses and provide additional insights. If you ARE 100% certain and have "
|
||||
"identified the exact bug and fix, proceed to confidence='certain'. Otherwise, let expert analysis "
|
||||
"help finalize the investigation."
|
||||
)
|
||||
else:
|
||||
next_steps = (
|
||||
f"PAUSE INVESTIGATION. Before calling {self.get_name()} step {step_number + 1}, you MUST examine code. "
|
||||
|
||||
@@ -90,7 +90,8 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
|
||||
"'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
|
||||
"analysis is complete and all issues are identified with no need for external model validation). "
|
||||
"Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
|
||||
"Using 'certain' means you have complete confidence locally and prevents external model validation."
|
||||
"Using 'certain' means you have complete confidence locally and prevents external model validation. Also "
|
||||
"do NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
|
||||
),
|
||||
"backtrack_from_step": (
|
||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||
|
||||
@@ -96,7 +96,8 @@ REFACTOR_FIELD_DESCRIPTIONS = {
|
||||
"Use 'complete' ONLY when you have fully analyzed all code, identified all significant refactoring "
|
||||
"opportunities, and can provide comprehensive recommendations without expert assistance. When files are "
|
||||
"too large to read fully or analysis is uncertain, use 'partial'. Using 'complete' prevents expert "
|
||||
"analysis to save time and money."
|
||||
"analysis to save time and money. Do NOT set confidence to 'certain' if the user has strongly requested that "
|
||||
"external validation MUST be performed."
|
||||
),
|
||||
"backtrack_from_step": (
|
||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||
|
||||
Reference in New Issue
Block a user