Tweaks to prompts to prevent Claude from becoming overconfident
This commit is contained in:
@@ -16,7 +16,7 @@ import os
|
|||||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||||
__version__ = "5.7.5"
|
__version__ = "5.7.5"
|
||||||
# Last update date in ISO format
|
# Last update date in ISO format
|
||||||
__updated__ = "2025-06-27"
|
__updated__ = "2025-06-28"
|
||||||
# Primary maintainer
|
# Primary maintainer
|
||||||
__author__ = "Fahad Gilani"
|
__author__ = "Fahad Gilani"
|
||||||
|
|
||||||
|
|||||||
@@ -96,7 +96,8 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
|
|||||||
"'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
|
"'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
|
||||||
"code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
|
"code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
|
||||||
"Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
|
"Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
|
||||||
"Using 'certain' means you have complete confidence locally and prevents external model validation."
|
"Using 'certain' means you have complete confidence locally and prevents external model validation. Also do "
|
||||||
|
"NOT set confidence to 'certain' if the user has strongly requested that external review must be performed."
|
||||||
),
|
),
|
||||||
"backtrack_from_step": (
|
"backtrack_from_step": (
|
||||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||||
|
|||||||
@@ -95,7 +95,8 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
|
|||||||
"'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
|
"'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
|
||||||
"confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
|
"confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
|
||||||
"fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
|
"fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
|
||||||
"means you have complete confidence locally and prevents external model validation."
|
"means you have complete confidence locally and prevents external model validation. Also do "
|
||||||
|
"NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
|
||||||
),
|
),
|
||||||
"backtrack_from_step": (
|
"backtrack_from_step": (
|
||||||
"If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
|
"If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
|
||||||
@@ -284,7 +285,7 @@ class DebugIssueTool(WorkflowTool):
|
|||||||
"Check for edge cases, boundary conditions, and assumptions in the code",
|
"Check for edge cases, boundary conditions, and assumptions in the code",
|
||||||
"Look for related configuration, dependencies, or external factors",
|
"Look for related configuration, dependencies, or external factors",
|
||||||
]
|
]
|
||||||
elif confidence in ["medium", "high", "very_high", "almost_certain"]:
|
elif confidence in ["medium", "high", "very_high"]:
|
||||||
# Close to root cause - need confirmation
|
# Close to root cause - need confirmation
|
||||||
return [
|
return [
|
||||||
"Examine the exact code sections where you believe the issue occurs",
|
"Examine the exact code sections where you believe the issue occurs",
|
||||||
@@ -292,6 +293,14 @@ class DebugIssueTool(WorkflowTool):
|
|||||||
"Verify your hypothesis with concrete code evidence",
|
"Verify your hypothesis with concrete code evidence",
|
||||||
"Check for any similar patterns elsewhere in the codebase",
|
"Check for any similar patterns elsewhere in the codebase",
|
||||||
]
|
]
|
||||||
|
elif confidence == "almost_certain":
|
||||||
|
# Almost certain - final verification before conclusion
|
||||||
|
return [
|
||||||
|
"Finalize your root cause analysis with specific evidence",
|
||||||
|
"Document the complete chain of causation from symptom to root cause",
|
||||||
|
"Verify the minimal fix approach is correct",
|
||||||
|
"Consider if expert analysis would provide additional insights",
|
||||||
|
]
|
||||||
else:
|
else:
|
||||||
# General investigation needed
|
# General investigation needed
|
||||||
return [
|
return [
|
||||||
@@ -324,6 +333,19 @@ class DebugIssueTool(WorkflowTool):
|
|||||||
f"=== ISSUE DESCRIPTION ===\n{self.initial_issue or 'Investigation initiated'}\n=== END DESCRIPTION ==="
|
f"=== ISSUE DESCRIPTION ===\n{self.initial_issue or 'Investigation initiated'}\n=== END DESCRIPTION ==="
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Add special note if confidence is almost_certain
|
||||||
|
if consolidated_findings.confidence == "almost_certain":
|
||||||
|
context_parts.append(
|
||||||
|
"\n=== IMPORTANT: ALMOST CERTAIN CONFIDENCE ===\n"
|
||||||
|
"The agent has reached 'almost_certain' confidence but has NOT confirmed the bug with 100% certainty. "
|
||||||
|
"Your role is to:\n"
|
||||||
|
"1. Validate the agent's hypothesis and investigation\n"
|
||||||
|
"2. Identify any missing evidence or overlooked aspects\n"
|
||||||
|
"3. Provide additional insights that could confirm or refute the hypothesis\n"
|
||||||
|
"4. Help finalize the root cause analysis with complete certainty\n"
|
||||||
|
"=== END IMPORTANT ==="
|
||||||
|
)
|
||||||
|
|
||||||
# Add investigation summary
|
# Add investigation summary
|
||||||
investigation_summary = self._build_investigation_summary(consolidated_findings)
|
investigation_summary = self._build_investigation_summary(consolidated_findings)
|
||||||
context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")
|
context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")
|
||||||
@@ -421,7 +443,7 @@ class DebugIssueTool(WorkflowTool):
|
|||||||
+ f"\n\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
|
+ f"\n\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
|
||||||
+ "completing these investigations."
|
+ "completing these investigations."
|
||||||
)
|
)
|
||||||
elif confidence in ["medium", "high"]:
|
elif confidence in ["medium", "high", "very_high"]:
|
||||||
next_steps = (
|
next_steps = (
|
||||||
f"WAIT! Your hypothesis needs verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\n"
|
f"WAIT! Your hypothesis needs verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\n"
|
||||||
+ "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
+ "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
||||||
@@ -430,6 +452,16 @@ class DebugIssueTool(WorkflowTool):
|
|||||||
f"or engineering assistant for clarification. Document findings with specific file:line references, "
|
f"or engineering assistant for clarification. Document findings with specific file:line references, "
|
||||||
f"then call {self.get_name()} with step_number: {step_number + 1}."
|
f"then call {self.get_name()} with step_number: {step_number + 1}."
|
||||||
)
|
)
|
||||||
|
elif confidence == "almost_certain":
|
||||||
|
next_steps = (
|
||||||
|
"ALMOST CERTAIN - Prepare for final analysis. REQUIRED ACTIONS:\n"
|
||||||
|
+ "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
||||||
|
+ "\n\nIMPORTANT: You're almost certain about the root cause. If you have NOT found the bug with "
|
||||||
|
"100% certainty, consider setting next_step_required=false to invoke expert analysis. The expert "
|
||||||
|
"can validate your hypotheses and provide additional insights. If you ARE 100% certain and have "
|
||||||
|
"identified the exact bug and fix, proceed to confidence='certain'. Otherwise, let expert analysis "
|
||||||
|
"help finalize the investigation."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
next_steps = (
|
next_steps = (
|
||||||
f"PAUSE INVESTIGATION. Before calling {self.get_name()} step {step_number + 1}, you MUST examine code. "
|
f"PAUSE INVESTIGATION. Before calling {self.get_name()} step {step_number + 1}, you MUST examine code. "
|
||||||
|
|||||||
@@ -90,7 +90,8 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
|
|||||||
"'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
|
"'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
|
||||||
"analysis is complete and all issues are identified with no need for external model validation). "
|
"analysis is complete and all issues are identified with no need for external model validation). "
|
||||||
"Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
|
"Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
|
||||||
"Using 'certain' means you have complete confidence locally and prevents external model validation."
|
"Using 'certain' means you have complete confidence locally and prevents external model validation. Also "
|
||||||
|
"do NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
|
||||||
),
|
),
|
||||||
"backtrack_from_step": (
|
"backtrack_from_step": (
|
||||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||||
|
|||||||
@@ -96,7 +96,8 @@ REFACTOR_FIELD_DESCRIPTIONS = {
|
|||||||
"Use 'complete' ONLY when you have fully analyzed all code, identified all significant refactoring "
|
"Use 'complete' ONLY when you have fully analyzed all code, identified all significant refactoring "
|
||||||
"opportunities, and can provide comprehensive recommendations without expert assistance. When files are "
|
"opportunities, and can provide comprehensive recommendations without expert assistance. When files are "
|
||||||
"too large to read fully or analysis is uncertain, use 'partial'. Using 'complete' prevents expert "
|
"too large to read fully or analysis is uncertain, use 'partial'. Using 'complete' prevents expert "
|
||||||
"analysis to save time and money."
|
"analysis to save time and money. Do NOT set confidence to 'certain' if the user has strongly requested that "
|
||||||
|
"external validation MUST be performed."
|
||||||
),
|
),
|
||||||
"backtrack_from_step": (
|
"backtrack_from_step": (
|
||||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||||
|
|||||||
Reference in New Issue
Block a user