Update confidence enum values across workflow tools

Added new confidence values (very_high, almost_certain) to all workflow tools
to provide more granular confidence tracking. Updated enum declarations in:
- analyze.py, codereview.py, debug.py, precommit.py, secaudit.py, testgen.py
- Updated debug.py's get_required_actions to handle new confidence values
- All tools now use consistent 7-value confidence scale
- refactor.py kept its unique scale (exploring/incomplete/partial/complete)

Also fixed model thinking configuration:
- Added very_high and almost_certain to MODEL_THINKING_PREFERENCES
- Set medium thinking for very_high, high thinking for almost_certain
- Updated prompts to clarify certain means 100% local confidence

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Fahad
2025-06-28 00:04:35 +04:00
parent bc447d4bcd
commit adbc4af4a9
14 changed files with 82 additions and 44 deletions

View File

@@ -14,7 +14,7 @@ import os
# These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "5.7.4"
__version__ = "5.7.5"
# Last update date in ISO format
__updated__ = "2025-06-27"
# Primary maintainer

View File

@@ -39,7 +39,7 @@ Include context_start_text and context_end_text as backup references. Never incl
snippets.
WORKFLOW CONTEXT
Your task is to analyze the systematic investigation given to you and provide expert debugging analysis back to the
Your task is to analyze the systematic investigation given to you and provide expert debugging analysis back to the
agent, who will then present the findings to the user in a consolidated format.
STRUCTURED JSON OUTPUT FORMAT

View File

@@ -32,7 +32,7 @@ Include context_start_text and context_end_text as backup references. Never incl
snippets.
WORKFLOW CONTEXT
Your task is to analyze the agent's systematic security investigation and provide expert security analysis back to the
Your task is to analyze the agent's systematic security investigation and provide expert security analysis back to the
agent, who will then present the findings to the user in a consolidated format.
STRUCTURED JSON OUTPUT FORMAT

View File

@@ -119,7 +119,7 @@ class TestChatTool:
formatted = self.tool.format_response(response, request)
assert "Test response content" in formatted
assert "Claude's Turn:" in formatted
assert "AGENT'S TURN:" in formatted
assert "Evaluate this perspective" in formatted
def test_tool_name(self):

View File

@@ -91,7 +91,8 @@ ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = {
"confidence": (
"Your confidence level in the current analysis findings: exploring (early investigation), "
"low (some insights but more needed), medium (solid understanding), high (comprehensive insights), "
"certain (complete analysis ready for expert validation)"
"very_high (very comprehensive insights), almost_certain (nearly complete analysis), "
"certain (100% confidence - complete analysis ready for expert validation)"
),
"analysis_type": "Type of analysis to perform (architecture, performance, security, quality, general)",
"output_format": "How to format the output (summary, detailed, actionable)",
@@ -252,7 +253,7 @@ class AnalyzeTool(WorkflowTool):
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
},
"backtrack_from_step": {

View File

@@ -92,10 +92,11 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
),
"confidence": (
"Indicate your current confidence in the code review assessment. Use: 'exploring' (starting analysis), 'low' "
"(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when "
"the code review is thoroughly complete and all significant issues are identified). Do NOT use 'certain' "
"unless the code review is comprehensively complete, use 'high' instead not 100% sure. Using 'certain' "
"prevents additional expert analysis."
"(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
"'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
"code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
"Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
),
"backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -263,7 +264,7 @@ class CodeReviewTool(WorkflowTool):
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
},
"backtrack_from_step": {

View File

@@ -91,10 +91,11 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
),
"confidence": (
"Indicate your current confidence in the hypothesis. Use: 'exploring' (starting out), 'low' (early idea), "
"'medium' (some supporting evidence), 'high' (strong evidence), 'certain' (only when "
"the root cause and minimal "
"fix are both confirmed). Do NOT use 'certain' unless the issue can be fully resolved with a fix, use 'high' "
"instead when not 100% sure. Using 'certain' prevents you from taking assistance from another thought-partner."
"'medium' (some supporting evidence), 'high' (strong evidence), 'very_high' (very strong evidence), "
"'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
"confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
"fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
"means you have complete confidence locally and prevents external model validation."
),
"backtrack_from_step": (
"If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
@@ -238,7 +239,7 @@ class DebugIssueTool(WorkflowTool):
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["confidence"],
},
"hypothesis": {
@@ -283,7 +284,7 @@ class DebugIssueTool(WorkflowTool):
"Check for edge cases, boundary conditions, and assumptions in the code",
"Look for related configuration, dependencies, or external factors",
]
elif confidence in ["medium", "high"]:
elif confidence in ["medium", "high", "very_high", "almost_certain"]:
# Close to root cause - need confirmation
return [
"Examine the exact code sections where you believe the issue occurs",
@@ -325,9 +326,7 @@ class DebugIssueTool(WorkflowTool):
# Add investigation summary
investigation_summary = self._build_investigation_summary(consolidated_findings)
context_parts.append(
f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ==="
)
context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")
# Add error context if available
error_context = self._extract_error_context(consolidated_findings)

View File

@@ -86,9 +86,11 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
),
"confidence": (
"Indicate your current confidence in the assessment. Use: 'exploring' (starting analysis), 'low' (early "
"investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when the "
"analysis is complete and all issues are identified). Do NOT use 'certain' unless the pre-commit validation "
"is thoroughly complete, use 'high' instead not 100% sure. Using 'certain' prevents additional expert analysis."
"investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
"'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
"analysis is complete and all issues are identified with no need for external model validation). "
"Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
),
"backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -266,7 +268,7 @@ class PrecommitTool(WorkflowTool):
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
},
"backtrack_from_step": {

View File

@@ -97,10 +97,11 @@ SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = {
),
"confidence": (
"Indicate your current confidence in the security audit assessment. Use: 'exploring' (starting analysis), "
"'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' "
"(only when the security audit is thoroughly complete and all significant security issues are identified). "
"Do NOT use 'certain' unless the security audit is comprehensively complete, use 'high' instead not 100% "
"sure. Using 'certain' prevents additional expert analysis."
"'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
"'very_high' (very strong evidence), 'almost_certain' (nearly complete audit), 'certain' "
"(100% confidence - security audit is thoroughly complete and all significant security issues are identified with no need for external model validation). "
"Do NOT use 'certain' unless the security audit is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
),
"backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which "
@@ -480,7 +481,7 @@ class SecauditTool(WorkflowTool):
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
},
"backtrack_from_step": {

View File

@@ -67,7 +67,11 @@ WORKFLOW_FIELD_DESCRIPTIONS = {
"relevant_files": "Files identified as relevant to the issue/goal",
"relevant_context": "Methods/functions identified as involved in the issue",
"issues_found": "Issues identified with severity levels during work",
"confidence": "Confidence level in findings: exploring, low, medium, high, certain",
"confidence": (
"Confidence level in findings: exploring (just starting), low (early investigation), "
"medium (some evidence), high (strong evidence), very_high (comprehensive understanding), "
"almost_certain (near complete confidence), certain (100% confidence locally - no external validation needed)"
),
"hypothesis": "Current theory about the issue/goal based on work",
"backtrack_from_step": "Step number to backtrack from if work needs revision",
"use_assistant_model": (

View File

@@ -78,10 +78,11 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
),
"confidence": (
"Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), "
"'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), 'certain' "
"(only when the test plan is thoroughly complete and all test scenarios are identified). Do NOT use 'certain' "
"unless the test generation analysis is comprehensively complete, use 'high' instead not 100% sure. Using "
"'certain' prevents additional expert analysis."
"'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), "
"'very_high' (very strong understanding), 'almost_certain' (nearly complete test plan), 'certain' "
"(100% confidence - test plan is thoroughly complete and all test scenarios are identified with no need for external model validation). "
"Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
),
"backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -228,7 +229,7 @@ class TestGenTool(WorkflowTool):
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
},
"backtrack_from_step": {

View File

@@ -84,9 +84,10 @@ class ThinkDeepWorkflowRequest(WorkflowRequest):
default="low",
description="Indicate your current confidence in the analysis. Use: 'exploring' (starting analysis), "
"'low' (early thinking), 'medium' (some insights gained), 'high' (strong understanding), "
"'certain' (only when the analysis is complete and conclusions are definitive). "
"Do NOT use 'certain' unless the thinking is comprehensively complete, use 'high' instead when in doubt. "
"Using 'certain' prevents additional expert analysis to save time and money.",
"'very_high' (very strong understanding), 'almost_certain' (nearly complete analysis), "
"'certain' (100% confidence - analysis is complete and conclusions are definitive with no need for external model validation). "
"Do NOT use 'certain' unless the thinking is comprehensively complete, use 'very_high' or 'almost_certain' instead when in doubt. "
"Using 'certain' means you have complete confidence locally and prevents external model validation.",
)
# Advanced workflow features
@@ -433,11 +434,27 @@ but also acknowledge strong insights and valid conclusions.
]
)
elif confidence == "high":
actions.extend(
[
"Refine and validate key findings",
"Explore edge cases and limitations",
"Document assumptions and trade-offs",
]
)
elif confidence == "very_high":
actions.extend(
[
"Synthesize findings into cohesive recommendations",
"Validate conclusions against evidence",
"Prepare for expert analysis",
"Validate conclusions against all evidence",
"Prepare comprehensive implementation guidance",
]
)
elif confidence == "almost_certain":
actions.extend(
[
"Finalize recommendations with high confidence",
"Document any remaining minor uncertainties",
"Prepare for expert analysis or implementation",
]
)
else: # certain
@@ -516,10 +533,20 @@ but also acknowledge strong insights and valid conclusions.
f"Your thinking analysis confidence is CERTAIN. Consider if you truly need step {next_step_number} "
f"or if you should complete the analysis now with expert validation."
)
elif request.confidence == "almost_certain":
guidance = (
f"Your thinking analysis confidence is ALMOST_CERTAIN. For step {next_step_number}, consider: "
f"finalizing recommendations, documenting any minor uncertainties, or preparing for implementation."
)
elif request.confidence == "very_high":
guidance = (
f"Your thinking analysis confidence is VERY_HIGH. For step {next_step_number}, consider: "
f"synthesis of all findings, comprehensive validation, or creating implementation roadmap."
)
elif request.confidence == "high":
guidance = (
f"Your thinking analysis confidence is HIGH. For step {next_step_number}, consider: "
f"validation of conclusions, stress-testing assumptions, or exploring edge cases."
f"exploring edge cases, documenting trade-offs, or stress-testing key assumptions."
)
elif request.confidence == "medium":
guidance = (

View File

@@ -86,8 +86,10 @@ TRACER_WORKFLOW_FIELD_DESCRIPTIONS = {
"confidence": (
"Indicate your current confidence in the tracing analysis completeness. Use: 'exploring' (starting analysis), "
"'low' (early investigation), 'medium' (some patterns identified), 'high' (comprehensive understanding), "
"'complete' (tracing analysis finished and ready for output). Do NOT use 'complete' unless the tracing "
"analysis is thoroughly finished and you have a comprehensive understanding of the code relationships."
"'very_high' (very comprehensive understanding), 'almost_certain' (nearly complete tracing), "
"'certain' (100% confidence - tracing analysis is finished and ready for output with no need for external model validation). "
"Do NOT use 'certain' unless the tracing analysis is thoroughly finished and you have a comprehensive understanding "
"of the code relationships. Using 'certain' means you have complete confidence locally and prevents external model validation."
),
"trace_mode": "Type of tracing: 'ask' (default - prompts user to choose mode), 'precision' (execution flow) or 'dependencies' (structural relationships)",
"target_description": (

View File

@@ -65,7 +65,7 @@ class WorkflowSchemaBuilder:
},
"confidence": {
"type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"],
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
},
"hypothesis": {