Update confidence enum values across workflow tools

Added new confidence values (very_high, almost_certain) to all workflow tools
to provide more granular confidence tracking. Updated enum declarations in:
- analyze.py, codereview.py, debug.py, precommit.py, secaudit.py, testgen.py
- Updated debug.py's get_required_actions to handle new confidence values
- All tools now use consistent 7-value confidence scale
- refactor.py kept its unique scale (exploring/incomplete/partial/complete)

Also fixed model thinking configuration:
- Added very_high and almost_certain to MODEL_THINKING_PREFERENCES
- Set medium thinking for very_high, high thinking for almost_certain
- Updated prompts to clarify certain means 100% local confidence

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Fahad
2025-06-28 00:04:35 +04:00
parent bc447d4bcd
commit adbc4af4a9
14 changed files with 82 additions and 44 deletions

View File

@@ -14,7 +14,7 @@ import os
# These values are used in server responses and for tracking releases # These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info # IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH # Semantic versioning: MAJOR.MINOR.PATCH
__version__ = "5.7.4" __version__ = "5.7.5"
# Last update date in ISO format # Last update date in ISO format
__updated__ = "2025-06-27" __updated__ = "2025-06-27"
# Primary maintainer # Primary maintainer

View File

@@ -119,7 +119,7 @@ class TestChatTool:
formatted = self.tool.format_response(response, request) formatted = self.tool.format_response(response, request)
assert "Test response content" in formatted assert "Test response content" in formatted
assert "Claude's Turn:" in formatted assert "AGENT'S TURN:" in formatted
assert "Evaluate this perspective" in formatted assert "Evaluate this perspective" in formatted
def test_tool_name(self): def test_tool_name(self):

View File

@@ -91,7 +91,8 @@ ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = {
"confidence": ( "confidence": (
"Your confidence level in the current analysis findings: exploring (early investigation), " "Your confidence level in the current analysis findings: exploring (early investigation), "
"low (some insights but more needed), medium (solid understanding), high (comprehensive insights), " "low (some insights but more needed), medium (solid understanding), high (comprehensive insights), "
"certain (complete analysis ready for expert validation)" "very_high (very comprehensive insights), almost_certain (nearly complete analysis), "
"certain (100% confidence - complete analysis ready for expert validation)"
), ),
"analysis_type": "Type of analysis to perform (architecture, performance, security, quality, general)", "analysis_type": "Type of analysis to perform (architecture, performance, security, quality, general)",
"output_format": "How to format the output (summary, detailed, actionable)", "output_format": "How to format the output (summary, detailed, actionable)",
@@ -252,7 +253,7 @@ class AnalyzeTool(WorkflowTool):
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["confidence"], "description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
}, },
"backtrack_from_step": { "backtrack_from_step": {

View File

@@ -92,10 +92,11 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
), ),
"confidence": ( "confidence": (
"Indicate your current confidence in the code review assessment. Use: 'exploring' (starting analysis), 'low' " "Indicate your current confidence in the code review assessment. Use: 'exploring' (starting analysis), 'low' "
"(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when " "(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
"the code review is thoroughly complete and all significant issues are identified). Do NOT use 'certain' " "'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
"unless the code review is comprehensively complete, use 'high' instead not 100% sure. Using 'certain' " "code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
"prevents additional expert analysis." "Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
), ),
"backtrack_from_step": ( "backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -263,7 +264,7 @@ class CodeReviewTool(WorkflowTool):
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["confidence"], "description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
}, },
"backtrack_from_step": { "backtrack_from_step": {

View File

@@ -91,10 +91,11 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
), ),
"confidence": ( "confidence": (
"Indicate your current confidence in the hypothesis. Use: 'exploring' (starting out), 'low' (early idea), " "Indicate your current confidence in the hypothesis. Use: 'exploring' (starting out), 'low' (early idea), "
"'medium' (some supporting evidence), 'high' (strong evidence), 'certain' (only when " "'medium' (some supporting evidence), 'high' (strong evidence), 'very_high' (very strong evidence), "
"the root cause and minimal " "'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
"fix are both confirmed). Do NOT use 'certain' unless the issue can be fully resolved with a fix, use 'high' " "confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
"instead when not 100% sure. Using 'certain' prevents you from taking assistance from another thought-partner." "fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
"means you have complete confidence locally and prevents external model validation."
), ),
"backtrack_from_step": ( "backtrack_from_step": (
"If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to " "If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
@@ -238,7 +239,7 @@ class DebugIssueTool(WorkflowTool):
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["confidence"], "description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["confidence"],
}, },
"hypothesis": { "hypothesis": {
@@ -283,7 +284,7 @@ class DebugIssueTool(WorkflowTool):
"Check for edge cases, boundary conditions, and assumptions in the code", "Check for edge cases, boundary conditions, and assumptions in the code",
"Look for related configuration, dependencies, or external factors", "Look for related configuration, dependencies, or external factors",
] ]
elif confidence in ["medium", "high"]: elif confidence in ["medium", "high", "very_high", "almost_certain"]:
# Close to root cause - need confirmation # Close to root cause - need confirmation
return [ return [
"Examine the exact code sections where you believe the issue occurs", "Examine the exact code sections where you believe the issue occurs",
@@ -325,9 +326,7 @@ class DebugIssueTool(WorkflowTool):
# Add investigation summary # Add investigation summary
investigation_summary = self._build_investigation_summary(consolidated_findings) investigation_summary = self._build_investigation_summary(consolidated_findings)
context_parts.append( context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")
f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ==="
)
# Add error context if available # Add error context if available
error_context = self._extract_error_context(consolidated_findings) error_context = self._extract_error_context(consolidated_findings)

View File

@@ -86,9 +86,11 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
), ),
"confidence": ( "confidence": (
"Indicate your current confidence in the assessment. Use: 'exploring' (starting analysis), 'low' (early " "Indicate your current confidence in the assessment. Use: 'exploring' (starting analysis), 'low' (early "
"investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when the " "investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
"analysis is complete and all issues are identified). Do NOT use 'certain' unless the pre-commit validation " "'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
"is thoroughly complete, use 'high' instead not 100% sure. Using 'certain' prevents additional expert analysis." "analysis is complete and all issues are identified with no need for external model validation). "
"Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
), ),
"backtrack_from_step": ( "backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -266,7 +268,7 @@ class PrecommitTool(WorkflowTool):
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"], "description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
}, },
"backtrack_from_step": { "backtrack_from_step": {

View File

@@ -97,10 +97,11 @@ SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = {
), ),
"confidence": ( "confidence": (
"Indicate your current confidence in the security audit assessment. Use: 'exploring' (starting analysis), " "Indicate your current confidence in the security audit assessment. Use: 'exploring' (starting analysis), "
"'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' " "'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
"(only when the security audit is thoroughly complete and all significant security issues are identified). " "'very_high' (very strong evidence), 'almost_certain' (nearly complete audit), 'certain' "
"Do NOT use 'certain' unless the security audit is comprehensively complete, use 'high' instead not 100% " "(100% confidence - security audit is thoroughly complete and all significant security issues are identified with no need for external model validation). "
"sure. Using 'certain' prevents additional expert analysis." "Do NOT use 'certain' unless the security audit is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
), ),
"backtrack_from_step": ( "backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which " "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which "
@@ -480,7 +481,7 @@ class SecauditTool(WorkflowTool):
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"], "description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
}, },
"backtrack_from_step": { "backtrack_from_step": {

View File

@@ -67,7 +67,11 @@ WORKFLOW_FIELD_DESCRIPTIONS = {
"relevant_files": "Files identified as relevant to the issue/goal", "relevant_files": "Files identified as relevant to the issue/goal",
"relevant_context": "Methods/functions identified as involved in the issue", "relevant_context": "Methods/functions identified as involved in the issue",
"issues_found": "Issues identified with severity levels during work", "issues_found": "Issues identified with severity levels during work",
"confidence": "Confidence level in findings: exploring, low, medium, high, certain", "confidence": (
"Confidence level in findings: exploring (just starting), low (early investigation), "
"medium (some evidence), high (strong evidence), very_high (comprehensive understanding), "
"almost_certain (near complete confidence), certain (100% confidence locally - no external validation needed)"
),
"hypothesis": "Current theory about the issue/goal based on work", "hypothesis": "Current theory about the issue/goal based on work",
"backtrack_from_step": "Step number to backtrack from if work needs revision", "backtrack_from_step": "Step number to backtrack from if work needs revision",
"use_assistant_model": ( "use_assistant_model": (

View File

@@ -78,10 +78,11 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
), ),
"confidence": ( "confidence": (
"Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), " "Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), "
"'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), 'certain' " "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), "
"(only when the test plan is thoroughly complete and all test scenarios are identified). Do NOT use 'certain' " "'very_high' (very strong understanding), 'almost_certain' (nearly complete test plan), 'certain' "
"unless the test generation analysis is comprehensively complete, use 'high' instead not 100% sure. Using " "(100% confidence - test plan is thoroughly complete and all test scenarios are identified with no need for external model validation). "
"'certain' prevents additional expert analysis." "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
"Using 'certain' means you have complete confidence locally and prevents external model validation."
), ),
"backtrack_from_step": ( "backtrack_from_step": (
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to " "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -228,7 +229,7 @@ class TestGenTool(WorkflowTool):
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"], "description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
}, },
"backtrack_from_step": { "backtrack_from_step": {

View File

@@ -84,9 +84,10 @@ class ThinkDeepWorkflowRequest(WorkflowRequest):
default="low", default="low",
description="Indicate your current confidence in the analysis. Use: 'exploring' (starting analysis), " description="Indicate your current confidence in the analysis. Use: 'exploring' (starting analysis), "
"'low' (early thinking), 'medium' (some insights gained), 'high' (strong understanding), " "'low' (early thinking), 'medium' (some insights gained), 'high' (strong understanding), "
"'certain' (only when the analysis is complete and conclusions are definitive). " "'very_high' (very strong understanding), 'almost_certain' (nearly complete analysis), "
"Do NOT use 'certain' unless the thinking is comprehensively complete, use 'high' instead when in doubt. " "'certain' (100% confidence - analysis is complete and conclusions are definitive with no need for external model validation). "
"Using 'certain' prevents additional expert analysis to save time and money.", "Do NOT use 'certain' unless the thinking is comprehensively complete, use 'very_high' or 'almost_certain' instead when in doubt. "
"Using 'certain' means you have complete confidence locally and prevents external model validation.",
) )
# Advanced workflow features # Advanced workflow features
@@ -433,11 +434,27 @@ but also acknowledge strong insights and valid conclusions.
] ]
) )
elif confidence == "high": elif confidence == "high":
actions.extend(
[
"Refine and validate key findings",
"Explore edge cases and limitations",
"Document assumptions and trade-offs",
]
)
elif confidence == "very_high":
actions.extend( actions.extend(
[ [
"Synthesize findings into cohesive recommendations", "Synthesize findings into cohesive recommendations",
"Validate conclusions against evidence", "Validate conclusions against all evidence",
"Prepare for expert analysis", "Prepare comprehensive implementation guidance",
]
)
elif confidence == "almost_certain":
actions.extend(
[
"Finalize recommendations with high confidence",
"Document any remaining minor uncertainties",
"Prepare for expert analysis or implementation",
] ]
) )
else: # certain else: # certain
@@ -516,10 +533,20 @@ but also acknowledge strong insights and valid conclusions.
f"Your thinking analysis confidence is CERTAIN. Consider if you truly need step {next_step_number} " f"Your thinking analysis confidence is CERTAIN. Consider if you truly need step {next_step_number} "
f"or if you should complete the analysis now with expert validation." f"or if you should complete the analysis now with expert validation."
) )
elif request.confidence == "almost_certain":
guidance = (
f"Your thinking analysis confidence is ALMOST_CERTAIN. For step {next_step_number}, consider: "
f"finalizing recommendations, documenting any minor uncertainties, or preparing for implementation."
)
elif request.confidence == "very_high":
guidance = (
f"Your thinking analysis confidence is VERY_HIGH. For step {next_step_number}, consider: "
f"synthesis of all findings, comprehensive validation, or creating implementation roadmap."
)
elif request.confidence == "high": elif request.confidence == "high":
guidance = ( guidance = (
f"Your thinking analysis confidence is HIGH. For step {next_step_number}, consider: " f"Your thinking analysis confidence is HIGH. For step {next_step_number}, consider: "
f"validation of conclusions, stress-testing assumptions, or exploring edge cases." f"exploring edge cases, documenting trade-offs, or stress-testing key assumptions."
) )
elif request.confidence == "medium": elif request.confidence == "medium":
guidance = ( guidance = (

View File

@@ -86,8 +86,10 @@ TRACER_WORKFLOW_FIELD_DESCRIPTIONS = {
"confidence": ( "confidence": (
"Indicate your current confidence in the tracing analysis completeness. Use: 'exploring' (starting analysis), " "Indicate your current confidence in the tracing analysis completeness. Use: 'exploring' (starting analysis), "
"'low' (early investigation), 'medium' (some patterns identified), 'high' (comprehensive understanding), " "'low' (early investigation), 'medium' (some patterns identified), 'high' (comprehensive understanding), "
"'complete' (tracing analysis finished and ready for output). Do NOT use 'complete' unless the tracing " "'very_high' (very comprehensive understanding), 'almost_certain' (nearly complete tracing), "
"analysis is thoroughly finished and you have a comprehensive understanding of the code relationships." "'certain' (100% confidence - tracing analysis is finished and ready for output with no need for external model validation). "
"Do NOT use 'certain' unless the tracing analysis is thoroughly finished and you have a comprehensive understanding "
"of the code relationships. Using 'certain' means you have complete confidence locally and prevents external model validation."
), ),
"trace_mode": "Type of tracing: 'ask' (default - prompts user to choose mode), 'precision' (execution flow) or 'dependencies' (structural relationships)", "trace_mode": "Type of tracing: 'ask' (default - prompts user to choose mode), 'precision' (execution flow) or 'dependencies' (structural relationships)",
"target_description": ( "target_description": (

View File

@@ -65,7 +65,7 @@ class WorkflowSchemaBuilder:
}, },
"confidence": { "confidence": {
"type": "string", "type": "string",
"enum": ["exploring", "low", "medium", "high", "certain"], "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
"description": WORKFLOW_FIELD_DESCRIPTIONS["confidence"], "description": WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
}, },
"hypothesis": { "hypothesis": {