Update confidence enum values across workflow tools

Added new confidence values (very_high, almost_certain) to all workflow tools to provide more granular confidence tracking. Updated enum declarations in: - analyze.py, codereview.py, debug.py, precommit.py, secaudit.py, testgen.py - Updated debug.py's get_required_actions to handle new confidence values - All tools now use consistent 7-value confidence scale - refactor.py kept its unique scale (exploring/incomplete/partial/complete) Also fixed model thinking configuration: - Added very_high and almost_certain to MODEL_THINKING_PREFERENCES - Set medium thinking for very_high, high thinking for almost_certain - Updated prompts to clarify certain means 100% local confidence 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-28 00:04:35 +04:00
parent bc447d4bcd
commit adbc4af4a9
14 changed files with 82 additions and 44 deletions
--- a/config.py
+++ b/config.py
@@ -14,7 +14,7 @@ import os
 # These values are used in server responses and for tracking releases
 # IMPORTANT: This is the single source of truth for version and author info
 # Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.7.4"
+__version__ = "5.7.5"
 # Last update date in ISO format
 __updated__ = "2025-06-27"
 # Primary maintainer
--- a/systemprompts/debug_prompt.py
+++ b/systemprompts/debug_prompt.py
@@ -39,7 +39,7 @@ Include context_start_text and context_end_text as backup references. Never incl
 snippets.

 WORKFLOW CONTEXT
-Your task is to analyze the systematic investigation given to you and provide expert debugging analysis back to the 
+Your task is to analyze the systematic investigation given to you and provide expert debugging analysis back to the
 agent, who will then present the findings to the user in a consolidated format.

 STRUCTURED JSON OUTPUT FORMAT
--- a/systemprompts/secaudit_prompt.py
+++ b/systemprompts/secaudit_prompt.py
@@ -32,7 +32,7 @@ Include context_start_text and context_end_text as backup references. Never incl
 snippets.

 WORKFLOW CONTEXT
-Your task is to analyze the agent's systematic security investigation and provide expert security analysis back to the 
+Your task is to analyze the agent's systematic security investigation and provide expert security analysis back to the
 agent, who will then present the findings to the user in a consolidated format.

 STRUCTURED JSON OUTPUT FORMAT
--- a/tests/test_chat_simple.py
+++ b/tests/test_chat_simple.py
@@ -119,7 +119,7 @@ class TestChatTool:
        formatted = self.tool.format_response(response, request)

        assert "Test response content" in formatted
-        assert "Claude's Turn:" in formatted
+        assert "AGENT'S TURN:" in formatted
        assert "Evaluate this perspective" in formatted

    def test_tool_name(self):
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -91,7 +91,8 @@ ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = {
    "confidence": (
        "Your confidence level in the current analysis findings: exploring (early investigation), "
        "low (some insights but more needed), medium (solid understanding), high (comprehensive insights), "
-        "certain (complete analysis ready for expert validation)"
+        "very_high (very comprehensive insights), almost_certain (nearly complete analysis), "
+        "certain (100% confidence - complete analysis ready for expert validation)"
    ),
    "analysis_type": "Type of analysis to perform (architecture, performance, security, quality, general)",
    "output_format": "How to format the output (summary, detailed, actionable)",
@@ -252,7 +253,7 @@ class AnalyzeTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -92,10 +92,11 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the code review assessment. Use: 'exploring' (starting analysis), 'low' "
-        "(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when "
-        "the code review is thoroughly complete and all significant issues are identified). Do NOT use 'certain' "
-        "unless the code review is comprehensively complete, use 'high' instead not 100% sure. Using 'certain' "
-        "prevents additional expert analysis."
+        "(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
+        "'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
+        "code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -263,7 +264,7 @@ class CodeReviewTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -91,10 +91,11 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the hypothesis. Use: 'exploring' (starting out), 'low' (early idea), "
-        "'medium' (some supporting evidence), 'high' (strong evidence), 'certain' (only when "
-        "the root cause and minimal "
-        "fix are both confirmed). Do NOT use 'certain' unless the issue can be fully resolved with a fix, use 'high' "
-        "instead when not 100% sure. Using 'certain' prevents you from taking assistance from another thought-partner."
+        "'medium' (some supporting evidence), 'high' (strong evidence), 'very_high' (very strong evidence), "
+        "'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
+        "confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
+        "fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
+        "means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
@@ -238,7 +239,7 @@ class DebugIssueTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["confidence"],
            },
            "hypothesis": {
@@ -283,7 +284,7 @@ class DebugIssueTool(WorkflowTool):
                "Check for edge cases, boundary conditions, and assumptions in the code",
                "Look for related configuration, dependencies, or external factors",
            ]
-        elif confidence in ["medium", "high"]:
+        elif confidence in ["medium", "high", "very_high", "almost_certain"]:
            # Close to root cause - need confirmation
            return [
                "Examine the exact code sections where you believe the issue occurs",
@@ -325,9 +326,7 @@ class DebugIssueTool(WorkflowTool):

        # Add investigation summary
        investigation_summary = self._build_investigation_summary(consolidated_findings)
-        context_parts.append(
-            f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ==="
-        )
+        context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")

        # Add error context if available
        error_context = self._extract_error_context(consolidated_findings)
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -86,9 +86,11 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the assessment. Use: 'exploring' (starting analysis), 'low' (early "
-        "investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when the "
-        "analysis is complete and all issues are identified). Do NOT use 'certain' unless the pre-commit validation "
-        "is thoroughly complete, use 'high' instead not 100% sure. Using 'certain' prevents additional expert analysis."
+        "investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
+        "'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
+        "analysis is complete and all issues are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -266,7 +268,7 @@ class PrecommitTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
--- a/tools/secaudit.py
+++ b/tools/secaudit.py
@@ -97,10 +97,11 @@ SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the security audit assessment. Use: 'exploring' (starting analysis), "
-        "'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' "
-        "(only when the security audit is thoroughly complete and all significant security issues are identified). "
-        "Do NOT use 'certain' unless the security audit is comprehensively complete, use 'high' instead not 100% "
-        "sure. Using 'certain' prevents additional expert analysis."
+        "'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
+        "'very_high' (very strong evidence), 'almost_certain' (nearly complete audit), 'certain' "
+        "(100% confidence - security audit is thoroughly complete and all significant security issues are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the security audit is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which "
@@ -480,7 +481,7 @@ class SecauditTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
--- a/tools/shared/base_models.py
+++ b/tools/shared/base_models.py
@@ -67,7 +67,11 @@ WORKFLOW_FIELD_DESCRIPTIONS = {
    "relevant_files": "Files identified as relevant to the issue/goal",
    "relevant_context": "Methods/functions identified as involved in the issue",
    "issues_found": "Issues identified with severity levels during work",
-    "confidence": "Confidence level in findings: exploring, low, medium, high, certain",
+    "confidence": (
+        "Confidence level in findings: exploring (just starting), low (early investigation), "
+        "medium (some evidence), high (strong evidence), very_high (comprehensive understanding), "
+        "almost_certain (near complete confidence), certain (100% confidence locally - no external validation needed)"
+    ),
    "hypothesis": "Current theory about the issue/goal based on work",
    "backtrack_from_step": "Step number to backtrack from if work needs revision",
    "use_assistant_model": (
--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -78,10 +78,11 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), "
-        "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), 'certain' "
-        "(only when the test plan is thoroughly complete and all test scenarios are identified). Do NOT use 'certain' "
-        "unless the test generation analysis is comprehensively complete, use 'high' instead not 100% sure. Using "
-        "'certain' prevents additional expert analysis."
+        "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), "
+        "'very_high' (very strong understanding), 'almost_certain' (nearly complete test plan), 'certain' "
+        "(100% confidence - test plan is thoroughly complete and all test scenarios are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -228,7 +229,7 @@ class TestGenTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -84,9 +84,10 @@ class ThinkDeepWorkflowRequest(WorkflowRequest):
        default="low",
        description="Indicate your current confidence in the analysis. Use: 'exploring' (starting analysis), "
        "'low' (early thinking), 'medium' (some insights gained), 'high' (strong understanding), "
-        "'certain' (only when the analysis is complete and conclusions are definitive). "
-        "Do NOT use 'certain' unless the thinking is comprehensively complete, use 'high' instead when in doubt. "
-        "Using 'certain' prevents additional expert analysis to save time and money.",
+        "'very_high' (very strong understanding), 'almost_certain' (nearly complete analysis), "
+        "'certain' (100% confidence - analysis is complete and conclusions are definitive with no need for external model validation). "
+        "Do NOT use 'certain' unless the thinking is comprehensively complete, use 'very_high' or 'almost_certain' instead when in doubt. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation.",
    )

    # Advanced workflow features
@@ -433,11 +434,27 @@ but also acknowledge strong insights and valid conclusions.
                ]
            )
        elif confidence == "high":
+            actions.extend(
+                [
+                    "Refine and validate key findings",
+                    "Explore edge cases and limitations",
+                    "Document assumptions and trade-offs",
+                ]
+            )
+        elif confidence == "very_high":
            actions.extend(
                [
                    "Synthesize findings into cohesive recommendations",
-                    "Validate conclusions against evidence",
-                    "Prepare for expert analysis",
+                    "Validate conclusions against all evidence",
+                    "Prepare comprehensive implementation guidance",
+                ]
+            )
+        elif confidence == "almost_certain":
+            actions.extend(
+                [
+                    "Finalize recommendations with high confidence",
+                    "Document any remaining minor uncertainties",
+                    "Prepare for expert analysis or implementation",
                ]
            )
        else:  # certain
@@ -516,10 +533,20 @@ but also acknowledge strong insights and valid conclusions.
                    f"Your thinking analysis confidence is CERTAIN. Consider if you truly need step {next_step_number} "
                    f"or if you should complete the analysis now with expert validation."
                )
+            elif request.confidence == "almost_certain":
+                guidance = (
+                    f"Your thinking analysis confidence is ALMOST_CERTAIN. For step {next_step_number}, consider: "
+                    f"finalizing recommendations, documenting any minor uncertainties, or preparing for implementation."
+                )
+            elif request.confidence == "very_high":
+                guidance = (
+                    f"Your thinking analysis confidence is VERY_HIGH. For step {next_step_number}, consider: "
+                    f"synthesis of all findings, comprehensive validation, or creating implementation roadmap."
+                )
            elif request.confidence == "high":
                guidance = (
                    f"Your thinking analysis confidence is HIGH. For step {next_step_number}, consider: "
-                    f"validation of conclusions, stress-testing assumptions, or exploring edge cases."
+                    f"exploring edge cases, documenting trade-offs, or stress-testing key assumptions."
                )
            elif request.confidence == "medium":
                guidance = (
--- a/tools/tracer.py
+++ b/tools/tracer.py
@@ -86,8 +86,10 @@ TRACER_WORKFLOW_FIELD_DESCRIPTIONS = {
    "confidence": (
        "Indicate your current confidence in the tracing analysis completeness. Use: 'exploring' (starting analysis), "
        "'low' (early investigation), 'medium' (some patterns identified), 'high' (comprehensive understanding), "
-        "'complete' (tracing analysis finished and ready for output). Do NOT use 'complete' unless the tracing "
-        "analysis is thoroughly finished and you have a comprehensive understanding of the code relationships."
+        "'very_high' (very comprehensive understanding), 'almost_certain' (nearly complete tracing), "
+        "'certain' (100% confidence - tracing analysis is finished and ready for output with no need for external model validation). "
+        "Do NOT use 'certain' unless the tracing analysis is thoroughly finished and you have a comprehensive understanding "
+        "of the code relationships. Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "trace_mode": "Type of tracing: 'ask' (default - prompts user to choose mode), 'precision' (execution flow) or 'dependencies' (structural relationships)",
    "target_description": (
--- a/tools/workflow/schema_builders.py
+++ b/tools/workflow/schema_builders.py
@@ -65,7 +65,7 @@ class WorkflowSchemaBuilder:
        },
        "confidence": {
            "type": "string",
-            "enum": ["exploring", "low", "medium", "high", "certain"],
+            "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
            "description": WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
        },
        "hypothesis": {