Merge branch 'BeehiveInnovations:main' into feat-dockerisation

2025-06-29 02:07:06 +02:00
parent 3d12a7cb70 b9c2e4f5e6
commit 479f556535
35 changed files with 256 additions and 183 deletions
--- a/tools/analyze.py
+++ b/tools/analyze.py
@@ -2,7 +2,7 @@
 AnalyzeWorkflow tool - Step-by-step code analysis with systematic investigation

 This tool provides a structured workflow for comprehensive code and file analysis.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, pattern identification, and architectural assessment before proceeding.
 The tool supports complex analysis scenarios including architectural review, performance analysis,
 security assessment, and maintainability evaluation.
@@ -91,7 +91,8 @@ ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS = {
    "confidence": (
        "Your confidence level in the current analysis findings: exploring (early investigation), "
        "low (some insights but more needed), medium (solid understanding), high (comprehensive insights), "
-        "certain (complete analysis ready for expert validation)"
+        "very_high (very comprehensive insights), almost_certain (nearly complete analysis), "
+        "certain (100% confidence - complete analysis ready for expert validation)"
    ),
    "analysis_type": "Type of analysis to perform (architecture, performance, security, quality, general)",
    "output_format": "How to format the output (summary, detailed, actionable)",
@@ -252,7 +253,7 @@ class AnalyzeTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": ANALYZE_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
@@ -348,7 +349,7 @@ class AnalyzeTool(WorkflowTool):
        # Add investigation summary
        investigation_summary = self._build_analysis_summary(consolidated_findings)
        context_parts.append(
-            f"\\n=== CLAUDE'S ANALYSIS INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
+            f"\\n=== AGENT'S ANALYSIS INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
        )

        # Add analysis configuration context if available
@@ -477,7 +478,7 @@ class AnalyzeTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Analyze-specific skip reason."""
-        return "Claude completed comprehensive analysis"
+        return "Completed comprehensive analysis locally"

    def get_skip_expert_analysis_status(self) -> str:
        """Analyze-specific expert analysis skip status."""
--- a/tools/chat.py
+++ b/tools/chat.py
@@ -204,7 +204,7 @@ class ChatTool(SimpleTool):
        Format the chat response to match the original Chat tool exactly.
        """
        return (
-            f"{response}\n\n---\n\n**Claude's Turn:** Evaluate this perspective alongside your analysis to "
+            f"{response}\n\n---\n\nAGENT'S TURN: Evaluate this perspective alongside your analysis to "
            "form a comprehensive solution and continue with the user's request and task at hand."
        )

--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -2,7 +2,7 @@
 CodeReview Workflow tool - Systematic code review with step-by-step analysis

 This tool provides a structured workflow for comprehensive code review and analysis.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, issue identification, and quality assessment before proceeding.
 The tool supports complex review scenarios including security analysis, performance evaluation,
 and architectural assessment.
@@ -92,10 +92,12 @@ CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the code review assessment. Use: 'exploring' (starting analysis), 'low' "
-        "(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when "
-        "the code review is thoroughly complete and all significant issues are identified). Do NOT use 'certain' "
-        "unless the code review is comprehensively complete, use 'high' instead not 100% sure. Using 'certain' "
-        "prevents additional expert analysis."
+        "(early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
+        "'very_high' (very strong evidence), 'almost_certain' (nearly complete review), 'certain' (100% confidence - "
+        "code review is thoroughly complete and all significant issues are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the code review is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation. Also do "
+        "NOT set confidence to 'certain' if the user has strongly requested that external review must be performed."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -263,7 +265,7 @@ class CodeReviewTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
@@ -358,7 +360,7 @@ class CodeReviewTool(WorkflowTool):
        """
        Decide when to call external model based on investigation completeness.

-        Don't call expert analysis if Claude has certain confidence - trust their judgment.
+        Don't call expert analysis if the CLI agent has certain confidence - trust their judgment.
        """
        # Check if user requested to skip assistant model
        if request and not self.get_request_use_assistant_model(request):
@@ -380,7 +382,7 @@ class CodeReviewTool(WorkflowTool):
        # Add investigation summary
        investigation_summary = self._build_code_review_summary(consolidated_findings)
        context_parts.append(
-            f"\\n=== CLAUDE'S CODE REVIEW INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
+            f"\\n=== AGENT'S CODE REVIEW INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
        )

        # Add review configuration context if available
@@ -479,7 +481,7 @@ class CodeReviewTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        Code review workflow skips expert analysis when Claude has "certain" confidence.
+        Code review workflow skips expert analysis when the CLI agent has "certain" confidence.
        """
        return request.confidence == "certain" and not request.next_step_required

@@ -516,7 +518,7 @@ class CodeReviewTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Code review-specific skip reason."""
-        return "Claude completed comprehensive code review with full confidence"
+        return "Completed comprehensive code review with full confidence locally"

    def get_skip_expert_analysis_status(self) -> str:
        """Code review-specific expert analysis skip status."""
--- a/tools/consensus.py
+++ b/tools/consensus.py
@@ -2,12 +2,12 @@
 Consensus tool - Step-by-step multi-model consensus with expert analysis

 This tool provides a structured workflow for gathering consensus from multiple models.
-It guides Claude through systematic steps where Claude first provides its own analysis,
+It guides the CLI agent through systematic steps where the CLI agent first provides its own analysis,
 then consults each requested model one by one, and finally synthesizes all perspectives.

 Key features:
 - Step-by-step consensus workflow with progress tracking
- Claude's initial neutral analysis followed by model-specific consultations
+- The CLI agent's initial neutral analysis followed by model-specific consultations
 - Context-aware file embedding
 - Support for stance-based analysis (for/against/neutral)
 - Final synthesis combining all perspectives
@@ -153,7 +153,7 @@ class ConsensusTool(WorkflowTool):
    """
    Consensus workflow tool for step-by-step multi-model consensus gathering.

-    This tool implements a structured consensus workflow where Claude first provides
+    This tool implements a structured consensus workflow where the CLI agent first provides
    its own neutral analysis, then consults each specified model individually,
    and finally synthesizes all perspectives into a unified recommendation.
    """
@@ -189,7 +189,7 @@ class ConsensusTool(WorkflowTool):
        )

    def get_system_prompt(self) -> str:
-        # For Claude's initial analysis, use a neutral version of the consensus prompt
+        # For the CLI agent's initial analysis, use a neutral version of the consensus prompt
        return CONSENSUS_PROMPT.replace(
            "{stance_prompt}",
            """BALANCED PERSPECTIVE
@@ -325,7 +325,7 @@ of the evidence, even when it strongly points in one direction.""",
        Note: confidence parameter is kept for compatibility with base class but not used.
        """
        if step_number == 1:
-            # Claude's initial analysis
+            # CLI Agent's initial analysis
            return [
                "You've provided your initial analysis. The tool will now consult other models.",
                "Wait for the next step to receive the first model's response.",
@@ -416,7 +416,7 @@ of the evidence, even when it strongly points in one direction.""",
        current_idx = request.current_model_index or 0

        if request.step_number == 1:
-            # After Claude's initial analysis, prepare to consult first model
+            # After CLI Agent's initial analysis, prepare to consult first model
            response_data["status"] = "consulting_models"
            response_data["next_model"] = self.models_to_consult[0] if self.models_to_consult else None
            response_data["next_steps"] = (
@@ -475,9 +475,9 @@ of the evidence, even when it strongly points in one direction.""",
                    "next_step_required": request.step_number < request.total_steps,
                }

-                # Add Claude's analysis to step 1
+                # Add CLAI Agent's analysis to step 1
                if request.step_number == 1:
-                    response_data["claude_analysis"] = {
+                    response_data["agent_analysis"] = {
                        "initial_analysis": request.step,
                        "findings": request.findings,
                    }
@@ -682,7 +682,7 @@ of the evidence, even when it strongly points in one direction.""",
        """
        Customize metadata for consensus workflow to accurately reflect multi-model nature.

-        The default workflow metadata shows the model running Claude's analysis steps,
+        The default workflow metadata shows the model running Agent's analysis steps,
        but consensus is a multi-model tool that consults different models. We need
        to provide accurate metadata that reflects this.
        """
@@ -728,7 +728,7 @@ of the evidence, even when it strongly points in one direction.""",
                }
            )

-            # Remove the misleading single model metadata that shows Claude's execution model
+            # Remove the misleading single model metadata that shows Agent's execution model
            # instead of the models being consulted
            metadata.pop("model_used", None)
            metadata.pop("provider_used", None)
--- a/tools/debug.py
+++ b/tools/debug.py
@@ -91,10 +91,12 @@ DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the hypothesis. Use: 'exploring' (starting out), 'low' (early idea), "
-        "'medium' (some supporting evidence), 'high' (strong evidence), 'certain' (only when "
-        "the root cause and minimal "
-        "fix are both confirmed). Do NOT use 'certain' unless the issue can be fully resolved with a fix, use 'high' "
-        "instead when not 100% sure. Using 'certain' prevents you from taking assistance from another thought-partner."
+        "'medium' (some supporting evidence), 'high' (strong evidence), 'very_high' (very strong evidence), "
+        "'almost_certain' (nearly confirmed), 'certain' (100% confidence - root cause and minimal fix are both "
+        "confirmed locally with no need for external model validation). Do NOT use 'certain' unless the issue can be "
+        "fully resolved with a fix, use 'very_high' or 'almost_certain' instead when not 100% sure. Using 'certain' "
+        "means you have complete confidence locally and prevents external model validation. Also do "
+        "NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
    ),
    "backtrack_from_step": (
        "If an earlier finding or hypothesis needs to be revised or discarded, specify the step number from which to "
@@ -238,7 +240,7 @@ class DebugIssueTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": DEBUG_INVESTIGATION_FIELD_DESCRIPTIONS["confidence"],
            },
            "hypothesis": {
@@ -283,7 +285,7 @@ class DebugIssueTool(WorkflowTool):
                "Check for edge cases, boundary conditions, and assumptions in the code",
                "Look for related configuration, dependencies, or external factors",
            ]
-        elif confidence in ["medium", "high"]:
+        elif confidence in ["medium", "high", "very_high"]:
            # Close to root cause - need confirmation
            return [
                "Examine the exact code sections where you believe the issue occurs",
@@ -291,6 +293,14 @@ class DebugIssueTool(WorkflowTool):
                "Verify your hypothesis with concrete code evidence",
                "Check for any similar patterns elsewhere in the codebase",
            ]
+        elif confidence == "almost_certain":
+            # Almost certain - final verification before conclusion
+            return [
+                "Finalize your root cause analysis with specific evidence",
+                "Document the complete chain of causation from symptom to root cause",
+                "Verify the minimal fix approach is correct",
+                "Consider if expert analysis would provide additional insights",
+            ]
        else:
            # General investigation needed
            return [
@@ -304,7 +314,7 @@ class DebugIssueTool(WorkflowTool):
        """
        Decide when to call external model based on investigation completeness.

-        Don't call expert analysis if Claude has certain confidence - trust their judgment.
+        Don't call expert analysis if the CLI agent has certain confidence - trust their judgment.
        """
        # Check if user requested to skip assistant model
        if request and not self.get_request_use_assistant_model(request):
@@ -323,11 +333,22 @@ class DebugIssueTool(WorkflowTool):
            f"=== ISSUE DESCRIPTION ===\n{self.initial_issue or 'Investigation initiated'}\n=== END DESCRIPTION ==="
        ]

+        # Add special note if confidence is almost_certain
+        if consolidated_findings.confidence == "almost_certain":
+            context_parts.append(
+                "\n=== IMPORTANT: ALMOST CERTAIN CONFIDENCE ===\n"
+                "The agent has reached 'almost_certain' confidence but has NOT confirmed the bug with 100% certainty. "
+                "Your role is to:\n"
+                "1. Validate the agent's hypothesis and investigation\n"
+                "2. Identify any missing evidence or overlooked aspects\n"
+                "3. Provide additional insights that could confirm or refute the hypothesis\n"
+                "4. Help finalize the root cause analysis with complete certainty\n"
+                "=== END IMPORTANT ==="
+            )
+
        # Add investigation summary
        investigation_summary = self._build_investigation_summary(consolidated_findings)
-        context_parts.append(
-            f"\n=== CLAUDE'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ==="
-        )
+        context_parts.append(f"\n=== AGENT'S INVESTIGATION FINDINGS ===\n{investigation_summary}\n=== END FINDINGS ===")

        # Add error context if available
        error_context = self._extract_error_context(consolidated_findings)
@@ -422,7 +443,7 @@ class DebugIssueTool(WorkflowTool):
                + f"\n\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
                + "completing these investigations."
            )
-        elif confidence in ["medium", "high"]:
+        elif confidence in ["medium", "high", "very_high"]:
            next_steps = (
                f"WAIT! Your hypothesis needs verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\n"
                + "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
@@ -431,6 +452,16 @@ class DebugIssueTool(WorkflowTool):
                f"or engineering assistant for clarification. Document findings with specific file:line references, "
                f"then call {self.get_name()} with step_number: {step_number + 1}."
            )
+        elif confidence == "almost_certain":
+            next_steps = (
+                "ALMOST CERTAIN - Prepare for final analysis. REQUIRED ACTIONS:\n"
+                + "\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
+                + "\n\nIMPORTANT: You're almost certain about the root cause. If you have NOT found the bug with "
+                "100% certainty, consider setting next_step_required=false to invoke expert analysis. The expert "
+                "can validate your hypotheses and provide additional insights. If you ARE 100% certain and have "
+                "identified the exact bug and fix, proceed to confidence='certain'. Otherwise, let expert analysis "
+                "help finalize the investigation."
+            )
        else:
            next_steps = (
                f"PAUSE INVESTIGATION. Before calling {self.get_name()} step {step_number + 1}, you MUST examine code. "
@@ -468,7 +499,7 @@ class DebugIssueTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        Debug tool skips expert analysis when Claude has "certain" confidence.
+        Debug tool skips expert analysis when agent has "certain" confidence.
        """
        return request.confidence == "certain" and not request.next_step_required

@@ -501,7 +532,7 @@ class DebugIssueTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Debug-specific skip reason."""
-        return "Claude identified exact root cause with minimal fix requirement"
+        return "Identified exact root cause with minimal fix requirement locally"

    def get_request_relevant_context(self, request) -> list:
        """Get relevant_context for debug tool."""
--- a/tools/docgen.py
+++ b/tools/docgen.py
@@ -198,7 +198,7 @@ class DocgenTool(WorkflowTool):
        """
        Docgen tool doesn't require model resolution at the MCP boundary.

-        The docgen tool is a self-contained workflow tool that guides Claude through
+        The docgen tool is a self-contained workflow tool that guides the CLI agent through
        systematic documentation generation without calling external AI models.

        Returns:
@@ -503,7 +503,7 @@ class DocgenTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        Docgen tool skips expert analysis when Claude has "certain" confidence.
+        Docgen tool skips expert analysis when the CLI agent has "certain" confidence.
        """
        return request.confidence == "certain" and not request.next_step_required

@@ -536,7 +536,7 @@ class DocgenTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Docgen-specific skip reason."""
-        return "Claude completed comprehensive documentation analysis"
+        return "Completed comprehensive documentation analysis locally"

    def get_request_relevant_context(self, request) -> list:
        """Get relevant_context for docgen tool."""
--- a/tools/listmodels.py
+++ b/tools/listmodels.py
@@ -225,7 +225,7 @@ class ListModelsTool(BaseTool):
                output_lines.append(f"**Error loading models**: {str(e)}")
        else:
            output_lines.append("**Status**: Not configured (set OPENROUTER_API_KEY)")
-            output_lines.append("**Note**: Provides access to GPT-4, Claude, Mistral, and many more")
+            output_lines.append("**Note**: Provides access to GPT-4, O3, Mistral, and many more")

        output_lines.append("")

@@ -296,7 +296,7 @@ class ListModelsTool(BaseTool):
        # Add usage tips
        output_lines.append("\n**Usage Tips**:")
        output_lines.append("- Use model aliases (e.g., 'flash', 'o3', 'opus') for convenience")
-        output_lines.append("- In auto mode, Claude will select the best model for each task")
+        output_lines.append("- In auto mode, the CLI Agent will select the best model for each task")
        output_lines.append("- Custom models are only available when CUSTOM_API_URL is set")
        output_lines.append("- OpenRouter provides access to many cloud models with one API key")

--- a/tools/models.py
+++ b/tools/models.py
@@ -17,12 +17,12 @@ class ToolModelCategory(Enum):


 class ContinuationOffer(BaseModel):
-    """Offer for Claude to continue conversation when Gemini doesn't ask follow-up"""
+    """Offer for CLI agent to continue conversation when Gemini doesn't ask follow-up"""

    continuation_id: str = Field(
        ..., description="Thread continuation ID for multi-turn conversations across different tools"
    )
-    note: str = Field(..., description="Message explaining continuation opportunity to Claude")
+    note: str = Field(..., description="Message explaining continuation opportunity to CLI agent")
    remaining_turns: int = Field(..., description="Number of conversation turns remaining")


@@ -48,7 +48,7 @@ class ToolOutput(BaseModel):
    content_type: Literal["text", "markdown", "json"] = "text"
    metadata: Optional[dict[str, Any]] = Field(default_factory=dict)
    continuation_offer: Optional[ContinuationOffer] = Field(
-        None, description="Optional offer for Claude to continue conversation"
+        None, description="Optional offer for Agent to continue conversation"
    )


@@ -56,7 +56,7 @@ class FilesNeededRequest(BaseModel):
    """Request for missing files / code to continue"""

    status: Literal["files_required_to_continue"] = "files_required_to_continue"
-    mandatory_instructions: str = Field(..., description="Critical instructions for Claude regarding required context")
+    mandatory_instructions: str = Field(..., description="Critical instructions for Agent regarding required context")
    files_needed: Optional[list[str]] = Field(
        default_factory=list, description="Specific files that are needed for analysis"
    )
@@ -75,7 +75,7 @@ class FullCodereviewRequired(BaseModel):


 class FocusedReviewRequired(BaseModel):
-    """Request for Claude to provide smaller, focused subsets of code for review"""
+    """Request for Agent to provide smaller, focused subsets of code for review"""

    status: Literal["focused_review_required"] = "focused_review_required"
    reason: str = Field(..., description="Why the current scope is too large for effective review")
@@ -122,14 +122,14 @@ class RefactorOpportunity(BaseModel):


 class RefactorAction(BaseModel):
-    """Next action for Claude to implement refactoring"""
+    """Next action for Agent to implement refactoring"""

    action_type: Literal["EXTRACT_METHOD", "SPLIT_CLASS", "MODERNIZE_SYNTAX", "REORGANIZE_CODE", "DECOMPOSE_FILE"] = (
        Field(..., description="Type of action to perform")
    )
    target_file: str = Field(..., description="Absolute path to target file")
    source_lines: str = Field(..., description="Line range (e.g., '45-67')")
-    description: str = Field(..., description="Step-by-step action description for Claude")
+    description: str = Field(..., description="Step-by-step action description for CLI Agent")


 class RefactorAnalysisComplete(BaseModel):
@@ -138,7 +138,7 @@ class RefactorAnalysisComplete(BaseModel):
    status: Literal["refactor_analysis_complete"] = "refactor_analysis_complete"
    refactor_opportunities: list[RefactorOpportunity] = Field(..., description="List of refactoring opportunities")
    priority_sequence: list[str] = Field(..., description="Recommended order of refactoring IDs")
-    next_actions_for_claude: list[RefactorAction] = Field(..., description="Specific actions for Claude to implement")
+    next_actions: list[RefactorAction] = Field(..., description="Specific actions for the agent to implement")


 class CodeTooLargeRequest(BaseModel):
--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -2,7 +2,7 @@
 Precommit Workflow tool - Step-by-step pre-commit validation with expert analysis

 This tool provides a structured workflow for comprehensive pre-commit validation.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, git change analysis, and issue detection before proceeding.
 The tool supports backtracking, finding updates, and expert analysis integration.

@@ -86,9 +86,12 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the assessment. Use: 'exploring' (starting analysis), 'low' (early "
-        "investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' (only when the "
-        "analysis is complete and all issues are identified). Do NOT use 'certain' unless the pre-commit validation "
-        "is thoroughly complete, use 'high' instead not 100% sure. Using 'certain' prevents additional expert analysis."
+        "investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
+        "'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (100% confidence - "
+        "analysis is complete and all issues are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation. Also "
+        "do NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -266,7 +269,7 @@ class PrecommitTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
@@ -365,7 +368,7 @@ class PrecommitTool(WorkflowTool):
        """
        Decide when to call external model based on investigation completeness.

-        Don't call expert analysis if Claude has certain confidence - trust their judgment.
+        Don't call expert analysis if the CLI agent has certain confidence - trust their judgment.
        """
        # Check if user requested to skip assistant model
        if request and not self.get_request_use_assistant_model(request):
@@ -387,7 +390,7 @@ class PrecommitTool(WorkflowTool):
        # Add investigation summary
        investigation_summary = self._build_precommit_summary(consolidated_findings)
        context_parts.append(
-            f"\\n=== CLAUDE'S PRE-COMMIT INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
+            f"\\n=== AGENT'S PRE-COMMIT INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
        )

        # Add git configuration context if available
@@ -485,7 +488,7 @@ class PrecommitTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        Precommit workflow skips expert analysis when Claude has "certain" confidence.
+        Precommit workflow skips expert analysis when the CLI agent has "certain" confidence.
        """
        return request.confidence == "certain" and not request.next_step_required

@@ -522,7 +525,7 @@ class PrecommitTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Precommit-specific skip reason."""
-        return "Claude completed comprehensive pre-commit validation with full confidence"
+        return "Completed comprehensive pre-commit validation with full confidence locally"

    def get_skip_expert_analysis_status(self) -> str:
        """Precommit-specific expert analysis skip status."""
--- a/tools/refactor.py
+++ b/tools/refactor.py
@@ -2,7 +2,7 @@
 Refactor tool - Step-by-step refactoring analysis with expert validation

 This tool provides a structured workflow for comprehensive code refactoring analysis.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, refactoring opportunity identification, and quality
 assessment before proceeding. The tool supports complex refactoring scenarios including
 code smell detection, decomposition planning, modernization opportunities, and organization improvements.
@@ -92,11 +92,12 @@ REFACTOR_FIELD_DESCRIPTIONS = {
        "Indicate your current confidence in the refactoring analysis completeness. Use: 'exploring' (starting "
        "analysis), 'incomplete' (just started or significant work remaining), 'partial' (some refactoring "
        "opportunities identified but more analysis needed), 'complete' (comprehensive refactoring analysis "
-        "finished with all major opportunities identified and Claude can handle 100% confidently without help). "
+        "finished with all major opportunities identified and the CLI agent can handle 100% confidently without help). "
        "Use 'complete' ONLY when you have fully analyzed all code, identified all significant refactoring "
        "opportunities, and can provide comprehensive recommendations without expert assistance. When files are "
        "too large to read fully or analysis is uncertain, use 'partial'. Using 'complete' prevents expert "
-        "analysis to save time and money."
+        "analysis to save time and money. Do NOT set confidence to 'certain' if the user has strongly requested that "
+        "external validation MUST be performed."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -357,7 +358,7 @@ class RefactorTool(WorkflowTool):
        """
        Decide when to call external model based on investigation completeness.

-        Don't call expert analysis if Claude has certain confidence and complete refactoring - trust their judgment.
+        Don't call expert analysis if the CLI agent has certain confidence and complete refactoring - trust their judgment.
        """
        # Check if user requested to skip assistant model
        if request and not self.get_request_use_assistant_model(request):
@@ -383,7 +384,7 @@ class RefactorTool(WorkflowTool):
        # Add investigation summary
        investigation_summary = self._build_refactoring_summary(consolidated_findings)
        context_parts.append(
-            f"\\n=== CLAUDE'S REFACTORING INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
+            f"\\n=== AGENT'S REFACTORING INVESTIGATION ===\\n{investigation_summary}\\n=== END INVESTIGATION ==="
        )

        # Add refactor configuration context if available
@@ -484,7 +485,7 @@ class RefactorTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        Refactor workflow skips expert analysis when Claude has "complete" confidence.
+        Refactor workflow skips expert analysis when the CLI agent has "complete" confidence.
        """
        return request.confidence == "complete" and not request.next_step_required

@@ -524,7 +525,7 @@ class RefactorTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Refactor-specific skip reason."""
-        return "Claude completed comprehensive refactoring analysis with full confidence"
+        return "Completed comprehensive refactoring analysis with full confidence locally"

    def get_skip_expert_analysis_status(self) -> str:
        """Refactor-specific expert analysis skip status."""
--- a/tools/secaudit.py
+++ b/tools/secaudit.py
@@ -2,7 +2,7 @@
 SECAUDIT Workflow tool - Comprehensive security audit with systematic investigation

 This tool provides a structured workflow for comprehensive security assessment and analysis.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough security examination, vulnerability identification, and compliance assessment
 before proceeding. The tool supports complex security scenarios including OWASP Top 10 coverage,
 compliance framework mapping, and technology-specific security patterns.
@@ -97,10 +97,11 @@ SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the security audit assessment. Use: 'exploring' (starting analysis), "
-        "'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), 'certain' "
-        "(only when the security audit is thoroughly complete and all significant security issues are identified). "
-        "Do NOT use 'certain' unless the security audit is comprehensively complete, use 'high' instead not 100% "
-        "sure. Using 'certain' prevents additional expert analysis."
+        "'low' (early investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
+        "'very_high' (very strong evidence), 'almost_certain' (nearly complete audit), 'certain' "
+        "(100% confidence - security audit is thoroughly complete and all significant security issues are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the security audit is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which "
@@ -351,7 +352,7 @@ class SecauditTool(WorkflowTool):
        # Add investigation summary
        investigation_summary = self._build_security_audit_summary(consolidated_findings)
        context_parts.append(
-            f"\n=== CLAUDE'S SECURITY INVESTIGATION ===\n{investigation_summary}\n=== END INVESTIGATION ==="
+            f"\n=== AGENT'S SECURITY INVESTIGATION ===\n{investigation_summary}\n=== END INVESTIGATION ==="
        )

        # Add security configuration context if available
@@ -480,7 +481,7 @@ class SecauditTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
@@ -566,7 +567,7 @@ class SecauditTool(WorkflowTool):
        return step_data

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
-        """Security audit workflow skips expert analysis when Claude has "certain" confidence."""
+        """Security audit workflow skips expert analysis when the CLI agent has "certain" confidence."""
        return request.confidence == "certain" and not request.next_step_required

    def store_initial_issue(self, step_description: str):
@@ -762,7 +763,7 @@ class SecauditTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Security audit-specific skip reason."""
-        return "Claude completed comprehensive security audit with full confidence"
+        return "Completed comprehensive security audit with full confidence locally"

    def get_skip_expert_analysis_status(self) -> str:
        """Security audit-specific expert analysis skip status."""
--- a/tools/shared/base_models.py
+++ b/tools/shared/base_models.py
@@ -67,7 +67,11 @@ WORKFLOW_FIELD_DESCRIPTIONS = {
    "relevant_files": "Files identified as relevant to the issue/goal",
    "relevant_context": "Methods/functions identified as involved in the issue",
    "issues_found": "Issues identified with severity levels during work",
-    "confidence": "Confidence level in findings: exploring, low, medium, high, certain",
+    "confidence": (
+        "Confidence level in findings: exploring (just starting), low (early investigation), "
+        "medium (some evidence), high (strong evidence), very_high (comprehensive understanding), "
+        "almost_certain (near complete confidence), certain (100% confidence locally - no external validation needed)"
+    ),
    "hypothesis": "Current theory about the issue/goal based on work",
    "backtrack_from_step": "Step number to backtrack from if work needs revision",
    "use_assistant_model": (
--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -2,7 +2,7 @@
 TestGen Workflow tool - Step-by-step test generation with expert validation

 This tool provides a structured workflow for comprehensive test generation.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, test planning, and pattern identification before proceeding.
 The tool supports backtracking, finding updates, and expert analysis integration for
 comprehensive test suite generation.
@@ -78,10 +78,11 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
    ),
    "confidence": (
        "Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), "
-        "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), 'certain' "
-        "(only when the test plan is thoroughly complete and all test scenarios are identified). Do NOT use 'certain' "
-        "unless the test generation analysis is comprehensively complete, use 'high' instead not 100% sure. Using "
-        "'certain' prevents additional expert analysis."
+        "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), "
+        "'very_high' (very strong understanding), 'almost_certain' (nearly complete test plan), 'certain' "
+        "(100% confidence - test plan is thoroughly complete and all test scenarios are identified with no need for external model validation). "
+        "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "backtrack_from_step": (
        "If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
@@ -228,7 +229,7 @@ class TestGenTool(WorkflowTool):
            },
            "confidence": {
                "type": "string",
-                "enum": ["exploring", "low", "medium", "high", "certain"],
+                "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
                "description": TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
            },
            "backtrack_from_step": {
@@ -315,7 +316,7 @@ class TestGenTool(WorkflowTool):
        # Add investigation summary
        investigation_summary = self._build_test_generation_summary(consolidated_findings)
        context_parts.append(
-            f"\n=== CLAUDE'S TEST PLANNING INVESTIGATION ===\n{investigation_summary}\n=== END INVESTIGATION ==="
+            f"\n=== AGENT'S TEST PLANNING INVESTIGATION ===\n{investigation_summary}\n=== END INVESTIGATION ==="
        )

        # Add relevant code elements if available
@@ -388,7 +389,7 @@ class TestGenTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        Test generation workflow skips expert analysis when Claude has "certain" confidence.
+        Test generation workflow skips expert analysis when the CLI agent has "certain" confidence.
        """
        return request.confidence == "certain" and not request.next_step_required

@@ -425,7 +426,7 @@ class TestGenTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Test generation-specific skip reason."""
-        return "Claude completed comprehensive test planning with full confidence"
+        return "Completed comprehensive test planning with full confidence locally"

    def get_skip_expert_analysis_status(self) -> str:
        """Test generation-specific expert analysis skip status."""
--- a/tools/thinkdeep.py
+++ b/tools/thinkdeep.py
@@ -84,9 +84,10 @@ class ThinkDeepWorkflowRequest(WorkflowRequest):
        default="low",
        description="Indicate your current confidence in the analysis. Use: 'exploring' (starting analysis), "
        "'low' (early thinking), 'medium' (some insights gained), 'high' (strong understanding), "
-        "'certain' (only when the analysis is complete and conclusions are definitive). "
-        "Do NOT use 'certain' unless the thinking is comprehensively complete, use 'high' instead when in doubt. "
-        "Using 'certain' prevents additional expert analysis to save time and money.",
+        "'very_high' (very strong understanding), 'almost_certain' (nearly complete analysis), "
+        "'certain' (100% confidence - analysis is complete and conclusions are definitive with no need for external model validation). "
+        "Do NOT use 'certain' unless the thinking is comprehensively complete, use 'very_high' or 'almost_certain' instead when in doubt. "
+        "Using 'certain' means you have complete confidence locally and prevents external model validation.",
    )

    # Advanced workflow features
@@ -277,7 +278,7 @@ class ThinkDeepTool(WorkflowTool):

    def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
        """
-        ThinkDeep tool skips expert analysis when Claude has "certain" confidence.
+        ThinkDeep tool skips expert analysis when the CLI agent has "certain" confidence.
        """
        return request.confidence == "certain" and not request.next_step_required

@@ -299,7 +300,7 @@ class ThinkDeepTool(WorkflowTool):

    def get_skip_reason(self) -> str:
        """Reason for skipping expert analysis."""
-        return "Claude expressed certain confidence in the deep thinking analysis - no additional validation needed"
+        return "Expressed 'certain' confidence in the deep thinking analysis - no additional validation needed"

    def get_completion_message(self) -> str:
        """Message for completion without expert analysis."""
@@ -433,11 +434,27 @@ but also acknowledge strong insights and valid conclusions.
                ]
            )
        elif confidence == "high":
+            actions.extend(
+                [
+                    "Refine and validate key findings",
+                    "Explore edge cases and limitations",
+                    "Document assumptions and trade-offs",
+                ]
+            )
+        elif confidence == "very_high":
            actions.extend(
                [
                    "Synthesize findings into cohesive recommendations",
-                    "Validate conclusions against evidence",
-                    "Prepare for expert analysis",
+                    "Validate conclusions against all evidence",
+                    "Prepare comprehensive implementation guidance",
+                ]
+            )
+        elif confidence == "almost_certain":
+            actions.extend(
+                [
+                    "Finalize recommendations with high confidence",
+                    "Document any remaining minor uncertainties",
+                    "Prepare for expert analysis or implementation",
                ]
            )
        else:  # certain
@@ -516,10 +533,20 @@ but also acknowledge strong insights and valid conclusions.
                    f"Your thinking analysis confidence is CERTAIN. Consider if you truly need step {next_step_number} "
                    f"or if you should complete the analysis now with expert validation."
                )
+            elif request.confidence == "almost_certain":
+                guidance = (
+                    f"Your thinking analysis confidence is ALMOST_CERTAIN. For step {next_step_number}, consider: "
+                    f"finalizing recommendations, documenting any minor uncertainties, or preparing for implementation."
+                )
+            elif request.confidence == "very_high":
+                guidance = (
+                    f"Your thinking analysis confidence is VERY_HIGH. For step {next_step_number}, consider: "
+                    f"synthesis of all findings, comprehensive validation, or creating implementation roadmap."
+                )
            elif request.confidence == "high":
                guidance = (
                    f"Your thinking analysis confidence is HIGH. For step {next_step_number}, consider: "
-                    f"validation of conclusions, stress-testing assumptions, or exploring edge cases."
+                    f"exploring edge cases, documenting trade-offs, or stress-testing key assumptions."
                )
            elif request.confidence == "medium":
                guidance = (
--- a/tools/tracer.py
+++ b/tools/tracer.py
@@ -2,7 +2,7 @@
 Tracer Workflow tool - Step-by-step code tracing and dependency analysis

 This tool provides a structured workflow for comprehensive code tracing and analysis.
-It guides Claude through systematic investigation steps with forced pauses between each step
+It guides the CLI agent through systematic investigation steps with forced pauses between each step
 to ensure thorough code examination, dependency mapping, and execution flow analysis before proceeding.

 The tracer guides users through sequential code analysis with full context awareness and
@@ -86,8 +86,10 @@ TRACER_WORKFLOW_FIELD_DESCRIPTIONS = {
    "confidence": (
        "Indicate your current confidence in the tracing analysis completeness. Use: 'exploring' (starting analysis), "
        "'low' (early investigation), 'medium' (some patterns identified), 'high' (comprehensive understanding), "
-        "'complete' (tracing analysis finished and ready for output). Do NOT use 'complete' unless the tracing "
-        "analysis is thoroughly finished and you have a comprehensive understanding of the code relationships."
+        "'very_high' (very comprehensive understanding), 'almost_certain' (nearly complete tracing), "
+        "'certain' (100% confidence - tracing analysis is finished and ready for output with no need for external model validation). "
+        "Do NOT use 'certain' unless the tracing analysis is thoroughly finished and you have a comprehensive understanding "
+        "of the code relationships. Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
    "trace_mode": "Type of tracing: 'ask' (default - prompts user to choose mode), 'precision' (execution flow) or 'dependencies' (structural relationships)",
    "target_description": (
@@ -545,7 +547,7 @@ class TracerTool(WorkflowTool):

    def _get_rendering_instructions(self, trace_mode: str) -> str:
        """
-        Get mode-specific rendering instructions for Claude.
+        Get mode-specific rendering instructions for the CLI agent.

        Args:
            trace_mode: Either "precision" or "dependencies"
--- a/tools/version.py
+++ b/tools/version.py
@@ -228,7 +228,7 @@ class VersionTool(BaseTool):
                    output_lines.append("git pull")
                    output_lines.append("```")
                    output_lines.append("")
-                    output_lines.append("*Note: Restart your Claude session after updating to use the new version.*")
+                    output_lines.append("*Note: Restart your session after updating to use the new version.*")
                elif comparison == 0:
                    # Up to date
                    output_lines.append("")
--- a/tools/workflow/schema_builders.py
+++ b/tools/workflow/schema_builders.py
@@ -65,7 +65,7 @@ class WorkflowSchemaBuilder:
        },
        "confidence": {
            "type": "string",
-            "enum": ["exploring", "low", "medium", "high", "certain"],
+            "enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
            "description": WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
        },
        "hypothesis": {