From 57200a8a2e6ad5e3c544fe6ff10c09b19a819902 Mon Sep 17 00:00:00 2001 From: Fahad Date: Wed, 20 Aug 2025 15:19:01 +0400 Subject: [PATCH] Precommit updated to always perform external analysis (via _other_ model) unless specified not to. This prevents Claude from being overconfident and inadequately performing subpar precommit checks. Improved precommit continuations to be immediate Workflow state restoration added between stateless calls Fixed incorrect token limit check --- .gitignore | 3 +- tools/analyze.py | 4 +- tools/codereview.py | 4 +- tools/consensus.py | 3 +- tools/debug.py | 4 +- tools/docgen.py | 4 +- tools/planner.py | 4 +- tools/precommit.py | 155 ++++++++++++++++++++----------- tools/refactor.py | 4 +- tools/secaudit.py | 4 +- tools/shared/base_tool.py | 3 +- tools/simple/base.py | 5 +- tools/testgen.py | 4 +- tools/thinkdeep.py | 4 +- tools/tracer.py | 4 +- tools/workflow/base.py | 17 +++- tools/workflow/workflow_mixin.py | 33 ++++++- utils/file_types.py | 1 + 18 files changed, 187 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index 1be8ec3..e7cce2a 100644 --- a/.gitignore +++ b/.gitignore @@ -185,4 +185,5 @@ logs/ *.backup /.desktop_configured -/worktrees/ \ No newline at end of file +/worktrees/ +test_simulation_files/ diff --git a/tools/analyze.py b/tools/analyze.py index f959037..8da782d 100644 --- a/tools/analyze.py +++ b/tools/analyze.py @@ -294,7 +294,9 @@ class AnalyzeTool(WorkflowTool): excluded_workflow_fields=list(excluded_fields), ) - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each investigation phase.""" if step_number == 1: # Initial analysis investigation tasks diff --git a/tools/codereview.py b/tools/codereview.py index 363cc16..3d969aa 100644 --- a/tools/codereview.py +++ b/tools/codereview.py @@ -320,7 +320,9 @@ class CodeReviewTool(WorkflowTool): tool_name=self.get_name(), ) - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each investigation phase.""" if step_number == 1: # Initial code review investigation tasks diff --git a/tools/consensus.py b/tools/consensus.py index 58ac43e..35d08df 100644 --- a/tools/consensus.py +++ b/tools/consensus.py @@ -323,10 +323,11 @@ of the evidence, even when it strongly points in one direction.""", return schema def get_required_actions( - self, step_number: int, confidence: str, findings: str, total_steps: int + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None ) -> list[str]: # noqa: ARG002 """Define required actions for each consensus phase. + Now includes request parameter for continuation-aware decisions. Note: confidence parameter is kept for compatibility with base class but not used. """ if step_number == 1: diff --git a/tools/debug.py b/tools/debug.py index 7874d11..4a0cb22 100644 --- a/tools/debug.py +++ b/tools/debug.py @@ -272,7 +272,9 @@ class DebugIssueTool(WorkflowTool): tool_name=self.get_name(), ) - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each investigation phase.""" if step_number == 1: # Initial investigation tasks diff --git a/tools/docgen.py b/tools/docgen.py index 9fee75a..12fdf80 100644 --- a/tools/docgen.py +++ b/tools/docgen.py @@ -293,7 +293,9 @@ class DocgenTool(WorkflowTool): excluded_common_fields=excluded_common_fields, ) - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for comprehensive documentation analysis with step-by-step file focus.""" if step_number == 1: # Initial discovery ONLY - no documentation yet diff --git a/tools/planner.py b/tools/planner.py index 881a18c..b71ebc2 100644 --- a/tools/planner.py +++ b/tools/planner.py @@ -259,7 +259,9 @@ class PlannerTool(WorkflowTool): # Abstract Methods - Required Implementation from BaseWorkflowMixin # ================================================================================ - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each planning phase.""" if step_number == 1: # Initial planning tasks diff --git a/tools/precommit.py b/tools/precommit.py index 3cccd78..9efb8bf 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -52,13 +52,15 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = { ), "total_steps": ( "Your current estimate for how many steps will be needed to complete the pre-commit investigation. " - "Adjust as new findings emerge. IMPORTANT: When continuation_id is provided (continuing a previous " - "conversation), set this to 1 as we're not starting a new multi-step investigation." + "Adjust as new findings emerge. IMPORTANT: When continuation_id is provided with external validation, " + "set this to 2 maximum (step 1: gather git changes, step 2: proceed to expert). For internal validation " + "continuations, set to 1 as we're not starting a new multi-step investigation." ), "next_step_required": ( "Set to true if you plan to continue the investigation with another step. False means you believe the " - "pre-commit analysis is complete and ready for expert validation. IMPORTANT: When continuation_id is " - "provided (continuing a previous conversation), set this to False to immediately proceed with expert analysis." + "pre-commit analysis is complete and ready for expert validation. IMPORTANT: For external continuations, " + "set to True only on step 1 (while gathering changes), then False on step 2 to trigger expert analysis. " + "For internal continuations, set to False to complete immediately." ), "findings": ( "Summarize everything discovered in this step about the changes being committed. Include analysis of git diffs, " @@ -332,15 +334,39 @@ class PrecommitTool(WorkflowTool): ) def get_required_actions( - self, step_number: int, findings_count: int, issues_count: int, total_steps: int + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None ) -> list[str]: - """Define required actions for each investigation phase.""" + """Define required actions for each investigation phase. + + Now includes request parameter for continuation-aware decisions. + """ + # Check for continuation - fast track mode + if request: + continuation_id = self.get_request_continuation_id(request) + if continuation_id and hasattr(request, "precommit_type") and request.precommit_type == "external": + if step_number == 1: + return [ + "Execute git status to see all changes", + "Execute git diff --cached for staged changes", + "Execute git diff for unstaged changes", + "List any relevant untracked files as well.", + ] + else: + return ["Complete validation and proceed to expert analysis with changeset file"] + + # Extract counts for normal flow + findings_count = len(findings.split("\n")) if findings else 0 + issues_count = len(self.consolidated_findings.issues_found) if hasattr(self, "consolidated_findings") else 0 + if step_number == 1: # Initial pre-commit investigation tasks return [ "Search for all git repositories in the specified path using appropriate tools", "Check git status to identify staged, unstaged, and untracked changes as required", - "Examine the actual file changes using git diff or file reading tools", + "Execute git status to see all changes", + "Execute git diff --cached for staged changes", + "Execute git diff for unstaged changes", + "List any relevant untracked files as well.", "Understand what functionality was added, modified, or removed", "Identify the scope and intent of the changes being committed", ] @@ -564,9 +590,9 @@ class PrecommitTool(WorkflowTool): expert_analysis_used: True if expert analysis was successfully executed """ base_message = ( - "PRE-COMMIT VALIDATION IS COMPLETE. You MUST now summarize and present ALL validation results, " - "identified issues with their severity levels, and exact commit recommendations. Clearly state whether " - "the changes are ready for commit or require fixes first. Provide concrete, actionable guidance for " + "PRE-COMMIT VALIDATION IS COMPLETE. You may delete any `zen_precommit.changeset` created. You MUST now summarize " + "and present ALL validation results, identified issues with their severity levels, and exact commit recommendations. " + "Clearly state whether the changes are ready for commit or require fixes first. Provide concrete, actionable guidance for " "any issues that need resolution—make it easy for a developer to understand exactly what needs to be " "done before committing." ) @@ -604,54 +630,76 @@ class PrecommitTool(WorkflowTool): def get_precommit_step_guidance(self, step_number: int, request) -> dict[str, Any]: """ Provide step-specific guidance for precommit workflow. + Uses get_required_actions to determine what needs to be done, + then formats those actions into appropriate guidance messages. """ - # Check if this is a continuation - if so, skip workflow and go to expert analysis + # Get the required actions from the single source of truth + required_actions = self.get_required_actions( + step_number, + request.precommit_type or "external", # Using precommit_type as confidence proxy + request.findings or "", + request.total_steps, + request, # Pass request for continuation-aware decisions + ) + + # Check if this is a continuation to provide context-aware guidance continuation_id = self.get_request_continuation_id(request) - if continuation_id: - if request.precommit_type == "external": - return { - "next_steps": ( - "Continuing previous conversation with external validation. CRITICAL: You MUST first gather " - "the complete git changeset (git status, git diff --cached, git diff) to provide to the expert. " - "No minimum steps required - as soon as you provide the git changes in your response, " - "the expert analysis will be performed immediately. The expert needs the FULL context of " - "all changes to provide comprehensive validation. Include staged changes, unstaged changes, " - "and any untracked files that are part of this commit." - ) - } - else: - return { - "next_steps": ( - "Continuing previous conversation with internal validation only. The analysis will build " - "upon the prior findings without external model validation. Proceed with your own assessment " - "of the changes based on the accumulated context." - ) - } - - # Generate the next steps instruction based on required actions - findings_count = len(request.findings.split("\n")) if request.findings else 0 - issues_count = len(request.issues_found) if request.issues_found else 0 - required_actions = self.get_required_actions(step_number, findings_count, issues_count, request.total_steps) + is_external_continuation = continuation_id and request.precommit_type == "external" + is_internal_continuation = continuation_id and request.precommit_type == "internal" + # Format the guidance based on step number and continuation status if step_number == 1: - next_steps = ( - f"MANDATORY: DO NOT call the {self.get_name()} tool again immediately. You MUST first investigate " - f"the git repositories and changes using appropriate tools. CRITICAL AWARENESS: You need to discover " - f"all git repositories, examine staged/unstaged changes, understand what's being committed, and identify " - f"potential issues before proceeding. Use git status, git diff, file reading tools, and code analysis " - f"to gather comprehensive information. Only call {self.get_name()} again AFTER completing your investigation. " - f"When you call {self.get_name()} next time, use step_number: {step_number + 1} and report specific " - f"files examined, changes analyzed, and validation findings discovered." - ) + if is_external_continuation: + # Fast-track mode for external continuations + next_steps = ( + "You are on step 1 of MAXIMUM 2 steps. CRITICAL: Gather and save the complete git changeset NOW. " + "MANDATORY ACTIONS:\\n" + + "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions)) + + "\\n\\nMANDATORY: The changeset may be large. You MUST save the required changeset as a 'zen_precommit.changeset' file " + "(replacing any existing one) in your work directory and include the FULL absolute path in relevant_files. " + "Set next_step_required=True and step_number=2 for the next call." + ) + elif is_internal_continuation: + # Internal validation mode + next_steps = ( + "Continuing previous conversation with internal validation only. The analysis will build " + "upon the prior findings without external model validation. REQUIRED ACTIONS:\\n" + + "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions)) + ) + else: + # Normal flow for new validations + next_steps = ( + f"MANDATORY: DO NOT call the {self.get_name()} tool again immediately. You MUST first investigate " + f"the git repositories and changes using appropriate tools. CRITICAL AWARENESS: You need to:\\n" + + "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions)) + + f"\\n\\nOnly call {self.get_name()} again AFTER completing your investigation. " + f"When you call {self.get_name()} next time, use step_number: {step_number + 1} " + f"and report specific files examined, changes analyzed, and validation findings discovered." + ) + elif step_number == 2: - next_steps = ( - f"STOP! Do NOT call {self.get_name()} again yet. Based on your findings, you've identified areas that need " - f"deeper analysis. MANDATORY ACTIONS before calling {self.get_name()} step {step_number + 1}:\\n" - + "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions)) - + f"\\n\\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER " - + "completing these validations." - ) - elif step_number >= 2: + if is_external_continuation: + # Fast-track completion + next_steps = ( + "Continuation complete - proceeding immediately to expert analysis. " + f"MANDATORY: call {self.get_name()} tool immediately again, and set next_step_required=False to " + f"trigger external validation NOW. " + f"MANDATORY: Include the entire changeset! The changeset may be large. You MUST save the required " + f"changeset as a 'zen_precommit.changeset' file (replacing any existing one) in your work directory " + f"and include the FULL absolute path in relevant_files so the expert can access the complete changeset." + ) + else: + # Normal flow - deeper analysis needed + next_steps = ( + f"STOP! Do NOT call {self.get_name()} again yet. Based on your findings, you've identified areas that need " + f"deeper analysis. MANDATORY ACTIONS before calling {self.get_name()} step {step_number + 1}:\\n" + + "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions)) + + f"\\n\\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER " + + "completing these validations." + ) + + elif step_number >= 3: + # Later steps - final verification next_steps = ( f"WAIT! Your validation needs final verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\\n" + "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions)) @@ -660,6 +708,7 @@ class PrecommitTool(WorkflowTool): f"with step_number: {step_number + 1}." ) else: + # Fallback for any other case next_steps = ( f"PAUSE VALIDATION. Before calling {self.get_name()} step {step_number + 1}, you MUST examine more code and changes. " + "Required: " diff --git a/tools/refactor.py b/tools/refactor.py index 390002b..9a132ba 100644 --- a/tools/refactor.py +++ b/tools/refactor.py @@ -313,7 +313,9 @@ class RefactorTool(WorkflowTool): tool_name=self.get_name(), ) - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each investigation phase.""" if step_number == 1: # Initial refactoring investigation tasks diff --git a/tools/secaudit.py b/tools/secaudit.py index fb16499..ab20765 100644 --- a/tools/secaudit.py +++ b/tools/secaudit.py @@ -264,7 +264,9 @@ class SecauditTool(WorkflowTool): """ return SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """ Provide step-specific guidance for systematic security analysis. diff --git a/tools/shared/base_tool.py b/tools/shared/base_tool.py index d2ce7b5..499d724 100644 --- a/tools/shared/base_tool.py +++ b/tools/shared/base_tool.py @@ -983,7 +983,8 @@ class BaseTool(ABC): reserve_tokens=reserve_tokens, include_line_numbers=self.wants_line_numbers_by_default(), ) - self._validate_token_limit(file_content, context_description) + # Note: No need to validate against MCP_PROMPT_SIZE_LIMIT here + # read_files already handles token-aware truncation based on model's capabilities content_parts.append(file_content) # Track the expanded files as actually processed diff --git a/tools/simple/base.py b/tools/simple/base.py index 4dd95b3..ff6a8c4 100644 --- a/tools/simple/base.py +++ b/tools/simple/base.py @@ -695,8 +695,9 @@ class SimpleTool(BaseTool): if file_content: user_content = f"{user_content}\n\n=== {file_context_title} ===\n{file_content}\n=== END CONTEXT ====" - # Check token limits - self._validate_token_limit(user_content, "Content") + # Check token limits - only validate original user prompt, not conversation history + content_to_validate = self.get_prompt_content_for_size_validation(user_content) + self._validate_token_limit(content_to_validate, "Content") # Add web search instruction if enabled websearch_instruction = "" diff --git a/tools/testgen.py b/tools/testgen.py index 272107d..c0da7b6 100644 --- a/tools/testgen.py +++ b/tools/testgen.py @@ -252,7 +252,9 @@ class TestGenTool(WorkflowTool): tool_name=self.get_name(), ) - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each investigation phase.""" if step_number == 1: # Initial test generation investigation tasks diff --git a/tools/thinkdeep.py b/tools/thinkdeep.py index 99976b6..6911aa8 100644 --- a/tools/thinkdeep.py +++ b/tools/thinkdeep.py @@ -403,7 +403,9 @@ but also acknowledge strong insights and valid conclusions. except AttributeError: return ["comprehensive analysis"] - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """ Return required actions for the current thinking step. """ diff --git a/tools/tracer.py b/tools/tracer.py index 6fe1aa9..fc24c45 100644 --- a/tools/tracer.py +++ b/tools/tracer.py @@ -286,7 +286,9 @@ class TracerTool(WorkflowTool): # Abstract Methods - Required Implementation from BaseWorkflowMixin # ================================================================================ - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each tracing phase.""" if step_number == 1: # Check if we're in ask mode and need to prompt for mode selection diff --git a/tools/workflow/base.py b/tools/workflow/base.py index 0ff3593..fb085d4 100644 --- a/tools/workflow/base.py +++ b/tools/workflow/base.py @@ -411,8 +411,21 @@ class WorkflowTool(BaseTool, BaseWorkflowMixin): # (These are inherited from BaseWorkflowMixin and must be implemented) @abstractmethod - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: - """Define required actions for each work phase.""" + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: + """Define required actions for each work phase. + + Args: + step_number: Current step number + confidence: Current confidence level + findings: Current findings text + total_steps: Total estimated steps + request: Optional request object for continuation-aware decisions + + Returns: + List of required actions for the current step + """ pass @abstractmethod diff --git a/tools/workflow/workflow_mixin.py b/tools/workflow/workflow_mixin.py index 8ac9135..ceb88c6 100644 --- a/tools/workflow/workflow_mixin.py +++ b/tools/workflow/workflow_mixin.py @@ -134,7 +134,9 @@ class BaseWorkflowMixin(ABC): pass @abstractmethod - def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]: + def get_required_actions( + self, step_number: int, confidence: str, findings: str, total_steps: int, request=None + ) -> list[str]: """Define required actions for each work phase. Args: @@ -142,6 +144,7 @@ class BaseWorkflowMixin(ABC): confidence: Current confidence level (exploring, low, medium, high, certain) findings: Current findings text total_steps: Total estimated steps for this work + request: Optional request object for continuation-aware decisions Returns: List of specific actions Claude should take before calling tool again @@ -298,7 +301,7 @@ class BaseWorkflowMixin(ABC): Default implementation uses required actions. """ required_actions = self.get_required_actions( - request.step_number, self.get_request_confidence(request), request.findings, request.total_steps + request.step_number, self.get_request_confidence(request), request.findings, request.total_steps, request ) next_step_number = request.step_number + 1 @@ -673,6 +676,26 @@ class BaseWorkflowMixin(ABC): # Handle continuation continuation_id = request.continuation_id + # Restore workflow state on continuation + if continuation_id: + from utils.conversation_memory import get_thread + + thread = get_thread(continuation_id) + if thread and thread.turns: + # Find the most recent assistant turn from this tool with workflow state + for turn in reversed(thread.turns): + if turn.role == "assistant" and turn.tool_name == self.get_name() and turn.model_metadata: + state = turn.model_metadata + if isinstance(state, dict) and "work_history" in state: + self.work_history = state.get("work_history", []) + self.initial_request = state.get("initial_request") + # Rebuild consolidated findings from restored history + self._reprocess_consolidated_findings() + logger.debug( + f"[{self.get_name()}] Restored workflow state with {len(self.work_history)} history items" + ) + break # State restored, exit loop + # Adjust total steps if needed if request.step_number > request.total_steps: request.total_steps = request.step_number @@ -1109,6 +1132,9 @@ class BaseWorkflowMixin(ABC): # CRITICAL: Extract clean content for conversation history (exclude internal workflow metadata) clean_content = self._extract_clean_workflow_content_for_history(response_data) + # Serialize workflow state for persistence across stateless tool calls + workflow_state = {"work_history": self.work_history, "initial_request": getattr(self, "initial_request", None)} + add_turn( thread_id=continuation_id, role="assistant", @@ -1116,6 +1142,7 @@ class BaseWorkflowMixin(ABC): tool_name=self.get_name(), files=self.get_request_relevant_files(request), images=self.get_request_images(request), + model_metadata=workflow_state, # Persist the state ) def _add_workflow_metadata(self, response_data: dict, arguments: dict[str, Any]) -> None: @@ -1343,7 +1370,7 @@ class BaseWorkflowMixin(ABC): # Get tool-specific required actions required_actions = self.get_required_actions( - request.step_number, self.get_request_confidence(request), request.findings, request.total_steps + request.step_number, self.get_request_confidence(request), request.findings, request.total_steps, request ) response_data["required_actions"] = required_actions diff --git a/utils/file_types.py b/utils/file_types.py index a1662cd..87a1059 100644 --- a/utils/file_types.py +++ b/utils/file_types.py @@ -86,6 +86,7 @@ TEXT_DATA = { ".sbt", # SBT ".pom", # Maven POM ".lock", # Lock files + ".changeset", # Precommit changeset } # Image file extensions - limited to what AI models actually support