Add DocGen tool with comprehensive documentation generation capabilities (#109)

* WIP: new workflow architecture * WIP: further improvements and cleanup * WIP: cleanup and docks, replace old tool with new * WIP: cleanup and docks, replace old tool with new * WIP: new planner implementation using workflow * WIP: precommit tool working as a workflow instead of a basic tool Support for passing False to use_assistant_model to skip external models completely and use Claude only * WIP: precommit workflow version swapped with old * WIP: codereview * WIP: replaced codereview * WIP: replaced codereview * WIP: replaced refactor * WIP: workflow for thinkdeep * WIP: ensure files get embedded correctly * WIP: thinkdeep replaced with workflow version * WIP: improved messaging when an external model's response is received * WIP: analyze tool swapped * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: updated tests * Extract only the content when building history * Use "relevant_files" for workflow tools only * WIP: fixed get_completion_next_steps_message missing param * Fixed tests Request for files consistently * Fixed tests Request for files consistently * Fixed tests * New testgen workflow tool Updated docs * Swap testgen workflow * Fix CI test failures by excluding API-dependent tests - Update GitHub Actions workflow to exclude simulation tests that require API keys - Fix collaboration tests to properly mock workflow tool expert analysis calls - Update test assertions to handle new workflow tool response format - Ensure unit tests run without external API dependencies in CI 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * WIP - Update tests to match new tools * WIP - Update tests to match new tools * WIP - Update tests to match new tools * Should help with https://github.com/BeehiveInnovations/zen-mcp-server/issues/97 Clear python cache when running script: https://github.com/BeehiveInnovations/zen-mcp-server/issues/96 Improved retry error logging Cleanup * WIP - chat tool using new architecture and improved code sharing * Removed todo * Removed todo * Cleanup old name * Tweak wordings * Tweak wordings Migrate old tests * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 * Support for Flash 2.0 and Flash Lite 2.0 Fixed test * Improved consensus to use the workflow base class * Improved consensus to use the workflow base class * Allow images * Allow images * Replaced old consensus tool * Cleanup tests * Tests for prompt size * New tool: docgen Tests for prompt size Fixes: https://github.com/BeehiveInnovations/zen-mcp-server/issues/107 Use available token size limits: https://github.com/BeehiveInnovations/zen-mcp-server/issues/105 * Improved docgen prompt Exclude TestGen from pytest inclusion * Updated errors * Lint * DocGen instructed not to fix bugs, surface them and stick to d * WIP * Stop claude from being lazy and only documenting a small handful * More style rules --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-06-21 23:21:19 -07:00
parent 0655590a51
commit c960bcb720
58 changed files with 5492 additions and 5558 deletions
--- a/tools/workflow/workflow_mixin.py
+++ b/tools/workflow/workflow_mixin.py
@@ -28,6 +28,7 @@ from typing import Any, Optional

 from mcp.types import TextContent

+from config import MCP_PROMPT_SIZE_LIMIT
 from utils.conversation_memory import add_turn, create_thread

 from ..shared.base_models import ConsolidatedFindings
@@ -111,6 +112,7 @@ class BaseWorkflowMixin(ABC):
        description: str,
        remaining_budget: Optional[int] = None,
        arguments: Optional[dict[str, Any]] = None,
+        model_context: Optional[Any] = None,
    ) -> tuple[str, list[str]]:
        """Prepare file content for prompts. Usually provided by BaseTool."""
        pass
@@ -230,6 +232,23 @@ class BaseWorkflowMixin(ABC):
        except AttributeError:
            return self.get_default_temperature()

+    def get_validated_temperature(self, request, model_context: Any) -> tuple[float, list[str]]:
+        """
+        Get temperature from request and validate it against model constraints.
+
+        This is a convenience method that combines temperature extraction and validation
+        for workflow tools. It ensures temperature is within valid range for the model.
+
+        Args:
+            request: The request object containing temperature
+            model_context: Model context object containing model info
+
+        Returns:
+            Tuple of (validated_temperature, warning_messages)
+        """
+        temperature = self.get_request_temperature(request)
+        return self.validate_and_correct_temperature(temperature, model_context)
+
    def get_request_thinking_mode(self, request) -> str:
        """Get thinking mode from request. Override for custom thinking mode handling."""
        try:
@@ -496,19 +515,22 @@ class BaseWorkflowMixin(ABC):
            return

        try:
-            # Ensure model context is available - fall back to resolution if needed
+            # Model context should be available from early validation, but might be deferred for tests
            current_model_context = self.get_current_model_context()
            if not current_model_context:
+                # Try to resolve model context now (deferred from early validation)
                try:
                    model_name, model_context = self._resolve_model_context(arguments, request)
                    self._model_context = model_context
+                    self._current_model_name = model_name
                except Exception as e:
                    logger.error(f"[WORKFLOW_FILES] {self.get_name()}: Failed to resolve model context: {e}")
-                    # Create fallback model context
+                    # Create fallback model context (preserves existing test behavior)
                    from utils.model_context import ModelContext

                    model_name = self.get_request_model_name(request)
                    self._model_context = ModelContext(model_name)
+                    self._current_model_name = model_name

            # Use the same file preparation logic as BaseTool with token budgeting
            continuation_id = self.get_request_continuation_id(request)
@@ -520,6 +542,7 @@ class BaseWorkflowMixin(ABC):
                "Workflow files for analysis",
                remaining_budget=remaining_tokens,
                arguments=arguments,
+                model_context=self._model_context,
            )

            # Store for use in expert analysis
@@ -595,6 +618,20 @@ class BaseWorkflowMixin(ABC):
            # Validate request using tool-specific model
            request = self.get_workflow_request_model()(**arguments)

+            # Validate step field size (basic validation for workflow instructions)
+            # If step is too large, user should use shorter instructions and put details in files
+            step_content = request.step
+            if step_content and len(step_content) > MCP_PROMPT_SIZE_LIMIT:
+                from tools.models import ToolOutput
+
+                error_output = ToolOutput(
+                    status="resend_prompt",
+                    content="Step instructions are too long. Please use shorter instructions and provide detailed context via file paths instead.",
+                    content_type="text",
+                    metadata={"prompt_size": len(step_content), "limit": MCP_PROMPT_SIZE_LIMIT},
+                )
+                raise ValueError(f"MCP_SIZE_CHECK:{error_output.model_dump_json()}")
+
            # Validate file paths for security (same as base tool)
            # Use try/except instead of hasattr as per coding standards
            try:
@@ -612,6 +649,20 @@ class BaseWorkflowMixin(ABC):
                # validate_file_paths method not available - skip validation
                pass

+            # Try to validate model availability early for production scenarios
+            # For tests, defer model validation to later to allow mocks to work
+            try:
+                model_name, model_context = self._resolve_model_context(arguments, request)
+                # Store for later use
+                self._current_model_name = model_name
+                self._model_context = model_context
+            except ValueError as e:
+                # Model resolution failed - in production this would be an error,
+                # but for tests we defer to allow mocks to handle model resolution
+                logger.debug(f"Early model validation failed, deferring to later: {e}")
+                self._current_model_name = None
+                self._model_context = None
+
            # Adjust total steps if needed
            if request.step_number > request.total_steps:
                request.total_steps = request.step_number
@@ -1364,29 +1415,26 @@ class BaseWorkflowMixin(ABC):
    async def _call_expert_analysis(self, arguments: dict, request) -> dict:
        """Call external model for expert analysis"""
        try:
-            # Use the same model resolution logic as BaseTool
-            model_context = arguments.get("_model_context")
-            resolved_model_name = arguments.get("_resolved_model_name")
-
-            if model_context and resolved_model_name:
-                self._model_context = model_context
-                model_name = resolved_model_name
-            else:
-                # Fallback for direct calls - requires BaseTool methods
+            # Model context should be resolved from early validation, but handle fallback for tests
+            if not self._model_context:
+                # Try to resolve model context for expert analysis (deferred from early validation)
                try:
                    model_name, model_context = self._resolve_model_context(arguments, request)
                    self._model_context = model_context
+                    self._current_model_name = model_name
                except Exception as e:
-                    logger.error(f"Failed to resolve model context: {e}")
-                    # Use request model as fallback
+                    logger.error(f"Failed to resolve model context for expert analysis: {e}")
+                    # Use request model as fallback (preserves existing test behavior)
                    model_name = self.get_request_model_name(request)
                    from utils.model_context import ModelContext

                    model_context = ModelContext(model_name)
                    self._model_context = model_context
+                    self._current_model_name = model_name
+            else:
+                model_name = self._current_model_name

-            self._current_model_name = model_name
-            provider = self.get_model_provider(model_name)
+            provider = self._model_context.provider

            # Prepare expert analysis context
            expert_context = self.prepare_expert_analysis_context(self.consolidated_findings)
@@ -1407,12 +1455,19 @@ class BaseWorkflowMixin(ABC):
            else:
                prompt = expert_context

+            # Validate temperature against model constraints
+            validated_temperature, temp_warnings = self.get_validated_temperature(request, self._model_context)
+
+            # Log any temperature corrections
+            for warning in temp_warnings:
+                logger.warning(warning)
+
            # Generate AI response - use request parameters if available
            model_response = provider.generate_content(
                prompt=prompt,
                model_name=model_name,
                system_prompt=system_prompt,
-                temperature=self.get_request_temperature(request),
+                temperature=validated_temperature,
                thinking_mode=self.get_request_thinking_mode(request),
                use_websearch=self.get_request_use_websearch(request),
                images=list(set(self.consolidated_findings.images)) if self.consolidated_findings.images else None,