Overall savings should now be 50%+ tokens used

perf: tweaks to schema descriptions, aiming to reduce token usage without performance degradation
2025-10-01 22:39:12 +04:00
parent f69ff03c4d
commit cc8a4dfd21
9 changed files with 108 additions and 235 deletions
--- a/tools/challenge.py
+++ b/tools/challenge.py
@@ -23,9 +23,8 @@ from .simple.base import SimpleTool
 # Field descriptions for the Challenge tool
 CHALLENGE_FIELD_DESCRIPTIONS = {
    "prompt": (
-        "The user's message or statement to analyze critically. "
-        "When manually invoked with 'challenge', exclude that prefix - just pass the actual content. "
-        "For automatic invocations (see tool description for conditions), pass the user's complete message unchanged."
+        "Statement to scrutinize. If you invoke `challenge` manually, strip the word 'challenge' and pass just the statement. "
+        "Automatic invocations send the full user message as-is; do not modify it."
    ),
 }

@@ -54,9 +53,8 @@ class ChallengeTool(SimpleTool):

    def get_description(self) -> str:
        return (
-            "Prevents reflexive agreement when users challenge responses by forcing critical thinking and reasoned analysis. "
-            "Trigger automatically when users critically question, disagree with, or appear to challenge previous statements in ongoing conversations. "
-            "Promotes truth-seeking over compliance by ensuring thoughtful evaluation rather than automatic agreement."
+            "Prevents reflexive agreement by forcing critical thinking and reasoned analysis when a statement is challenged. "
+            "Trigger automatically when a user critically questions, disagrees or appears to push back on earlier answers, and use it manually to sanity-check contentious claims."
        )

    def get_system_prompt(self) -> str:
--- a/tools/codereview.py
+++ b/tools/codereview.py
@@ -35,36 +35,30 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions for code review workflow
 CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = {
    "step": (
-        "Review plan. Step 1: State strategy. Later: Report findings. "
-        "MUST examine quality, security, performance, architecture. Use 'relevant_files' for code. NO large snippets."
+        "Review narrative. Step 1: outline the review strategy. Later steps: report findings. MUST cover quality, security, "
+        "performance, and architecture. Reference code via `relevant_files`; avoid dumping large snippets."
    ),
-    "step_number": "Current step index in review sequence (starts at 1). Build upon previous steps.",
+    "step_number": "Current review step (starts at 1) – each step should build on the last.",
    "total_steps": (
-        "Estimated steps needed to complete the review. "
-        "IMPORTANT: For external validation, max 2 steps. For internal validation, use 1 step. "
-        "When continuation_id is provided (continuing a previous conversation), set to 2 max for external, 1 for internal."
+        "Number of review steps planned. External validation: two steps (analysis + summary). Internal validation: one step. "
+        "Use the same limits when continuing an existing review via continuation_id."
    ),
    "next_step_required": (
-        "True to continue with another step, False when review is complete. "
-        "CRITICAL for external validation: Set to True on step 1, then False on step 2. "
-        "For internal validation: Set to False immediately. "
-        "When continuation_id is provided: Follow the same rules based on validation type."
+        "True when another review step follows. External validation: step 1 → True, step 2 → False. Internal validation: set False immediately. "
+        "Apply the same rule on continuation flows."
    ),
-    "findings": (
-        "Discoveries: quality, security, performance, architecture. "
-        "Document positive+negative. Update in later steps."
-    ),
-    "files_checked": "All examined files (absolute paths), including ruled-out ones.",
-    "relevant_files": "Step 1: All files/dirs for review. Final: Subset with key findings (issues, patterns, decisions).",
-    "relevant_context": "Methods/functions central to findings: 'Class.method' or 'function'. Focus on issues/patterns.",
-    "issues_found": "Issues with 'severity' (critical/high/medium/low) and 'description'. Vulnerabilities, performance, quality.",
-    "review_validation_type": "'external' (default, expert model) or 'internal' (no expert). Default external unless user specifies.",
-    "backtrack_from_step": "Step number to backtrack from if revision needed.",
-    "images": "Optional diagrams, mockups, visuals for review context (absolute paths). Include if materially helpful.",
-    "review_type": "Review type: full, security, performance, quick.",
-    "focus_on": "Specific aspects or context for areas of concern.",
-    "standards": "Coding standards to enforce.",
-    "severity_filter": "Minimum severity to report.",
+    "findings": "Capture findings (positive and negative) across quality, security, performance, and architecture; update each step.",
+    "files_checked": "Absolute paths of every file reviewed, including those ruled out.",
+    "relevant_files": "Step 1: list all files/dirs under review. Must be absolute full non-abbreviated paths. Final step: narrow to files tied to key findings.",
+    "relevant_context": "Functions or methods central to findings (e.g. 'Class.method' or 'function_name').",
+    "issues_found": "Issues with severity (critical/high/medium/low) and descriptions.",
+    "review_validation_type": "Set 'external' (default) for expert follow-up or 'internal' for local-only review.",
+    "backtrack_from_step": "If revising earlier analysis, note the step number to revisit.",
+    "images": "Optional diagram or screenshot paths that clarify review context.",
+    "review_type": "Review focus: full, security, performance, or quick.",
+    "focus_on": "Optional note on areas to emphasise (e.g. 'threading', 'auth flow').",
+    "standards": "Coding standards or style guides to enforce.",
+    "severity_filter": "Lowest severity to include when reporting issues (critical/high/medium/low/all).",
 }


--- a/tools/consensus.py
+++ b/tools/consensus.py
@@ -37,45 +37,23 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions for consensus workflow
 CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = {
    "step": (
-        "The core question for consensus. Step 1: Provide the EXACT proposal for all models to evaluate. "
-        "CRITICAL: This text is sent to all models and must be a clear question, not a self-referential statement "
-        "(e.g., use 'Evaluate...' not 'I will evaluate...'). Steps 2+: Internal notes on the last model's response; this is NOT sent to other models."
+        "Consensus prompt. Step 1: write the exact proposal/question every model will see (use 'Evaluate…', not meta commentary). "
+        "Steps 2+: capture internal notes about the latest model response—these notes are NOT sent to other models."
    ),
-    "step_number": (
-        "The index of the current step in the consensus workflow, beginning at 1. Step 1 is your analysis, "
-        "steps 2+ are for processing individual model responses."
-    ),
-    "total_steps": (
-        "Total number of steps needed. This equals the number of models to consult. "
-        "Step 1 includes your analysis + first model consultation on return of the call. Final step includes "
-        "last model consultation + synthesis."
-    ),
-    "next_step_required": ("Set to true if more models need to be consulted. False when ready for final synthesis."),
+    "step_number": "Current step index (starts at 1). Step 1 is your analysis; steps 2+ handle each model response.",
+    "total_steps": "Total steps = number of models consulted plus the final synthesis step.",
+    "next_step_required": "True if more model consultations remain; set false when ready to synthesize.",
    "findings": (
-        "Your analysis of the consensus topic. Step 1: Your independent, comprehensive analysis of the proposal. "
-        "CRITICAL: This is for the final synthesis and is NOT sent to the other models. "
-        "Steps 2+: A summary of the key points from the most recent model's response."
-    ),
-    "relevant_files": (
-        "Files that are relevant to the consensus analysis. Include files that help understand the proposal, "
-        "provide context, or contain implementation details."
+        "Step 1: your independent analysis for later synthesis (not shared with other models). Steps 2+: summarize the newest model response."
    ),
+    "relevant_files": "Optional supporting files that help the consensus analysis. Must be absolute full, non-abbreviated paths.",
    "models": (
-        "List of model configurations to consult. Each can have a model name, stance (for/against/neutral), "
-        "and optional custom stance prompt. The same model can be used multiple times with different stances, "
-        "but each model + stance combination must be unique. "
-        "Example: [{'model': 'o3', 'stance': 'for'}, {'model': 'o3', 'stance': 'against'}, "
-        "{'model': 'flash', 'stance': 'neutral'}]"
-    ),
-    "current_model_index": (
-        "Internal tracking of which model is being consulted (0-based index). Used to determine which model "
-        "to call next."
-    ),
-    "model_responses": ("Accumulated responses from models consulted so far. Internal field for tracking progress."),
-    "images": (
-        "Optional list of image paths or base64 data URLs for visual context. Useful for UI/UX discussions, "
-        "architecture diagrams, mockups, or any visual references that help inform the consensus analysis."
+        "List of models to consult. Each entry may include model, stance (for/against/neutral), and stance_prompt. "
+        "Each (model, stance) pair must be unique, e.g. [{'model':'o3','stance':'for'}, {'model':'o3','stance':'against'}]."
    ),
+    "current_model_index": "0-based index of the next model to consult (managed internally).",
+    "model_responses": "Internal log of responses gathered so far.",
+    "images": "Optional absolute image paths or base64 references that add helpful visual context.",
 }


--- a/tools/docgen.py
+++ b/tools/docgen.py
@@ -37,58 +37,21 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions for documentation generation
 DOCGEN_FIELD_DESCRIPTIONS = {
    "step": (
-        "Step 1 (DISCOVERY): Plan to discover ALL files needing documentation; count and list them clearly. DO NOT document yet. "
-        "Step 2+ (DOCUMENTATION): Document ONE file at a time. CRITICAL: DO NOT ALTER CODE LOGIC - only add documentation. "
-        "If you find bugs, TRACK them but DO NOT FIX. Report progress using counters."
-    ),
-    "step_number": (
-        "The index of the current step in the documentation generation sequence, beginning at 1. Each step should build upon or "
-        "revise the previous one."
-    ),
-    "total_steps": (
-        "Total steps needed to complete documentation: 1 (discovery) + number of files to document. "
-        "This is calculated dynamically based on total_files_to_document counter."
-    ),
-    "next_step_required": (
-        "Set to true if you plan to continue the documentation analysis with another step. False means you believe the "
-        "documentation plan is complete and ready for implementation."
-    ),
-    "findings": (
-        "Summary of documentation needs found in this step. Note missing docs, complexity, and call flows. "
-        "IMPORTANT: Document both well-documented areas and areas needing docs. "
-        "CRITICAL: If ANY bugs are found, STOP and report them immediately before continuing documentation."
-    ),
-    "relevant_files": (
-        "Current focus files (absolute paths) for this step. Focus on documenting ONE FILE completely per step."
-    ),
-    "relevant_context": (
-        "List methods/functions needing documentation, in 'ClassName.methodName' or 'functionName' format. "
-        "Prioritize complex logic, important interfaces, or missing documentation."
-    ),
-    "num_files_documented": (
-        "Counter for fully documented files. Starts at 0. Increment only when a file is 100% complete. "
-        "CRITICAL: Must equal 'total_files_to_document' to finish."
-    ),
-    "total_files_to_document": (
-        "Counter for total files needing documentation. Set in step 1 during discovery. "
-        "This is the completion target for the 'num_files_documented' counter."
-    ),
-    "document_complexity": (
-        "Whether to include algorithmic complexity (Big O) analysis in function/method documentation. "
-        "Default: true. When enabled, analyzes and documents the computational complexity of algorithms."
-    ),
-    "document_flow": (
-        "Whether to include call flow and dependency information in documentation. "
-        "Default: true. When enabled, documents which methods this function calls and which methods call this function."
-    ),
-    "update_existing": (
-        "Whether to update existing documentation when it's found to be incorrect or incomplete. "
-        "Default: true. When enabled, improves existing docs rather than just adding new ones."
-    ),
-    "comments_on_complex_logic": (
-        "Whether to add inline comments around complex logic within functions. "
-        "Default: true. When enabled, adds explanatory comments for non-obvious algorithmic steps."
+        "Step 1 (Discovery): list every file that needs documentation and record the total. Do not write docs yet. "
+        "Steps 2+: document exactly one file per step. Never change code logic; log bugs separately. Keep the counters accurate."
    ),
+    "step_number": "Current documentation step (starts at 1).",
+    "total_steps": "1 discovery step + one step per file documented (tracks via `total_files_to_document`).",
+    "next_step_required": "True while more files still need documentation; False once everything is complete.",
+    "findings": "Summarize documentation gaps, complexity, call flows, and well-documented areas. Stop and report immediately if you uncover a bug.",
+    "relevant_files": "Absolute paths for the file(s) you are documenting this step—stick to a single file per step.",
+    "relevant_context": "Functions or methods needing documentation (e.g. 'Class.method', 'function_name'), especially complex or user-facing areas.",
+    "num_files_documented": "Count of files finished so far. Increment only when a file is fully documented.",
+    "total_files_to_document": "Total files identified in discovery; completion requires matching this count.",
+    "document_complexity": "Include algorithmic complexity (Big O) analysis when True (default).",
+    "document_flow": "Include call flow/dependency notes when True (default).",
+    "update_existing": "True (default) to polish inaccurate or outdated docs instead of leaving them untouched.",
+    "comments_on_complex_logic": "True (default) to add inline comments around non-obvious logic.",
 }


--- a/tools/planner.py
+++ b/tools/planner.py
@@ -39,18 +39,18 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions matching original planner tool
 PLANNER_FIELD_DESCRIPTIONS = {
    "step": (
-        "Your current planning step content. Step 1: Describe the task/problem to plan in detail for breakdown. "
-        "Subsequent steps: Provide planning content (steps, revisions, questions, approach changes, etc.)."
+        "Planning content for this step. Step 1: describe the task, problem and scope. Later steps: capture updates, "
+        "revisions, branches, or open questions that shape the plan."
    ),
-    "step_number": "Current step number in the planning sequence (starts at 1)",
-    "total_steps": "Current estimate of total steps needed (can be adjusted up/down as planning progresses)",
-    "next_step_required": "Whether another planning step is required after this one",
-    "is_step_revision": "True if this step revises/replaces a previous step",
-    "revises_step_number": "If is_step_revision is true, which step number is being revised",
-    "is_branch_point": "True if this step branches from a previous step to explore alternatives",
-    "branch_from_step": "If is_branch_point is true, which step number is the branching point",
-    "branch_id": "Identifier for the current branch (e.g., 'approach-A', 'microservices-path')",
-    "more_steps_needed": "True if more steps are needed beyond the initial estimate",
+    "step_number": "Current planning step number (starts at 1).",
+    "total_steps": "Estimated number of planning steps; adjust as the plan evolves.",
+    "next_step_required": "Set true when another planning step will follow after this one.",
+    "is_step_revision": "Set true when you are replacing a previously recorded step.",
+    "revises_step_number": "Step number being replaced when revising.",
+    "is_branch_point": "True when this step creates a new branch to explore an alternative path.",
+    "branch_from_step": "If branching, the step number that this branch starts from.",
+    "branch_id": "Name for this branch (e.g. 'approach-A', 'migration-path').",
+    "more_steps_needed": "True when you now expect to add additional steps beyond the prior estimate.",
 }


--- a/tools/precommit.py
+++ b/tools/precommit.py
@@ -34,37 +34,31 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions for precommit workflow
 PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    "step": (
-        "Validation plan. Step 1: State strategy. Later: Report findings. "
-        "MUST examine git changes, analyze impacts. Use 'relevant_files' for code. NO large snippets."
+        "Step 1: outline how you'll validate the git changes. Later steps: report findings. Review diffs and impacts, use `relevant_files`, and avoid pasting large snippets."
    ),
-    "step_number": "Current step index in pre-commit sequence (starts at 1). Build upon previous steps.",
+    "step_number": "Current pre-commit step number (starts at 1).",
    "total_steps": (
-        "Estimated steps needed to complete validation. "
-        "IMPORTANT: For external validation, use max 3 steps. For internal validation, use 1 step. "
-        "When continuation_id is provided (continuing a previous conversation), set to 3 max for external, 1 for internal."
+        "Planned number of validation steps. External validation: use at most three (analysis → follow-ups → summary). Internal validation: a single step. Honour these limits when resuming via continuation_id."
    ),
    "next_step_required": (
        "True to continue with another step, False when validation is complete. "
-        "CRITICAL: If total_steps>=3, set to True until the final step. "
+        "CRITICAL: If total_steps>=3 or when `precommit_type = external`, set to True until the final step. "
        "When continuation_id is provided: Follow the same validation rules based on precommit_type."
    ),
-    "findings": (
-        "Discoveries: git diffs, modifications, issues (bugs, missing tests, security). "
-        "Document positive+concerns. Update in later steps."
-    ),
-    "files_checked": "All examined files (absolute paths), including ruled-out ones.",
-    "relevant_files": "Files with changes or relevant to validation (absolute paths). Modified files, config, tests, docs.",
-    "relevant_context": "Methods/functions central to changes: 'Class.method' or 'function'. Focus on modified/added.",
-    "issues_found": "Issues with 'severity' (critical/high/medium/low) and 'description'. Bugs, security, performance.",
-    "precommit_type": "'external' (default, expert review) or 'internal' (local only). Default external unless user specifies.",
-    "backtrack_from_step": "Step number to backtrack from if revision needed.",
-    "images": "Optional screenshots/visuals for validation (absolute paths).",
-    "path": "Starting path for git repos (FULL absolute path). REQUIRED step 1.",
-    "compare_to": "Optional git ref (branch/tag/commit) to compare. Checks remotes if needed. Without: checks staged/unstaged.",
-    "include_staged": "Analyze staged changes. Ignored if 'compare_to' provided.",
-    "include_unstaged": "Analyze unstaged changes. Ignored if 'compare_to' provided.",
-    "focus_on": "Focus aspects: security, performance, test coverage.",
-    "severity_filter": "Minimum severity to report.",
+    "findings": "Record git diff insights, risks, missing tests, security concerns, and positives; update previous notes as you go.",
+    "files_checked": "Absolute paths for every file examined, including ruled-out candidates.",
+    "relevant_files": "Absolute paths of files involved in the change or validation (code, configs, tests, docs). Must be absolute full non-abbreviated paths.",
+    "relevant_context": "Key functions/methods touched by the change (e.g. 'Class.method', 'function_name').",
+    "issues_found": "List issues with severity (critical/high/medium/low) plus descriptions (bugs, security, performance, coverage).",
+    "precommit_type": "'external' (default, triggers expert model) or 'internal' (local-only validation).",
+    "backtrack_from_step": "Step number to revisit when revising earlier analysis.",
+    "images": "Optional absolute paths to screenshots or diagrams that aid validation.",
+    "path": "Absolute path to the repository root. Required in step 1.",
+    "compare_to": "Optional git ref (branch/tag/commit) to diff against; falls back to staged/unstaged changes.",
+    "include_staged": "Whether to inspect staged changes (ignored when `compare_to` is set).",
+    "include_unstaged": "Whether to inspect unstaged changes (ignored when `compare_to` is set).",
+    "focus_on": "Optional emphasis areas such as security, performance, or test coverage.",
+    "severity_filter": "Lowest severity to include when reporting issues.",
 }


--- a/tools/secaudit.py
+++ b/tools/secaudit.py
@@ -36,53 +36,24 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions for security audit workflow
 SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = {
    "step": (
-        "Audit plan. Step 1: State strategy. Later: Report findings. "
-        "MANDATORY: Systematic approach (OWASP Top 10, auth, validation). Use 'relevant_files'. NO large code."
+        "Step 1: outline the audit strategy (OWASP Top 10, auth, validation, etc.). Later steps: report findings. MANDATORY: use `relevant_files` for code references and avoid large snippets."
    ),
-    "step_number": "Current step in audit sequence (starts at 1).",
-    "total_steps": "Estimated steps for audit. Adjust as findings emerge.",
-    "next_step_required": ("True to continue. False when ALL threats uncovered, ready for validation."),
-    "findings": (
-        "Discoveries: vulnerabilities, auth issues, validation gaps, compliance. "
-        "Document positives and concerns. Update past findings."
-    ),
-    "files_checked": "All files examined (absolute paths). Include ruled-out files.",
-    "relevant_files": (
-        "Step 1: Files to audit (absolute paths). " "Final: Files with security issues, auth modules, config files."
-    ),
-    "relevant_context": (
-        "Security-critical methods/classes: 'ClassName.methodName'. "
-        "Focus on vulnerabilities, auth logic, security patterns."
-    ),
-    "issues_found": (
-        "Security issues as dict: 'severity' (critical/high/medium/low), 'description'. "
-        "Include vulnerabilities, auth flaws, injection, crypto weakness, config issues."
-    ),
-    "confidence": (
-        "exploring/low/medium/high/very_high/almost_certain/certain. "
-        "CRITICAL: 'certain' PREVENTS external validation."
-    ),
-    "backtrack_from_step": "Step number to backtrack from if revision needed.",
-    "images": (
-        "Optional: Architecture diagrams, security models, threat models (absolute paths). "
-        "Only if assists security assessment."
-    ),
-    "security_scope": (
-        "Security context (web/mobile/API/enterprise/cloud). "
-        "Include stack, user types, data sensitivity, threat landscape. "
-        "This helps focus the security assessment appropriately."
-    ),
-    "threat_level": (
-        "Assess the threat level based on application context: 'low' (internal tools, low-risk data), "
-        "'medium' (customer-facing, business data), 'high' (financial, healthcare, regulated industry), "
-        "'critical' (payment processing, sensitive personal data). This guides prioritization."
-    ),
-    "compliance_requirements": (
-        "List applicable compliance frameworks and security standards (SOC2, PCI DSS, HIPAA, GDPR, "
-        "ISO 27001, NIST). Include industry-specific requirements that affect security controls."
-    ),
-    "audit_focus": "Primary security focus areas for this audit (owasp, compliance, infrastructure, dependencies)",
-    "severity_filter": "Minimum severity level to report on the security issues found",
+    "step_number": "Current security-audit step number (starts at 1).",
+    "total_steps": "Expected number of audit steps; adjust as new risks surface.",
+    "next_step_required": "True while additional threat analysis remains; set False once you are ready to hand off for validation.",
+    "findings": "Summarize vulnerabilities, auth issues, validation gaps, compliance notes, and positives; update prior findings as needed.",
+    "files_checked": "Absolute paths for every file inspected, including rejected candidates.",
+    "relevant_files": "Absolute paths for security-relevant files (auth modules, configs, sensitive code).",
+    "relevant_context": "Security-critical classes/methods (e.g. 'AuthService.login', 'encryption_helper').",
+    "issues_found": "Security issues with severity (critical/high/medium/low) and descriptions (vulns, auth flaws, injection, crypto, config).",
+    "confidence": "exploring/low/medium/high/very_high/almost_certain/certain. 'certain' blocks external validation—use only when fully complete.",
+    "backtrack_from_step": "Step number to revisit when revising earlier audit work.",
+    "images": "Optional absolute paths to diagrams or threat models that inform the audit.",
+    "security_scope": "Security context (web, mobile, API, cloud, etc.) including stack, user types, data sensitivity, and threat landscape.",
+    "threat_level": "Assess the threat level: low (internal/low-risk), medium (customer-facing/business data), high (regulated or sensitive), critical (financial/healthcare/PII).",
+    "compliance_requirements": "Applicable compliance frameworks or standards (SOC2, PCI DSS, HIPAA, GDPR, ISO 27001, NIST, etc.).",
+    "audit_focus": "Primary focus area: owasp, compliance, infrastructure, dependencies, or comprehensive.",
+    "severity_filter": "Minimum severity to include when reporting security issues.",
 }


--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -35,37 +35,15 @@ logger = logging.getLogger(__name__)
 # Tool-specific field descriptions for test generation workflow
 TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
    "step": (
-        "The test plan for this step. Step 1: State strategy for analyzing code structure, business logic, critical paths, and edge cases. "
-        "Later steps: Report findings and adapt as new test scenarios are identified."
-    ),
-    "step_number": (
-        "The index of the current step in the test generation sequence, beginning at 1. Each step should build upon or "
-        "revise the previous one."
-    ),
-    "total_steps": (
-        "Your current estimate for how many steps will be needed to complete the test generation analysis. "
-        "Adjust as new findings emerge."
-    ),
-    "next_step_required": (
-        "Set to true if you plan to continue the investigation with another step. False means you believe the "
-        "test generation analysis is complete and ready for expert validation."
-    ),
-    "findings": (
-        "Summary of discoveries about the code being tested. Include analysis of functionality, critical paths, edge cases, "
-        "boundary conditions, and error handling. IMPORTANT: Document both happy paths and failure modes. "
-        "Identify existing test patterns. In later steps, confirm or update past findings."
-    ),
-    "files_checked": (
-        "List all files examined (absolute paths). Include even ruled-out files to track exploration path."
-    ),
-    "relevant_files": (
-        "Subset of files_checked containing code needing tests (absolute paths). Include implementation files, "
-        "interfaces, dependencies, or existing test examples."
-    ),
-    "relevant_context": (
-        "List methods/functions needing test coverage, in 'ClassName.methodName' or 'functionName' format. "
-        "Prioritize critical business logic, public APIs, and error-prone code paths."
+        "Test plan for this step. Step 1: outline how you'll analyse structure, business logic, critical paths, and edge cases. Later steps: record findings and new scenarios as they emerge."
    ),
+    "step_number": "Current test-generation step (starts at 1) — each step should build on prior work.",
+    "total_steps": "Estimated number of steps needed for test planning; adjust as new scenarios appear.",
+    "next_step_required": "True while more investigation or planning remains; set False when test planning is ready for expert validation.",
+    "findings": "Summarise functionality, critical paths, edge cases, boundary conditions, error handling, and existing test patterns. Cover both happy and failure paths.",
+    "files_checked": "Absolute paths of every file examined, including those ruled out.",
+    "relevant_files": "Absolute paths of code that requires new or updated tests (implementation, dependencies, existing test fixtures).",
+    "relevant_context": "Functions/methods needing coverage (e.g. 'Class.method', 'function_name'), with emphasis on critical paths and error-prone code.",
    "confidence": (
        "Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), "
        "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), "
@@ -74,11 +52,8 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = {
        "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. "
        "Using 'certain' means you have complete confidence locally and prevents external model validation."
    ),
-    "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."),
-    "images": (
-        "Optional list of absolute paths to architecture diagrams, flow charts, or visual documentation that help "
-        "understand the code structure and test requirements. Only include if they materially assist test planning."
-    ),
+    "backtrack_from_step": "Step number to revisit if earlier findings need revision.",
+    "images": "Optional absolute paths to diagrams or visuals that clarify the system under test.",
 }