From cc8a4dfd21b6f3dae4972a833b619e53c964693b Mon Sep 17 00:00:00 2001 From: Fahad Date: Wed, 1 Oct 2025 22:39:12 +0400 Subject: [PATCH] Overall savings should now be 50%+ tokens used perf: tweaks to schema descriptions, aiming to reduce token usage without performance degradation --- tests/test_challenge.py | 6 ++-- tools/challenge.py | 10 +++---- tools/codereview.py | 44 ++++++++++++---------------- tools/consensus.py | 46 ++++++++--------------------- tools/docgen.py | 65 +++++++++-------------------------------- tools/planner.py | 22 +++++++------- tools/precommit.py | 42 ++++++++++++-------------- tools/secaudit.py | 63 +++++++++++---------------------------- tools/testgen.py | 45 +++++++--------------------- 9 files changed, 108 insertions(+), 235 deletions(-) diff --git a/tests/test_challenge.py b/tests/test_challenge.py index b81f2e4..a5240cf 100644 --- a/tests/test_challenge.py +++ b/tests/test_challenge.py @@ -26,7 +26,7 @@ class TestChallengeTool: assert self.tool.get_name() == "challenge" assert "reflexive agreement" in self.tool.get_description() assert "critical thinking" in self.tool.get_description() - assert "thoughtful evaluation" in self.tool.get_description() + assert "reasoned analysis" in self.tool.get_description() assert self.tool.get_default_temperature() == 0.2 # TEMPERATURE_ANALYTICAL def test_requires_model(self): @@ -154,8 +154,8 @@ class TestChallengeTool: assert "prompt" in fields assert fields["prompt"]["type"] == "string" - assert "message or statement" in fields["prompt"]["description"] - assert "analyze critically" in fields["prompt"]["description"] + assert "Statement to scrutinize" in fields["prompt"]["description"] + assert "strip the word 'challenge'" in fields["prompt"]["description"] def test_required_fields_list(self): """Test required fields list""" diff --git a/tools/challenge.py b/tools/challenge.py index 2873025..a217924 100644 --- a/tools/challenge.py +++ b/tools/challenge.py @@ -23,9 +23,8 @@ from .simple.base import SimpleTool # Field descriptions for the Challenge tool CHALLENGE_FIELD_DESCRIPTIONS = { "prompt": ( - "The user's message or statement to analyze critically. " - "When manually invoked with 'challenge', exclude that prefix - just pass the actual content. " - "For automatic invocations (see tool description for conditions), pass the user's complete message unchanged." + "Statement to scrutinize. If you invoke `challenge` manually, strip the word 'challenge' and pass just the statement. " + "Automatic invocations send the full user message as-is; do not modify it." ), } @@ -54,9 +53,8 @@ class ChallengeTool(SimpleTool): def get_description(self) -> str: return ( - "Prevents reflexive agreement when users challenge responses by forcing critical thinking and reasoned analysis. " - "Trigger automatically when users critically question, disagree with, or appear to challenge previous statements in ongoing conversations. " - "Promotes truth-seeking over compliance by ensuring thoughtful evaluation rather than automatic agreement." + "Prevents reflexive agreement by forcing critical thinking and reasoned analysis when a statement is challenged. " + "Trigger automatically when a user critically questions, disagrees or appears to push back on earlier answers, and use it manually to sanity-check contentious claims." ) def get_system_prompt(self) -> str: diff --git a/tools/codereview.py b/tools/codereview.py index 46c21a4..041e9e8 100644 --- a/tools/codereview.py +++ b/tools/codereview.py @@ -35,36 +35,30 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for code review workflow CODEREVIEW_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Review plan. Step 1: State strategy. Later: Report findings. " - "MUST examine quality, security, performance, architecture. Use 'relevant_files' for code. NO large snippets." + "Review narrative. Step 1: outline the review strategy. Later steps: report findings. MUST cover quality, security, " + "performance, and architecture. Reference code via `relevant_files`; avoid dumping large snippets." ), - "step_number": "Current step index in review sequence (starts at 1). Build upon previous steps.", + "step_number": "Current review step (starts at 1) – each step should build on the last.", "total_steps": ( - "Estimated steps needed to complete the review. " - "IMPORTANT: For external validation, max 2 steps. For internal validation, use 1 step. " - "When continuation_id is provided (continuing a previous conversation), set to 2 max for external, 1 for internal." + "Number of review steps planned. External validation: two steps (analysis + summary). Internal validation: one step. " + "Use the same limits when continuing an existing review via continuation_id." ), "next_step_required": ( - "True to continue with another step, False when review is complete. " - "CRITICAL for external validation: Set to True on step 1, then False on step 2. " - "For internal validation: Set to False immediately. " - "When continuation_id is provided: Follow the same rules based on validation type." + "True when another review step follows. External validation: step 1 → True, step 2 → False. Internal validation: set False immediately. " + "Apply the same rule on continuation flows." ), - "findings": ( - "Discoveries: quality, security, performance, architecture. " - "Document positive+negative. Update in later steps." - ), - "files_checked": "All examined files (absolute paths), including ruled-out ones.", - "relevant_files": "Step 1: All files/dirs for review. Final: Subset with key findings (issues, patterns, decisions).", - "relevant_context": "Methods/functions central to findings: 'Class.method' or 'function'. Focus on issues/patterns.", - "issues_found": "Issues with 'severity' (critical/high/medium/low) and 'description'. Vulnerabilities, performance, quality.", - "review_validation_type": "'external' (default, expert model) or 'internal' (no expert). Default external unless user specifies.", - "backtrack_from_step": "Step number to backtrack from if revision needed.", - "images": "Optional diagrams, mockups, visuals for review context (absolute paths). Include if materially helpful.", - "review_type": "Review type: full, security, performance, quick.", - "focus_on": "Specific aspects or context for areas of concern.", - "standards": "Coding standards to enforce.", - "severity_filter": "Minimum severity to report.", + "findings": "Capture findings (positive and negative) across quality, security, performance, and architecture; update each step.", + "files_checked": "Absolute paths of every file reviewed, including those ruled out.", + "relevant_files": "Step 1: list all files/dirs under review. Must be absolute full non-abbreviated paths. Final step: narrow to files tied to key findings.", + "relevant_context": "Functions or methods central to findings (e.g. 'Class.method' or 'function_name').", + "issues_found": "Issues with severity (critical/high/medium/low) and descriptions.", + "review_validation_type": "Set 'external' (default) for expert follow-up or 'internal' for local-only review.", + "backtrack_from_step": "If revising earlier analysis, note the step number to revisit.", + "images": "Optional diagram or screenshot paths that clarify review context.", + "review_type": "Review focus: full, security, performance, or quick.", + "focus_on": "Optional note on areas to emphasise (e.g. 'threading', 'auth flow').", + "standards": "Coding standards or style guides to enforce.", + "severity_filter": "Lowest severity to include when reporting issues (critical/high/medium/low/all).", } diff --git a/tools/consensus.py b/tools/consensus.py index d76aa29..b86a878 100644 --- a/tools/consensus.py +++ b/tools/consensus.py @@ -37,45 +37,23 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for consensus workflow CONSENSUS_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "The core question for consensus. Step 1: Provide the EXACT proposal for all models to evaluate. " - "CRITICAL: This text is sent to all models and must be a clear question, not a self-referential statement " - "(e.g., use 'Evaluate...' not 'I will evaluate...'). Steps 2+: Internal notes on the last model's response; this is NOT sent to other models." + "Consensus prompt. Step 1: write the exact proposal/question every model will see (use 'Evaluate…', not meta commentary). " + "Steps 2+: capture internal notes about the latest model response—these notes are NOT sent to other models." ), - "step_number": ( - "The index of the current step in the consensus workflow, beginning at 1. Step 1 is your analysis, " - "steps 2+ are for processing individual model responses." - ), - "total_steps": ( - "Total number of steps needed. This equals the number of models to consult. " - "Step 1 includes your analysis + first model consultation on return of the call. Final step includes " - "last model consultation + synthesis." - ), - "next_step_required": ("Set to true if more models need to be consulted. False when ready for final synthesis."), + "step_number": "Current step index (starts at 1). Step 1 is your analysis; steps 2+ handle each model response.", + "total_steps": "Total steps = number of models consulted plus the final synthesis step.", + "next_step_required": "True if more model consultations remain; set false when ready to synthesize.", "findings": ( - "Your analysis of the consensus topic. Step 1: Your independent, comprehensive analysis of the proposal. " - "CRITICAL: This is for the final synthesis and is NOT sent to the other models. " - "Steps 2+: A summary of the key points from the most recent model's response." - ), - "relevant_files": ( - "Files that are relevant to the consensus analysis. Include files that help understand the proposal, " - "provide context, or contain implementation details." + "Step 1: your independent analysis for later synthesis (not shared with other models). Steps 2+: summarize the newest model response." ), + "relevant_files": "Optional supporting files that help the consensus analysis. Must be absolute full, non-abbreviated paths.", "models": ( - "List of model configurations to consult. Each can have a model name, stance (for/against/neutral), " - "and optional custom stance prompt. The same model can be used multiple times with different stances, " - "but each model + stance combination must be unique. " - "Example: [{'model': 'o3', 'stance': 'for'}, {'model': 'o3', 'stance': 'against'}, " - "{'model': 'flash', 'stance': 'neutral'}]" - ), - "current_model_index": ( - "Internal tracking of which model is being consulted (0-based index). Used to determine which model " - "to call next." - ), - "model_responses": ("Accumulated responses from models consulted so far. Internal field for tracking progress."), - "images": ( - "Optional list of image paths or base64 data URLs for visual context. Useful for UI/UX discussions, " - "architecture diagrams, mockups, or any visual references that help inform the consensus analysis." + "List of models to consult. Each entry may include model, stance (for/against/neutral), and stance_prompt. " + "Each (model, stance) pair must be unique, e.g. [{'model':'o3','stance':'for'}, {'model':'o3','stance':'against'}]." ), + "current_model_index": "0-based index of the next model to consult (managed internally).", + "model_responses": "Internal log of responses gathered so far.", + "images": "Optional absolute image paths or base64 references that add helpful visual context.", } diff --git a/tools/docgen.py b/tools/docgen.py index f1e6cb7..4bccd0f 100644 --- a/tools/docgen.py +++ b/tools/docgen.py @@ -37,58 +37,21 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for documentation generation DOCGEN_FIELD_DESCRIPTIONS = { "step": ( - "Step 1 (DISCOVERY): Plan to discover ALL files needing documentation; count and list them clearly. DO NOT document yet. " - "Step 2+ (DOCUMENTATION): Document ONE file at a time. CRITICAL: DO NOT ALTER CODE LOGIC - only add documentation. " - "If you find bugs, TRACK them but DO NOT FIX. Report progress using counters." - ), - "step_number": ( - "The index of the current step in the documentation generation sequence, beginning at 1. Each step should build upon or " - "revise the previous one." - ), - "total_steps": ( - "Total steps needed to complete documentation: 1 (discovery) + number of files to document. " - "This is calculated dynamically based on total_files_to_document counter." - ), - "next_step_required": ( - "Set to true if you plan to continue the documentation analysis with another step. False means you believe the " - "documentation plan is complete and ready for implementation." - ), - "findings": ( - "Summary of documentation needs found in this step. Note missing docs, complexity, and call flows. " - "IMPORTANT: Document both well-documented areas and areas needing docs. " - "CRITICAL: If ANY bugs are found, STOP and report them immediately before continuing documentation." - ), - "relevant_files": ( - "Current focus files (absolute paths) for this step. Focus on documenting ONE FILE completely per step." - ), - "relevant_context": ( - "List methods/functions needing documentation, in 'ClassName.methodName' or 'functionName' format. " - "Prioritize complex logic, important interfaces, or missing documentation." - ), - "num_files_documented": ( - "Counter for fully documented files. Starts at 0. Increment only when a file is 100% complete. " - "CRITICAL: Must equal 'total_files_to_document' to finish." - ), - "total_files_to_document": ( - "Counter for total files needing documentation. Set in step 1 during discovery. " - "This is the completion target for the 'num_files_documented' counter." - ), - "document_complexity": ( - "Whether to include algorithmic complexity (Big O) analysis in function/method documentation. " - "Default: true. When enabled, analyzes and documents the computational complexity of algorithms." - ), - "document_flow": ( - "Whether to include call flow and dependency information in documentation. " - "Default: true. When enabled, documents which methods this function calls and which methods call this function." - ), - "update_existing": ( - "Whether to update existing documentation when it's found to be incorrect or incomplete. " - "Default: true. When enabled, improves existing docs rather than just adding new ones." - ), - "comments_on_complex_logic": ( - "Whether to add inline comments around complex logic within functions. " - "Default: true. When enabled, adds explanatory comments for non-obvious algorithmic steps." + "Step 1 (Discovery): list every file that needs documentation and record the total. Do not write docs yet. " + "Steps 2+: document exactly one file per step. Never change code logic; log bugs separately. Keep the counters accurate." ), + "step_number": "Current documentation step (starts at 1).", + "total_steps": "1 discovery step + one step per file documented (tracks via `total_files_to_document`).", + "next_step_required": "True while more files still need documentation; False once everything is complete.", + "findings": "Summarize documentation gaps, complexity, call flows, and well-documented areas. Stop and report immediately if you uncover a bug.", + "relevant_files": "Absolute paths for the file(s) you are documenting this step—stick to a single file per step.", + "relevant_context": "Functions or methods needing documentation (e.g. 'Class.method', 'function_name'), especially complex or user-facing areas.", + "num_files_documented": "Count of files finished so far. Increment only when a file is fully documented.", + "total_files_to_document": "Total files identified in discovery; completion requires matching this count.", + "document_complexity": "Include algorithmic complexity (Big O) analysis when True (default).", + "document_flow": "Include call flow/dependency notes when True (default).", + "update_existing": "True (default) to polish inaccurate or outdated docs instead of leaving them untouched.", + "comments_on_complex_logic": "True (default) to add inline comments around non-obvious logic.", } diff --git a/tools/planner.py b/tools/planner.py index 1f25eb3..927b262 100644 --- a/tools/planner.py +++ b/tools/planner.py @@ -39,18 +39,18 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions matching original planner tool PLANNER_FIELD_DESCRIPTIONS = { "step": ( - "Your current planning step content. Step 1: Describe the task/problem to plan in detail for breakdown. " - "Subsequent steps: Provide planning content (steps, revisions, questions, approach changes, etc.)." + "Planning content for this step. Step 1: describe the task, problem and scope. Later steps: capture updates, " + "revisions, branches, or open questions that shape the plan." ), - "step_number": "Current step number in the planning sequence (starts at 1)", - "total_steps": "Current estimate of total steps needed (can be adjusted up/down as planning progresses)", - "next_step_required": "Whether another planning step is required after this one", - "is_step_revision": "True if this step revises/replaces a previous step", - "revises_step_number": "If is_step_revision is true, which step number is being revised", - "is_branch_point": "True if this step branches from a previous step to explore alternatives", - "branch_from_step": "If is_branch_point is true, which step number is the branching point", - "branch_id": "Identifier for the current branch (e.g., 'approach-A', 'microservices-path')", - "more_steps_needed": "True if more steps are needed beyond the initial estimate", + "step_number": "Current planning step number (starts at 1).", + "total_steps": "Estimated number of planning steps; adjust as the plan evolves.", + "next_step_required": "Set true when another planning step will follow after this one.", + "is_step_revision": "Set true when you are replacing a previously recorded step.", + "revises_step_number": "Step number being replaced when revising.", + "is_branch_point": "True when this step creates a new branch to explore an alternative path.", + "branch_from_step": "If branching, the step number that this branch starts from.", + "branch_id": "Name for this branch (e.g. 'approach-A', 'migration-path').", + "more_steps_needed": "True when you now expect to add additional steps beyond the prior estimate.", } diff --git a/tools/precommit.py b/tools/precommit.py index 59e208c..c3527a8 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -34,37 +34,31 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for precommit workflow PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Validation plan. Step 1: State strategy. Later: Report findings. " - "MUST examine git changes, analyze impacts. Use 'relevant_files' for code. NO large snippets." + "Step 1: outline how you'll validate the git changes. Later steps: report findings. Review diffs and impacts, use `relevant_files`, and avoid pasting large snippets." ), - "step_number": "Current step index in pre-commit sequence (starts at 1). Build upon previous steps.", + "step_number": "Current pre-commit step number (starts at 1).", "total_steps": ( - "Estimated steps needed to complete validation. " - "IMPORTANT: For external validation, use max 3 steps. For internal validation, use 1 step. " - "When continuation_id is provided (continuing a previous conversation), set to 3 max for external, 1 for internal." + "Planned number of validation steps. External validation: use at most three (analysis → follow-ups → summary). Internal validation: a single step. Honour these limits when resuming via continuation_id." ), "next_step_required": ( "True to continue with another step, False when validation is complete. " - "CRITICAL: If total_steps>=3, set to True until the final step. " + "CRITICAL: If total_steps>=3 or when `precommit_type = external`, set to True until the final step. " "When continuation_id is provided: Follow the same validation rules based on precommit_type." ), - "findings": ( - "Discoveries: git diffs, modifications, issues (bugs, missing tests, security). " - "Document positive+concerns. Update in later steps." - ), - "files_checked": "All examined files (absolute paths), including ruled-out ones.", - "relevant_files": "Files with changes or relevant to validation (absolute paths). Modified files, config, tests, docs.", - "relevant_context": "Methods/functions central to changes: 'Class.method' or 'function'. Focus on modified/added.", - "issues_found": "Issues with 'severity' (critical/high/medium/low) and 'description'. Bugs, security, performance.", - "precommit_type": "'external' (default, expert review) or 'internal' (local only). Default external unless user specifies.", - "backtrack_from_step": "Step number to backtrack from if revision needed.", - "images": "Optional screenshots/visuals for validation (absolute paths).", - "path": "Starting path for git repos (FULL absolute path). REQUIRED step 1.", - "compare_to": "Optional git ref (branch/tag/commit) to compare. Checks remotes if needed. Without: checks staged/unstaged.", - "include_staged": "Analyze staged changes. Ignored if 'compare_to' provided.", - "include_unstaged": "Analyze unstaged changes. Ignored if 'compare_to' provided.", - "focus_on": "Focus aspects: security, performance, test coverage.", - "severity_filter": "Minimum severity to report.", + "findings": "Record git diff insights, risks, missing tests, security concerns, and positives; update previous notes as you go.", + "files_checked": "Absolute paths for every file examined, including ruled-out candidates.", + "relevant_files": "Absolute paths of files involved in the change or validation (code, configs, tests, docs). Must be absolute full non-abbreviated paths.", + "relevant_context": "Key functions/methods touched by the change (e.g. 'Class.method', 'function_name').", + "issues_found": "List issues with severity (critical/high/medium/low) plus descriptions (bugs, security, performance, coverage).", + "precommit_type": "'external' (default, triggers expert model) or 'internal' (local-only validation).", + "backtrack_from_step": "Step number to revisit when revising earlier analysis.", + "images": "Optional absolute paths to screenshots or diagrams that aid validation.", + "path": "Absolute path to the repository root. Required in step 1.", + "compare_to": "Optional git ref (branch/tag/commit) to diff against; falls back to staged/unstaged changes.", + "include_staged": "Whether to inspect staged changes (ignored when `compare_to` is set).", + "include_unstaged": "Whether to inspect unstaged changes (ignored when `compare_to` is set).", + "focus_on": "Optional emphasis areas such as security, performance, or test coverage.", + "severity_filter": "Lowest severity to include when reporting issues.", } diff --git a/tools/secaudit.py b/tools/secaudit.py index 38d5244..fa34cea 100644 --- a/tools/secaudit.py +++ b/tools/secaudit.py @@ -36,53 +36,24 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for security audit workflow SECAUDIT_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "Audit plan. Step 1: State strategy. Later: Report findings. " - "MANDATORY: Systematic approach (OWASP Top 10, auth, validation). Use 'relevant_files'. NO large code." + "Step 1: outline the audit strategy (OWASP Top 10, auth, validation, etc.). Later steps: report findings. MANDATORY: use `relevant_files` for code references and avoid large snippets." ), - "step_number": "Current step in audit sequence (starts at 1).", - "total_steps": "Estimated steps for audit. Adjust as findings emerge.", - "next_step_required": ("True to continue. False when ALL threats uncovered, ready for validation."), - "findings": ( - "Discoveries: vulnerabilities, auth issues, validation gaps, compliance. " - "Document positives and concerns. Update past findings." - ), - "files_checked": "All files examined (absolute paths). Include ruled-out files.", - "relevant_files": ( - "Step 1: Files to audit (absolute paths). " "Final: Files with security issues, auth modules, config files." - ), - "relevant_context": ( - "Security-critical methods/classes: 'ClassName.methodName'. " - "Focus on vulnerabilities, auth logic, security patterns." - ), - "issues_found": ( - "Security issues as dict: 'severity' (critical/high/medium/low), 'description'. " - "Include vulnerabilities, auth flaws, injection, crypto weakness, config issues." - ), - "confidence": ( - "exploring/low/medium/high/very_high/almost_certain/certain. " - "CRITICAL: 'certain' PREVENTS external validation." - ), - "backtrack_from_step": "Step number to backtrack from if revision needed.", - "images": ( - "Optional: Architecture diagrams, security models, threat models (absolute paths). " - "Only if assists security assessment." - ), - "security_scope": ( - "Security context (web/mobile/API/enterprise/cloud). " - "Include stack, user types, data sensitivity, threat landscape. " - "This helps focus the security assessment appropriately." - ), - "threat_level": ( - "Assess the threat level based on application context: 'low' (internal tools, low-risk data), " - "'medium' (customer-facing, business data), 'high' (financial, healthcare, regulated industry), " - "'critical' (payment processing, sensitive personal data). This guides prioritization." - ), - "compliance_requirements": ( - "List applicable compliance frameworks and security standards (SOC2, PCI DSS, HIPAA, GDPR, " - "ISO 27001, NIST). Include industry-specific requirements that affect security controls." - ), - "audit_focus": "Primary security focus areas for this audit (owasp, compliance, infrastructure, dependencies)", - "severity_filter": "Minimum severity level to report on the security issues found", + "step_number": "Current security-audit step number (starts at 1).", + "total_steps": "Expected number of audit steps; adjust as new risks surface.", + "next_step_required": "True while additional threat analysis remains; set False once you are ready to hand off for validation.", + "findings": "Summarize vulnerabilities, auth issues, validation gaps, compliance notes, and positives; update prior findings as needed.", + "files_checked": "Absolute paths for every file inspected, including rejected candidates.", + "relevant_files": "Absolute paths for security-relevant files (auth modules, configs, sensitive code).", + "relevant_context": "Security-critical classes/methods (e.g. 'AuthService.login', 'encryption_helper').", + "issues_found": "Security issues with severity (critical/high/medium/low) and descriptions (vulns, auth flaws, injection, crypto, config).", + "confidence": "exploring/low/medium/high/very_high/almost_certain/certain. 'certain' blocks external validation—use only when fully complete.", + "backtrack_from_step": "Step number to revisit when revising earlier audit work.", + "images": "Optional absolute paths to diagrams or threat models that inform the audit.", + "security_scope": "Security context (web, mobile, API, cloud, etc.) including stack, user types, data sensitivity, and threat landscape.", + "threat_level": "Assess the threat level: low (internal/low-risk), medium (customer-facing/business data), high (regulated or sensitive), critical (financial/healthcare/PII).", + "compliance_requirements": "Applicable compliance frameworks or standards (SOC2, PCI DSS, HIPAA, GDPR, ISO 27001, NIST, etc.).", + "audit_focus": "Primary focus area: owasp, compliance, infrastructure, dependencies, or comprehensive.", + "severity_filter": "Minimum severity to include when reporting security issues.", } diff --git a/tools/testgen.py b/tools/testgen.py index c3a58ed..721c268 100644 --- a/tools/testgen.py +++ b/tools/testgen.py @@ -35,37 +35,15 @@ logger = logging.getLogger(__name__) # Tool-specific field descriptions for test generation workflow TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = { "step": ( - "The test plan for this step. Step 1: State strategy for analyzing code structure, business logic, critical paths, and edge cases. " - "Later steps: Report findings and adapt as new test scenarios are identified." - ), - "step_number": ( - "The index of the current step in the test generation sequence, beginning at 1. Each step should build upon or " - "revise the previous one." - ), - "total_steps": ( - "Your current estimate for how many steps will be needed to complete the test generation analysis. " - "Adjust as new findings emerge." - ), - "next_step_required": ( - "Set to true if you plan to continue the investigation with another step. False means you believe the " - "test generation analysis is complete and ready for expert validation." - ), - "findings": ( - "Summary of discoveries about the code being tested. Include analysis of functionality, critical paths, edge cases, " - "boundary conditions, and error handling. IMPORTANT: Document both happy paths and failure modes. " - "Identify existing test patterns. In later steps, confirm or update past findings." - ), - "files_checked": ( - "List all files examined (absolute paths). Include even ruled-out files to track exploration path." - ), - "relevant_files": ( - "Subset of files_checked containing code needing tests (absolute paths). Include implementation files, " - "interfaces, dependencies, or existing test examples." - ), - "relevant_context": ( - "List methods/functions needing test coverage, in 'ClassName.methodName' or 'functionName' format. " - "Prioritize critical business logic, public APIs, and error-prone code paths." + "Test plan for this step. Step 1: outline how you'll analyse structure, business logic, critical paths, and edge cases. Later steps: record findings and new scenarios as they emerge." ), + "step_number": "Current test-generation step (starts at 1) — each step should build on prior work.", + "total_steps": "Estimated number of steps needed for test planning; adjust as new scenarios appear.", + "next_step_required": "True while more investigation or planning remains; set False when test planning is ready for expert validation.", + "findings": "Summarise functionality, critical paths, edge cases, boundary conditions, error handling, and existing test patterns. Cover both happy and failure paths.", + "files_checked": "Absolute paths of every file examined, including those ruled out.", + "relevant_files": "Absolute paths of code that requires new or updated tests (implementation, dependencies, existing test fixtures).", + "relevant_context": "Functions/methods needing coverage (e.g. 'Class.method', 'function_name'), with emphasis on critical paths and error-prone code.", "confidence": ( "Indicate your current confidence in the test generation assessment. Use: 'exploring' (starting analysis), " "'low' (early investigation), 'medium' (some patterns identified), 'high' (strong understanding), " @@ -74,11 +52,8 @@ TESTGEN_WORKFLOW_FIELD_DESCRIPTIONS = { "Do NOT use 'certain' unless the test generation analysis is comprehensively complete, use 'very_high' or 'almost_certain' instead if not 100% sure. " "Using 'certain' means you have complete confidence locally and prevents external model validation." ), - "backtrack_from_step": ("If an earlier finding needs revision, specify the step number to backtrack from."), - "images": ( - "Optional list of absolute paths to architecture diagrams, flow charts, or visual documentation that help " - "understand the code structure and test requirements. Only include if they materially assist test planning." - ), + "backtrack_from_step": "Step number to revisit if earlier findings need revision.", + "images": "Optional absolute paths to diagrams or visuals that clarify the system under test.", }