Precommit updated to take always prefer external analysis (via _other_ model) unless specified not to. This prevents Claude from being overconfident and inadequately performing subpar precommit checks.
This commit is contained in:
@@ -14,18 +14,20 @@ The precommit tool implements a **structured workflow** for comprehensive change
|
|||||||
|
|
||||||
**Investigation Phase (Claude-Led):**
|
**Investigation Phase (Claude-Led):**
|
||||||
1. **Step 1**: Claude describes the validation plan and begins analyzing git status across repositories
|
1. **Step 1**: Claude describes the validation plan and begins analyzing git status across repositories
|
||||||
2. **Step 2+**: Claude examines changes, diffs, dependencies, and potential impacts
|
2. **Step 2+**: Claude examines changes, diffs, dependencies, and potential impacts (minimum 2 steps)
|
||||||
3. **Throughout**: Claude tracks findings, relevant files, issues, and confidence levels
|
3. **Throughout**: Claude tracks findings, relevant files, and issues
|
||||||
4. **Completion**: Once investigation is thorough, Claude signals completion
|
4. **Completion**: Once investigation is thorough, Claude signals completion
|
||||||
|
|
||||||
|
**For Continuations**: When using `continuation_id` with external validation, Claude will immediately gather git changes and proceed to expert analysis without minimum step requirements.
|
||||||
|
|
||||||
**Expert Validation Phase:**
|
**Expert Validation Phase:**
|
||||||
After Claude completes the investigation (unless confidence is **certain**):
|
After Claude completes the investigation (unless precommit_type is **internal**):
|
||||||
- Complete summary of all changes and their context
|
- Complete summary of all changes and their context
|
||||||
- Potential issues and regressions identified
|
- Potential issues and regressions identified
|
||||||
- Requirement compliance assessment
|
- Requirement compliance assessment
|
||||||
- Final recommendations for safe commit
|
- Final recommendations for safe commit
|
||||||
|
|
||||||
**Special Note**: If you want Claude to perform the entire pre-commit validation without calling another model, you can include "don't use any other model" in your prompt, and Claude will complete the full workflow independently.
|
**Special Note**: If you want Claude to perform the entire pre-commit validation without calling another model, you can include "don't use any other model" in your prompt, or set the precommit_type to "internal", and Claude will complete the full workflow independently.
|
||||||
|
|
||||||
## Model Recommendation
|
## Model Recommendation
|
||||||
|
|
||||||
@@ -127,9 +129,8 @@ Use zen and perform a thorough precommit ensuring there aren't any new regressio
|
|||||||
- `relevant_files`: Files directly relevant to the changes
|
- `relevant_files`: Files directly relevant to the changes
|
||||||
- `relevant_context`: Methods/functions/classes affected by changes
|
- `relevant_context`: Methods/functions/classes affected by changes
|
||||||
- `issues_found`: Issues identified with severity levels
|
- `issues_found`: Issues identified with severity levels
|
||||||
- `confidence`: Confidence level in validation completeness (exploring/low/medium/high/certain)
|
- `precommit_type`: Type of validation to perform (external/internal, default: external)
|
||||||
- `backtrack_from_step`: Step number to backtrack from (for revisions)
|
- `backtrack_from_step`: Step number to backtrack from (for revisions)
|
||||||
- `hypothesis`: Current assessment of change safety and completeness
|
|
||||||
- `images`: Screenshots of requirements, design mockups for validation
|
- `images`: Screenshots of requirements, design mockups for validation
|
||||||
|
|
||||||
**Initial Configuration (used in step 1):**
|
**Initial Configuration (used in step 1):**
|
||||||
|
|||||||
@@ -225,8 +225,8 @@ REQUIREMENTS:
|
|||||||
{"severity": "high", "description": "Password hash exposed in API response"},
|
{"severity": "high", "description": "Password hash exposed in API response"},
|
||||||
{"severity": "medium", "description": "Missing authentication on admin endpoint"},
|
{"severity": "medium", "description": "Missing authentication on admin endpoint"},
|
||||||
],
|
],
|
||||||
"assessment": "Multiple critical security vulnerabilities found requiring immediate fixes",
|
# Assessment field removed - using precommit_type instead
|
||||||
"confidence": "high",
|
# Confidence field removed - using precommit_type instead
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -249,8 +249,8 @@ REQUIREMENTS:
|
|||||||
self.logger.error("Issues found not properly tracked")
|
self.logger.error("Issues found not properly tracked")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if validation_status.get("assessment_confidence") != "high":
|
if validation_status.get("precommit_type") != "external":
|
||||||
self.logger.error("Confidence level not properly tracked")
|
self.logger.error("Precommit type not properly tracked")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info(" ✅ Step 2 successful with proper tracking")
|
self.logger.info(" ✅ Step 2 successful with proper tracking")
|
||||||
@@ -300,8 +300,7 @@ REQUIREMENTS:
|
|||||||
"findings": "Connection pool configuration seems reasonable, might be looking in wrong place",
|
"findings": "Connection pool configuration seems reasonable, might be looking in wrong place",
|
||||||
"files_checked": ["/db/connection.py", "/config/database.py"],
|
"files_checked": ["/db/connection.py", "/config/database.py"],
|
||||||
"relevant_files": [],
|
"relevant_files": [],
|
||||||
"assessment": "Database configuration appears correct",
|
# Assessment fields removed - using precommit_type instead
|
||||||
"confidence": "low",
|
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -326,8 +325,7 @@ REQUIREMENTS:
|
|||||||
"issues_found": [
|
"issues_found": [
|
||||||
{"severity": "medium", "description": "N+1 query pattern in user profile loading"}
|
{"severity": "medium", "description": "N+1 query pattern in user profile loading"}
|
||||||
],
|
],
|
||||||
"assessment": "Query pattern optimization needed for performance",
|
# Assessment fields removed - using precommit_type instead
|
||||||
"confidence": "medium",
|
|
||||||
"backtrack_from_step": 2, # Backtrack from step 2
|
"backtrack_from_step": 2, # Backtrack from step 2
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
},
|
},
|
||||||
@@ -397,7 +395,7 @@ REQUIREMENTS:
|
|||||||
{"severity": "medium", "description": "Missing authentication on admin endpoint"},
|
{"severity": "medium", "description": "Missing authentication on admin endpoint"},
|
||||||
{"severity": "medium", "description": "Debug mode enabled in production configuration"},
|
{"severity": "medium", "description": "Debug mode enabled in production configuration"},
|
||||||
],
|
],
|
||||||
"confidence": "high",
|
# Confidence field removed - using precommit_type instead
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
"model": "flash", # Use flash for expert analysis
|
"model": "flash", # Use flash for expert analysis
|
||||||
},
|
},
|
||||||
@@ -490,8 +488,7 @@ REQUIREMENTS:
|
|||||||
{"severity": "high", "description": "Hardcoded secret - use environment variables"},
|
{"severity": "high", "description": "Hardcoded secret - use environment variables"},
|
||||||
{"severity": "medium", "description": "Missing authentication - add middleware"},
|
{"severity": "medium", "description": "Missing authentication - add middleware"},
|
||||||
],
|
],
|
||||||
"assessment": "Critical security vulnerabilities identified with clear fixes - changes must not be committed until resolved",
|
"precommit_type": "internal", # This should skip expert analysis
|
||||||
"confidence": "certain", # This should skip expert analysis
|
|
||||||
"path": self.test_dir,
|
"path": self.test_dir,
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
@@ -517,7 +514,7 @@ REQUIREMENTS:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
expert_analysis = response_certain_data.get("expert_analysis", {})
|
expert_analysis = response_certain_data.get("expert_analysis", {})
|
||||||
if expert_analysis.get("status") != "skipped_due_to_certain_validation_confidence":
|
if expert_analysis.get("status") != "skipped_due_to_internal_analysis_type":
|
||||||
self.logger.error("Expert analysis should be skipped for certain confidence")
|
self.logger.error("Expert analysis should be skipped for certain confidence")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -680,8 +677,7 @@ def rate_limiting_middleware(app):
|
|||||||
"files_checked": [auth_file, middleware_file],
|
"files_checked": [auth_file, middleware_file],
|
||||||
"relevant_files": [auth_file], # This should be referenced, not embedded
|
"relevant_files": [auth_file], # This should be referenced, not embedded
|
||||||
"relevant_context": ["require_auth"],
|
"relevant_context": ["require_auth"],
|
||||||
"assessment": "Investigating security implementation",
|
# Assessment fields removed - using precommit_type instead
|
||||||
"confidence": "low",
|
|
||||||
"path": self.test_dir,
|
"path": self.test_dir,
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
@@ -724,8 +720,7 @@ def rate_limiting_middleware(app):
|
|||||||
"issues_found": [
|
"issues_found": [
|
||||||
{"severity": "medium", "description": "Basic token validation might be insufficient"}
|
{"severity": "medium", "description": "Basic token validation might be insufficient"}
|
||||||
],
|
],
|
||||||
"assessment": "Security implementation needs improvement",
|
# Assessment fields removed - using precommit_type instead
|
||||||
"confidence": "medium",
|
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -775,8 +770,8 @@ def rate_limiting_middleware(app):
|
|||||||
{"severity": "low", "description": "Missing CSRF protection"},
|
{"severity": "low", "description": "Missing CSRF protection"},
|
||||||
{"severity": "low", "description": "Rate limiting not implemented"},
|
{"severity": "low", "description": "Rate limiting not implemented"},
|
||||||
],
|
],
|
||||||
"assessment": "Security implementation needs improvements but is acceptable for commit with follow-up tasks",
|
# Assessment field removed - using precommit_type instead
|
||||||
"confidence": "high",
|
# Confidence field removed - using precommit_type instead
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -915,8 +910,7 @@ if __name__ == '__main__':
|
|||||||
"files_checked": [db_file],
|
"files_checked": [db_file],
|
||||||
"relevant_files": [db_file],
|
"relevant_files": [db_file],
|
||||||
"relevant_context": [],
|
"relevant_context": [],
|
||||||
"assessment": "Examining database implementation for best practices",
|
# Assessment fields removed - using precommit_type instead
|
||||||
"confidence": "low",
|
|
||||||
"path": self.test_dir,
|
"path": self.test_dir,
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
@@ -950,8 +944,7 @@ if __name__ == '__main__':
|
|||||||
"files_checked": [db_file, test_file],
|
"files_checked": [db_file, test_file],
|
||||||
"relevant_files": [db_file, test_file],
|
"relevant_files": [db_file, test_file],
|
||||||
"relevant_context": ["DatabaseManager.create_user", "TestDatabaseManager.test_create_user"],
|
"relevant_context": ["DatabaseManager.create_user", "TestDatabaseManager.test_create_user"],
|
||||||
"assessment": "Implementation looks solid with proper testing",
|
# Assessment fields removed - using precommit_type instead
|
||||||
"confidence": "medium",
|
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -991,8 +984,8 @@ if __name__ == '__main__':
|
|||||||
"relevant_files": [db_file, test_file],
|
"relevant_files": [db_file, test_file],
|
||||||
"relevant_context": ["DatabaseManager.get_connection", "DatabaseManager.create_user"],
|
"relevant_context": ["DatabaseManager.get_connection", "DatabaseManager.create_user"],
|
||||||
"issues_found": [], # No issues found
|
"issues_found": [], # No issues found
|
||||||
"assessment": "High quality implementation with proper security measures and testing",
|
# Assessment field removed - using precommit_type instead
|
||||||
"confidence": "high",
|
# Confidence field removed - using precommit_type instead
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -1026,8 +1019,8 @@ if __name__ == '__main__':
|
|||||||
"relevant_files": [db_file, test_file],
|
"relevant_files": [db_file, test_file],
|
||||||
"relevant_context": ["DatabaseManager", "TestDatabaseManager"],
|
"relevant_context": ["DatabaseManager", "TestDatabaseManager"],
|
||||||
"issues_found": [],
|
"issues_found": [],
|
||||||
"assessment": "Code meets all security and quality standards - approved for commit",
|
# Assessment field removed - using precommit_type instead
|
||||||
"confidence": "high",
|
# Confidence field removed - using precommit_type instead
|
||||||
"model": "flash",
|
"model": "flash",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,75 +1,54 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
import sqlite3
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from flask import Flask, jsonify, request
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
# A05: Security Misconfiguration - Debug mode enabled
|
|
||||||
app.config["DEBUG"] = True
|
@app.route("/api/user/<user_id>", methods=["GET"])
|
||||||
app.config["SECRET_KEY"] = "dev-secret-key" # Hardcoded secret
|
def get_user(user_id):
|
||||||
|
"""Get user information by ID"""
|
||||||
|
# Potential SQL injection vulnerability
|
||||||
|
conn = sqlite3.connect("users.db")
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# BUG: Direct string interpolation creates SQL injection risk
|
||||||
|
query = f"SELECT * FROM users WHERE id = {user_id}"
|
||||||
|
cursor.execute(query)
|
||||||
|
|
||||||
|
result = cursor.fetchone()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if result:
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"id": result[0],
|
||||||
|
"username": result[1],
|
||||||
|
"email": result[2],
|
||||||
|
"password_hash": result[3], # Security issue: exposing password hash
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return jsonify({"error": "User not found"}), 404
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/search", methods=["GET"])
|
@app.route("/api/admin/users", methods=["GET"])
|
||||||
def search():
|
def list_all_users():
|
||||||
"""Search endpoint with multiple vulnerabilities"""
|
"""Admin endpoint to list all users"""
|
||||||
# A03: Injection - XSS vulnerability, no input sanitization
|
# Missing authentication check
|
||||||
query = request.args.get("q", "")
|
conn = sqlite3.connect("users.db")
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("SELECT id, username, email FROM users")
|
||||||
|
|
||||||
# A03: Injection - Command injection vulnerability
|
users = []
|
||||||
if "file:" in query:
|
for row in cursor.fetchall():
|
||||||
filename = query.split("file:")[1]
|
users.append({"id": row[0], "username": row[1], "email": row[2]})
|
||||||
# Direct command execution
|
|
||||||
result = subprocess.run(f"cat {filename}", shell=True, capture_output=True, text=True)
|
|
||||||
return jsonify({"result": result.stdout})
|
|
||||||
|
|
||||||
# A10: Server-Side Request Forgery (SSRF)
|
conn.close()
|
||||||
if query.startswith("http"):
|
return jsonify(users)
|
||||||
# No validation of URL, allows internal network access
|
|
||||||
response = requests.get(query)
|
|
||||||
return jsonify({"content": response.text})
|
|
||||||
|
|
||||||
# Return search results without output encoding
|
|
||||||
return f"<h1>Search Results for: {query}</h1>"
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/admin", methods=["GET"])
|
|
||||||
def admin_panel():
|
|
||||||
"""Admin panel with broken access control"""
|
|
||||||
# A01: Broken Access Control - No authentication check
|
|
||||||
# Anyone can access admin functionality
|
|
||||||
action = request.args.get("action")
|
|
||||||
|
|
||||||
if action == "delete_user":
|
|
||||||
user_id = request.args.get("user_id")
|
|
||||||
# Performs privileged action without authorization
|
|
||||||
return jsonify({"status": "User deleted", "user_id": user_id})
|
|
||||||
|
|
||||||
return jsonify({"status": "Admin panel"})
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/upload", methods=["POST"])
|
|
||||||
def upload_file():
|
|
||||||
"""File upload with security issues"""
|
|
||||||
# A05: Security Misconfiguration - No file type validation
|
|
||||||
file = request.files.get("file")
|
|
||||||
if file:
|
|
||||||
# Saves any file type to server
|
|
||||||
filename = file.filename
|
|
||||||
file.save(os.path.join("/tmp", filename))
|
|
||||||
|
|
||||||
# A03: Path traversal vulnerability
|
|
||||||
return jsonify({"status": "File uploaded", "path": f"/tmp/{filename}"})
|
|
||||||
|
|
||||||
return jsonify({"error": "No file provided"})
|
|
||||||
|
|
||||||
|
|
||||||
# A06: Vulnerable and Outdated Components
|
|
||||||
# Using old Flask version with known vulnerabilities (hypothetical)
|
|
||||||
# requirements.txt: Flask==0.12.2 (known security issues)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# A05: Security Misconfiguration - Running on all interfaces
|
# Debug mode in production is a security risk
|
||||||
app.run(host="0.0.0.0", port=5000, debug=True)
|
app.run(debug=True, host="0.0.0.0")
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ class TestPrecommitWorkflowTool:
|
|||||||
next_step_required=False,
|
next_step_required=False,
|
||||||
findings="Comprehensive findings",
|
findings="Comprehensive findings",
|
||||||
path="/test/repo",
|
path="/test/repo",
|
||||||
confidence="high",
|
precommit_type="external",
|
||||||
files_checked=["/file1.py", "/file2.py"],
|
files_checked=["/file1.py", "/file2.py"],
|
||||||
relevant_files=["/file1.py"],
|
relevant_files=["/file1.py"],
|
||||||
relevant_context=["function_name", "class_name"],
|
relevant_context=["function_name", "class_name"],
|
||||||
@@ -101,7 +101,7 @@ class TestPrecommitWorkflowTool:
|
|||||||
images=["/screenshot.png"],
|
images=["/screenshot.png"],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert request.confidence == "high"
|
assert request.precommit_type == "external"
|
||||||
assert len(request.files_checked) == 2
|
assert len(request.files_checked) == 2
|
||||||
assert len(request.relevant_files) == 1
|
assert len(request.relevant_files) == 1
|
||||||
assert len(request.relevant_context) == 2
|
assert len(request.relevant_context) == 2
|
||||||
@@ -144,21 +144,32 @@ class TestPrecommitWorkflowTool:
|
|||||||
assert request.focus_on == "security issues"
|
assert request.focus_on == "security issues"
|
||||||
assert request.severity_filter == "high"
|
assert request.severity_filter == "high"
|
||||||
|
|
||||||
def test_confidence_levels(self):
|
def test_precommit_type_validation(self):
|
||||||
"""Test confidence level validation"""
|
"""Test precommit type validation"""
|
||||||
valid_confidence_levels = ["exploring", "low", "medium", "high", "certain"]
|
valid_types = ["external", "internal"]
|
||||||
|
|
||||||
for confidence in valid_confidence_levels:
|
for precommit_type in valid_types:
|
||||||
request = PrecommitRequest(
|
request = PrecommitRequest(
|
||||||
step="Test confidence level",
|
step="Test precommit type",
|
||||||
step_number=1,
|
step_number=1,
|
||||||
total_steps=1,
|
total_steps=1,
|
||||||
next_step_required=False,
|
next_step_required=False,
|
||||||
findings="Test findings",
|
findings="Test findings",
|
||||||
path="/repo",
|
path="/repo",
|
||||||
confidence=confidence,
|
precommit_type=precommit_type,
|
||||||
)
|
)
|
||||||
assert request.confidence == confidence
|
assert request.precommit_type == precommit_type
|
||||||
|
|
||||||
|
# Test default is external
|
||||||
|
request = PrecommitRequest(
|
||||||
|
step="Test default type",
|
||||||
|
step_number=1,
|
||||||
|
total_steps=1,
|
||||||
|
next_step_required=False,
|
||||||
|
findings="Test findings",
|
||||||
|
path="/repo",
|
||||||
|
)
|
||||||
|
assert request.precommit_type == "external"
|
||||||
|
|
||||||
def test_severity_filter_options(self):
|
def test_severity_filter_options(self):
|
||||||
"""Test severity filter validation"""
|
"""Test severity filter validation"""
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ Key features:
|
|||||||
- Step-by-step pre-commit investigation workflow with progress tracking
|
- Step-by-step pre-commit investigation workflow with progress tracking
|
||||||
- Context-aware file embedding (references during investigation, full content for analysis)
|
- Context-aware file embedding (references during investigation, full content for analysis)
|
||||||
- Automatic git repository discovery and change analysis
|
- Automatic git repository discovery and change analysis
|
||||||
- Expert analysis integration with external models
|
- Expert analysis integration with external models (default)
|
||||||
- Support for multiple repositories and change types
|
- Support for multiple repositories and change types
|
||||||
- Confidence-based workflow optimization
|
- Configurable validation type (external with expert model or internal only)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -89,15 +89,11 @@ PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS = {
|
|||||||
"(critical, high, medium, low) and 'description' fields. Include potential bugs, security concerns, "
|
"(critical, high, medium, low) and 'description' fields. Include potential bugs, security concerns, "
|
||||||
"performance issues, missing tests, incomplete implementations, etc."
|
"performance issues, missing tests, incomplete implementations, etc."
|
||||||
),
|
),
|
||||||
"confidence": (
|
"precommit_type": (
|
||||||
"Indicate your current confidence in the assessment. Use: 'exploring' (starting analysis), 'low' (early "
|
"Type of pre-commit validation to perform: 'external' (default - uses external model for validation) or 'internal' "
|
||||||
"investigation), 'medium' (some evidence gathered), 'high' (strong evidence), "
|
"(performs validation without external model review). IMPORTANT: Always default to 'external' unless the "
|
||||||
"'very_high' (very strong evidence), 'almost_certain' (nearly complete validation), 'certain' (200% confidence - "
|
"user explicitly requests internal-only validation or asks you not to use another model. External validation "
|
||||||
"analysis is complete and all issues are identified with no need for external model validation). "
|
"provides additional expert review and should be the standard approach for comprehensive pre-commit validation."
|
||||||
"Do NOT use 'certain' unless the pre-commit validation is thoroughly complete, use 'very_high' or 'almost_certain' "
|
|
||||||
"instead if not 200% sure. "
|
|
||||||
"Using 'certain' means you have complete confidence locally and prevents external model validation. Also "
|
|
||||||
"do NOT set confidence to 'certain' if the user has strongly requested that external validation MUST be performed."
|
|
||||||
),
|
),
|
||||||
"backtrack_from_step": (
|
"backtrack_from_step": (
|
||||||
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
"If an earlier finding or assessment needs to be revised or discarded, specify the step number from which to "
|
||||||
@@ -145,7 +141,9 @@ class PrecommitRequest(WorkflowRequest):
|
|||||||
issues_found: list[dict] = Field(
|
issues_found: list[dict] = Field(
|
||||||
default_factory=list, description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["issues_found"]
|
default_factory=list, description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["issues_found"]
|
||||||
)
|
)
|
||||||
confidence: Optional[str] = Field("low", description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"])
|
precommit_type: Optional[Literal["external", "internal"]] = Field(
|
||||||
|
"external", description=PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["precommit_type"]
|
||||||
|
)
|
||||||
|
|
||||||
# Optional backtracking field
|
# Optional backtracking field
|
||||||
backtrack_from_step: Optional[int] = Field(
|
backtrack_from_step: Optional[int] = Field(
|
||||||
@@ -273,10 +271,11 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
"description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["relevant_files"],
|
"description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["relevant_files"],
|
||||||
},
|
},
|
||||||
"confidence": {
|
"precommit_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["exploring", "low", "medium", "high", "very_high", "almost_certain", "certain"],
|
"enum": ["external", "internal"],
|
||||||
"description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["confidence"],
|
"default": "external",
|
||||||
|
"description": PRECOMMIT_WORKFLOW_FIELD_DESCRIPTIONS["precommit_type"],
|
||||||
},
|
},
|
||||||
"backtrack_from_step": {
|
"backtrack_from_step": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
@@ -332,7 +331,9 @@ class PrecommitTool(WorkflowTool):
|
|||||||
tool_name=self.get_name(),
|
tool_name=self.get_name(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_required_actions(self, step_number: int, confidence: str, findings: str, total_steps: int) -> list[str]:
|
def get_required_actions(
|
||||||
|
self, step_number: int, findings_count: int, issues_count: int, total_steps: int
|
||||||
|
) -> list[str]:
|
||||||
"""Define required actions for each investigation phase."""
|
"""Define required actions for each investigation phase."""
|
||||||
if step_number == 1:
|
if step_number == 1:
|
||||||
# Initial pre-commit investigation tasks
|
# Initial pre-commit investigation tasks
|
||||||
@@ -343,7 +344,7 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"Understand what functionality was added, modified, or removed",
|
"Understand what functionality was added, modified, or removed",
|
||||||
"Identify the scope and intent of the changes being committed",
|
"Identify the scope and intent of the changes being committed",
|
||||||
]
|
]
|
||||||
elif confidence in ["exploring", "low"]:
|
elif step_number == 2:
|
||||||
# Need deeper investigation
|
# Need deeper investigation
|
||||||
return [
|
return [
|
||||||
"Examine the specific files you've identified as changed or relevant",
|
"Examine the specific files you've identified as changed or relevant",
|
||||||
@@ -352,7 +353,7 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"Verify that changes align with good coding practices and patterns",
|
"Verify that changes align with good coding practices and patterns",
|
||||||
"Look for missing tests, documentation, or configuration updates",
|
"Look for missing tests, documentation, or configuration updates",
|
||||||
]
|
]
|
||||||
elif confidence in ["medium", "high"]:
|
elif step_number >= 2 and (findings_count > 2 or issues_count > 0):
|
||||||
# Close to completion - need final verification
|
# Close to completion - need final verification
|
||||||
return [
|
return [
|
||||||
"Verify all identified issues have been properly documented",
|
"Verify all identified issues have been properly documented",
|
||||||
@@ -374,12 +375,17 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"""
|
"""
|
||||||
Decide when to call external model based on investigation completeness.
|
Decide when to call external model based on investigation completeness.
|
||||||
|
|
||||||
Don't call expert analysis if the CLI agent has certain confidence - trust their judgment.
|
For continuations with external type, always proceed with expert analysis.
|
||||||
"""
|
"""
|
||||||
# Check if user requested to skip assistant model
|
# Check if user requested to skip assistant model
|
||||||
if request and not self.get_request_use_assistant_model(request):
|
if request and not self.get_request_use_assistant_model(request):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# For continuations with external type, always proceed with expert analysis
|
||||||
|
continuation_id = self.get_request_continuation_id(request)
|
||||||
|
if continuation_id and request.precommit_type == "external":
|
||||||
|
return True # Always perform expert analysis for external continuations
|
||||||
|
|
||||||
# Check if we have meaningful investigation data
|
# Check if we have meaningful investigation data
|
||||||
return (
|
return (
|
||||||
len(consolidated_findings.relevant_files) > 0
|
len(consolidated_findings.relevant_files) > 0
|
||||||
@@ -420,8 +426,7 @@ class PrecommitTool(WorkflowTool):
|
|||||||
# Add assessment evolution if available
|
# Add assessment evolution if available
|
||||||
if consolidated_findings.hypotheses:
|
if consolidated_findings.hypotheses:
|
||||||
assessments_text = "\\n".join(
|
assessments_text = "\\n".join(
|
||||||
f"Step {h['step']} ({h['confidence']} confidence): {h['hypothesis']}"
|
f"Step {h['step']}: {h['hypothesis']}" for h in consolidated_findings.hypotheses
|
||||||
for h in consolidated_findings.hypotheses
|
|
||||||
)
|
)
|
||||||
context_parts.append(f"\\n=== ASSESSMENT EVOLUTION ===\\n{assessments_text}\\n=== END ASSESSMENTS ===")
|
context_parts.append(f"\\n=== ASSESSMENT EVOLUTION ===\\n{assessments_text}\\n=== END ASSESSMENTS ===")
|
||||||
|
|
||||||
@@ -486,17 +491,25 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"relevant_files": request.relevant_files,
|
"relevant_files": request.relevant_files,
|
||||||
"relevant_context": request.relevant_context,
|
"relevant_context": request.relevant_context,
|
||||||
"issues_found": request.issues_found,
|
"issues_found": request.issues_found,
|
||||||
"confidence": request.confidence,
|
"precommit_type": request.precommit_type,
|
||||||
"hypothesis": request.findings, # Map findings to hypothesis for compatibility
|
"hypothesis": request.findings, # Map findings to hypothesis for compatibility
|
||||||
"images": request.images or [],
|
"images": request.images or [],
|
||||||
|
"confidence": "high", # Dummy value for workflow_mixin compatibility
|
||||||
}
|
}
|
||||||
return step_data
|
return step_data
|
||||||
|
|
||||||
def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
|
def should_skip_expert_analysis(self, request, consolidated_findings) -> bool:
|
||||||
"""
|
"""
|
||||||
Precommit workflow skips expert analysis when the CLI agent has "certain" confidence.
|
Precommit workflow skips expert analysis only when precommit_type is "internal".
|
||||||
|
Default is always to use expert analysis (external).
|
||||||
|
For continuations with external type, always perform expert analysis immediately.
|
||||||
"""
|
"""
|
||||||
return request.confidence == "certain" and not request.next_step_required
|
# If it's a continuation and precommit_type is external, don't skip
|
||||||
|
continuation_id = self.get_request_continuation_id(request)
|
||||||
|
if continuation_id and request.precommit_type != "internal":
|
||||||
|
return False # Always do expert analysis for external continuations
|
||||||
|
|
||||||
|
return request.precommit_type == "internal" and not request.next_step_required
|
||||||
|
|
||||||
def store_initial_issue(self, step_description: str):
|
def store_initial_issue(self, step_description: str):
|
||||||
"""Store initial request for expert analysis."""
|
"""Store initial request for expert analysis."""
|
||||||
@@ -516,14 +529,14 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"""Precommit tools use 'findings' field."""
|
"""Precommit tools use 'findings' field."""
|
||||||
return request.findings
|
return request.findings
|
||||||
|
|
||||||
def get_confidence_level(self, request) -> str:
|
def get_precommit_type(self, request) -> str:
|
||||||
"""Precommit tools use 'certain' for high confidence."""
|
"""Precommit tools use precommit_type field."""
|
||||||
return "certain"
|
return request.precommit_type or "external"
|
||||||
|
|
||||||
def get_completion_message(self) -> str:
|
def get_completion_message(self) -> str:
|
||||||
"""Precommit-specific completion message."""
|
"""Precommit-specific completion message."""
|
||||||
return (
|
return (
|
||||||
"Pre-commit validation complete with CERTAIN confidence. You have identified all issues "
|
"Pre-commit validation complete. You have identified all issues "
|
||||||
"and verified commit readiness. MANDATORY: Present the user with the complete validation results "
|
"and verified commit readiness. MANDATORY: Present the user with the complete validation results "
|
||||||
"and IMMEDIATELY proceed with commit if no critical issues found, or provide specific fix guidance "
|
"and IMMEDIATELY proceed with commit if no critical issues found, or provide specific fix guidance "
|
||||||
"if issues need resolution. Focus on actionable next steps."
|
"if issues need resolution. Focus on actionable next steps."
|
||||||
@@ -531,11 +544,13 @@ class PrecommitTool(WorkflowTool):
|
|||||||
|
|
||||||
def get_skip_reason(self) -> str:
|
def get_skip_reason(self) -> str:
|
||||||
"""Precommit-specific skip reason."""
|
"""Precommit-specific skip reason."""
|
||||||
return "Completed comprehensive pre-commit validation with full confidence locally"
|
return (
|
||||||
|
"Completed comprehensive pre-commit validation with internal analysis only (no external model validation)"
|
||||||
|
)
|
||||||
|
|
||||||
def get_skip_expert_analysis_status(self) -> str:
|
def get_skip_expert_analysis_status(self) -> str:
|
||||||
"""Precommit-specific expert analysis skip status."""
|
"""Precommit-specific expert analysis skip status."""
|
||||||
return "skipped_due_to_certain_validation_confidence"
|
return "skipped_due_to_internal_analysis_type"
|
||||||
|
|
||||||
def prepare_work_summary(self) -> str:
|
def prepare_work_summary(self) -> str:
|
||||||
"""Precommit-specific work summary."""
|
"""Precommit-specific work summary."""
|
||||||
@@ -583,26 +598,40 @@ class PrecommitTool(WorkflowTool):
|
|||||||
"""
|
"""
|
||||||
Precommit-specific step guidance with detailed investigation instructions.
|
Precommit-specific step guidance with detailed investigation instructions.
|
||||||
"""
|
"""
|
||||||
step_guidance = self.get_precommit_step_guidance(request.step_number, request.confidence, request)
|
step_guidance = self.get_precommit_step_guidance(request.step_number, request)
|
||||||
return step_guidance["next_steps"]
|
return step_guidance["next_steps"]
|
||||||
|
|
||||||
def get_precommit_step_guidance(self, step_number: int, confidence: str, request) -> dict[str, Any]:
|
def get_precommit_step_guidance(self, step_number: int, request) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Provide step-specific guidance for precommit workflow.
|
Provide step-specific guidance for precommit workflow.
|
||||||
"""
|
"""
|
||||||
# Check if this is a continuation - if so, skip workflow and go to expert analysis
|
# Check if this is a continuation - if so, skip workflow and go to expert analysis
|
||||||
continuation_id = self.get_request_continuation_id(request)
|
continuation_id = self.get_request_continuation_id(request)
|
||||||
if continuation_id:
|
if continuation_id:
|
||||||
return {
|
if request.precommit_type == "external":
|
||||||
"next_steps": (
|
return {
|
||||||
"Continuing previous conversation. The expert analysis will now be performed based on the "
|
"next_steps": (
|
||||||
"accumulated context from the previous conversation. The analysis will build upon the prior "
|
"Continuing previous conversation with external validation. CRITICAL: You MUST first gather "
|
||||||
"findings without repeating the investigation steps."
|
"the complete git changeset (git status, git diff --cached, git diff) to provide to the expert. "
|
||||||
)
|
"No minimum steps required - as soon as you provide the git changes in your response, "
|
||||||
}
|
"the expert analysis will be performed immediately. The expert needs the FULL context of "
|
||||||
|
"all changes to provide comprehensive validation. Include staged changes, unstaged changes, "
|
||||||
|
"and any untracked files that are part of this commit."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"next_steps": (
|
||||||
|
"Continuing previous conversation with internal validation only. The analysis will build "
|
||||||
|
"upon the prior findings without external model validation. Proceed with your own assessment "
|
||||||
|
"of the changes based on the accumulated context."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
# Generate the next steps instruction based on required actions
|
# Generate the next steps instruction based on required actions
|
||||||
required_actions = self.get_required_actions(step_number, confidence, request.findings, request.total_steps)
|
findings_count = len(request.findings.split("\n")) if request.findings else 0
|
||||||
|
issues_count = len(request.issues_found) if request.issues_found else 0
|
||||||
|
required_actions = self.get_required_actions(step_number, findings_count, issues_count, request.total_steps)
|
||||||
|
|
||||||
if step_number == 1:
|
if step_number == 1:
|
||||||
next_steps = (
|
next_steps = (
|
||||||
@@ -614,7 +643,7 @@ class PrecommitTool(WorkflowTool):
|
|||||||
f"When you call {self.get_name()} next time, use step_number: {step_number + 1} and report specific "
|
f"When you call {self.get_name()} next time, use step_number: {step_number + 1} and report specific "
|
||||||
f"files examined, changes analyzed, and validation findings discovered."
|
f"files examined, changes analyzed, and validation findings discovered."
|
||||||
)
|
)
|
||||||
elif confidence in ["exploring", "low"]:
|
elif step_number == 2:
|
||||||
next_steps = (
|
next_steps = (
|
||||||
f"STOP! Do NOT call {self.get_name()} again yet. Based on your findings, you've identified areas that need "
|
f"STOP! Do NOT call {self.get_name()} again yet. Based on your findings, you've identified areas that need "
|
||||||
f"deeper analysis. MANDATORY ACTIONS before calling {self.get_name()} step {step_number + 1}:\\n"
|
f"deeper analysis. MANDATORY ACTIONS before calling {self.get_name()} step {step_number + 1}:\\n"
|
||||||
@@ -622,7 +651,7 @@ class PrecommitTool(WorkflowTool):
|
|||||||
+ f"\\n\\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
|
+ f"\\n\\nOnly call {self.get_name()} again with step_number: {step_number + 1} AFTER "
|
||||||
+ "completing these validations."
|
+ "completing these validations."
|
||||||
)
|
)
|
||||||
elif confidence in ["medium", "high"]:
|
elif step_number >= 2:
|
||||||
next_steps = (
|
next_steps = (
|
||||||
f"WAIT! Your validation needs final verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\\n"
|
f"WAIT! Your validation needs final verification. DO NOT call {self.get_name()} immediately. REQUIRED ACTIONS:\\n"
|
||||||
+ "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
+ "\\n".join(f"{i+1}. {action}" for i, action in enumerate(required_actions))
|
||||||
@@ -677,7 +706,7 @@ class PrecommitTool(WorkflowTool):
|
|||||||
response_data["validation_status"] = response_data.pop(f"{tool_name}_status")
|
response_data["validation_status"] = response_data.pop(f"{tool_name}_status")
|
||||||
# Add precommit-specific status fields
|
# Add precommit-specific status fields
|
||||||
response_data["validation_status"]["issues_identified"] = len(self.consolidated_findings.issues_found)
|
response_data["validation_status"]["issues_identified"] = len(self.consolidated_findings.issues_found)
|
||||||
response_data["validation_status"]["assessment_confidence"] = self.get_request_confidence(request)
|
response_data["validation_status"]["precommit_type"] = request.precommit_type or "external"
|
||||||
|
|
||||||
# Map complete_precommitworkflow to complete_validation
|
# Map complete_precommitworkflow to complete_validation
|
||||||
if f"complete_{tool_name}" in response_data:
|
if f"complete_{tool_name}" in response_data:
|
||||||
|
|||||||
Reference in New Issue
Block a user