fix: reduced token usage, removed parameters from schema that CLIs never seem to use
This commit is contained in:
@@ -39,8 +39,8 @@ class AnalyzeValidationTest(ConversationBaseTest):
|
||||
if not self._test_single_analysis_session():
|
||||
return False
|
||||
|
||||
# Test 2: Analysis with backtracking
|
||||
if not self._test_analysis_with_backtracking():
|
||||
# Test 2: Analysis flow that requires refocusing
|
||||
if not self._test_analysis_refocus_flow():
|
||||
return False
|
||||
|
||||
# Test 3: Complete analysis with expert validation
|
||||
@@ -530,13 +530,13 @@ class PerformanceTimer:
|
||||
self.logger.error(f"Single analysis session test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_analysis_with_backtracking(self) -> bool:
|
||||
"""Test analysis with backtracking to revise findings"""
|
||||
def _test_analysis_refocus_flow(self) -> bool:
|
||||
"""Test analysis flow that requires refocusing to revise findings"""
|
||||
try:
|
||||
self.logger.info(" 1.2: Testing analysis with backtracking")
|
||||
self.logger.info(" 1.2: Testing analysis refocus workflow")
|
||||
|
||||
# Start a new analysis for testing backtracking
|
||||
self.logger.info(" 1.2.1: Start analysis for backtracking test")
|
||||
# Start a new analysis for testing refocus behaviour
|
||||
self.logger.info(" 1.2.1: Start analysis for refocus test")
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"analyze",
|
||||
{
|
||||
@@ -553,7 +553,7 @@ class PerformanceTimer:
|
||||
)
|
||||
|
||||
if not response1 or not continuation_id:
|
||||
self.logger.error("Failed to start backtracking test analysis")
|
||||
self.logger.error("Failed to start refocus test analysis")
|
||||
return False
|
||||
|
||||
# Step 2: Wrong direction
|
||||
@@ -579,12 +579,12 @@ class PerformanceTimer:
|
||||
self.logger.error("Failed to continue to step 2")
|
||||
return False
|
||||
|
||||
# Step 3: Backtrack from step 2
|
||||
self.logger.info(" 1.2.3: Step 3 - Backtrack and revise approach")
|
||||
# Step 3: Adjust investigation path
|
||||
self.logger.info(" 1.2.3: Step 3 - Refocus the analysis")
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"analyze",
|
||||
{
|
||||
"step": "Backtracking - the performance issue might not be database related. Let me examine the caching and serialization patterns instead.",
|
||||
"step": "Refocus - the performance issue might not be database related. Let me examine the caching and serialization patterns instead.",
|
||||
"step_number": 3,
|
||||
"total_steps": 4,
|
||||
"next_step_required": True,
|
||||
@@ -597,20 +597,19 @@ class PerformanceTimer:
|
||||
{"severity": "low", "description": "Cache key generation lacks proper escaping"},
|
||||
],
|
||||
"confidence": "medium",
|
||||
"backtrack_from_step": 2, # Backtrack from step 2
|
||||
"continuation_id": continuation_id,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error("Failed to backtrack")
|
||||
self.logger.error("Failed to refocus analysis")
|
||||
return False
|
||||
|
||||
response3_data = self._parse_analyze_response(response3)
|
||||
if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_analysis"):
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Backtracking working correctly")
|
||||
self.logger.info(" ✅ Analysis refocus flow working correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -39,8 +39,8 @@ class CodeReviewValidationTest(ConversationBaseTest):
|
||||
if not self._test_single_review_session():
|
||||
return False
|
||||
|
||||
# Test 2: Review with backtracking
|
||||
if not self._test_review_with_backtracking():
|
||||
# Test 2: Review flow that requires refocusing
|
||||
if not self._test_review_refocus_flow():
|
||||
return False
|
||||
|
||||
# Test 3: Complete review with expert analysis
|
||||
@@ -336,13 +336,13 @@ class ConfigurationManager:
|
||||
self.logger.error(f"Single review session test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_review_with_backtracking(self) -> bool:
|
||||
"""Test code review with backtracking to revise findings"""
|
||||
def _test_review_refocus_flow(self) -> bool:
|
||||
"""Test code review flow that revises findings by refocusing"""
|
||||
try:
|
||||
self.logger.info(" 1.2: Testing code review with backtracking")
|
||||
self.logger.info(" 1.2: Testing code review refocus workflow")
|
||||
|
||||
# Start a new review for testing backtracking
|
||||
self.logger.info(" 1.2.1: Start review for backtracking test")
|
||||
# Start a new review for testing refocus behaviour
|
||||
self.logger.info(" 1.2.1: Start review for refocus test")
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"codereview",
|
||||
{
|
||||
@@ -359,7 +359,7 @@ class ConfigurationManager:
|
||||
)
|
||||
|
||||
if not response1 or not continuation_id:
|
||||
self.logger.error("Failed to start backtracking test review")
|
||||
self.logger.error("Failed to start refocus test review")
|
||||
return False
|
||||
|
||||
# Step 2: Initial direction
|
||||
@@ -386,12 +386,12 @@ class ConfigurationManager:
|
||||
self.logger.error("Failed to continue to step 2")
|
||||
return False
|
||||
|
||||
# Step 3: Backtrack and focus on security
|
||||
self.logger.info(" 1.2.3: Step 3 - Backtrack to focus on security issues")
|
||||
# Step 3: Shift focus based on new evidence
|
||||
self.logger.info(" 1.2.3: Step 3 - Refocus on security issues")
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"codereview",
|
||||
{
|
||||
"step": "Backtracking - need to focus on the critical security issues I initially missed. Found hardcoded secrets and credentials in plain text.",
|
||||
"step": "Refocusing - need to concentrate on the critical security issues I initially missed. Found hardcoded secrets and credentials in plain text.",
|
||||
"step_number": 3,
|
||||
"total_steps": 4,
|
||||
"next_step_required": True,
|
||||
@@ -405,24 +405,23 @@ class ConfigurationManager:
|
||||
{"severity": "high", "description": "Over-engineered configuration system"},
|
||||
],
|
||||
"confidence": "high",
|
||||
"backtrack_from_step": 2, # Backtrack from step 2
|
||||
"continuation_id": continuation_id,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error("Failed to backtrack")
|
||||
self.logger.error("Failed to refocus")
|
||||
return False
|
||||
|
||||
response3_data = self._parse_review_response(response3)
|
||||
if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_code_review"):
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Backtracking working correctly")
|
||||
self.logger.info(" ✅ Refocus flow working correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Backtracking test failed: {e}")
|
||||
self.logger.error(f"Refocus test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_complete_review_with_analysis(self) -> bool:
|
||||
|
||||
@@ -39,8 +39,8 @@ class DebugValidationTest(ConversationBaseTest):
|
||||
if not self._test_single_investigation_session():
|
||||
return False
|
||||
|
||||
# Test 2: Investigation with backtracking
|
||||
if not self._test_investigation_with_backtracking():
|
||||
# Test 2: Investigation flow that requires refinement
|
||||
if not self._test_investigation_refine_flow():
|
||||
return False
|
||||
|
||||
# Test 3: Complete investigation with expert analysis
|
||||
@@ -230,13 +230,13 @@ RuntimeError: dictionary changed size during iteration
|
||||
self.logger.error(f"Single investigation session test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_investigation_with_backtracking(self) -> bool:
|
||||
"""Test investigation with backtracking to revise findings"""
|
||||
def _test_investigation_refine_flow(self) -> bool:
|
||||
"""Test investigation flow that requires refining the approach"""
|
||||
try:
|
||||
self.logger.info(" 1.2: Testing investigation with backtracking")
|
||||
self.logger.info(" 1.2: Testing investigation refinement workflow")
|
||||
|
||||
# Start a new investigation for testing backtracking
|
||||
self.logger.info(" 1.2.1: Start investigation for backtracking test")
|
||||
# Start a new investigation for testing refinement behaviour
|
||||
self.logger.info(" 1.2.1: Start investigation for refinement test")
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"debug",
|
||||
{
|
||||
@@ -251,7 +251,7 @@ RuntimeError: dictionary changed size during iteration
|
||||
)
|
||||
|
||||
if not response1 or not continuation_id:
|
||||
self.logger.error("Failed to start backtracking test investigation")
|
||||
self.logger.error("Failed to start refinement test investigation")
|
||||
return False
|
||||
|
||||
# Step 2: Wrong direction
|
||||
@@ -277,11 +277,11 @@ RuntimeError: dictionary changed size during iteration
|
||||
return False
|
||||
|
||||
# Step 3: Backtrack from step 2
|
||||
self.logger.info(" 1.2.3: Step 3 - Backtrack and revise approach")
|
||||
self.logger.info(" 1.2.3: Step 3 - Refine investigation path")
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"debug",
|
||||
{
|
||||
"step": "Backtracking - the issue might not be database related. Let me investigate the data processing algorithm instead.",
|
||||
"step": "Refocusing - the issue might not be database related. Let me investigate the data processing algorithm instead.",
|
||||
"step_number": 3,
|
||||
"total_steps": 4,
|
||||
"next_step_required": True,
|
||||
@@ -291,24 +291,23 @@ RuntimeError: dictionary changed size during iteration
|
||||
"relevant_context": ["DataProcessor.process_batch"],
|
||||
"hypothesis": "Inefficient algorithm causing performance issues",
|
||||
"confidence": "medium",
|
||||
"backtrack_from_step": 2, # Backtrack from step 2
|
||||
"continuation_id": continuation_id,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error("Failed to backtrack")
|
||||
self.logger.error("Failed to refine investigation")
|
||||
return False
|
||||
|
||||
response3_data = self._parse_debug_response(response3)
|
||||
if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_investigation"):
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Backtracking working correctly")
|
||||
self.logger.info(" ✅ Investigation refinement working correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Backtracking test failed: {e}")
|
||||
self.logger.error(f"Investigation refinement test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_complete_investigation_with_analysis(self) -> bool:
|
||||
|
||||
@@ -39,8 +39,8 @@ class PrecommitWorkflowValidationTest(ConversationBaseTest):
|
||||
if not self._test_single_validation_session():
|
||||
return False
|
||||
|
||||
# Test 2: Validation with backtracking
|
||||
if not self._test_validation_with_backtracking():
|
||||
# Test 2: Validation flow that requires refocusing
|
||||
if not self._test_validation_refocus_flow():
|
||||
return False
|
||||
|
||||
# Test 3: Complete validation with expert analysis
|
||||
@@ -263,13 +263,13 @@ REQUIREMENTS:
|
||||
self.logger.error(f"Single validation session test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_validation_with_backtracking(self) -> bool:
|
||||
"""Test validation with backtracking to revise findings"""
|
||||
def _test_validation_refocus_flow(self) -> bool:
|
||||
"""Test validation workflow that requires refocusing to revise findings"""
|
||||
try:
|
||||
self.logger.info(" 1.2: Testing validation with backtracking")
|
||||
self.logger.info(" 1.2: Testing validation refocus workflow")
|
||||
|
||||
# Start a new validation for testing backtracking
|
||||
self.logger.info(" 1.2.1: Start validation for backtracking test")
|
||||
# Start a new validation for testing refocus behaviour
|
||||
self.logger.info(" 1.2.1: Start validation for refocus test")
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"precommit",
|
||||
{
|
||||
@@ -285,7 +285,7 @@ REQUIREMENTS:
|
||||
)
|
||||
|
||||
if not response1 or not continuation_id:
|
||||
self.logger.error("Failed to start backtracking test validation")
|
||||
self.logger.error("Failed to start refocus test validation")
|
||||
return False
|
||||
|
||||
# Step 2: Wrong direction
|
||||
@@ -309,12 +309,12 @@ REQUIREMENTS:
|
||||
self.logger.error("Failed to continue to step 2")
|
||||
return False
|
||||
|
||||
# Step 3: Backtrack from step 2
|
||||
self.logger.info(" 1.2.3: Step 3 - Backtrack and revise approach")
|
||||
# Step 3: Shift investigation focus
|
||||
self.logger.info(" 1.2.3: Step 3 - Refocus and revise approach")
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"precommit",
|
||||
{
|
||||
"step": "Backtracking - the issue might not be database configuration. Let me examine the actual SQL queries and data access patterns instead.",
|
||||
"step": "Refocusing - the issue might not be database configuration. Let me examine the actual SQL queries and data access patterns instead.",
|
||||
"step_number": 3,
|
||||
"total_steps": 4,
|
||||
"next_step_required": True,
|
||||
@@ -326,24 +326,23 @@ REQUIREMENTS:
|
||||
{"severity": "medium", "description": "N+1 query pattern in user profile loading"}
|
||||
],
|
||||
# Assessment fields removed - using precommit_type instead
|
||||
"backtrack_from_step": 2, # Backtrack from step 2
|
||||
"continuation_id": continuation_id,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error("Failed to backtrack")
|
||||
self.logger.error("Failed to refocus")
|
||||
return False
|
||||
|
||||
response3_data = self._parse_precommit_response(response3)
|
||||
if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_validation"):
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Backtracking working correctly")
|
||||
self.logger.info(" ✅ Refocus flow working correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Backtracking test failed: {e}")
|
||||
self.logger.error(f"Refocus test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_complete_validation_with_analysis(self) -> bool:
|
||||
|
||||
@@ -38,8 +38,8 @@ class RefactorValidationTest(ConversationBaseTest):
|
||||
if not self._test_single_refactoring_session():
|
||||
return False
|
||||
|
||||
# Test 2: Refactoring analysis with backtracking
|
||||
if not self._test_refactoring_with_backtracking():
|
||||
# Test 2: Refactoring analysis requiring refocus
|
||||
if not self._test_refactoring_refocus_flow():
|
||||
return False
|
||||
|
||||
# Test 3: Complete refactoring analysis with expert analysis
|
||||
@@ -389,13 +389,13 @@ class UserData:
|
||||
self.logger.error(f"Single refactoring session test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_refactoring_with_backtracking(self) -> bool:
|
||||
"""Test refactoring analysis with backtracking to revise findings"""
|
||||
def _test_refactoring_refocus_flow(self) -> bool:
|
||||
"""Test refactoring analysis that shifts focus mid-investigation"""
|
||||
try:
|
||||
self.logger.info(" 1.2: Testing refactoring analysis with backtracking")
|
||||
self.logger.info(" 1.2: Testing refactoring analysis refocus workflow")
|
||||
|
||||
# Start a new refactoring analysis for testing backtracking
|
||||
self.logger.info(" 1.2.1: Start refactoring analysis for backtracking test")
|
||||
# Start a new refactoring analysis for testing refocus behaviour
|
||||
self.logger.info(" 1.2.1: Start refactoring analysis for refocus test")
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"refactor",
|
||||
{
|
||||
@@ -412,7 +412,7 @@ class UserData:
|
||||
)
|
||||
|
||||
if not response1 or not continuation_id:
|
||||
self.logger.error("Failed to start backtracking test refactoring analysis")
|
||||
self.logger.error("Failed to start refocus test refactoring analysis")
|
||||
return False
|
||||
|
||||
# Step 2: Wrong direction
|
||||
@@ -437,11 +437,11 @@ class UserData:
|
||||
return False
|
||||
|
||||
# Step 3: Backtrack from step 2
|
||||
self.logger.info(" 1.2.3: Step 3 - Backtrack and focus on function decomposition")
|
||||
self.logger.info(" 1.2.3: Step 3 - Refocus on function decomposition")
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"refactor",
|
||||
{
|
||||
"step": "Backtracking - the real decomposition opportunity is the god function process_everything. Let me analyze function-level refactoring instead.",
|
||||
"step": "Refocusing - the real decomposition opportunity is the god function process_everything. Let me analyze function-level refactoring instead.",
|
||||
"step_number": 3,
|
||||
"total_steps": 4,
|
||||
"next_step_required": True,
|
||||
@@ -462,13 +462,12 @@ class UserData:
|
||||
},
|
||||
],
|
||||
"confidence": "partial",
|
||||
"backtrack_from_step": 2, # Backtrack from step 2
|
||||
"continuation_id": continuation_id,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error("Failed to backtrack")
|
||||
self.logger.error("Failed to refocus")
|
||||
return False
|
||||
|
||||
response3_data = self._parse_refactor_response(response3)
|
||||
@@ -477,11 +476,11 @@ class UserData:
|
||||
):
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Backtracking working correctly for refactoring analysis")
|
||||
self.logger.info(" ✅ Refocus working correctly for refactoring analysis")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Refactoring backtracking test failed: {e}")
|
||||
self.logger.error(f"Refocusing test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_complete_refactoring_with_analysis(self) -> bool:
|
||||
|
||||
@@ -39,8 +39,8 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
|
||||
if not self._test_single_thinking_session():
|
||||
return False
|
||||
|
||||
# Test 2: Thinking with backtracking
|
||||
if not self._test_thinking_with_backtracking():
|
||||
# Test 2: Thinking flow that requires refocusing
|
||||
if not self._test_thinking_refocus_flow():
|
||||
return False
|
||||
|
||||
# Test 3: Complete thinking with expert analysis
|
||||
@@ -243,13 +243,13 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
|
||||
self.logger.error(f"Single thinking session test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_thinking_with_backtracking(self) -> bool:
|
||||
"""Test thinking with backtracking to revise analysis"""
|
||||
def _test_thinking_refocus_flow(self) -> bool:
|
||||
"""Test thinking workflow that shifts direction mid-analysis"""
|
||||
try:
|
||||
self.logger.info(" 1.2: Testing thinking with backtracking")
|
||||
self.logger.info(" 1.2: Testing thinking refocus workflow")
|
||||
|
||||
# Start a new thinking session for testing backtracking
|
||||
self.logger.info(" 1.2.1: Start thinking for backtracking test")
|
||||
# Start a new thinking session for testing refocus behaviour
|
||||
self.logger.info(" 1.2.1: Start thinking session for refocus test")
|
||||
response1, continuation_id = self.call_mcp_tool(
|
||||
"thinkdeep",
|
||||
{
|
||||
@@ -266,7 +266,7 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
|
||||
)
|
||||
|
||||
if not response1 or not continuation_id:
|
||||
self.logger.error("Failed to start backtracking test thinking")
|
||||
self.logger.error("Failed to start refocus test thinking")
|
||||
return False
|
||||
|
||||
# Step 2: Initial direction
|
||||
@@ -300,7 +300,7 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
|
||||
response3, _ = self.call_mcp_tool(
|
||||
"thinkdeep",
|
||||
{
|
||||
"step": "Backtracking - maybe shared database with service-specific schemas is better initially. Then gradually extract databases as services mature.",
|
||||
"step": "Refocusing - maybe shared database with service-specific schemas is better initially. Then gradually extract databases as services mature.",
|
||||
"step_number": 3,
|
||||
"total_steps": 4,
|
||||
"next_step_required": True,
|
||||
@@ -309,24 +309,23 @@ class ThinkDeepWorkflowValidationTest(ConversationBaseTest):
|
||||
"relevant_files": [self.architecture_file, self.requirements_file],
|
||||
"relevant_context": ["shared_database", "bounded_contexts", "gradual_extraction"],
|
||||
"confidence": "medium",
|
||||
"backtrack_from_step": 2, # Backtrack from step 2
|
||||
"continuation_id": continuation_id,
|
||||
},
|
||||
)
|
||||
|
||||
if not response3:
|
||||
self.logger.error("Failed to backtrack")
|
||||
self.logger.error("Failed to refocus")
|
||||
return False
|
||||
|
||||
response3_data = self._parse_thinkdeep_response(response3)
|
||||
if not self._validate_step_response(response3_data, 3, 4, True, "pause_for_thinkdeep"):
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Backtracking working correctly")
|
||||
self.logger.info(" ✅ Refocus working correctly")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Backtracking test failed: {e}")
|
||||
self.logger.error(f"Refocus test failed: {e}")
|
||||
return False
|
||||
|
||||
def _test_complete_thinking_with_analysis(self) -> bool:
|
||||
|
||||
Reference in New Issue
Block a user