Major new addition: refactor tool
Supports decomposing large components and files, finding codesmells, finding modernizing opportunities as well as code organization opportunities. Fix this mega-classes today! Line numbers added to embedded code for better references from model -> claude
This commit is contained in:
283
simulator_tests/test_refactor_validation.py
Normal file
283
simulator_tests/test_refactor_validation.py
Normal file
@@ -0,0 +1,283 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Refactor Tool Validation Test
|
||||
|
||||
Tests the refactor tool with a simple code smell example to validate:
|
||||
- Proper execution with flash model
|
||||
- Correct line number references in response
|
||||
- Log validation for tool execution
|
||||
"""
|
||||
|
||||
import json
|
||||
from .base_test import BaseSimulatorTest
|
||||
|
||||
|
||||
class RefactorValidationTest(BaseSimulatorTest):
|
||||
"""Test refactor tool with codesmells detection"""
|
||||
|
||||
@property
|
||||
def test_name(self) -> str:
|
||||
return "refactor_validation"
|
||||
|
||||
@property
|
||||
def test_description(self) -> str:
|
||||
return "Refactor tool validation with codesmells"
|
||||
|
||||
def run_test(self) -> bool:
|
||||
"""Test refactor tool with a simple code smell example"""
|
||||
try:
|
||||
self.logger.info("Test: Refactor tool validation")
|
||||
|
||||
# Setup test files directory first
|
||||
self.setup_test_files()
|
||||
|
||||
# Create a simple Python file with obvious code smells
|
||||
code_with_smells = '''# Code with obvious smells for testing
|
||||
def process_data(data):
|
||||
# Code smell: Magic number
|
||||
if len(data) > 42:
|
||||
result = []
|
||||
# Code smell: Nested loops with poor variable names
|
||||
for i in range(len(data)):
|
||||
for j in range(len(data[i])):
|
||||
x = data[i][j]
|
||||
# Code smell: Duplicate code
|
||||
if x > 0:
|
||||
result.append(x * 2)
|
||||
elif x < 0:
|
||||
result.append(x * 2)
|
||||
return result
|
||||
else:
|
||||
# Code smell: Return inconsistent type
|
||||
return None
|
||||
|
||||
# Code smell: God function doing too many things
|
||||
def handle_everything(user_input, config, database):
|
||||
# Validation
|
||||
if not user_input:
|
||||
print("Error: No input") # Code smell: print instead of logging
|
||||
return
|
||||
|
||||
# Processing
|
||||
processed = user_input.strip().lower()
|
||||
|
||||
# Database operation
|
||||
connection = database.connect()
|
||||
data = connection.query("SELECT * FROM users") # Code smell: SQL in code
|
||||
|
||||
# Business logic mixed with data access
|
||||
valid_users = []
|
||||
for row in data:
|
||||
if row[2] == processed: # Code smell: Magic index
|
||||
valid_users.append(row)
|
||||
|
||||
return valid_users
|
||||
'''
|
||||
|
||||
# Create test file
|
||||
test_file = self.create_additional_test_file("smelly_code.py", code_with_smells)
|
||||
self.logger.info(f" ✅ Created test file with code smells: {test_file}")
|
||||
|
||||
# Call refactor tool with codesmells type
|
||||
self.logger.info(" 📝 Calling refactor tool with codesmells type...")
|
||||
response, _ = self.call_mcp_tool(
|
||||
"refactor",
|
||||
{
|
||||
"files": [test_file],
|
||||
"prompt": "Find and suggest fixes for code smells in this file",
|
||||
"refactor_type": "codesmells",
|
||||
"model": "flash",
|
||||
"thinking_mode": "low", # Keep it fast for testing
|
||||
}
|
||||
)
|
||||
|
||||
if not response:
|
||||
self.logger.error("Failed to get refactor response")
|
||||
return False
|
||||
|
||||
self.logger.info(" ✅ Got refactor response")
|
||||
|
||||
# Parse response to check for line references
|
||||
try:
|
||||
response_data = json.loads(response)
|
||||
|
||||
# Debug: log the response structure
|
||||
self.logger.debug(f"Response keys: {list(response_data.keys())}")
|
||||
|
||||
# Extract the actual content if it's wrapped
|
||||
if "content" in response_data:
|
||||
# The actual refactoring data is in the content field
|
||||
content = response_data["content"]
|
||||
# Remove markdown code block markers if present
|
||||
if content.startswith("```json"):
|
||||
content = content[7:] # Remove ```json
|
||||
if content.endswith("```"):
|
||||
content = content[:-3] # Remove ```
|
||||
content = content.strip()
|
||||
|
||||
# Find the end of the JSON object - handle truncated responses
|
||||
# Count braces to find where the JSON ends
|
||||
brace_count = 0
|
||||
json_end = -1
|
||||
in_string = False
|
||||
escape_next = False
|
||||
|
||||
for i, char in enumerate(content):
|
||||
if escape_next:
|
||||
escape_next = False
|
||||
continue
|
||||
if char == '\\':
|
||||
escape_next = True
|
||||
continue
|
||||
if char == '"' and not escape_next:
|
||||
in_string = not in_string
|
||||
if not in_string:
|
||||
if char == '{':
|
||||
brace_count += 1
|
||||
elif char == '}':
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
json_end = i + 1
|
||||
break
|
||||
|
||||
if json_end > 0:
|
||||
content = content[:json_end]
|
||||
|
||||
# Parse the inner JSON
|
||||
inner_data = json.loads(content)
|
||||
self.logger.debug(f"Inner data keys: {list(inner_data.keys())}")
|
||||
else:
|
||||
inner_data = response_data
|
||||
|
||||
# Check that we got refactoring suggestions (might be called refactor_opportunities)
|
||||
refactorings_key = None
|
||||
for key in ["refactorings", "refactor_opportunities"]:
|
||||
if key in inner_data:
|
||||
refactorings_key = key
|
||||
break
|
||||
|
||||
if not refactorings_key:
|
||||
self.logger.error("No refactorings found in response")
|
||||
self.logger.error(f"Response structure: {json.dumps(inner_data, indent=2)[:500]}...")
|
||||
return False
|
||||
|
||||
refactorings = inner_data[refactorings_key]
|
||||
if not isinstance(refactorings, list) or len(refactorings) == 0:
|
||||
self.logger.error("Empty refactorings list")
|
||||
return False
|
||||
|
||||
# Validate that we have line references for code smells
|
||||
# Flash model typically detects these issues:
|
||||
# - Lines 4-18: process_data function (magic number, nested loops, duplicate code)
|
||||
# - Lines 11-14: duplicate code blocks
|
||||
# - Lines 21-40: handle_everything god function
|
||||
expected_line_ranges = [
|
||||
(4, 18), # process_data function issues
|
||||
(11, 14), # duplicate code
|
||||
(21, 40), # god function
|
||||
]
|
||||
|
||||
self.logger.debug(f"Refactorings found: {len(refactorings)}")
|
||||
for i, ref in enumerate(refactorings[:3]): # Log first 3
|
||||
self.logger.debug(f"Refactoring {i}: start_line={ref.get('start_line')}, end_line={ref.get('end_line')}, type={ref.get('type')}")
|
||||
|
||||
found_references = []
|
||||
for refactoring in refactorings:
|
||||
# Check for line numbers in various fields
|
||||
start_line = refactoring.get("start_line")
|
||||
end_line = refactoring.get("end_line")
|
||||
location = refactoring.get("location", "")
|
||||
|
||||
# Add found line numbers
|
||||
if start_line:
|
||||
found_references.append(f"line {start_line}")
|
||||
if end_line and end_line != start_line:
|
||||
found_references.append(f"line {end_line}")
|
||||
|
||||
# Also extract from location string
|
||||
import re
|
||||
line_matches = re.findall(r'line[s]?\s+(\d+)', location.lower())
|
||||
found_references.extend([f"line {num}" for num in line_matches])
|
||||
|
||||
self.logger.info(f" 📍 Found line references: {found_references}")
|
||||
|
||||
# Check that flash found the expected refactoring areas
|
||||
found_ranges = []
|
||||
for refactoring in refactorings:
|
||||
start = refactoring.get("start_line")
|
||||
end = refactoring.get("end_line")
|
||||
if start and end:
|
||||
found_ranges.append((start, end))
|
||||
|
||||
self.logger.info(f" 📍 Found refactoring ranges: {found_ranges}")
|
||||
|
||||
# Verify we found issues in the main problem areas
|
||||
# Check if we have issues detected in process_data function area (lines 2-18)
|
||||
process_data_issues = [r for r in found_ranges if r[0] >= 2 and r[1] <= 18]
|
||||
# Check if we have issues detected in handle_everything function area (lines 21-40)
|
||||
god_function_issues = [r for r in found_ranges if r[0] >= 21 and r[1] <= 40]
|
||||
|
||||
self.logger.info(f" 📍 Issues in process_data area (lines 2-18): {len(process_data_issues)}")
|
||||
self.logger.info(f" 📍 Issues in handle_everything area (lines 21-40): {len(god_function_issues)}")
|
||||
|
||||
if len(process_data_issues) >= 1 and len(god_function_issues) >= 1:
|
||||
self.logger.info(f" ✅ Flash correctly identified code smells in both major areas")
|
||||
self.logger.info(f" ✅ Found {len(refactorings)} total refactoring opportunities")
|
||||
|
||||
# Verify we have reasonable number of total issues
|
||||
if len(refactorings) >= 3:
|
||||
self.logger.info(f" ✅ Refactoring analysis validation passed")
|
||||
else:
|
||||
self.logger.warning(f" ⚠️ Only {len(refactorings)} refactorings found (expected >= 3)")
|
||||
else:
|
||||
self.logger.error(f" ❌ Flash didn't find enough issues in expected areas")
|
||||
self.logger.error(f" - process_data area: found {len(process_data_issues)}, expected >= 1")
|
||||
self.logger.error(f" - handle_everything area: found {len(god_function_issues)}, expected >= 1")
|
||||
return False
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.error(f"Failed to parse refactor response as JSON: {e}")
|
||||
return False
|
||||
|
||||
# Validate logs
|
||||
self.logger.info(" 📋 Validating execution logs...")
|
||||
|
||||
# Get server logs from the actual log file inside the container
|
||||
result = self.run_command(
|
||||
["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"],
|
||||
capture_output=True
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
logs = result.stdout.decode() + result.stderr.decode()
|
||||
|
||||
# Look for refactor tool execution patterns
|
||||
refactor_patterns = [
|
||||
"[REFACTOR]",
|
||||
"refactor tool",
|
||||
"codesmells",
|
||||
"Token budget",
|
||||
"Code files embedded successfully"
|
||||
]
|
||||
|
||||
patterns_found = 0
|
||||
for pattern in refactor_patterns:
|
||||
if pattern in logs:
|
||||
patterns_found += 1
|
||||
self.logger.debug(f" ✅ Found log pattern: {pattern}")
|
||||
|
||||
if patterns_found >= 3:
|
||||
self.logger.info(f" ✅ Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)")
|
||||
else:
|
||||
self.logger.warning(f" ⚠️ Only found {patterns_found}/{len(refactor_patterns)} log patterns")
|
||||
else:
|
||||
self.logger.warning(" ⚠️ Could not retrieve Docker logs")
|
||||
|
||||
self.logger.info(" ✅ Refactor tool validation completed successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Refactor validation test failed: {e}")
|
||||
return False
|
||||
finally:
|
||||
self.cleanup_test_files()
|
||||
Reference in New Issue
Block a user