Major new addition: refactor tool

Supports decomposing large components and files, finding codesmells, finding modernizing opportunities as well as code organization opportunities. Fix this mega-classes today! Line numbers added to embedded code for better references from model -> claude
2025-06-15 06:00:01 +04:00
parent 70f1356e3e
commit b5004b91fc
28 changed files with 2633 additions and 310 deletions
--- a/simulator_tests/test_refactor_validation.py
+++ b/simulator_tests/test_refactor_validation.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+"""
+Refactor Tool Validation Test
+
+Tests the refactor tool with a simple code smell example to validate:
+- Proper execution with flash model
+- Correct line number references in response
+- Log validation for tool execution
+"""
+
+import json
+from .base_test import BaseSimulatorTest
+
+
+class RefactorValidationTest(BaseSimulatorTest):
+    """Test refactor tool with codesmells detection"""
+
+    @property
+    def test_name(self) -> str:
+        return "refactor_validation"
+
+    @property
+    def test_description(self) -> str:
+        return "Refactor tool validation with codesmells"
+
+    def run_test(self) -> bool:
+        """Test refactor tool with a simple code smell example"""
+        try:
+            self.logger.info("Test: Refactor tool validation")
+
+            # Setup test files directory first
+            self.setup_test_files()
+
+            # Create a simple Python file with obvious code smells
+            code_with_smells = '''# Code with obvious smells for testing
+def process_data(data):
+    # Code smell: Magic number
+    if len(data) > 42:
+        result = []
+        # Code smell: Nested loops with poor variable names
+        for i in range(len(data)):
+            for j in range(len(data[i])):
+                x = data[i][j]
+                # Code smell: Duplicate code
+                if x > 0:
+                    result.append(x * 2)
+                elif x < 0:
+                    result.append(x * 2)
+        return result
+    else:
+        # Code smell: Return inconsistent type
+        return None
+
+# Code smell: God function doing too many things
+def handle_everything(user_input, config, database):
+    # Validation
+    if not user_input:
+        print("Error: No input")  # Code smell: print instead of logging
+        return
+    
+    # Processing
+    processed = user_input.strip().lower()
+    
+    # Database operation
+    connection = database.connect()
+    data = connection.query("SELECT * FROM users")  # Code smell: SQL in code
+    
+    # Business logic mixed with data access
+    valid_users = []
+    for row in data:
+        if row[2] == processed:  # Code smell: Magic index
+            valid_users.append(row)
+    
+    return valid_users
+'''
+
+            # Create test file
+            test_file = self.create_additional_test_file("smelly_code.py", code_with_smells)
+            self.logger.info(f"  ✅ Created test file with code smells: {test_file}")
+
+            # Call refactor tool with codesmells type
+            self.logger.info("  📝 Calling refactor tool with codesmells type...")
+            response, _ = self.call_mcp_tool(
+                "refactor",
+                {
+                    "files": [test_file],
+                    "prompt": "Find and suggest fixes for code smells in this file",
+                    "refactor_type": "codesmells",
+                    "model": "flash",
+                    "thinking_mode": "low",  # Keep it fast for testing
+                }
+            )
+
+            if not response:
+                self.logger.error("Failed to get refactor response")
+                return False
+
+            self.logger.info("  ✅ Got refactor response")
+            
+            # Parse response to check for line references
+            try:
+                response_data = json.loads(response)
+                
+                # Debug: log the response structure
+                self.logger.debug(f"Response keys: {list(response_data.keys())}")
+                
+                # Extract the actual content if it's wrapped
+                if "content" in response_data:
+                    # The actual refactoring data is in the content field
+                    content = response_data["content"]
+                    # Remove markdown code block markers if present
+                    if content.startswith("```json"):
+                        content = content[7:]  # Remove ```json
+                    if content.endswith("```"):
+                        content = content[:-3]  # Remove ```
+                    content = content.strip()
+                    
+                    # Find the end of the JSON object - handle truncated responses
+                    # Count braces to find where the JSON ends
+                    brace_count = 0
+                    json_end = -1
+                    in_string = False
+                    escape_next = False
+                    
+                    for i, char in enumerate(content):
+                        if escape_next:
+                            escape_next = False
+                            continue
+                        if char == '\\':
+                            escape_next = True
+                            continue
+                        if char == '"' and not escape_next:
+                            in_string = not in_string
+                        if not in_string:
+                            if char == '{':
+                                brace_count += 1
+                            elif char == '}':
+                                brace_count -= 1
+                                if brace_count == 0:
+                                    json_end = i + 1
+                                    break
+                    
+                    if json_end > 0:
+                        content = content[:json_end]
+                    
+                    # Parse the inner JSON
+                    inner_data = json.loads(content)
+                    self.logger.debug(f"Inner data keys: {list(inner_data.keys())}")
+                else:
+                    inner_data = response_data
+                
+                # Check that we got refactoring suggestions (might be called refactor_opportunities)
+                refactorings_key = None
+                for key in ["refactorings", "refactor_opportunities"]:
+                    if key in inner_data:
+                        refactorings_key = key
+                        break
+                
+                if not refactorings_key:
+                    self.logger.error("No refactorings found in response")
+                    self.logger.error(f"Response structure: {json.dumps(inner_data, indent=2)[:500]}...")
+                    return False
+                
+                refactorings = inner_data[refactorings_key]
+                if not isinstance(refactorings, list) or len(refactorings) == 0:
+                    self.logger.error("Empty refactorings list")
+                    return False
+                
+                # Validate that we have line references for code smells
+                # Flash model typically detects these issues:
+                # - Lines 4-18: process_data function (magic number, nested loops, duplicate code)
+                # - Lines 11-14: duplicate code blocks
+                # - Lines 21-40: handle_everything god function
+                expected_line_ranges = [
+                    (4, 18),   # process_data function issues
+                    (11, 14),  # duplicate code
+                    (21, 40),  # god function
+                ]
+                
+                self.logger.debug(f"Refactorings found: {len(refactorings)}")
+                for i, ref in enumerate(refactorings[:3]):  # Log first 3
+                    self.logger.debug(f"Refactoring {i}: start_line={ref.get('start_line')}, end_line={ref.get('end_line')}, type={ref.get('type')}")
+                
+                found_references = []
+                for refactoring in refactorings:
+                    # Check for line numbers in various fields
+                    start_line = refactoring.get("start_line")
+                    end_line = refactoring.get("end_line")
+                    location = refactoring.get("location", "")
+                    
+                    # Add found line numbers
+                    if start_line:
+                        found_references.append(f"line {start_line}")
+                    if end_line and end_line != start_line:
+                        found_references.append(f"line {end_line}")
+                    
+                    # Also extract from location string
+                    import re
+                    line_matches = re.findall(r'line[s]?\s+(\d+)', location.lower())
+                    found_references.extend([f"line {num}" for num in line_matches])
+                
+                self.logger.info(f"  📍 Found line references: {found_references}")
+                
+                # Check that flash found the expected refactoring areas
+                found_ranges = []
+                for refactoring in refactorings:
+                    start = refactoring.get("start_line")
+                    end = refactoring.get("end_line")
+                    if start and end:
+                        found_ranges.append((start, end))
+                
+                self.logger.info(f"  📍 Found refactoring ranges: {found_ranges}")
+                
+                # Verify we found issues in the main problem areas
+                # Check if we have issues detected in process_data function area (lines 2-18)
+                process_data_issues = [r for r in found_ranges if r[0] >= 2 and r[1] <= 18]
+                # Check if we have issues detected in handle_everything function area (lines 21-40)
+                god_function_issues = [r for r in found_ranges if r[0] >= 21 and r[1] <= 40]
+                
+                self.logger.info(f"  📍 Issues in process_data area (lines 2-18): {len(process_data_issues)}")
+                self.logger.info(f"  📍 Issues in handle_everything area (lines 21-40): {len(god_function_issues)}")
+                
+                if len(process_data_issues) >= 1 and len(god_function_issues) >= 1:
+                    self.logger.info(f"  ✅ Flash correctly identified code smells in both major areas")
+                    self.logger.info(f"  ✅ Found {len(refactorings)} total refactoring opportunities")
+                    
+                    # Verify we have reasonable number of total issues
+                    if len(refactorings) >= 3:
+                        self.logger.info(f"  ✅ Refactoring analysis validation passed")
+                    else:
+                        self.logger.warning(f"  ⚠️ Only {len(refactorings)} refactorings found (expected >= 3)")
+                else:
+                    self.logger.error(f"  ❌ Flash didn't find enough issues in expected areas")
+                    self.logger.error(f"     - process_data area: found {len(process_data_issues)}, expected >= 1")
+                    self.logger.error(f"     - handle_everything area: found {len(god_function_issues)}, expected >= 1")
+                    return False
+                
+            except json.JSONDecodeError as e:
+                self.logger.error(f"Failed to parse refactor response as JSON: {e}")
+                return False
+            
+            # Validate logs
+            self.logger.info("  📋 Validating execution logs...")
+            
+            # Get server logs from the actual log file inside the container
+            result = self.run_command(
+                ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], 
+                capture_output=True
+            )
+            
+            if result.returncode == 0:
+                logs = result.stdout.decode() + result.stderr.decode()
+                
+                # Look for refactor tool execution patterns
+                refactor_patterns = [
+                    "[REFACTOR]",
+                    "refactor tool",
+                    "codesmells",
+                    "Token budget",
+                    "Code files embedded successfully"
+                ]
+                
+                patterns_found = 0
+                for pattern in refactor_patterns:
+                    if pattern in logs:
+                        patterns_found += 1
+                        self.logger.debug(f"  ✅ Found log pattern: {pattern}")
+                
+                if patterns_found >= 3:
+                    self.logger.info(f"  ✅ Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)")
+                else:
+                    self.logger.warning(f"  ⚠️ Only found {patterns_found}/{len(refactor_patterns)} log patterns")
+            else:
+                self.logger.warning("  ⚠️ Could not retrieve Docker logs")
+            
+            self.logger.info("  ✅ Refactor tool validation completed successfully")
+            return True
+
+        except Exception as e:
+            self.logger.error(f"Refactor validation test failed: {e}")
+            return False
+        finally:
+            self.cleanup_test_files()