Docs added to show how a new provider is added

Docs added to show how a new tool is created All tools should add numbers to code for models to be able to reference if needed Enabled line numbering for code for all tools to use Additional tests to validate line numbering is not added to git diffs
2025-06-15 07:02:27 +04:00
parent b5004b91fc
commit 99fab3e83d
27 changed files with 2511 additions and 143 deletions
--- a/simulator_tests/test_refactor_validation.py
+++ b/simulator_tests/test_refactor_validation.py
@@ -9,6 +9,7 @@ Tests the refactor tool with a simple code smell example to validate:
 """

 import json
+
 from .base_test import BaseSimulatorTest


@@ -32,7 +33,7 @@ class RefactorValidationTest(BaseSimulatorTest):
            self.setup_test_files()

            # Create a simple Python file with obvious code smells
-            code_with_smells = '''# Code with obvious smells for testing
+            code_with_smells = """# Code with obvious smells for testing
 def process_data(data):
    # Code smell: Magic number
    if len(data) > 42:
@@ -57,22 +58,22 @@ def handle_everything(user_input, config, database):
    if not user_input:
        print("Error: No input")  # Code smell: print instead of logging
        return
-    
+
    # Processing
    processed = user_input.strip().lower()
-    
+
    # Database operation
    connection = database.connect()
    data = connection.query("SELECT * FROM users")  # Code smell: SQL in code
-    
+
    # Business logic mixed with data access
    valid_users = []
    for row in data:
        if row[2] == processed:  # Code smell: Magic index
            valid_users.append(row)
-    
+
    return valid_users
-'''
+"""

            # Create test file
            test_file = self.create_additional_test_file("smelly_code.py", code_with_smells)
@@ -88,7 +89,7 @@ def handle_everything(user_input, config, database):
                    "refactor_type": "codesmells",
                    "model": "flash",
                    "thinking_mode": "low",  # Keep it fast for testing
-                }
+                },
            )

            if not response:
@@ -96,14 +97,14 @@ def handle_everything(user_input, config, database):
                return False

            self.logger.info("  ✅ Got refactor response")
-            
+
            # Parse response to check for line references
            try:
                response_data = json.loads(response)
-                
+
                # Debug: log the response structure
                self.logger.debug(f"Response keys: {list(response_data.keys())}")
-                
+
                # Extract the actual content if it's wrapped
                if "content" in response_data:
                    # The actual refactoring data is in the content field
@@ -114,93 +115,91 @@ def handle_everything(user_input, config, database):
                    if content.endswith("```"):
                        content = content[:-3]  # Remove ```
                    content = content.strip()
-                    
+
                    # Find the end of the JSON object - handle truncated responses
                    # Count braces to find where the JSON ends
                    brace_count = 0
                    json_end = -1
                    in_string = False
                    escape_next = False
-                    
+
                    for i, char in enumerate(content):
                        if escape_next:
                            escape_next = False
                            continue
-                        if char == '\\':
+                        if char == "\\":
                            escape_next = True
                            continue
                        if char == '"' and not escape_next:
                            in_string = not in_string
                        if not in_string:
-                            if char == '{':
+                            if char == "{":
                                brace_count += 1
-                            elif char == '}':
+                            elif char == "}":
                                brace_count -= 1
                                if brace_count == 0:
                                    json_end = i + 1
                                    break
-                    
+
                    if json_end > 0:
                        content = content[:json_end]
-                    
+
                    # Parse the inner JSON
                    inner_data = json.loads(content)
                    self.logger.debug(f"Inner data keys: {list(inner_data.keys())}")
                else:
                    inner_data = response_data
-                
+
                # Check that we got refactoring suggestions (might be called refactor_opportunities)
                refactorings_key = None
                for key in ["refactorings", "refactor_opportunities"]:
                    if key in inner_data:
                        refactorings_key = key
                        break
-                
+
                if not refactorings_key:
                    self.logger.error("No refactorings found in response")
                    self.logger.error(f"Response structure: {json.dumps(inner_data, indent=2)[:500]}...")
                    return False
-                
+
                refactorings = inner_data[refactorings_key]
                if not isinstance(refactorings, list) or len(refactorings) == 0:
                    self.logger.error("Empty refactorings list")
                    return False
-                
+
                # Validate that we have line references for code smells
                # Flash model typically detects these issues:
                # - Lines 4-18: process_data function (magic number, nested loops, duplicate code)
                # - Lines 11-14: duplicate code blocks
                # - Lines 21-40: handle_everything god function
-                expected_line_ranges = [
-                    (4, 18),   # process_data function issues
-                    (11, 14),  # duplicate code
-                    (21, 40),  # god function
-                ]
-                
+
                self.logger.debug(f"Refactorings found: {len(refactorings)}")
                for i, ref in enumerate(refactorings[:3]):  # Log first 3
-                    self.logger.debug(f"Refactoring {i}: start_line={ref.get('start_line')}, end_line={ref.get('end_line')}, type={ref.get('type')}")
-                
+                    self.logger.debug(
+                        f"Refactoring {i}: start_line={ref.get('start_line')}, end_line={ref.get('end_line')}, type={ref.get('type')}"
+                    )
+
                found_references = []
                for refactoring in refactorings:
                    # Check for line numbers in various fields
                    start_line = refactoring.get("start_line")
                    end_line = refactoring.get("end_line")
                    location = refactoring.get("location", "")
-                    
+
                    # Add found line numbers
                    if start_line:
                        found_references.append(f"line {start_line}")
                    if end_line and end_line != start_line:
                        found_references.append(f"line {end_line}")
-                    
+
                    # Also extract from location string
                    import re
-                    line_matches = re.findall(r'line[s]?\s+(\d+)', location.lower())
+
+                    line_matches = re.findall(r"line[s]?\s+(\d+)", location.lower())
                    found_references.extend([f"line {num}" for num in line_matches])
-                
+
                self.logger.info(f"  📍 Found line references: {found_references}")
-                
+
                # Check that flash found the expected refactoring areas
                found_ranges = []
                for refactoring in refactorings:
@@ -208,71 +207,70 @@ def handle_everything(user_input, config, database):
                    end = refactoring.get("end_line")
                    if start and end:
                        found_ranges.append((start, end))
-                
+
                self.logger.info(f"  📍 Found refactoring ranges: {found_ranges}")
-                
+
                # Verify we found issues in the main problem areas
                # Check if we have issues detected in process_data function area (lines 2-18)
                process_data_issues = [r for r in found_ranges if r[0] >= 2 and r[1] <= 18]
                # Check if we have issues detected in handle_everything function area (lines 21-40)
                god_function_issues = [r for r in found_ranges if r[0] >= 21 and r[1] <= 40]
-                
+
                self.logger.info(f"  📍 Issues in process_data area (lines 2-18): {len(process_data_issues)}")
                self.logger.info(f"  📍 Issues in handle_everything area (lines 21-40): {len(god_function_issues)}")
-                
+
                if len(process_data_issues) >= 1 and len(god_function_issues) >= 1:
-                    self.logger.info(f"  ✅ Flash correctly identified code smells in both major areas")
+                    self.logger.info("  ✅ Flash correctly identified code smells in both major areas")
                    self.logger.info(f"  ✅ Found {len(refactorings)} total refactoring opportunities")
-                    
+
                    # Verify we have reasonable number of total issues
                    if len(refactorings) >= 3:
-                        self.logger.info(f"  ✅ Refactoring analysis validation passed")
+                        self.logger.info("  ✅ Refactoring analysis validation passed")
                    else:
                        self.logger.warning(f"  ⚠️ Only {len(refactorings)} refactorings found (expected >= 3)")
                else:
-                    self.logger.error(f"  ❌ Flash didn't find enough issues in expected areas")
+                    self.logger.error("  ❌ Flash didn't find enough issues in expected areas")
                    self.logger.error(f"     - process_data area: found {len(process_data_issues)}, expected >= 1")
                    self.logger.error(f"     - handle_everything area: found {len(god_function_issues)}, expected >= 1")
                    return False
-                
+
            except json.JSONDecodeError as e:
                self.logger.error(f"Failed to parse refactor response as JSON: {e}")
                return False
-            
+
            # Validate logs
            self.logger.info("  📋 Validating execution logs...")
-            
+
            # Get server logs from the actual log file inside the container
            result = self.run_command(
-                ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], 
-                capture_output=True
+                ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True
            )
-            
+
            if result.returncode == 0:
                logs = result.stdout.decode() + result.stderr.decode()
-                
+
                # Look for refactor tool execution patterns
                refactor_patterns = [
                    "[REFACTOR]",
                    "refactor tool",
                    "codesmells",
                    "Token budget",
-                    "Code files embedded successfully"
+                    "Code files embedded successfully",
                ]
-                
+
                patterns_found = 0
                for pattern in refactor_patterns:
                    if pattern in logs:
                        patterns_found += 1
                        self.logger.debug(f"  ✅ Found log pattern: {pattern}")
-                
+
                if patterns_found >= 3:
                    self.logger.info(f"  ✅ Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)")
                else:
                    self.logger.warning(f"  ⚠️ Only found {patterns_found}/{len(refactor_patterns)} log patterns")
            else:
                self.logger.warning("  ⚠️ Could not retrieve Docker logs")
-            
+
            self.logger.info("  ✅ Refactor tool validation completed successfully")
            return True

@@ -280,4 +278,4 @@ def handle_everything(user_input, config, database):
            self.logger.error(f"Refactor validation test failed: {e}")
            return False
        finally:
-            self.cleanup_test_files()
+            self.cleanup_test_files()