Ensure duplicate file references are gracefully handled

Improved prompt to encourage immediate action
2025-06-14 16:37:02 +04:00
parent ec707e021a
commit d0d0a171dc
2 changed files with 154 additions and 15 deletions
--- a/tests/test_testgen.py
+++ b/tests/test_testgen.py
@@ -282,11 +282,13 @@ class TestComprehensive(unittest.TestCase):
        raw_response = "Generated test cases with edge cases"
        formatted = tool.format_response(raw_response, request)
-        # Check formatting includes next steps
+        # Check formatting includes new action-oriented next steps
        assert raw_response in formatted
-        assert "**Next Steps:**" in formatted
+        assert "IMMEDIATE ACTION REQUIRED" in formatted
-        assert "Create and save the test files" in formatted
+        assert "ULTRATHINK" in formatted
-        assert "Run the tests" in formatted
+        assert "CREATE" in formatted
        assert "VALIDATE BY EXECUTION" in formatted
        assert "MANDATORY" in formatted
    @pytest.mark.asyncio
    async def test_error_handling_invalid_files(self, tool):
@@ -379,3 +381,98 @@ class TestComprehensive(unittest.TestCase):
            # Should not contain web search instructions
            assert "WEB SEARCH CAPABILITY" not in prompt
            assert "web search" not in prompt.lower()
    @pytest.mark.asyncio
    async def test_duplicate_file_deduplication(self, tool, temp_files):
        """Test that duplicate files are removed from code files when they appear in test_examples"""
        # Create a scenario where the same file appears in both files and test_examples
        duplicate_file = temp_files["code_file"]
        request = TestGenRequest(
            files=[duplicate_file, temp_files["large_test"]],  # code_file appears in both
            prompt="Generate tests",
            test_examples=[temp_files["small_test"], duplicate_file],  # code_file also here
        )
        # Track the actual files passed to _prepare_file_content_for_prompt
        captured_calls = []
        def capture_prepare_calls(files, *args, **kwargs):
            captured_calls.append(("prepare", files))
            return "mocked content"
        with patch.object(tool, "_prepare_file_content_for_prompt", side_effect=capture_prepare_calls):
            await tool.prepare_prompt(request)
            # Should have been called twice: once for test examples, once for code files
            assert len(captured_calls) == 2
            # First call should be for test examples processing (via _process_test_examples)
            captured_calls[0][1]
            # Second call should be for deduplicated code files
            code_files = captured_calls[1][1]
            # duplicate_file should NOT be in code files (removed due to duplication)
            assert duplicate_file not in code_files
            # temp_files["large_test"] should still be there (not duplicated)
            assert temp_files["large_test"] in code_files
    @pytest.mark.asyncio
    async def test_no_deduplication_when_no_test_examples(self, tool, temp_files):
        """Test that no deduplication occurs when test_examples is None/empty"""
        request = TestGenRequest(
            files=[temp_files["code_file"], temp_files["large_test"]],
            prompt="Generate tests",
            # No test_examples
        )
        with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare:
            mock_prepare.return_value = "mocked content"
            await tool.prepare_prompt(request)
            # Should only be called once (for code files, no test examples)
            assert mock_prepare.call_count == 1
            # All original files should be passed through
            code_files_call = mock_prepare.call_args_list[0]
            code_files = code_files_call[0][0]
            assert temp_files["code_file"] in code_files
            assert temp_files["large_test"] in code_files
    @pytest.mark.asyncio
    async def test_path_normalization_in_deduplication(self, tool, temp_files):
        """Test that path normalization works correctly for deduplication"""
        import os
        # Create variants of the same path (with and without normalization)
        base_file = temp_files["code_file"]
        # Add some path variations that should normalize to the same file
        variant_path = os.path.join(os.path.dirname(base_file), ".", os.path.basename(base_file))
        request = TestGenRequest(
            files=[variant_path, temp_files["large_test"]],  # variant path in files
            prompt="Generate tests",
            test_examples=[base_file],  # base path in test_examples
        )
        # Track the actual files passed to _prepare_file_content_for_prompt
        captured_calls = []
        def capture_prepare_calls(files, *args, **kwargs):
            captured_calls.append(("prepare", files))
            return "mocked content"
        with patch.object(tool, "_prepare_file_content_for_prompt", side_effect=capture_prepare_calls):
            await tool.prepare_prompt(request)
            # Should have been called twice: once for test examples, once for code files
            assert len(captured_calls) == 2
            # Second call should be for code files
            code_files = captured_calls[1][1]
            # variant_path should be removed due to normalization matching base_file
            assert variant_path not in code_files
            # large_test should still be there
            assert temp_files["large_test"] in code_files
--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -325,6 +325,25 @@ class TestGenTool(BaseTool):
            else:
                logger.info("[TESTGEN] No test examples content after processing")
        # Remove files that appear in both 'files' and 'test_examples' to avoid duplicate embedding
        # Files in test_examples take precedence as they're used for pattern reference
        code_files_to_process = request.files.copy()
        if request.test_examples:
            # Normalize paths for comparison (resolve any relative paths, handle case sensitivity)
            test_example_set = {os.path.normpath(os.path.abspath(f)) for f in request.test_examples}
            original_count = len(code_files_to_process)
            code_files_to_process = [
                f for f in code_files_to_process if os.path.normpath(os.path.abspath(f)) not in test_example_set
            ]
            duplicates_removed = original_count - len(code_files_to_process)
            if duplicates_removed > 0:
                logger.info(
                    f"[TESTGEN] Removed {duplicates_removed} duplicate files from code files list "
                    f"(already included in test examples for pattern reference)"
                )
        # Calculate remaining tokens for main code after test examples
        if test_examples_content and available_tokens:
            from utils.token_utils import estimate_tokens
@@ -341,10 +360,10 @@ class TestGenTool(BaseTool):
                    f"[TESTGEN] Token allocation: {remaining_tokens:,} tokens available for code files (no test examples)"
                )
-        # Use centralized file processing logic for main code files
+        # Use centralized file processing logic for main code files (after deduplication)
-        logger.debug(f"[TESTGEN] Preparing {len(request.files)} code files for analysis")
+        logger.debug(f"[TESTGEN] Preparing {len(code_files_to_process)} code files for analysis")
        code_content = self._prepare_file_content_for_prompt(
-            request.files, continuation_id, "Code to test", max_tokens=remaining_tokens, reserve_tokens=2000
+            code_files_to_process, continuation_id, "Code to test", max_tokens=remaining_tokens, reserve_tokens=2000
        )
        if code_content:
@@ -417,18 +436,41 @@ class TestGenTool(BaseTool):
 ---
-**Next Steps:**
+# IMMEDIATE ACTION REQUIRED
-Claude must now:
+Claude, you are now in EXECUTION MODE. Take immediate action:
-1. **Create and save the test files** - Write the generated tests to appropriate test files in your project structure
+## Step 1: ULTRATHINK & CREATE TESTS
 ULTRATHINK while creating these tests. Verify EVERY code reference, import, function name, and logic path is
 100% accurate before saving.
-2. **Display to the user** - Show each new test file/function created with a brief line explaining what it covers
+- **CREATE** all test files in the correct project structure
 - **SAVE** each test with proper naming conventions
 - **VALIDATE** all imports, references, and dependencies are correct as required by the current framework
-3. **Install any missing test dependencies** - Set up required testing frameworks if not already available
+## Step 2: DISPLAY RESULTS TO USER
 After creating each test file, show the user:
 ```
 ✅ Created: path/to/test_file.py
   - test_function_name(): Brief description of what it tests
   - test_another_function(): Brief description
   - [Total: X test functions]
 ```
-4. **Run the tests** - Execute the test suite to verify functionality and fix any issues
+## Step 3: VALIDATE BY EXECUTION
 **MANDATORY**: Run the tests immediately to confirm they work:
 - Install any missing dependencies first
 - Execute the test suite
 - Fix any failures or errors
 - Confirm 100% pass rate
-5. **Integrate the tests** - Ensure tests are properly connected to your existing test infrastructure
+## Step 4: INTEGRATION VERIFICATION
 - Verify tests integrate with existing test infrastructure
 - Confirm test discovery works
 - Validate test naming and organization
-The tests are ready for immediate implementation and integration into your codebase."""
+## Step 5: MOVE TO NEXT ACTION
 Once tests are confirmed working, immediately proceed to the next logical step for the project.
 **CRITICAL**: Do NOT stop after generating - you MUST create, validate, run, and confirm the tests work. 
 Take full ownership of the testing implementation and move to your next work."""