Ensure duplicate file references are gracefully handled

Improved prompt to encourage immediate action
2025-06-14 16:37:02 +04:00
parent ec707e021a
commit d0d0a171dc
2 changed files with 154 additions and 15 deletions
--- a/tests/test_testgen.py
+++ b/tests/test_testgen.py
@@ -282,11 +282,13 @@ class TestComprehensive(unittest.TestCase):
        raw_response = "Generated test cases with edge cases"
        formatted = tool.format_response(raw_response, request)

-        # Check formatting includes next steps
+        # Check formatting includes new action-oriented next steps
        assert raw_response in formatted
-        assert "**Next Steps:**" in formatted
-        assert "Create and save the test files" in formatted
-        assert "Run the tests" in formatted
+        assert "IMMEDIATE ACTION REQUIRED" in formatted
+        assert "ULTRATHINK" in formatted
+        assert "CREATE" in formatted
+        assert "VALIDATE BY EXECUTION" in formatted
+        assert "MANDATORY" in formatted

    @pytest.mark.asyncio
    async def test_error_handling_invalid_files(self, tool):
@@ -379,3 +381,98 @@ class TestComprehensive(unittest.TestCase):
            # Should not contain web search instructions
            assert "WEB SEARCH CAPABILITY" not in prompt
            assert "web search" not in prompt.lower()
+
+    @pytest.mark.asyncio
+    async def test_duplicate_file_deduplication(self, tool, temp_files):
+        """Test that duplicate files are removed from code files when they appear in test_examples"""
+        # Create a scenario where the same file appears in both files and test_examples
+        duplicate_file = temp_files["code_file"]
+
+        request = TestGenRequest(
+            files=[duplicate_file, temp_files["large_test"]],  # code_file appears in both
+            prompt="Generate tests",
+            test_examples=[temp_files["small_test"], duplicate_file],  # code_file also here
+        )
+
+        # Track the actual files passed to _prepare_file_content_for_prompt
+        captured_calls = []
+
+        def capture_prepare_calls(files, *args, **kwargs):
+            captured_calls.append(("prepare", files))
+            return "mocked content"
+
+        with patch.object(tool, "_prepare_file_content_for_prompt", side_effect=capture_prepare_calls):
+            await tool.prepare_prompt(request)
+
+            # Should have been called twice: once for test examples, once for code files
+            assert len(captured_calls) == 2
+
+            # First call should be for test examples processing (via _process_test_examples)
+            captured_calls[0][1]
+            # Second call should be for deduplicated code files
+            code_files = captured_calls[1][1]
+
+            # duplicate_file should NOT be in code files (removed due to duplication)
+            assert duplicate_file not in code_files
+            # temp_files["large_test"] should still be there (not duplicated)
+            assert temp_files["large_test"] in code_files
+
+    @pytest.mark.asyncio
+    async def test_no_deduplication_when_no_test_examples(self, tool, temp_files):
+        """Test that no deduplication occurs when test_examples is None/empty"""
+        request = TestGenRequest(
+            files=[temp_files["code_file"], temp_files["large_test"]],
+            prompt="Generate tests",
+            # No test_examples
+        )
+
+        with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare:
+            mock_prepare.return_value = "mocked content"
+
+            await tool.prepare_prompt(request)
+
+            # Should only be called once (for code files, no test examples)
+            assert mock_prepare.call_count == 1
+
+            # All original files should be passed through
+            code_files_call = mock_prepare.call_args_list[0]
+            code_files = code_files_call[0][0]
+            assert temp_files["code_file"] in code_files
+            assert temp_files["large_test"] in code_files
+
+    @pytest.mark.asyncio
+    async def test_path_normalization_in_deduplication(self, tool, temp_files):
+        """Test that path normalization works correctly for deduplication"""
+        import os
+
+        # Create variants of the same path (with and without normalization)
+        base_file = temp_files["code_file"]
+        # Add some path variations that should normalize to the same file
+        variant_path = os.path.join(os.path.dirname(base_file), ".", os.path.basename(base_file))
+
+        request = TestGenRequest(
+            files=[variant_path, temp_files["large_test"]],  # variant path in files
+            prompt="Generate tests",
+            test_examples=[base_file],  # base path in test_examples
+        )
+
+        # Track the actual files passed to _prepare_file_content_for_prompt
+        captured_calls = []
+
+        def capture_prepare_calls(files, *args, **kwargs):
+            captured_calls.append(("prepare", files))
+            return "mocked content"
+
+        with patch.object(tool, "_prepare_file_content_for_prompt", side_effect=capture_prepare_calls):
+            await tool.prepare_prompt(request)
+
+            # Should have been called twice: once for test examples, once for code files
+            assert len(captured_calls) == 2
+
+            # Second call should be for code files
+            code_files = captured_calls[1][1]
+
+            # variant_path should be removed due to normalization matching base_file
+            assert variant_path not in code_files
+            # large_test should still be there
+            assert temp_files["large_test"] in code_files
--- a/tools/testgen.py
+++ b/tools/testgen.py
@@ -325,6 +325,25 @@ class TestGenTool(BaseTool):
            else:
                logger.info("[TESTGEN] No test examples content after processing")

+        # Remove files that appear in both 'files' and 'test_examples' to avoid duplicate embedding
+        # Files in test_examples take precedence as they're used for pattern reference
+        code_files_to_process = request.files.copy()
+        if request.test_examples:
+            # Normalize paths for comparison (resolve any relative paths, handle case sensitivity)
+            test_example_set = {os.path.normpath(os.path.abspath(f)) for f in request.test_examples}
+            original_count = len(code_files_to_process)
+
+            code_files_to_process = [
+                f for f in code_files_to_process if os.path.normpath(os.path.abspath(f)) not in test_example_set
+            ]
+
+            duplicates_removed = original_count - len(code_files_to_process)
+            if duplicates_removed > 0:
+                logger.info(
+                    f"[TESTGEN] Removed {duplicates_removed} duplicate files from code files list "
+                    f"(already included in test examples for pattern reference)"
+                )
+
        # Calculate remaining tokens for main code after test examples
        if test_examples_content and available_tokens:
            from utils.token_utils import estimate_tokens
@@ -341,10 +360,10 @@ class TestGenTool(BaseTool):
                    f"[TESTGEN] Token allocation: {remaining_tokens:,} tokens available for code files (no test examples)"
                )

-        # Use centralized file processing logic for main code files
-        logger.debug(f"[TESTGEN] Preparing {len(request.files)} code files for analysis")
+        # Use centralized file processing logic for main code files (after deduplication)
+        logger.debug(f"[TESTGEN] Preparing {len(code_files_to_process)} code files for analysis")
        code_content = self._prepare_file_content_for_prompt(
-            request.files, continuation_id, "Code to test", max_tokens=remaining_tokens, reserve_tokens=2000
+            code_files_to_process, continuation_id, "Code to test", max_tokens=remaining_tokens, reserve_tokens=2000
        )

        if code_content:
@@ -417,18 +436,41 @@ class TestGenTool(BaseTool):

 ---

-**Next Steps:**
+# IMMEDIATE ACTION REQUIRED

-Claude must now:
+Claude, you are now in EXECUTION MODE. Take immediate action:

-1. **Create and save the test files** - Write the generated tests to appropriate test files in your project structure
+## Step 1: ULTRATHINK & CREATE TESTS
+ULTRATHINK while creating these tests. Verify EVERY code reference, import, function name, and logic path is
+100% accurate before saving.

-2. **Display to the user** - Show each new test file/function created with a brief line explaining what it covers
+- **CREATE** all test files in the correct project structure
+- **SAVE** each test with proper naming conventions
+- **VALIDATE** all imports, references, and dependencies are correct as required by the current framework

-3. **Install any missing test dependencies** - Set up required testing frameworks if not already available
+## Step 2: DISPLAY RESULTS TO USER
+After creating each test file, show the user:
+```
+✅ Created: path/to/test_file.py
+   - test_function_name(): Brief description of what it tests
+   - test_another_function(): Brief description
+   - [Total: X test functions]
+```

-4. **Run the tests** - Execute the test suite to verify functionality and fix any issues
+## Step 3: VALIDATE BY EXECUTION
+**MANDATORY**: Run the tests immediately to confirm they work:
+- Install any missing dependencies first
+- Execute the test suite
+- Fix any failures or errors
+- Confirm 100% pass rate

-5. **Integrate the tests** - Ensure tests are properly connected to your existing test infrastructure
+## Step 4: INTEGRATION VERIFICATION
+- Verify tests integrate with existing test infrastructure
+- Confirm test discovery works
+- Validate test naming and organization

-The tests are ready for immediate implementation and integration into your codebase."""
+## Step 5: MOVE TO NEXT ACTION
+Once tests are confirmed working, immediately proceed to the next logical step for the project.
+
+**CRITICAL**: Do NOT stop after generating - you MUST create, validate, run, and confirm the tests work. 
+Take full ownership of the testing implementation and move to your next work."""