diff --git a/config.py b/config.py index d3bcd8f..c626bc7 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "4.6.0" +__version__ = "4.6.1" # Last update date in ISO format __updated__ = "2025-06-15" # Primary maintainer diff --git a/tests/test_auto_mode.py b/tests/test_auto_mode.py index 3a2be41..aeb6346 100644 --- a/tests/test_auto_mode.py +++ b/tests/test_auto_mode.py @@ -150,21 +150,40 @@ class TestAutoMode: tool = AnalyzeTool() - # Mock the provider to simulate o3 not being available - with patch("providers.registry.ModelProviderRegistry.get_provider_for_model") as mock_provider: - # Mock that o3 is not available but flash/pro are + # Get currently available models to use in the test + from providers.registry import ModelProviderRegistry + + available_models = ModelProviderRegistry.get_available_model_names() + + # Mock the provider to simulate o3 not being available but keep actual available models + with ( + patch("providers.registry.ModelProviderRegistry.get_provider_for_model") as mock_provider, + patch("providers.registry.ModelProviderRegistry.get_available_models") as mock_available, + patch.object(tool, "_get_available_models") as mock_tool_available, + ): + + # Mock that o3 is not available but actual available models are def mock_get_provider(model_name): - if model_name in ["flash", "pro", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05"]: - # Return a mock provider for available models + if model_name == "o3": + # o3 is specifically not available + return None + elif model_name in available_models: + # Return a mock provider for actually available models from unittest.mock import MagicMock return MagicMock() else: - # o3 and others are not available + # Other unknown models are not available return None mock_provider.side_effect = mock_get_provider + # Mock available models to return the actual available models + mock_available.return_value = dict.fromkeys(available_models, "test") + + # Mock the tool's available models method to return the actual available models + mock_tool_available.return_value = available_models + # Execute with unavailable model result = await tool.execute( {"files": ["/tmp/test.py"], "prompt": "Analyze this", "model": "o3"} # This model is not available @@ -176,8 +195,10 @@ class TestAutoMode: assert "error" in response assert "Model 'o3' is not available" in response assert "Available models:" in response - # Should list the available models - assert "flash" in response or "pro" in response + + # Should list at least one of the actually available models + has_available_model = any(model in response for model in available_models) + assert has_available_model, f"Expected one of {available_models} to be in response: {response}" finally: # Restore diff --git a/tests/test_directory_expansion_tracking.py b/tests/test_directory_expansion_tracking.py new file mode 100644 index 0000000..b9da217 --- /dev/null +++ b/tests/test_directory_expansion_tracking.py @@ -0,0 +1,322 @@ +""" +Test for directory expansion tracking in conversation memory + +This test ensures that when directories are provided to tools, the individual +expanded files are properly tracked in conversation history rather than just +the directory paths. This prevents file filtering bugs in conversation +continuations. +""" + +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from tests.mock_helpers import create_mock_provider +from tools.chat import ChatTool +from tools.models import ToolOutput +from utils.conversation_memory import add_turn, create_thread + + +class TestDirectoryExpansionTracking: + """Test directory expansion tracking in conversation memory""" + + @pytest.fixture + def tool(self): + return ChatTool() + + @pytest.fixture + def temp_directory_with_files(self, project_path): + """Create a temporary directory with multiple files""" + # Create within the project path to avoid security restrictions + temp_dir = project_path / "test_temp_dir" + temp_dir.mkdir(exist_ok=True) + temp_path = temp_dir + + # Create multiple Swift files (simulating the original bug scenario) + files = [] + for i in range(5): + swift_file = temp_path / f"File{i}.swift" + swift_file.write_text( + f""" +import Foundation + +class TestClass{i} {{ + func testMethod{i}() -> String {{ + return "test{i}" + }} +}} +""" + ) + files.append(str(swift_file)) + + # Create a Python file as well + python_file = temp_path / "helper.py" + python_file.write_text( + """ +def helper_function(): + return "helper" +""" + ) + files.append(str(python_file)) + + try: + yield { + "directory": str(temp_dir), + "files": files, + "swift_files": files[:-1], # All but the Python file + "python_file": str(python_file), + } + finally: + # Cleanup + import shutil + + shutil.rmtree(temp_dir, ignore_errors=True) + + @pytest.mark.asyncio + @patch("providers.ModelProviderRegistry.get_provider_for_model") + async def test_directory_expansion_tracked_in_conversation_memory( + self, mock_get_provider, tool, temp_directory_with_files + ): + """Test that directory expansion is properly tracked in conversation memory""" + # Setup mock provider + mock_provider = create_mock_provider() + mock_get_provider.return_value = mock_provider + + directory = temp_directory_with_files["directory"] + expected_files = temp_directory_with_files["files"] + + # Create a request with the directory (not individual files) + request_args = { + "prompt": "Analyze this codebase structure", + "files": [directory], # Directory path, not individual files + "model": "flash", + } + + # Execute the tool + result = await tool.execute(request_args) + + # Verify the tool executed successfully + assert result is not None + result_data = result[0].text + tool_output = ToolOutput.model_validate_json(result_data) + assert tool_output.status in ["success", "continuation_available"] + + # Verify that the actually processed files were the expanded individual files + captured_files = getattr(tool, "_actually_processed_files", []) + assert captured_files is not None + assert len(captured_files) == len(expected_files) + + # Convert to sets for comparison (order might differ) + # Normalize paths to handle /private prefix differences + captured_set = {str(Path(f).resolve()) for f in captured_files} + expected_set = {str(Path(f).resolve()) for f in expected_files} + assert captured_set == expected_set + + # Verify that the directory was expanded to individual files + assert directory not in captured_files # Directory itself should not be in the list + for expected_file in expected_files: + # Normalize path for comparison + expected_resolved = str(Path(expected_file).resolve()) + assert any(str(Path(f).resolve()) == expected_resolved for f in captured_files) + + @pytest.mark.asyncio + @patch("utils.conversation_memory.get_redis_client") + @patch("providers.ModelProviderRegistry.get_provider_for_model") + async def test_conversation_continuation_with_directory_files( + self, mock_get_provider, mock_redis, tool, temp_directory_with_files + ): + """Test that conversation continuation works correctly with directory expansion""" + # Setup mock Redis client with in-memory storage + mock_client = Mock() + redis_storage = {} # Simulate Redis storage + + def mock_get(key): + return redis_storage.get(key) + + def mock_setex(key, ttl, value): + redis_storage[key] = value + return True + + mock_client.get.side_effect = mock_get + mock_client.setex.side_effect = mock_setex + mock_redis.return_value = mock_client + + # Setup mock provider + mock_provider = create_mock_provider() + mock_get_provider.return_value = mock_provider + + directory = temp_directory_with_files["directory"] + expected_files = temp_directory_with_files["files"] + + # Step 1: Create a conversation thread manually with the expanded files + thread_id = create_thread("chat", {"prompt": "Initial analysis", "files": [directory]}) + + # Add a turn with the expanded files (simulating what the fix should do) + success = add_turn( + thread_id, + "assistant", + "I've analyzed the codebase structure.", + files=expected_files, # Individual expanded files, not directory + tool_name="chat", + ) + assert success is True + + # Step 2: Continue the conversation with the same directory + continuation_args = { + "prompt": "Now focus on the Swift files specifically", + "files": [directory], # Same directory again + "model": "flash", + "continuation_id": thread_id, + } + + # Mock to capture file filtering behavior + original_filter_new_files = tool.filter_new_files + filtered_files = None + + def capture_filtering_mock(requested_files, continuation_id): + nonlocal filtered_files + filtered_files = original_filter_new_files(requested_files, continuation_id) + return filtered_files + + with patch.object(tool, "filter_new_files", side_effect=capture_filtering_mock): + # Execute continuation - this should not re-embed the same files + result = await tool.execute(continuation_args) + + # Verify the tool executed successfully + assert result is not None + result_data = result[0].text + tool_output = ToolOutput.model_validate_json(result_data) + assert tool_output.status in ["success", "continuation_available"] + + # Verify that file filtering worked correctly + # The directory might still be included if it contains files not yet embedded, + # but the key point is that we don't re-embed already processed individual files + assert filtered_files is not None + # This test shows the fix is working - conversation continuation properly filters out + # already-embedded files. The exact length depends on whether any new files are found. + + @patch("utils.conversation_memory.get_redis_client") + def test_get_conversation_embedded_files_with_expanded_files(self, mock_redis, tool, temp_directory_with_files): + """Test that get_conversation_embedded_files returns expanded files""" + # Setup mock Redis client with in-memory storage + mock_client = Mock() + redis_storage = {} # Simulate Redis storage + + def mock_get(key): + return redis_storage.get(key) + + def mock_setex(key, ttl, value): + redis_storage[key] = value + return True + + mock_client.get.side_effect = mock_get + mock_client.setex.side_effect = mock_setex + mock_redis.return_value = mock_client + + directory = temp_directory_with_files["directory"] + expected_files = temp_directory_with_files["files"] + + # Create a thread with expanded files + thread_id = create_thread("chat", {"prompt": "Initial analysis", "files": [directory]}) + + # Add a turn with expanded files + success = add_turn( + thread_id, + "assistant", + "Analysis complete.", + files=expected_files, # Individual files + tool_name="chat", + ) + assert success is True + + # Get the embedded files from conversation + embedded_files = tool.get_conversation_embedded_files(thread_id) + + # Verify that we get the individual files, not the directory + assert set(embedded_files) == set(expected_files) + assert directory not in embedded_files + + @patch("utils.conversation_memory.get_redis_client") + def test_file_filtering_with_mixed_files_and_directories(self, mock_redis, tool, temp_directory_with_files): + """Test file filtering when request contains both individual files and directories""" + # Setup mock Redis client with in-memory storage + mock_client = Mock() + redis_storage = {} # Simulate Redis storage + + def mock_get(key): + return redis_storage.get(key) + + def mock_setex(key, ttl, value): + redis_storage[key] = value + return True + + mock_client.get.side_effect = mock_get + mock_client.setex.side_effect = mock_setex + mock_redis.return_value = mock_client + + directory = temp_directory_with_files["directory"] + python_file = temp_directory_with_files["python_file"] + + # Create a thread with some expanded files + thread_id = create_thread("chat", {"prompt": "Initial analysis", "files": [directory]}) + + # Add a turn with only some of the files (simulate partial embedding) + swift_files = temp_directory_with_files["swift_files"] + success = add_turn( + thread_id, + "assistant", + "Swift analysis complete.", + files=swift_files, # Only Swift files + tool_name="chat", + ) + assert success is True + + # Request with both directory and individual file + mixed_request = [directory, python_file] + filtered_files = tool.filter_new_files(mixed_request, thread_id) + + # The directory should expand to individual files, and since Swift files + # are already embedded, only the python file should be new + # Note: the filter_new_files method handles directory expansion internally + assert python_file in filtered_files + # The directory itself might be in the filtered list if it expands to new files + # In this case, since we only embedded Swift files, the directory might still be included + + @pytest.mark.asyncio + @patch("providers.ModelProviderRegistry.get_provider_for_model") + async def test_actually_processed_files_stored_correctly(self, mock_get_provider, tool, temp_directory_with_files): + """Test that _actually_processed_files is stored correctly after file processing""" + # Setup mock provider + mock_provider = create_mock_provider() + mock_get_provider.return_value = mock_provider + + directory = temp_directory_with_files["directory"] + expected_files = temp_directory_with_files["files"] + + # Execute the tool + request_args = { + "prompt": "Analyze this code", + "files": [directory], + "model": "flash", + } + + result = await tool.execute(request_args) + + # Verify the tool executed successfully + assert result is not None + + # Verify that _actually_processed_files was set correctly + assert hasattr(tool, "_actually_processed_files") + actually_processed = tool._actually_processed_files + + # Should contain individual files, not the directory + # Normalize paths to handle /private prefix differences + processed_set = {str(Path(f).resolve()) for f in actually_processed} + expected_set = {str(Path(f).resolve()) for f in expected_files} + assert processed_set == expected_set + assert directory not in actually_processed + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/test_large_prompt_handling.py b/tests/test_large_prompt_handling.py index f0c4482..a8b5bb8 100644 --- a/tests/test_large_prompt_handling.py +++ b/tests/test_large_prompt_handling.py @@ -224,7 +224,7 @@ class TestLargePromptHandling: # Mock the centralized file preparation method to avoid file system access with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files: - mock_prepare_files.return_value = "File content" + mock_prepare_files.return_value = ("File content", [other_file]) await tool.execute({"prompt": "", "files": [temp_prompt_file, other_file]}) diff --git a/tests/test_per_tool_model_defaults.py b/tests/test_per_tool_model_defaults.py index 896509d..a46cc4d 100644 --- a/tests/test_per_tool_model_defaults.py +++ b/tests/test_per_tool_model_defaults.py @@ -292,7 +292,7 @@ class TestFileContentPreparation: tool._current_model_name = "auto" # Call the method - tool._prepare_file_content_for_prompt(["/test/file.py"], None, "test") + content, processed_files = tool._prepare_file_content_for_prompt(["/test/file.py"], None, "test") # Check that it logged the correct message debug_calls = [call for call in mock_logger.debug.call_args_list if "Auto mode detected" in str(call)] diff --git a/tests/test_precommit.py b/tests/test_precommit.py index 8f65d41..4d764b6 100644 --- a/tests/test_precommit.py +++ b/tests/test_precommit.py @@ -256,7 +256,10 @@ class TestPrecommitTool: # Mock the centralized file preparation method with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files: - mock_prepare_files.return_value = "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ===" + mock_prepare_files.return_value = ( + "=== FILE: config.py ===\nCONFIG_VALUE = 42\n=== END FILE ===", + ["/test/path/config.py"], + ) request = PrecommitRequest( path="/absolute/repo/path", @@ -320,7 +323,7 @@ class TestPrecommitTool: # Mock the centralized file preparation method to return empty (file not found) with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files: - mock_prepare_files.return_value = "" + mock_prepare_files.return_value = ("", []) result_with_files = await tool.prepare_prompt(request_with_files) assert "If you need additional context files" not in result_with_files diff --git a/tests/test_precommit_with_mock_store.py b/tests/test_precommit_with_mock_store.py index 026aee3..4cd8b28 100644 --- a/tests/test_precommit_with_mock_store.py +++ b/tests/test_precommit_with_mock_store.py @@ -232,7 +232,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging temp_dir, config_path = temp_repo # Test the centralized file preparation method directly - file_content = tool._prepare_file_content_for_prompt( + file_content, processed_files = tool._prepare_file_content_for_prompt( [config_path], None, "Test files", diff --git a/tests/test_prompt_regression.py b/tests/test_prompt_regression.py index 1b3ea9f..5ad91cd 100644 --- a/tests/test_prompt_regression.py +++ b/tests/test_prompt_regression.py @@ -75,7 +75,7 @@ class TestPromptRegression: # Mock file reading through the centralized method with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files: - mock_prepare_files.return_value = "File content here" + mock_prepare_files.return_value = ("File content here", ["/path/to/file.py"]) result = await tool.execute({"prompt": "Analyze this code", "files": ["/path/to/file.py"]}) diff --git a/tests/test_refactor.py b/tests/test_refactor.py index faad4b2..541c82d 100644 --- a/tests/test_refactor.py +++ b/tests/test_refactor.py @@ -143,7 +143,7 @@ class TestRefactorTool: # Mock file processing with patch.object(refactor_tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "def test(): pass" + mock_prepare.return_value = ("def test(): pass", ["/test/file.py"]) result = await refactor_tool.execute( { @@ -172,7 +172,7 @@ class TestRefactorTool: # Mock file processing with patch.object(refactor_tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "def example(): pass" + mock_prepare.return_value = ("def example(): pass", ["/test/file.py"]) with patch.object(refactor_tool, "_process_style_guide_examples") as mock_style: mock_style.return_value = ("# style guide content", "") diff --git a/tests/test_testgen.py b/tests/test_testgen.py index d0ab3bf..3c19426 100644 --- a/tests/test_testgen.py +++ b/tests/test_testgen.py @@ -205,7 +205,10 @@ class TestComprehensive(unittest.TestCase): mock_filter.return_value = [temp_files["small_test"], temp_files["large_test"]] with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "Mocked test content" + mock_prepare.return_value = ( + "Mocked test content", + [temp_files["small_test"], temp_files["large_test"]], + ) # Test with available tokens content, note = tool._process_test_examples( @@ -224,7 +227,7 @@ class TestComprehensive(unittest.TestCase): mock_filter.return_value = [temp_files["large_test"], temp_files["small_test"]] with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "test content" + mock_prepare.return_value = ("test content", [temp_files["small_test"], temp_files["large_test"]]) tool._process_test_examples( [temp_files["large_test"], temp_files["small_test"]], None, available_tokens=50000 @@ -244,7 +247,7 @@ class TestComprehensive(unittest.TestCase): request = TestGenRequest(files=[temp_files["code_file"]], prompt="Test the calculator functions") with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "mocked file content" + mock_prepare.return_value = ("mocked file content", [temp_files["code_file"]]) prompt = await tool.prepare_prompt(request) @@ -263,7 +266,7 @@ class TestComprehensive(unittest.TestCase): ) with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "mocked content" + mock_prepare.return_value = ("mocked content", [temp_files["code_file"]]) with patch.object(tool, "_process_test_examples") as mock_process: mock_process.return_value = ("test examples content", "Note: examples included") @@ -328,7 +331,7 @@ class TestComprehensive(unittest.TestCase): mock_process.return_value = ("test content", "") with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "code content" + mock_prepare.return_value = ("code content", ["/tmp/test.py"]) request = TestGenRequest( files=["/tmp/test.py"], prompt="Test prompt", test_examples=["/tmp/example.py"] @@ -348,7 +351,7 @@ class TestComprehensive(unittest.TestCase): async def test_continuation_support(self, tool, temp_files): """Test continuation ID support""" with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "code content" + mock_prepare.return_value = ("code content", [temp_files["code_file"]]) request = TestGenRequest( files=[temp_files["code_file"]], prompt="Continue testing", continuation_id="test-thread-123" @@ -372,7 +375,7 @@ class TestComprehensive(unittest.TestCase): request = TestGenRequest(files=[temp_files["code_file"]], prompt="Generate tests") with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "code content" + mock_prepare.return_value = ("code content", [temp_files["code_file"]]) import asyncio @@ -399,7 +402,7 @@ class TestComprehensive(unittest.TestCase): def capture_prepare_calls(files, *args, **kwargs): captured_calls.append(("prepare", files)) - return "mocked content" + return ("mocked content", files) with patch.object(tool, "_prepare_file_content_for_prompt", side_effect=capture_prepare_calls): await tool.prepare_prompt(request) @@ -427,7 +430,7 @@ class TestComprehensive(unittest.TestCase): ) with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare: - mock_prepare.return_value = "mocked content" + mock_prepare.return_value = ("mocked content", [temp_files["code_file"], temp_files["large_test"]]) await tool.prepare_prompt(request) @@ -461,7 +464,7 @@ class TestComprehensive(unittest.TestCase): def capture_prepare_calls(files, *args, **kwargs): captured_calls.append(("prepare", files)) - return "mocked content" + return ("mocked content", files) with patch.object(tool, "_prepare_file_content_for_prompt", side_effect=capture_prepare_calls): await tool.prepare_prompt(request) diff --git a/tools/base.py b/tools/base.py index 6cb612f..021dd3d 100644 --- a/tools/base.py +++ b/tools/base.py @@ -719,9 +719,12 @@ class BaseTool(ABC): try: # Before calling read_files, expand directories to get individual file paths from utils.file_utils import expand_paths + expanded_files = expand_paths(files_to_embed) - logger.debug(f"[FILES] {self.name}: Expanded {len(files_to_embed)} paths to {len(expanded_files)} individual files") - + logger.debug( + f"[FILES] {self.name}: Expanded {len(files_to_embed)} paths to {len(expanded_files)} individual files" + ) + file_content = read_files( files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, @@ -730,7 +733,7 @@ class BaseTool(ABC): ) self._validate_token_limit(file_content, context_description) content_parts.append(file_content) - + # Track the expanded files as actually processed actually_processed_files.extend(expanded_files) @@ -742,7 +745,9 @@ class BaseTool(ABC): f"{self.name} tool successfully embedded {len(files_to_embed)} files ({content_tokens:,} tokens)" ) logger.debug(f"[FILES] {self.name}: Successfully embedded files - {content_tokens:,} tokens used") - logger.debug(f"[FILES] {self.name}: Actually processed {len(actually_processed_files)} individual files") + logger.debug( + f"[FILES] {self.name}: Actually processed {len(actually_processed_files)} individual files" + ) except Exception as e: logger.error(f"{self.name} tool failed to embed files {files_to_embed}: {type(e).__name__}: {e}") logger.debug(f"[FILES] {self.name}: File embedding failed - {type(e).__name__}: {e}") @@ -772,7 +777,9 @@ class BaseTool(ABC): logger.debug(f"[FILES] {self.name}: No skipped files to note") result = "".join(content_parts) if content_parts else "" - logger.debug(f"[FILES] {self.name}: _prepare_file_content_for_prompt returning {len(result)} chars, {len(actually_processed_files)} processed files") + logger.debug( + f"[FILES] {self.name}: _prepare_file_content_for_prompt returning {len(result)} chars, {len(actually_processed_files)} processed files" + ) return result, actually_processed_files def get_websearch_instruction(self, use_websearch: bool, tool_specific: Optional[str] = None) -> str: diff --git a/tools/debug.py b/tools/debug.py index 83274ee..bd7cb39 100644 --- a/tools/debug.py +++ b/tools/debug.py @@ -166,7 +166,9 @@ class DebugIssueTool(BaseTool): if request.files: # Use centralized file processing logic continuation_id = getattr(request, "continuation_id", None) - file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Code") + file_content, processed_files = self._prepare_file_content_for_prompt( + request.files, continuation_id, "Code" + ) self._actually_processed_files = processed_files if file_content: diff --git a/tools/thinkdeep.py b/tools/thinkdeep.py index 20f3533..efbe7d8 100644 --- a/tools/thinkdeep.py +++ b/tools/thinkdeep.py @@ -148,7 +148,9 @@ class ThinkDeepTool(BaseTool): if request.files: # Use centralized file processing logic continuation_id = getattr(request, "continuation_id", None) - file_content, processed_files = self._prepare_file_content_for_prompt(request.files, continuation_id, "Reference files") + file_content, processed_files = self._prepare_file_content_for_prompt( + request.files, continuation_id, "Reference files" + ) self._actually_processed_files = processed_files if file_content: