WIP major refactor and features

2025-06-12 07:14:59 +04:00
parent e06a6fd1fc
commit 2a067a7f4e
46 changed files with 2960 additions and 1011 deletions
--- a/simulator_tests/test_basic_conversation.py
+++ b/simulator_tests/test_basic_conversation.py
@@ -37,6 +37,7 @@ class BasicConversationTest(BaseSimulatorTest):
                {
                    "prompt": "Please use low thinking mode. Analyze this Python code and explain what it does",
                    "files": [self.test_files["python"]],
+                    "model": "flash",
                },
            )

@@ -54,6 +55,7 @@ class BasicConversationTest(BaseSimulatorTest):
                    "prompt": "Please use low thinking mode. Now focus on the Calculator class specifically. Are there any improvements you'd suggest?",
                    "files": [self.test_files["python"]],  # Same file - should be deduplicated
                    "continuation_id": continuation_id,
+                    "model": "flash",
                },
            )

@@ -69,6 +71,7 @@ class BasicConversationTest(BaseSimulatorTest):
                    "prompt": "Please use low thinking mode. Now also analyze this configuration file and see how it might relate to the Python code",
                    "files": [self.test_files["python"], self.test_files["config"]],
                    "continuation_id": continuation_id,
+                    "model": "flash",
                },
            )

--- a/simulator_tests/test_content_validation.py
+++ b/simulator_tests/test_content_validation.py
@@ -66,7 +66,7 @@ DATABASE_CONFIG = {
                {
                    "path": os.getcwd(),
                    "files": [validation_file],
-                    "original_request": "Test for content duplication in precommit tool",
+                    "prompt": "Test for content duplication in precommit tool",
                },
            )

@@ -116,16 +116,18 @@ DATABASE_CONFIG = {
                    {
                        "prompt": "Please use low thinking mode. Analyze this config file",
                        "files": [validation_file],
+                        "model": "flash",
                    },  # Using absolute path
                ),
                (
                    "codereview",
                    {
                        "files": [validation_file],
-                        "context": "Please use low thinking mode. Review this configuration",
+                        "prompt": "Please use low thinking mode. Review this configuration",
+                        "model": "flash",
                    },  # Using absolute path
                ),
-                ("analyze", {"files": [validation_file], "analysis_type": "code_quality"}),  # Using absolute path
+                ("analyze", {"files": [validation_file], "analysis_type": "code_quality", "model": "flash"}),  # Using absolute path
            ]

            for tool_name, params in tools_to_test:
@@ -163,6 +165,7 @@ DATABASE_CONFIG = {
                        "prompt": "Please use low thinking mode. Continue analyzing this configuration file",
                        "files": [validation_file],  # Same file should be deduplicated
                        "continuation_id": thread_id,
+                        "model": "flash",
                    },
                )

--- a/simulator_tests/test_cross_tool_comprehensive.py
+++ b/simulator_tests/test_cross_tool_comprehensive.py
@@ -91,6 +91,7 @@ def hash_pwd(pwd):
                "prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?",
                "files": [auth_file],
                "thinking_mode": "low",
+                "model": "flash",
            }

            response1, continuation_id1 = self.call_mcp_tool("chat", chat_params)
@@ -106,8 +107,9 @@ def hash_pwd(pwd):
            self.logger.info("  Step 2: analyze tool - Deep code analysis (fresh)")
            analyze_params = {
                "files": [auth_file],
-                "question": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
+                "prompt": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
                "thinking_mode": "low",
+                "model": "flash",
            }

            response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params)
@@ -127,6 +129,7 @@ def hash_pwd(pwd):
                "prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?",
                "files": [auth_file, config_file_path],  # Old + new file
                "thinking_mode": "low",
+                "model": "flash",
            }

            response3, _ = self.call_mcp_tool("chat", chat_continue_params)
@@ -141,8 +144,9 @@ def hash_pwd(pwd):
            self.logger.info("  Step 4: debug tool - Identify specific problems")
            debug_params = {
                "files": [auth_file, config_file_path],
-                "error_description": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
+                "prompt": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
                "thinking_mode": "low",
+                "model": "flash",
            }

            response4, continuation_id4 = self.call_mcp_tool("debug", debug_params)
@@ -161,8 +165,9 @@ def hash_pwd(pwd):
                debug_continue_params = {
                    "continuation_id": continuation_id4,
                    "files": [auth_file, config_file_path],
-                    "error_description": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
+                    "prompt": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
                    "thinking_mode": "low",
+                    "model": "flash",
                }

                response5, _ = self.call_mcp_tool("debug", debug_continue_params)
@@ -174,8 +179,9 @@ def hash_pwd(pwd):
            self.logger.info("  Step 6: codereview tool - Comprehensive code review")
            codereview_params = {
                "files": [auth_file, config_file_path],
-                "context": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
+                "prompt": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
                "thinking_mode": "low",
+                "model": "flash",
            }

            response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params)
@@ -207,7 +213,7 @@ def secure_login(user, pwd):
            precommit_params = {
                "path": self.test_dir,
                "files": [auth_file, config_file_path, improved_file],
-                "original_request": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
+                "prompt": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
                "thinking_mode": "low",
            }

--- a/simulator_tests/test_cross_tool_continuation.py
+++ b/simulator_tests/test_cross_tool_continuation.py
@@ -67,6 +67,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                {
                    "prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
                    "files": [self.test_files["python"]],
+                    "model": "flash",
                },
            )

@@ -81,6 +82,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                    "prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
                    "files": [self.test_files["python"]],  # Same file should be deduplicated
                    "continuation_id": chat_id,
+                    "model": "flash",
                },
            )

@@ -93,8 +95,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                "codereview",
                {
                    "files": [self.test_files["python"]],  # Same file should be deduplicated
-                    "context": "Building on our previous analysis, provide a comprehensive code review",
+                    "prompt": "Building on our previous analysis, provide a comprehensive code review",
                    "continuation_id": chat_id,
+                    "model": "flash",
                },
            )

@@ -116,7 +119,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):

            # Start with analyze
            analyze_response, analyze_id = self.call_mcp_tool(
-                "analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality"}
+                "analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality", "model": "flash"}
            )

            if not analyze_response or not analyze_id:
@@ -128,8 +131,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                "debug",
                {
                    "files": [self.test_files["python"]],  # Same file should be deduplicated
-                    "issue_description": "Based on our analysis, help debug the performance issue in fibonacci",
+                    "prompt": "Based on our analysis, help debug the performance issue in fibonacci",
                    "continuation_id": analyze_id,
+                    "model": "flash",
                },
            )

@@ -144,6 +148,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                    "prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
                    "files": [self.test_files["python"]],  # Same file should be deduplicated
                    "continuation_id": analyze_id,
+                    "model": "flash",
                },
            )

@@ -169,6 +174,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                {
                    "prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
                    "files": [self.test_files["python"], self.test_files["config"]],
+                    "model": "flash",
                },
            )

@@ -181,8 +187,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
                "codereview",
                {
                    "files": [self.test_files["python"], self.test_files["config"]],  # Same files
-                    "context": "Review both files in the context of our previous discussion",
+                    "prompt": "Review both files in the context of our previous discussion",
                    "continuation_id": multi_id,
+                    "model": "flash",
                },
            )

--- a/simulator_tests/test_per_tool_deduplication.py
+++ b/simulator_tests/test_per_tool_deduplication.py
@@ -100,8 +100,9 @@ def divide(x, y):
            precommit_params = {
                "path": self.test_dir,  # Required path parameter
                "files": [dummy_file_path],
-                "original_request": "Please give me a quick one line reply. Review this code for commit readiness",
+                "prompt": "Please give me a quick one line reply. Review this code for commit readiness",
                "thinking_mode": "low",
+                "model": "flash",
            }

            response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
@@ -124,8 +125,9 @@ def divide(x, y):
            self.logger.info("  Step 2: codereview tool with same file (fresh conversation)")
            codereview_params = {
                "files": [dummy_file_path],
-                "context": "Please give me a quick one line reply. General code review for quality and best practices",
+                "prompt": "Please give me a quick one line reply. General code review for quality and best practices",
                "thinking_mode": "low",
+                "model": "flash",
            }

            response2, _ = self.call_mcp_tool("codereview", codereview_params)
@@ -150,8 +152,9 @@ def subtract(a, b):
                "continuation_id": continuation_id,
                "path": self.test_dir,  # Required path parameter
                "files": [dummy_file_path, new_file_path],  # Old + new file
-                "original_request": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
+                "prompt": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
                "thinking_mode": "low",
+                "model": "flash",
            }

            response3, _ = self.call_mcp_tool("precommit", continue_params)