WIP major refactor and features

This commit is contained in:
Fahad
2025-06-12 07:14:59 +04:00
parent e06a6fd1fc
commit 2a067a7f4e
46 changed files with 2960 additions and 1011 deletions

View File

@@ -37,6 +37,7 @@ class BasicConversationTest(BaseSimulatorTest):
{
"prompt": "Please use low thinking mode. Analyze this Python code and explain what it does",
"files": [self.test_files["python"]],
"model": "flash",
},
)
@@ -54,6 +55,7 @@ class BasicConversationTest(BaseSimulatorTest):
"prompt": "Please use low thinking mode. Now focus on the Calculator class specifically. Are there any improvements you'd suggest?",
"files": [self.test_files["python"]], # Same file - should be deduplicated
"continuation_id": continuation_id,
"model": "flash",
},
)
@@ -69,6 +71,7 @@ class BasicConversationTest(BaseSimulatorTest):
"prompt": "Please use low thinking mode. Now also analyze this configuration file and see how it might relate to the Python code",
"files": [self.test_files["python"], self.test_files["config"]],
"continuation_id": continuation_id,
"model": "flash",
},
)

View File

@@ -66,7 +66,7 @@ DATABASE_CONFIG = {
{
"path": os.getcwd(),
"files": [validation_file],
"original_request": "Test for content duplication in precommit tool",
"prompt": "Test for content duplication in precommit tool",
},
)
@@ -116,16 +116,18 @@ DATABASE_CONFIG = {
{
"prompt": "Please use low thinking mode. Analyze this config file",
"files": [validation_file],
"model": "flash",
}, # Using absolute path
),
(
"codereview",
{
"files": [validation_file],
"context": "Please use low thinking mode. Review this configuration",
"prompt": "Please use low thinking mode. Review this configuration",
"model": "flash",
}, # Using absolute path
),
("analyze", {"files": [validation_file], "analysis_type": "code_quality"}), # Using absolute path
("analyze", {"files": [validation_file], "analysis_type": "code_quality", "model": "flash"}), # Using absolute path
]
for tool_name, params in tools_to_test:
@@ -163,6 +165,7 @@ DATABASE_CONFIG = {
"prompt": "Please use low thinking mode. Continue analyzing this configuration file",
"files": [validation_file], # Same file should be deduplicated
"continuation_id": thread_id,
"model": "flash",
},
)

View File

@@ -91,6 +91,7 @@ def hash_pwd(pwd):
"prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?",
"files": [auth_file],
"thinking_mode": "low",
"model": "flash",
}
response1, continuation_id1 = self.call_mcp_tool("chat", chat_params)
@@ -106,8 +107,9 @@ def hash_pwd(pwd):
self.logger.info(" Step 2: analyze tool - Deep code analysis (fresh)")
analyze_params = {
"files": [auth_file],
"question": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
"prompt": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
"thinking_mode": "low",
"model": "flash",
}
response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params)
@@ -127,6 +129,7 @@ def hash_pwd(pwd):
"prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?",
"files": [auth_file, config_file_path], # Old + new file
"thinking_mode": "low",
"model": "flash",
}
response3, _ = self.call_mcp_tool("chat", chat_continue_params)
@@ -141,8 +144,9 @@ def hash_pwd(pwd):
self.logger.info(" Step 4: debug tool - Identify specific problems")
debug_params = {
"files": [auth_file, config_file_path],
"error_description": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
"prompt": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
"thinking_mode": "low",
"model": "flash",
}
response4, continuation_id4 = self.call_mcp_tool("debug", debug_params)
@@ -161,8 +165,9 @@ def hash_pwd(pwd):
debug_continue_params = {
"continuation_id": continuation_id4,
"files": [auth_file, config_file_path],
"error_description": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
"prompt": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
"thinking_mode": "low",
"model": "flash",
}
response5, _ = self.call_mcp_tool("debug", debug_continue_params)
@@ -174,8 +179,9 @@ def hash_pwd(pwd):
self.logger.info(" Step 6: codereview tool - Comprehensive code review")
codereview_params = {
"files": [auth_file, config_file_path],
"context": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
"prompt": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
"thinking_mode": "low",
"model": "flash",
}
response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params)
@@ -207,7 +213,7 @@ def secure_login(user, pwd):
precommit_params = {
"path": self.test_dir,
"files": [auth_file, config_file_path, improved_file],
"original_request": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
"prompt": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
"thinking_mode": "low",
}

View File

@@ -67,6 +67,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
{
"prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
"files": [self.test_files["python"]],
"model": "flash",
},
)
@@ -81,6 +82,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
"prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
"files": [self.test_files["python"]], # Same file should be deduplicated
"continuation_id": chat_id,
"model": "flash",
},
)
@@ -93,8 +95,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
"codereview",
{
"files": [self.test_files["python"]], # Same file should be deduplicated
"context": "Building on our previous analysis, provide a comprehensive code review",
"prompt": "Building on our previous analysis, provide a comprehensive code review",
"continuation_id": chat_id,
"model": "flash",
},
)
@@ -116,7 +119,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
# Start with analyze
analyze_response, analyze_id = self.call_mcp_tool(
"analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality"}
"analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality", "model": "flash"}
)
if not analyze_response or not analyze_id:
@@ -128,8 +131,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
"debug",
{
"files": [self.test_files["python"]], # Same file should be deduplicated
"issue_description": "Based on our analysis, help debug the performance issue in fibonacci",
"prompt": "Based on our analysis, help debug the performance issue in fibonacci",
"continuation_id": analyze_id,
"model": "flash",
},
)
@@ -144,6 +148,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
"prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
"files": [self.test_files["python"]], # Same file should be deduplicated
"continuation_id": analyze_id,
"model": "flash",
},
)
@@ -169,6 +174,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
{
"prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
"files": [self.test_files["python"], self.test_files["config"]],
"model": "flash",
},
)
@@ -181,8 +187,9 @@ class CrossToolContinuationTest(BaseSimulatorTest):
"codereview",
{
"files": [self.test_files["python"], self.test_files["config"]], # Same files
"context": "Review both files in the context of our previous discussion",
"prompt": "Review both files in the context of our previous discussion",
"continuation_id": multi_id,
"model": "flash",
},
)

View File

@@ -100,8 +100,9 @@ def divide(x, y):
precommit_params = {
"path": self.test_dir, # Required path parameter
"files": [dummy_file_path],
"original_request": "Please give me a quick one line reply. Review this code for commit readiness",
"prompt": "Please give me a quick one line reply. Review this code for commit readiness",
"thinking_mode": "low",
"model": "flash",
}
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
@@ -124,8 +125,9 @@ def divide(x, y):
self.logger.info(" Step 2: codereview tool with same file (fresh conversation)")
codereview_params = {
"files": [dummy_file_path],
"context": "Please give me a quick one line reply. General code review for quality and best practices",
"prompt": "Please give me a quick one line reply. General code review for quality and best practices",
"thinking_mode": "low",
"model": "flash",
}
response2, _ = self.call_mcp_tool("codereview", codereview_params)
@@ -150,8 +152,9 @@ def subtract(a, b):
"continuation_id": continuation_id,
"path": self.test_dir, # Required path parameter
"files": [dummy_file_path, new_file_path], # Old + new file
"original_request": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
"prompt": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
"thinking_mode": "low",
"model": "flash",
}
response3, _ = self.call_mcp_tool("precommit", continue_params)