More tests
This commit is contained in:
@@ -366,7 +366,10 @@ def parse_arguments():
|
|||||||
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
|
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
|
||||||
parser.add_argument("--individual", "-i", help="Run a single test individually")
|
parser.add_argument("--individual", "-i", help="Run a single test individually")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--skip-docker", action="store_true", default=True, help="Skip Docker setup (assumes containers are already running) - DEFAULT"
|
"--skip-docker",
|
||||||
|
action="store_true",
|
||||||
|
default=True,
|
||||||
|
help="Skip Docker setup (assumes containers are already running) - DEFAULT",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--rebuild-docker", action="store_true", help="Force rebuild Docker environment (overrides --skip-docker)"
|
"--rebuild-docker", action="store_true", help="Force rebuild Docker environment (overrides --skip-docker)"
|
||||||
@@ -447,7 +450,7 @@ def main():
|
|||||||
# Determine execution mode and run
|
# Determine execution mode and run
|
||||||
# Override skip_docker if rebuild_docker is specified
|
# Override skip_docker if rebuild_docker is specified
|
||||||
skip_docker = args.skip_docker and not args.rebuild_docker
|
skip_docker = args.skip_docker and not args.rebuild_docker
|
||||||
|
|
||||||
if args.individual:
|
if args.individual:
|
||||||
exit_code = run_individual_test(simulator, args.individual, skip_docker)
|
exit_code = run_individual_test(simulator, args.individual, skip_docker)
|
||||||
else:
|
else:
|
||||||
|
|||||||
21
server.py
21
server.py
@@ -22,6 +22,7 @@ import asyncio
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -52,7 +53,8 @@ from tools.models import ToolOutput
|
|||||||
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
|
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
|
||||||
|
|
||||||
# Create timezone-aware formatter
|
# Create timezone-aware formatter
|
||||||
import time
|
|
||||||
|
|
||||||
class LocalTimeFormatter(logging.Formatter):
|
class LocalTimeFormatter(logging.Formatter):
|
||||||
def formatTime(self, record, datefmt=None):
|
def formatTime(self, record, datefmt=None):
|
||||||
"""Override to use local timezone instead of UTC"""
|
"""Override to use local timezone instead of UTC"""
|
||||||
@@ -61,9 +63,10 @@ class LocalTimeFormatter(logging.Formatter):
|
|||||||
s = time.strftime(datefmt, ct)
|
s = time.strftime(datefmt, ct)
|
||||||
else:
|
else:
|
||||||
t = time.strftime("%Y-%m-%d %H:%M:%S", ct)
|
t = time.strftime("%Y-%m-%d %H:%M:%S", ct)
|
||||||
s = "%s,%03d" % (t, record.msecs)
|
s = f"{t},{record.msecs:03.0f}"
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
# Configure both console and file logging
|
# Configure both console and file logging
|
||||||
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@@ -213,7 +216,9 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
|||||||
if "continuation_id" in arguments and arguments["continuation_id"]:
|
if "continuation_id" in arguments and arguments["continuation_id"]:
|
||||||
continuation_id = arguments["continuation_id"]
|
continuation_id = arguments["continuation_id"]
|
||||||
logger.debug(f"Resuming conversation thread: {continuation_id}")
|
logger.debug(f"Resuming conversation thread: {continuation_id}")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Tool '{name}' resuming thread {continuation_id} with {len(arguments)} arguments")
|
logger.debug(
|
||||||
|
f"[CONVERSATION_DEBUG] Tool '{name}' resuming thread {continuation_id} with {len(arguments)} arguments"
|
||||||
|
)
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Original arguments keys: {list(arguments.keys())}")
|
logger.debug(f"[CONVERSATION_DEBUG] Original arguments keys: {list(arguments.keys())}")
|
||||||
|
|
||||||
# Log to activity file for monitoring
|
# Log to activity file for monitoring
|
||||||
@@ -225,7 +230,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
|||||||
|
|
||||||
arguments = await reconstruct_thread_context(arguments)
|
arguments = await reconstruct_thread_context(arguments)
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] After thread reconstruction, arguments keys: {list(arguments.keys())}")
|
logger.debug(f"[CONVERSATION_DEBUG] After thread reconstruction, arguments keys: {list(arguments.keys())}")
|
||||||
if '_remaining_tokens' in arguments:
|
if "_remaining_tokens" in arguments:
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Remaining token budget: {arguments['_remaining_tokens']:,}")
|
logger.debug(f"[CONVERSATION_DEBUG] Remaining token budget: {arguments['_remaining_tokens']:,}")
|
||||||
|
|
||||||
# Route to AI-powered tools that require Gemini API calls
|
# Route to AI-powered tools that require Gemini API calls
|
||||||
@@ -354,7 +359,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
success = add_turn(continuation_id, "user", user_prompt, files=user_files)
|
success = add_turn(continuation_id, "user", user_prompt, files=user_files)
|
||||||
if not success:
|
if not success:
|
||||||
logger.warning(f"Failed to add user turn to thread {continuation_id}")
|
logger.warning(f"Failed to add user turn to thread {continuation_id}")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Failed to add user turn - thread may be at turn limit or expired")
|
logger.debug("[CONVERSATION_DEBUG] Failed to add user turn - thread may be at turn limit or expired")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Successfully added user turn to thread {continuation_id}")
|
logger.debug(f"[CONVERSATION_DEBUG] Successfully added user turn to thread {continuation_id}")
|
||||||
|
|
||||||
@@ -387,7 +392,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
|
|
||||||
remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens
|
remaining_tokens = MAX_CONTENT_TOKENS - conversation_tokens
|
||||||
enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens) # Ensure non-negative
|
enhanced_arguments["_remaining_tokens"] = max(0, remaining_tokens) # Ensure non-negative
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Token budget calculation:")
|
logger.debug("[CONVERSATION_DEBUG] Token budget calculation:")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] MAX_CONTENT_TOKENS: {MAX_CONTENT_TOKENS:,}")
|
logger.debug(f"[CONVERSATION_DEBUG] MAX_CONTENT_TOKENS: {MAX_CONTENT_TOKENS:,}")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Conversation tokens: {conversation_tokens:,}")
|
logger.debug(f"[CONVERSATION_DEBUG] Conversation tokens: {conversation_tokens:,}")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Remaining tokens: {remaining_tokens:,}")
|
logger.debug(f"[CONVERSATION_DEBUG] Remaining tokens: {remaining_tokens:,}")
|
||||||
@@ -402,9 +407,9 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
|
|
||||||
logger.info(f"Reconstructed context for thread {continuation_id} (turn {len(context.turns)})")
|
logger.info(f"Reconstructed context for thread {continuation_id} (turn {len(context.turns)})")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Final enhanced arguments keys: {list(enhanced_arguments.keys())}")
|
logger.debug(f"[CONVERSATION_DEBUG] Final enhanced arguments keys: {list(enhanced_arguments.keys())}")
|
||||||
|
|
||||||
# Debug log files in the enhanced arguments for file tracking
|
# Debug log files in the enhanced arguments for file tracking
|
||||||
if 'files' in enhanced_arguments:
|
if "files" in enhanced_arguments:
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Final files in enhanced arguments: {enhanced_arguments['files']}")
|
logger.debug(f"[CONVERSATION_DEBUG] Final files in enhanced arguments: {enhanced_arguments['files']}")
|
||||||
|
|
||||||
# Log to activity file for monitoring
|
# Log to activity file for monitoring
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ Each test is in its own file for better organization and maintainability.
|
|||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
from .test_basic_conversation import BasicConversationTest
|
from .test_basic_conversation import BasicConversationTest
|
||||||
from .test_content_validation import ContentValidationTest
|
from .test_content_validation import ContentValidationTest
|
||||||
|
from .test_cross_tool_comprehensive import CrossToolComprehensiveTest
|
||||||
from .test_cross_tool_continuation import CrossToolContinuationTest
|
from .test_cross_tool_continuation import CrossToolContinuationTest
|
||||||
from .test_logs_validation import LogsValidationTest
|
from .test_logs_validation import LogsValidationTest
|
||||||
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
||||||
@@ -19,6 +20,7 @@ TEST_REGISTRY = {
|
|||||||
"content_validation": ContentValidationTest,
|
"content_validation": ContentValidationTest,
|
||||||
"per_tool_deduplication": PerToolDeduplicationTest,
|
"per_tool_deduplication": PerToolDeduplicationTest,
|
||||||
"cross_tool_continuation": CrossToolContinuationTest,
|
"cross_tool_continuation": CrossToolContinuationTest,
|
||||||
|
"cross_tool_comprehensive": CrossToolComprehensiveTest,
|
||||||
"logs_validation": LogsValidationTest,
|
"logs_validation": LogsValidationTest,
|
||||||
"redis_validation": RedisValidationTest,
|
"redis_validation": RedisValidationTest,
|
||||||
}
|
}
|
||||||
@@ -29,6 +31,7 @@ __all__ = [
|
|||||||
"ContentValidationTest",
|
"ContentValidationTest",
|
||||||
"PerToolDeduplicationTest",
|
"PerToolDeduplicationTest",
|
||||||
"CrossToolContinuationTest",
|
"CrossToolContinuationTest",
|
||||||
|
"CrossToolComprehensiveTest",
|
||||||
"LogsValidationTest",
|
"LogsValidationTest",
|
||||||
"RedisValidationTest",
|
"RedisValidationTest",
|
||||||
"TEST_REGISTRY",
|
"TEST_REGISTRY",
|
||||||
|
|||||||
@@ -96,10 +96,7 @@ class Calculator:
|
|||||||
f.write(config_content)
|
f.write(config_content)
|
||||||
|
|
||||||
# Ensure absolute paths for MCP server compatibility
|
# Ensure absolute paths for MCP server compatibility
|
||||||
self.test_files = {
|
self.test_files = {"python": os.path.abspath(test_py), "config": os.path.abspath(test_config)}
|
||||||
"python": os.path.abspath(test_py),
|
|
||||||
"config": os.path.abspath(test_config)
|
|
||||||
}
|
|
||||||
self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}")
|
self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}")
|
||||||
|
|
||||||
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||||||
@@ -237,9 +234,9 @@ class Calculator:
|
|||||||
|
|
||||||
def create_additional_test_file(self, filename: str, content: str) -> str:
|
def create_additional_test_file(self, filename: str, content: str) -> str:
|
||||||
"""Create an additional test file for mixed scenario testing"""
|
"""Create an additional test file for mixed scenario testing"""
|
||||||
if not hasattr(self, 'test_dir') or not self.test_dir:
|
if not hasattr(self, "test_dir") or not self.test_dir:
|
||||||
raise RuntimeError("Test directory not initialized. Call setup_test_files() first.")
|
raise RuntimeError("Test directory not initialized. Call setup_test_files() first.")
|
||||||
|
|
||||||
file_path = os.path.join(self.test_dir, filename)
|
file_path = os.path.join(self.test_dir, filename)
|
||||||
with open(file_path, "w") as f:
|
with open(file_path, "w") as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ DATABASE_CONFIG = {
|
|||||||
validation_file = os.path.join(self.test_dir, "validation_config.py")
|
validation_file = os.path.join(self.test_dir, "validation_config.py")
|
||||||
with open(validation_file, "w") as f:
|
with open(validation_file, "w") as f:
|
||||||
f.write(validation_content)
|
f.write(validation_content)
|
||||||
|
|
||||||
# Ensure absolute path for MCP server compatibility
|
# Ensure absolute path for MCP server compatibility
|
||||||
validation_file = os.path.abspath(validation_file)
|
validation_file = os.path.abspath(validation_file)
|
||||||
|
|
||||||
@@ -113,11 +113,17 @@ DATABASE_CONFIG = {
|
|||||||
tools_to_test = [
|
tools_to_test = [
|
||||||
(
|
(
|
||||||
"chat",
|
"chat",
|
||||||
{"prompt": "Please use low thinking mode. Analyze this config file", "files": [validation_file]}, # Using absolute path
|
{
|
||||||
|
"prompt": "Please use low thinking mode. Analyze this config file",
|
||||||
|
"files": [validation_file],
|
||||||
|
}, # Using absolute path
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"codereview",
|
"codereview",
|
||||||
{"files": [validation_file], "context": "Please use low thinking mode. Review this configuration"}, # Using absolute path
|
{
|
||||||
|
"files": [validation_file],
|
||||||
|
"context": "Please use low thinking mode. Review this configuration",
|
||||||
|
}, # Using absolute path
|
||||||
),
|
),
|
||||||
("analyze", {"files": [validation_file], "analysis_type": "code_quality"}), # Using absolute path
|
("analyze", {"files": [validation_file], "analysis_type": "code_quality"}), # Using absolute path
|
||||||
]
|
]
|
||||||
|
|||||||
306
simulator_tests/test_cross_tool_comprehensive.py
Normal file
306
simulator_tests/test_cross_tool_comprehensive.py
Normal file
@@ -0,0 +1,306 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Comprehensive Cross-Tool Test
|
||||||
|
|
||||||
|
Tests file deduplication, conversation continuation, and file handling
|
||||||
|
across all available MCP tools using realistic workflows with low thinking mode.
|
||||||
|
Validates:
|
||||||
|
1. Cross-tool conversation continuation
|
||||||
|
2. File deduplication across different tools
|
||||||
|
3. Mixed file scenarios (old + new files)
|
||||||
|
4. Conversation history preservation
|
||||||
|
5. Proper tool chaining with context
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
|
|
||||||
|
class CrossToolComprehensiveTest(BaseSimulatorTest):
|
||||||
|
"""Comprehensive test across all MCP tools"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def test_name(self) -> str:
|
||||||
|
return "cross_tool_comprehensive"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def test_description(self) -> str:
|
||||||
|
return "Comprehensive cross-tool file deduplication and continuation"
|
||||||
|
|
||||||
|
def get_docker_logs_since(self, since_time: str) -> str:
|
||||||
|
"""Get docker logs since a specific timestamp"""
|
||||||
|
try:
|
||||||
|
# Check both main server and log monitor for comprehensive logs
|
||||||
|
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
||||||
|
cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"]
|
||||||
|
|
||||||
|
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
||||||
|
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
||||||
|
|
||||||
|
# Combine logs from both containers
|
||||||
|
combined_logs = result_server.stdout + "\n" + result_monitor.stdout
|
||||||
|
return combined_logs
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to get docker logs: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def run_test(self) -> bool:
|
||||||
|
"""Comprehensive cross-tool test with all MCP tools"""
|
||||||
|
try:
|
||||||
|
self.logger.info("📄 Test: Comprehensive cross-tool file deduplication and continuation")
|
||||||
|
|
||||||
|
# Setup test files
|
||||||
|
self.setup_test_files()
|
||||||
|
|
||||||
|
# Create short test files for quick testing
|
||||||
|
python_code = '''def login(user, pwd):
|
||||||
|
# Security issue: plain text password
|
||||||
|
if user == "admin" and pwd == "123":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def hash_pwd(pwd):
|
||||||
|
# Weak hashing
|
||||||
|
return str(hash(pwd))
|
||||||
|
'''
|
||||||
|
|
||||||
|
config_file = """{
|
||||||
|
"db_password": "weak123",
|
||||||
|
"debug": true,
|
||||||
|
"secret_key": "test"
|
||||||
|
}"""
|
||||||
|
|
||||||
|
auth_file = self.create_additional_test_file("auth.py", python_code)
|
||||||
|
config_file_path = self.create_additional_test_file("config.json", config_file)
|
||||||
|
|
||||||
|
# Get timestamp for log filtering
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
|
# Tool chain: chat → analyze → debug → codereview → precommit
|
||||||
|
# Each step builds on the previous with cross-tool continuation
|
||||||
|
|
||||||
|
current_continuation_id = None
|
||||||
|
responses = []
|
||||||
|
|
||||||
|
# Step 1: Start with chat tool to understand the codebase
|
||||||
|
self.logger.info(" Step 1: chat tool - Initial codebase exploration")
|
||||||
|
chat_params = {
|
||||||
|
"prompt": "Please give me a quick one line reply. I have an authentication module that needs review. Can you help me understand potential issues?",
|
||||||
|
"files": [auth_file],
|
||||||
|
"thinking_mode": "low"
|
||||||
|
}
|
||||||
|
|
||||||
|
response1, continuation_id1 = self.call_mcp_tool("chat", chat_params)
|
||||||
|
if not response1 or not continuation_id1:
|
||||||
|
self.logger.error(" ❌ Step 1: chat tool failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info(f" ✅ Step 1: chat completed with continuation_id: {continuation_id1[:8]}...")
|
||||||
|
responses.append(("chat", response1, continuation_id1))
|
||||||
|
current_continuation_id = continuation_id1
|
||||||
|
|
||||||
|
# Step 2: Use analyze tool to do deeper analysis (fresh conversation)
|
||||||
|
self.logger.info(" Step 2: analyze tool - Deep code analysis (fresh)")
|
||||||
|
analyze_params = {
|
||||||
|
"files": [auth_file],
|
||||||
|
"question": "Please give me a quick one line reply. What are the security vulnerabilities and architectural issues in this authentication code?",
|
||||||
|
"thinking_mode": "low"
|
||||||
|
}
|
||||||
|
|
||||||
|
response2, continuation_id2 = self.call_mcp_tool("analyze", analyze_params)
|
||||||
|
if not response2:
|
||||||
|
self.logger.error(" ❌ Step 2: analyze tool failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f" ✅ Step 2: analyze completed with continuation_id: {continuation_id2[:8] if continuation_id2 else 'None'}..."
|
||||||
|
)
|
||||||
|
responses.append(("analyze", response2, continuation_id2))
|
||||||
|
|
||||||
|
# Step 3: Continue chat conversation with config file
|
||||||
|
self.logger.info(" Step 3: chat continuation - Add config file context")
|
||||||
|
chat_continue_params = {
|
||||||
|
"continuation_id": current_continuation_id,
|
||||||
|
"prompt": "Please give me a quick one line reply. I also have this configuration file. Can you analyze it alongside the authentication code?",
|
||||||
|
"files": [auth_file, config_file_path], # Old + new file
|
||||||
|
"thinking_mode": "low"
|
||||||
|
}
|
||||||
|
|
||||||
|
response3, _ = self.call_mcp_tool("chat", chat_continue_params)
|
||||||
|
if not response3:
|
||||||
|
self.logger.error(" ❌ Step 3: chat continuation failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info(" ✅ Step 3: chat continuation completed")
|
||||||
|
responses.append(("chat_continue", response3, current_continuation_id))
|
||||||
|
|
||||||
|
# Step 4: Use debug tool to identify specific issues
|
||||||
|
self.logger.info(" Step 4: debug tool - Identify specific problems")
|
||||||
|
debug_params = {
|
||||||
|
"files": [auth_file, config_file_path],
|
||||||
|
"error_description": "Please give me a quick one line reply. The authentication system has security vulnerabilities. Help me identify and fix the main issues.",
|
||||||
|
"thinking_mode": "low"
|
||||||
|
}
|
||||||
|
|
||||||
|
response4, continuation_id4 = self.call_mcp_tool("debug", debug_params)
|
||||||
|
if not response4:
|
||||||
|
self.logger.error(" ❌ Step 4: debug tool failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f" ✅ Step 4: debug completed with continuation_id: {continuation_id4[:8] if continuation_id4 else 'None'}..."
|
||||||
|
)
|
||||||
|
responses.append(("debug", response4, continuation_id4))
|
||||||
|
|
||||||
|
# Step 5: Cross-tool continuation - continue debug with chat context
|
||||||
|
if continuation_id4:
|
||||||
|
self.logger.info(" Step 5: debug continuation - Additional analysis")
|
||||||
|
debug_continue_params = {
|
||||||
|
"continuation_id": continuation_id4,
|
||||||
|
"files": [auth_file, config_file_path],
|
||||||
|
"error_description": "Please give me a quick one line reply. What specific code changes would you recommend to fix the password hashing vulnerability?",
|
||||||
|
"thinking_mode": "low"
|
||||||
|
}
|
||||||
|
|
||||||
|
response5, _ = self.call_mcp_tool("debug", debug_continue_params)
|
||||||
|
if response5:
|
||||||
|
self.logger.info(" ✅ Step 5: debug continuation completed")
|
||||||
|
responses.append(("debug_continue", response5, continuation_id4))
|
||||||
|
|
||||||
|
# Step 6: Use codereview for comprehensive review
|
||||||
|
self.logger.info(" Step 6: codereview tool - Comprehensive code review")
|
||||||
|
codereview_params = {
|
||||||
|
"files": [auth_file, config_file_path],
|
||||||
|
"context": "Please give me a quick one line reply. Comprehensive security-focused code review for production readiness",
|
||||||
|
"thinking_mode": "low"
|
||||||
|
}
|
||||||
|
|
||||||
|
response6, continuation_id6 = self.call_mcp_tool("codereview", codereview_params)
|
||||||
|
if not response6:
|
||||||
|
self.logger.error(" ❌ Step 6: codereview tool failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f" ✅ Step 6: codereview completed with continuation_id: {continuation_id6[:8] if continuation_id6 else 'None'}..."
|
||||||
|
)
|
||||||
|
responses.append(("codereview", response6, continuation_id6))
|
||||||
|
|
||||||
|
# Step 7: Create improved version and use precommit
|
||||||
|
self.logger.info(" Step 7: precommit tool - Pre-commit validation")
|
||||||
|
|
||||||
|
# Create a short improved version
|
||||||
|
improved_code = '''import hashlib
|
||||||
|
|
||||||
|
def secure_login(user, pwd):
|
||||||
|
# Better: hashed password check
|
||||||
|
hashed = hashlib.sha256(pwd.encode()).hexdigest()
|
||||||
|
if user == "admin" and hashed == "expected_hash":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
'''
|
||||||
|
|
||||||
|
improved_file = self.create_additional_test_file("auth_improved.py", improved_code)
|
||||||
|
|
||||||
|
precommit_params = {
|
||||||
|
"path": self.test_dir,
|
||||||
|
"files": [auth_file, config_file_path, improved_file],
|
||||||
|
"original_request": "Please give me a quick one line reply. Ready to commit security improvements to authentication module",
|
||||||
|
"thinking_mode": "low",
|
||||||
|
}
|
||||||
|
|
||||||
|
response7, continuation_id7 = self.call_mcp_tool("precommit", precommit_params)
|
||||||
|
if not response7:
|
||||||
|
self.logger.error(" ❌ Step 7: precommit tool failed")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f" ✅ Step 7: precommit completed with continuation_id: {continuation_id7[:8] if continuation_id7 else 'None'}..."
|
||||||
|
)
|
||||||
|
responses.append(("precommit", response7, continuation_id7))
|
||||||
|
|
||||||
|
# Validate comprehensive results
|
||||||
|
self.logger.info(" 📋 Validating comprehensive cross-tool results...")
|
||||||
|
logs = self.get_docker_logs_since(start_time)
|
||||||
|
|
||||||
|
# Validation criteria
|
||||||
|
tools_used = [r[0] for r in responses]
|
||||||
|
continuation_ids_created = [r[2] for r in responses if r[2]]
|
||||||
|
|
||||||
|
# Check for various log patterns
|
||||||
|
conversation_logs = [
|
||||||
|
line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower()
|
||||||
|
]
|
||||||
|
embedding_logs = [
|
||||||
|
line
|
||||||
|
for line in logs.split("\n")
|
||||||
|
if "📁" in line or "embedding" in line.lower() or "file" in line.lower()
|
||||||
|
]
|
||||||
|
continuation_logs = [
|
||||||
|
line for line in logs.split("\n") if "continuation" in line.lower() or "resuming" in line.lower()
|
||||||
|
]
|
||||||
|
cross_tool_logs = [
|
||||||
|
line
|
||||||
|
for line in logs.split("\n")
|
||||||
|
if any(tool in line.lower() for tool in ["chat", "analyze", "debug", "codereview", "precommit"])
|
||||||
|
]
|
||||||
|
|
||||||
|
# File mentions
|
||||||
|
auth_file_mentioned = any("auth.py" in line for line in logs.split("\n"))
|
||||||
|
config_file_mentioned = any("config.json" in line for line in logs.split("\n"))
|
||||||
|
improved_file_mentioned = any("auth_improved.py" in line for line in logs.split("\n"))
|
||||||
|
|
||||||
|
# Print comprehensive diagnostics
|
||||||
|
self.logger.info(f" 📊 Tools used: {len(tools_used)} ({', '.join(tools_used)})")
|
||||||
|
self.logger.info(f" 📊 Continuation IDs created: {len(continuation_ids_created)}")
|
||||||
|
self.logger.info(f" 📊 Conversation logs found: {len(conversation_logs)}")
|
||||||
|
self.logger.info(f" 📊 File embedding logs found: {len(embedding_logs)}")
|
||||||
|
self.logger.info(f" 📊 Continuation logs found: {len(continuation_logs)}")
|
||||||
|
self.logger.info(f" 📊 Cross-tool activity logs: {len(cross_tool_logs)}")
|
||||||
|
self.logger.info(f" 📊 Auth file mentioned: {auth_file_mentioned}")
|
||||||
|
self.logger.info(f" 📊 Config file mentioned: {config_file_mentioned}")
|
||||||
|
self.logger.info(f" 📊 Improved file mentioned: {improved_file_mentioned}")
|
||||||
|
|
||||||
|
if self.verbose:
|
||||||
|
self.logger.debug(" 📋 Sample tool activity logs:")
|
||||||
|
for log in cross_tool_logs[:10]: # Show first 10
|
||||||
|
if log.strip():
|
||||||
|
self.logger.debug(f" {log.strip()}")
|
||||||
|
|
||||||
|
self.logger.debug(" 📋 Sample continuation logs:")
|
||||||
|
for log in continuation_logs[:5]: # Show first 5
|
||||||
|
if log.strip():
|
||||||
|
self.logger.debug(f" {log.strip()}")
|
||||||
|
|
||||||
|
# Comprehensive success criteria
|
||||||
|
success_criteria = [
|
||||||
|
len(tools_used) >= 5, # Used multiple tools
|
||||||
|
len(continuation_ids_created) >= 3, # Created multiple continuation threads
|
||||||
|
len(embedding_logs) > 10, # Significant file embedding activity
|
||||||
|
len(continuation_logs) > 0, # Evidence of continuation
|
||||||
|
auth_file_mentioned, # Original file processed
|
||||||
|
config_file_mentioned, # Additional file processed
|
||||||
|
improved_file_mentioned, # New file processed
|
||||||
|
len(conversation_logs) > 5, # Conversation history activity
|
||||||
|
]
|
||||||
|
|
||||||
|
passed_criteria = sum(success_criteria)
|
||||||
|
total_criteria = len(success_criteria)
|
||||||
|
|
||||||
|
self.logger.info(f" 📊 Success criteria met: {passed_criteria}/{total_criteria}")
|
||||||
|
|
||||||
|
if passed_criteria >= 6: # At least 6 out of 8 criteria
|
||||||
|
self.logger.info(" ✅ Comprehensive cross-tool test: PASSED")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.logger.warning(" ⚠️ Comprehensive cross-tool test: FAILED")
|
||||||
|
self.logger.warning(" 💡 Check logs for detailed cross-tool activity")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Comprehensive cross-tool test failed: {e}")
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
self.cleanup_test_files()
|
||||||
@@ -11,10 +11,8 @@ Validates that:
|
|||||||
4. Docker logs show deduplication behavior
|
4. Docker logs show deduplication behavior
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
|
|
||||||
@@ -35,10 +33,10 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
# Check both main server and log monitor for comprehensive logs
|
# Check both main server and log monitor for comprehensive logs
|
||||||
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
||||||
cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"]
|
cmd_monitor = ["docker", "logs", "--since", since_time, "gemini-mcp-log-monitor"]
|
||||||
|
|
||||||
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
||||||
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
||||||
|
|
||||||
# Combine logs from both containers
|
# Combine logs from both containers
|
||||||
combined_logs = result_server.stdout + "\n" + result_monitor.stdout
|
combined_logs = result_server.stdout + "\n" + result_monitor.stdout
|
||||||
return combined_logs
|
return combined_logs
|
||||||
@@ -51,14 +49,20 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
|
def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
|
||||||
"""Validate that logs show file deduplication behavior"""
|
"""Validate that logs show file deduplication behavior"""
|
||||||
# Look for file embedding messages
|
# Look for file embedding messages
|
||||||
embedding_messages = [line for line in logs.split('\n') if '📁' in line and 'embedding' in line and tool_name in line]
|
embedding_messages = [
|
||||||
|
line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line
|
||||||
# Look for deduplication/filtering messages
|
]
|
||||||
filtering_messages = [line for line in logs.split('\n') if '📁' in line and 'Filtering' in line and tool_name in line]
|
|
||||||
skipping_messages = [line for line in logs.split('\n') if '📁' in line and 'skipping' in line and tool_name in line]
|
# Look for deduplication/filtering messages
|
||||||
|
filtering_messages = [
|
||||||
|
line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line
|
||||||
|
]
|
||||||
|
skipping_messages = [
|
||||||
|
line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line
|
||||||
|
]
|
||||||
|
|
||||||
deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
|
deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
|
||||||
|
|
||||||
if deduplication_found:
|
if deduplication_found:
|
||||||
self.logger.info(f" ✅ {tool_name}: Found deduplication evidence in logs")
|
self.logger.info(f" ✅ {tool_name}: Found deduplication evidence in logs")
|
||||||
for msg in filtering_messages + skipping_messages:
|
for msg in filtering_messages + skipping_messages:
|
||||||
@@ -66,7 +70,7 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
else:
|
else:
|
||||||
self.logger.warning(f" ⚠️ {tool_name}: No deduplication evidence found in logs")
|
self.logger.warning(f" ⚠️ {tool_name}: No deduplication evidence found in logs")
|
||||||
self.logger.debug(f" 📁 All embedding messages: {embedding_messages}")
|
self.logger.debug(f" 📁 All embedding messages: {embedding_messages}")
|
||||||
|
|
||||||
return deduplication_found
|
return deduplication_found
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
@@ -76,21 +80,19 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
|
|
||||||
# Setup test files
|
# Setup test files
|
||||||
self.setup_test_files()
|
self.setup_test_files()
|
||||||
|
|
||||||
# Create a dummy file for precommit testing
|
|
||||||
dummy_content = '''def hello_world():
|
|
||||||
"""A simple hello world function with a bug"""
|
|
||||||
print("Hello world!")
|
|
||||||
return "hello"
|
|
||||||
|
|
||||||
# TODO: Fix the inconsistent return type
|
# Create a short dummy file for quick testing
|
||||||
def calculate_sum(a, b):
|
dummy_content = '''def add(a, b):
|
||||||
return a + b # Missing type hints
|
return a + b # Missing type hints
|
||||||
|
|
||||||
|
def divide(x, y):
|
||||||
|
return x / y # No zero check
|
||||||
'''
|
'''
|
||||||
dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content)
|
dummy_file_path = self.create_additional_test_file("dummy_code.py", dummy_content)
|
||||||
|
|
||||||
# Get timestamp for log filtering
|
# Get timestamp for log filtering
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
# Step 1: precommit tool with dummy file (low thinking mode)
|
# Step 1: precommit tool with dummy file (low thinking mode)
|
||||||
@@ -98,98 +100,105 @@ def calculate_sum(a, b):
|
|||||||
precommit_params = {
|
precommit_params = {
|
||||||
"path": self.test_dir, # Required path parameter
|
"path": self.test_dir, # Required path parameter
|
||||||
"files": [dummy_file_path],
|
"files": [dummy_file_path],
|
||||||
"original_request": "Please use low thinking mode. Review this code for commit readiness",
|
"original_request": "Please give me a quick one line reply. Review this code for commit readiness",
|
||||||
"thinking_mode": "low"
|
"thinking_mode": "low",
|
||||||
}
|
}
|
||||||
|
|
||||||
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
|
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
|
||||||
if not response1:
|
if not response1:
|
||||||
self.logger.error(" ❌ Step 1: precommit tool failed")
|
self.logger.error(" ❌ Step 1: precommit tool failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not continuation_id:
|
if not continuation_id:
|
||||||
self.logger.error(" ❌ Step 1: precommit tool didn't provide continuation_id")
|
self.logger.error(" ❌ Step 1: precommit tool didn't provide continuation_id")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Validate continuation_id format (should be UUID)
|
||||||
|
if len(continuation_id) < 32:
|
||||||
|
self.logger.error(f" ❌ Step 1: Invalid continuation_id format: {continuation_id}")
|
||||||
|
return False
|
||||||
|
|
||||||
self.logger.info(f" ✅ Step 1: precommit completed with continuation_id: {continuation_id[:8]}...")
|
self.logger.info(f" ✅ Step 1: precommit completed with continuation_id: {continuation_id[:8]}...")
|
||||||
|
|
||||||
# Step 2: codereview tool with same file (NO continuation - fresh conversation)
|
# Step 2: codereview tool with same file (NO continuation - fresh conversation)
|
||||||
self.logger.info(" Step 2: codereview tool with same file (fresh conversation)")
|
self.logger.info(" Step 2: codereview tool with same file (fresh conversation)")
|
||||||
codereview_params = {
|
codereview_params = {
|
||||||
"files": [dummy_file_path],
|
"files": [dummy_file_path],
|
||||||
"context": "Please use low thinking mode. General code review for quality and best practices"
|
"context": "Please give me a quick one line reply. General code review for quality and best practices",
|
||||||
|
"thinking_mode": "low"
|
||||||
}
|
}
|
||||||
|
|
||||||
response2, _ = self.call_mcp_tool("codereview", codereview_params)
|
response2, _ = self.call_mcp_tool("codereview", codereview_params)
|
||||||
if not response2:
|
if not response2:
|
||||||
self.logger.error(" ❌ Step 2: codereview tool failed")
|
self.logger.error(" ❌ Step 2: codereview tool failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info(" ✅ Step 2: codereview completed (fresh conversation)")
|
self.logger.info(" ✅ Step 2: codereview completed (fresh conversation)")
|
||||||
|
|
||||||
# Step 3: Create new file and continue with precommit
|
# Step 3: Create new file and continue with precommit
|
||||||
self.logger.info(" Step 3: precommit continuation with old + new file")
|
self.logger.info(" Step 3: precommit continuation with old + new file")
|
||||||
new_file_content = '''def new_feature():
|
new_file_content = '''def multiply(x, y):
|
||||||
"""A new feature function"""
|
return x * y
|
||||||
return {"status": "implemented", "version": "1.0"}
|
|
||||||
|
|
||||||
class NewUtility:
|
def subtract(a, b):
|
||||||
"""A new utility class"""
|
return a - b
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.initialized = True
|
|
||||||
|
|
||||||
def process_data(self, data):
|
|
||||||
return f"Processed: {data}"
|
|
||||||
'''
|
'''
|
||||||
new_file_path = self.create_additional_test_file("new_feature.py", new_file_content)
|
new_file_path = self.create_additional_test_file("new_feature.py", new_file_content)
|
||||||
|
|
||||||
# Continue precommit with both files
|
# Continue precommit with both files
|
||||||
continue_params = {
|
continue_params = {
|
||||||
"continuation_id": continuation_id,
|
"continuation_id": continuation_id,
|
||||||
"path": self.test_dir, # Required path parameter
|
"path": self.test_dir, # Required path parameter
|
||||||
"files": [dummy_file_path, new_file_path], # Old + new file
|
"files": [dummy_file_path, new_file_path], # Old + new file
|
||||||
"original_request": "Please use low thinking mode. Now also review the new feature file along with the previous one",
|
"original_request": "Please give me a quick one line reply. Now also review the new feature file along with the previous one",
|
||||||
"thinking_mode": "low"
|
"thinking_mode": "low",
|
||||||
}
|
}
|
||||||
|
|
||||||
response3, _ = self.call_mcp_tool("precommit", continue_params)
|
response3, _ = self.call_mcp_tool("precommit", continue_params)
|
||||||
if not response3:
|
if not response3:
|
||||||
self.logger.error(" ❌ Step 3: precommit continuation failed")
|
self.logger.error(" ❌ Step 3: precommit continuation failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info(" ✅ Step 3: precommit continuation completed")
|
self.logger.info(" ✅ Step 3: precommit continuation completed")
|
||||||
|
|
||||||
# Validate results in docker logs
|
# Validate results in docker logs
|
||||||
self.logger.info(" 📋 Validating conversation history and file deduplication...")
|
self.logger.info(" 📋 Validating conversation history and file deduplication...")
|
||||||
logs = self.get_docker_logs_since(start_time)
|
logs = self.get_docker_logs_since(start_time)
|
||||||
|
|
||||||
# Check for conversation history building
|
# Check for conversation history building
|
||||||
conversation_logs = [line for line in logs.split('\n') if 'conversation' in line.lower() or 'history' in line.lower()]
|
conversation_logs = [
|
||||||
|
line for line in logs.split("\n") if "conversation" in line.lower() or "history" in line.lower()
|
||||||
|
]
|
||||||
|
|
||||||
# Check for file embedding/deduplication
|
# Check for file embedding/deduplication
|
||||||
embedding_logs = [line for line in logs.split('\n') if '📁' in line or 'embedding' in line.lower() or 'file' in line.lower()]
|
embedding_logs = [
|
||||||
|
line
|
||||||
|
for line in logs.split("\n")
|
||||||
|
if "📁" in line or "embedding" in line.lower() or "file" in line.lower()
|
||||||
|
]
|
||||||
|
|
||||||
# Check for continuation evidence
|
# Check for continuation evidence
|
||||||
continuation_logs = [line for line in logs.split('\n') if 'continuation' in line.lower() or continuation_id[:8] in line]
|
continuation_logs = [
|
||||||
|
line for line in logs.split("\n") if "continuation" in line.lower() or continuation_id[:8] in line
|
||||||
|
]
|
||||||
|
|
||||||
# Check for both files mentioned
|
# Check for both files mentioned
|
||||||
dummy_file_mentioned = any("dummy_code.py" in line for line in logs.split('\n'))
|
dummy_file_mentioned = any("dummy_code.py" in line for line in logs.split("\n"))
|
||||||
new_file_mentioned = any("new_feature.py" in line for line in logs.split('\n'))
|
new_file_mentioned = any("new_feature.py" in line for line in logs.split("\n"))
|
||||||
|
|
||||||
# Print diagnostic information
|
# Print diagnostic information
|
||||||
self.logger.info(f" 📊 Conversation logs found: {len(conversation_logs)}")
|
self.logger.info(f" 📊 Conversation logs found: {len(conversation_logs)}")
|
||||||
self.logger.info(f" 📊 File embedding logs found: {len(embedding_logs)}")
|
self.logger.info(f" 📊 File embedding logs found: {len(embedding_logs)}")
|
||||||
self.logger.info(f" 📊 Continuation logs found: {len(continuation_logs)}")
|
self.logger.info(f" 📊 Continuation logs found: {len(continuation_logs)}")
|
||||||
self.logger.info(f" 📊 Dummy file mentioned: {dummy_file_mentioned}")
|
self.logger.info(f" 📊 Dummy file mentioned: {dummy_file_mentioned}")
|
||||||
self.logger.info(f" 📊 New file mentioned: {new_file_mentioned}")
|
self.logger.info(f" 📊 New file mentioned: {new_file_mentioned}")
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.logger.debug(" 📋 Sample embedding logs:")
|
self.logger.debug(" 📋 Sample embedding logs:")
|
||||||
for log in embedding_logs[:5]: # Show first 5
|
for log in embedding_logs[:5]: # Show first 5
|
||||||
if log.strip():
|
if log.strip():
|
||||||
self.logger.debug(f" {log.strip()}")
|
self.logger.debug(f" {log.strip()}")
|
||||||
|
|
||||||
self.logger.debug(" 📋 Sample continuation logs:")
|
self.logger.debug(" 📋 Sample continuation logs:")
|
||||||
for log in continuation_logs[:3]: # Show first 3
|
for log in continuation_logs[:3]: # Show first 3
|
||||||
if log.strip():
|
if log.strip():
|
||||||
@@ -200,14 +209,14 @@ class NewUtility:
|
|||||||
len(embedding_logs) > 0, # File embedding occurred
|
len(embedding_logs) > 0, # File embedding occurred
|
||||||
len(continuation_logs) > 0, # Continuation worked
|
len(continuation_logs) > 0, # Continuation worked
|
||||||
dummy_file_mentioned, # Original file processed
|
dummy_file_mentioned, # Original file processed
|
||||||
new_file_mentioned # New file processed
|
new_file_mentioned, # New file processed
|
||||||
]
|
]
|
||||||
|
|
||||||
passed_criteria = sum(success_criteria)
|
passed_criteria = sum(success_criteria)
|
||||||
total_criteria = len(success_criteria)
|
total_criteria = len(success_criteria)
|
||||||
|
|
||||||
self.logger.info(f" 📊 Success criteria met: {passed_criteria}/{total_criteria}")
|
self.logger.info(f" 📊 Success criteria met: {passed_criteria}/{total_criteria}")
|
||||||
|
|
||||||
if passed_criteria >= 3: # At least 3 out of 4 criteria
|
if passed_criteria >= 3: # At least 3 out of 4 criteria
|
||||||
self.logger.info(" ✅ File deduplication workflow test: PASSED")
|
self.logger.info(" ✅ File deduplication workflow test: PASSED")
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -210,7 +210,7 @@ class BaseTool(ABC):
|
|||||||
list[str]: List of files that need to be embedded (not already in history)
|
list[str]: List of files that need to be embedded (not already in history)
|
||||||
"""
|
"""
|
||||||
logger.debug(f"[FILES] {self.name}: Filtering {len(requested_files)} requested files")
|
logger.debug(f"[FILES] {self.name}: Filtering {len(requested_files)} requested files")
|
||||||
|
|
||||||
if not continuation_id:
|
if not continuation_id:
|
||||||
# New conversation, all files are new
|
# New conversation, all files are new
|
||||||
logger.debug(f"[FILES] {self.name}: New conversation, all {len(requested_files)} files are new")
|
logger.debug(f"[FILES] {self.name}: New conversation, all {len(requested_files)} files are new")
|
||||||
@@ -226,12 +226,16 @@ class BaseTool(ABC):
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}"
|
f"📁 {self.name} tool: No files found in conversation history for thread {continuation_id}"
|
||||||
)
|
)
|
||||||
logger.debug(f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files")
|
logger.debug(
|
||||||
|
f"[FILES] {self.name}: No embedded files found, returning all {len(requested_files)} requested files"
|
||||||
|
)
|
||||||
return requested_files
|
return requested_files
|
||||||
|
|
||||||
# Return only files that haven't been embedded yet
|
# Return only files that haven't been embedded yet
|
||||||
new_files = [f for f in requested_files if f not in embedded_files]
|
new_files = [f for f in requested_files if f not in embedded_files]
|
||||||
logger.debug(f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded")
|
logger.debug(
|
||||||
|
f"[FILES] {self.name}: After filtering: {len(new_files)} new files, {len(requested_files) - len(new_files)} already embedded"
|
||||||
|
)
|
||||||
logger.debug(f"[FILES] {self.name}: New files to embed: {new_files}")
|
logger.debug(f"[FILES] {self.name}: New files to embed: {new_files}")
|
||||||
|
|
||||||
# Log filtering results for debugging
|
# Log filtering results for debugging
|
||||||
@@ -249,7 +253,9 @@ class BaseTool(ABC):
|
|||||||
# and include all files rather than risk losing access to needed files
|
# and include all files rather than risk losing access to needed files
|
||||||
logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}")
|
logger.warning(f"📁 {self.name} tool: Error checking conversation history for {continuation_id}: {e}")
|
||||||
logger.warning(f"📁 {self.name} tool: Including all requested files as fallback")
|
logger.warning(f"📁 {self.name} tool: Including all requested files as fallback")
|
||||||
logger.debug(f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback")
|
logger.debug(
|
||||||
|
f"[FILES] {self.name}: Exception in filter_new_files, returning all {len(requested_files)} files as fallback"
|
||||||
|
)
|
||||||
return requested_files
|
return requested_files
|
||||||
|
|
||||||
def _prepare_file_content_for_prompt(
|
def _prepare_file_content_for_prompt(
|
||||||
@@ -312,7 +318,9 @@ class BaseTool(ABC):
|
|||||||
# Read content of new files only
|
# Read content of new files only
|
||||||
if files_to_embed:
|
if files_to_embed:
|
||||||
logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
|
logger.debug(f"📁 {self.name} tool embedding {len(files_to_embed)} new files: {', '.join(files_to_embed)}")
|
||||||
logger.debug(f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}")
|
logger.debug(
|
||||||
|
f"[FILES] {self.name}: Starting file embedding with token budget {effective_max_tokens + reserve_tokens:,}"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
file_content = read_files(
|
file_content = read_files(
|
||||||
files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens
|
files_to_embed, max_tokens=effective_max_tokens + reserve_tokens, reserve_tokens=reserve_tokens
|
||||||
@@ -662,7 +670,7 @@ If any of these would strengthen your analysis, specify what Claude should searc
|
|||||||
# Return error information in standardized format
|
# Return error information in standardized format
|
||||||
logger = logging.getLogger(f"tools.{self.name}")
|
logger = logging.getLogger(f"tools.{self.name}")
|
||||||
error_msg = str(e)
|
error_msg = str(e)
|
||||||
|
|
||||||
# Check if this is a 500 INTERNAL error that asks for retry
|
# Check if this is a 500 INTERNAL error that asks for retry
|
||||||
if "500 INTERNAL" in error_msg and "Please retry" in error_msg:
|
if "500 INTERNAL" in error_msg and "Please retry" in error_msg:
|
||||||
logger.warning(f"500 INTERNAL error in {self.name} - attempting retry")
|
logger.warning(f"500 INTERNAL error in {self.name} - attempting retry")
|
||||||
@@ -671,16 +679,16 @@ If any of these would strengthen your analysis, specify what Claude should searc
|
|||||||
model = self._get_model_wrapper(request)
|
model = self._get_model_wrapper(request)
|
||||||
raw_response = await model.generate_content(prompt)
|
raw_response = await model.generate_content(prompt)
|
||||||
response = raw_response.text
|
response = raw_response.text
|
||||||
|
|
||||||
# If successful, process normally
|
# If successful, process normally
|
||||||
return [TextContent(type="text", text=self._process_response(response, request).model_dump_json())]
|
return [TextContent(type="text", text=self._process_response(response, request).model_dump_json())]
|
||||||
|
|
||||||
except Exception as retry_e:
|
except Exception as retry_e:
|
||||||
logger.error(f"Retry failed for {self.name} tool: {str(retry_e)}")
|
logger.error(f"Retry failed for {self.name} tool: {str(retry_e)}")
|
||||||
error_msg = f"Tool failed after retry: {str(retry_e)}"
|
error_msg = f"Tool failed after retry: {str(retry_e)}"
|
||||||
|
|
||||||
logger.error(f"Error in {self.name} tool execution: {error_msg}", exc_info=True)
|
logger.error(f"Error in {self.name} tool execution: {error_msg}", exc_info=True)
|
||||||
|
|
||||||
error_output = ToolOutput(
|
error_output = ToolOutput(
|
||||||
status="error",
|
status="error",
|
||||||
content=f"Error in {self.name}: {error_msg}",
|
content=f"Error in {self.name}: {error_msg}",
|
||||||
@@ -911,11 +919,12 @@ If any of these would strengthen your analysis, specify what Claude should searc
|
|||||||
Dict with continuation data if opportunity should be offered, None otherwise
|
Dict with continuation data if opportunity should be offered, None otherwise
|
||||||
"""
|
"""
|
||||||
continuation_id = getattr(request, "continuation_id", None)
|
continuation_id = getattr(request, "continuation_id", None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if continuation_id:
|
if continuation_id:
|
||||||
# Check remaining turns in existing thread
|
# Check remaining turns in existing thread
|
||||||
from utils.conversation_memory import get_thread
|
from utils.conversation_memory import get_thread
|
||||||
|
|
||||||
context = get_thread(continuation_id)
|
context = get_thread(continuation_id)
|
||||||
if context:
|
if context:
|
||||||
current_turns = len(context.turns)
|
current_turns = len(context.turns)
|
||||||
|
|||||||
@@ -251,7 +251,7 @@ def add_turn(
|
|||||||
- File references are preserved for cross-tool access
|
- File references are preserved for cross-tool access
|
||||||
"""
|
"""
|
||||||
logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
|
logger.debug(f"[FLOW] Adding {role} turn to {thread_id} ({tool_name})")
|
||||||
|
|
||||||
context = get_thread(thread_id)
|
context = get_thread(thread_id)
|
||||||
if not context:
|
if not context:
|
||||||
logger.debug(f"[FLOW] Thread {thread_id} not found for turn addition")
|
logger.debug(f"[FLOW] Thread {thread_id} not found for turn addition")
|
||||||
@@ -301,13 +301,13 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
|
|||||||
list[str]: Deduplicated list of file paths referenced in the conversation
|
list[str]: Deduplicated list of file paths referenced in the conversation
|
||||||
"""
|
"""
|
||||||
if not context.turns:
|
if not context.turns:
|
||||||
logger.debug(f"[FILES] No turns found, returning empty file list")
|
logger.debug("[FILES] No turns found, returning empty file list")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Collect all unique files from all turns, preserving order of first appearance
|
# Collect all unique files from all turns, preserving order of first appearance
|
||||||
seen_files = set()
|
seen_files = set()
|
||||||
unique_files = []
|
unique_files = []
|
||||||
|
|
||||||
logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns")
|
logger.debug(f"[FILES] Collecting files from {len(context.turns)} turns")
|
||||||
|
|
||||||
for i, turn in enumerate(context.turns):
|
for i, turn in enumerate(context.turns):
|
||||||
@@ -322,7 +322,7 @@ def get_conversation_file_list(context: ThreadContext) -> list[str]:
|
|||||||
logger.debug(f"[FILES] Duplicate file skipped: {file_path}")
|
logger.debug(f"[FILES] Duplicate file skipped: {file_path}")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"[FILES] Turn {i+1} has no files")
|
logger.debug(f"[FILES] Turn {i+1} has no files")
|
||||||
|
|
||||||
logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}")
|
logger.debug(f"[FILES] Final unique file list ({len(unique_files)}): {unique_files}")
|
||||||
return unique_files
|
return unique_files
|
||||||
|
|
||||||
@@ -409,13 +409,17 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
f"📄 File embedded in conversation history: {file_path} ({content_tokens:,} tokens)"
|
f"📄 File embedded in conversation history: {file_path} ({content_tokens:,} tokens)"
|
||||||
)
|
)
|
||||||
logger.debug(f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})")
|
logger.debug(
|
||||||
|
f"[FILES] Successfully embedded {file_path} - {content_tokens:,} tokens (total: {total_tokens:,})"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
files_truncated += 1
|
files_truncated += 1
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
|
f"📄 File truncated due to token limit: {file_path} ({content_tokens:,} tokens, would exceed {MAX_CONTENT_TOKENS:,} limit)"
|
||||||
)
|
)
|
||||||
logger.debug(f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)")
|
logger.debug(
|
||||||
|
f"[FILES] File {file_path} would exceed token limit - skipping (would be {total_tokens + content_tokens:,} tokens)"
|
||||||
|
)
|
||||||
# Stop processing more files
|
# Stop processing more files
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
@@ -439,7 +443,9 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
f"📄 Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens"
|
f"📄 Conversation history file embedding complete: {files_included} files embedded, {files_truncated} truncated, {total_tokens:,} total tokens"
|
||||||
)
|
)
|
||||||
logger.debug(f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total")
|
logger.debug(
|
||||||
|
f"[FILES] File embedding summary - {files_included} embedded, {files_truncated} truncated, {total_tokens:,} tokens total"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
history_parts.append("(No accessible files found)")
|
history_parts.append("(No accessible files found)")
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -505,11 +511,13 @@ def build_conversation_history(context: ThreadContext, read_files_func=None) ->
|
|||||||
from utils.token_utils import estimate_tokens
|
from utils.token_utils import estimate_tokens
|
||||||
|
|
||||||
total_conversation_tokens = estimate_tokens(complete_history)
|
total_conversation_tokens = estimate_tokens(complete_history)
|
||||||
|
|
||||||
# Summary log of what was built
|
# Summary log of what was built
|
||||||
user_turns = len([t for t in context.turns if t.role == "user"])
|
user_turns = len([t for t in context.turns if t.role == "user"])
|
||||||
assistant_turns = len([t for t in context.turns if t.role == "assistant"])
|
assistant_turns = len([t for t in context.turns if t.role == "assistant"])
|
||||||
logger.debug(f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens")
|
logger.debug(
|
||||||
|
f"[FLOW] Built conversation history: {user_turns} user + {assistant_turns} assistant turns, {len(all_files)} files, {total_conversation_tokens:,} tokens"
|
||||||
|
)
|
||||||
|
|
||||||
return complete_history, total_conversation_tokens
|
return complete_history, total_conversation_tokens
|
||||||
|
|
||||||
|
|||||||
@@ -470,7 +470,7 @@ def read_file_content(file_path: str, max_size: int = 1_000_000) -> tuple[str, i
|
|||||||
logger.debug(f"[FILES] Reading file content for {file_path}")
|
logger.debug(f"[FILES] Reading file content for {file_path}")
|
||||||
with open(path, encoding="utf-8", errors="replace") as f:
|
with open(path, encoding="utf-8", errors="replace") as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
|
|
||||||
logger.debug(f"[FILES] Successfully read {len(file_content)} characters from {file_path}")
|
logger.debug(f"[FILES] Successfully read {len(file_content)} characters from {file_path}")
|
||||||
|
|
||||||
# Format with clear delimiters that help the AI understand file boundaries
|
# Format with clear delimiters that help the AI understand file boundaries
|
||||||
@@ -518,7 +518,9 @@ def read_files(
|
|||||||
max_tokens = MAX_CONTEXT_TOKENS
|
max_tokens = MAX_CONTEXT_TOKENS
|
||||||
|
|
||||||
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
|
logger.debug(f"[FILES] read_files called with {len(file_paths)} paths")
|
||||||
logger.debug(f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}")
|
logger.debug(
|
||||||
|
f"[FILES] Token budget: max={max_tokens:,}, reserve={reserve_tokens:,}, available={max_tokens - reserve_tokens:,}"
|
||||||
|
)
|
||||||
|
|
||||||
content_parts = []
|
content_parts = []
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
@@ -546,7 +548,7 @@ def read_files(
|
|||||||
|
|
||||||
if not all_files and file_paths:
|
if not all_files and file_paths:
|
||||||
# No files found but paths were provided
|
# No files found but paths were provided
|
||||||
logger.debug(f"[FILES] No files found from provided paths")
|
logger.debug("[FILES] No files found from provided paths")
|
||||||
content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n")
|
content_parts.append(f"\n--- NO FILES FOUND ---\nProvided paths: {', '.join(file_paths)}\n--- END ---\n")
|
||||||
else:
|
else:
|
||||||
# Read files sequentially until token limit is reached
|
# Read files sequentially until token limit is reached
|
||||||
@@ -567,7 +569,9 @@ def read_files(
|
|||||||
logger.debug(f"[FILES] Added file {file_path}, total tokens: {total_tokens:,}")
|
logger.debug(f"[FILES] Added file {file_path}, total tokens: {total_tokens:,}")
|
||||||
else:
|
else:
|
||||||
# File too large for remaining budget
|
# File too large for remaining budget
|
||||||
logger.debug(f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)")
|
logger.debug(
|
||||||
|
f"[FILES] File {file_path} too large for remaining budget ({file_tokens:,} tokens, {available_tokens - total_tokens:,} remaining)"
|
||||||
|
)
|
||||||
files_skipped.append(file_path)
|
files_skipped.append(file_path)
|
||||||
|
|
||||||
# Add informative note about skipped files to help users understand
|
# Add informative note about skipped files to help users understand
|
||||||
|
|||||||
Reference in New Issue
Block a user