Files
my-pal-mcp-server/simulator_tests/test_cross_tool_continuation.py
Beehive Innovations 4151c3c3a5 Migration from Docker to Standalone Python Server (#73)
* Migration from docker to standalone server
Migration handling
Fixed tests
Use simpler in-memory storage
Support for concurrent logging to disk
Simplified direct connections to localhost

* Migration from docker / redis to standalone script
Updated tests
Updated run script
Fixed requirements
Use dotenv
Ask if user would like to install MCP in Claude Desktop once
Updated docs

* More cleanup and references to docker removed

* Cleanup

* Comments

* Fixed tests

* Fix GitHub Actions workflow for standalone Python architecture

- Install requirements-dev.txt for pytest and testing dependencies
- Remove Docker setup from simulation tests (now standalone)
- Simplify linting job to use requirements-dev.txt
- Update simulation tests to run directly without Docker

Fixes unit test failures in CI due to missing pytest dependency.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Remove simulation tests from GitHub Actions

- Removed simulation-tests job that makes real API calls
- Keep only unit tests (mocked, no API costs) and linting
- Simulation tests should be run manually with real API keys
- Reduces CI costs and complexity

GitHub Actions now only runs:
- Unit tests (569 tests, all mocked)
- Code quality checks (ruff, black)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Fixed tests

* Fixed tests

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-06-18 23:41:22 +04:00

211 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
Cross-Tool Continuation Test
Tests comprehensive cross-tool continuation scenarios to ensure
conversation context is maintained when switching between different tools.
"""
from .conversation_base_test import ConversationBaseTest
class CrossToolContinuationTest(ConversationBaseTest):
"""Test comprehensive cross-tool continuation scenarios"""
@property
def test_name(self) -> str:
return "cross_tool_continuation"
@property
def test_description(self) -> str:
return "Cross-tool conversation continuation scenarios"
def run_test(self) -> bool:
"""Test comprehensive cross-tool continuation scenarios"""
try:
self.logger.info("🔧 Test: Cross-tool continuation scenarios")
# Setup test environment for conversation testing
self.setUp()
success_count = 0
total_scenarios = 3
# Scenario 1: chat -> thinkdeep -> codereview
if self._test_chat_thinkdeep_codereview():
success_count += 1
# Scenario 2: analyze -> debug -> thinkdeep
if self._test_analyze_debug_thinkdeep():
success_count += 1
# Scenario 3: Multi-file cross-tool continuation
if self._test_multi_file_continuation():
success_count += 1
self.logger.info(
f" ✅ Cross-tool continuation scenarios completed: {success_count}/{total_scenarios} scenarios passed"
)
# Consider successful if at least one scenario worked
return success_count > 0
except Exception as e:
self.logger.error(f"Cross-tool continuation test failed: {e}")
return False
finally:
self.cleanup_test_files()
def _test_chat_thinkdeep_codereview(self) -> bool:
"""Test chat -> thinkdeep -> codereview scenario"""
try:
self.logger.info(" 1: Testing chat -> thinkdeep -> codereview")
# Start with chat
chat_response, chat_id = self.call_mcp_tool_direct(
"chat",
{
"prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
"files": [self.test_files["python"]],
"model": "flash",
},
)
if not chat_response or not chat_id:
self.logger.error("Failed to start chat conversation")
return False
# Continue with thinkdeep
thinkdeep_response, _ = self.call_mcp_tool_direct(
"thinkdeep",
{
"prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
"files": [self.test_files["python"]], # Same file should be deduplicated
"continuation_id": chat_id,
"model": "flash",
},
)
if not thinkdeep_response:
self.logger.error("Failed chat -> thinkdeep continuation")
return False
# Continue with codereview
codereview_response, _ = self.call_mcp_tool_direct(
"codereview",
{
"files": [self.test_files["python"]], # Same file should be deduplicated
"prompt": "Building on our previous analysis, provide a comprehensive code review",
"continuation_id": chat_id,
"model": "flash",
},
)
if not codereview_response:
self.logger.error("Failed thinkdeep -> codereview continuation")
return False
self.logger.info(" ✅ chat -> thinkdeep -> codereview working")
return True
except Exception as e:
self.logger.error(f"Chat -> thinkdeep -> codereview scenario failed: {e}")
return False
def _test_analyze_debug_thinkdeep(self) -> bool:
"""Test analyze -> debug -> thinkdeep scenario"""
try:
self.logger.info(" 2: Testing analyze -> debug -> thinkdeep")
# Start with analyze
analyze_response, analyze_id = self.call_mcp_tool_direct(
"analyze",
{
"files": [self.test_files["python"]],
"prompt": "Analyze this code for quality and performance issues",
"model": "flash",
},
)
if not analyze_response or not analyze_id:
self.logger.warning("Failed to start analyze conversation, skipping scenario 2")
return False
# Continue with debug
debug_response, _ = self.call_mcp_tool_direct(
"debug",
{
"files": [self.test_files["python"]], # Same file should be deduplicated
"prompt": "Based on our analysis, help debug the performance issue in fibonacci",
"continuation_id": analyze_id,
"model": "flash",
},
)
if not debug_response:
self.logger.warning(" ⚠️ analyze -> debug continuation failed")
return False
# Continue with thinkdeep
final_response, _ = self.call_mcp_tool_direct(
"thinkdeep",
{
"prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
"files": [self.test_files["python"]], # Same file should be deduplicated
"continuation_id": analyze_id,
"model": "flash",
},
)
if not final_response:
self.logger.warning(" ⚠️ debug -> thinkdeep continuation failed")
return False
self.logger.info(" ✅ analyze -> debug -> thinkdeep working")
return True
except Exception as e:
self.logger.error(f"Analyze -> debug -> thinkdeep scenario failed: {e}")
return False
def _test_multi_file_continuation(self) -> bool:
"""Test multi-file cross-tool continuation"""
try:
self.logger.info(" 3: Testing multi-file cross-tool continuation")
# Start with both files
multi_response, multi_id = self.call_mcp_tool_direct(
"chat",
{
"prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
"files": [self.test_files["python"], self.test_files["config"]],
"model": "flash",
},
)
if not multi_response or not multi_id:
self.logger.warning("Failed to start multi-file conversation, skipping scenario 3")
return False
# Switch to codereview with same files (should use conversation history)
multi_review, _ = self.call_mcp_tool_direct(
"codereview",
{
"files": [self.test_files["python"], self.test_files["config"]], # Same files
"prompt": "Review both files in the context of our previous discussion",
"continuation_id": multi_id,
"model": "flash",
},
)
if not multi_review:
self.logger.warning(" ⚠️ Multi-file cross-tool continuation failed")
return False
self.logger.info(" ✅ Multi-file cross-tool continuation working")
return True
except Exception as e:
self.logger.error(f"Multi-file continuation scenario failed: {e}")
return False