diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 44934f1..0000000 --- a/.dockerignore +++ /dev/null @@ -1,72 +0,0 @@ -# Git -.git -.gitignore - -# Python -__pycache__ -*.pyc -*.pyo -*.pyd -.Python -*.egg -*.egg-info/ -dist/ -build/ -*.so -.coverage -.pytest_cache/ -htmlcov/ -.tox/ -.mypy_cache/ -.ruff_cache/ - -# Virtual environments -venv/ -env/ -ENV/ -.venv/ - -# IDE -.vscode/ -.idea/ -*.swp -*.swo -*~ -.DS_Store - -# Docker -.dockerignore -Dockerfile -docker-compose*.yml - -# Environment files (contain secrets) -.env -.env.* -*.env - -# Documentation -*.md -docs/ -examples/ - -# Tests -tests/ -test_*.py -*_test.py - -# CI/CD -.github/ -.gitlab-ci.yml -.travis.yml - -# Logs -*.log -logs/ - -# Temporary files -tmp/ -temp/ -*.tmp - -# OS specific -Thumbs.db \ No newline at end of file diff --git a/.env.example b/.env.example index 260c46e..b025de2 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,6 @@ # Zen MCP Server Environment Configuration # Copy this file to .env and fill in your values -# Required: Workspace root directory for file access -# This should be the HOST path that contains all files Claude might reference -# Defaults to $HOME for direct usage, auto-configured for Docker -WORKSPACE_ROOT=/Users/your-username - # API Keys - At least one is required # # IMPORTANT: Use EITHER OpenRouter OR native APIs (Gemini/OpenAI), not both! @@ -27,10 +22,7 @@ XAI_API_KEY=your_xai_api_key_here OPENROUTER_API_KEY=your_openrouter_api_key_here # Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.) -# IMPORTANT: Since this server ALWAYS runs in Docker, you MUST use host.docker.internal instead of localhost -# โŒ WRONG: http://localhost:11434/v1 (Docker containers cannot reach localhost) -# โœ… CORRECT: http://host.docker.internal:11434/v1 (Docker can reach host services) -# CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example (NOT localhost!) +# CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example # CUSTOM_API_KEY= # Empty for Ollama (no auth needed) # CUSTOM_MODEL_NAME=llama3.2 # Default model name @@ -95,9 +87,7 @@ DEFAULT_THINKING_MODE_THINKDEEP=high # Override the default location of custom_models.json # CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json -# Optional: Redis configuration (auto-configured for Docker) -# The Redis URL for conversation threading - typically managed by docker-compose -# REDIS_URL=redis://redis:6379/0 +# Note: Redis is no longer used - conversations are stored in memory # Optional: Conversation timeout (hours) # How long AI-to-AI conversation threads persist before expiring diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index d925dc6..56ce707 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -11,8 +11,8 @@ body: id: version attributes: label: Project Version - description: "Which version are you using? (e.g., Docker image tag like `latest` or `v1.2.3`, or a git commit SHA)" - placeholder: "e.g., ghcr.io/beehiveinnovations/zen-mcp-server:latest" + description: "Which version are you using? (To see version: ./run-server.sh -v)" + placeholder: "e.g., 5.1.0" validations: required: true @@ -29,7 +29,7 @@ body: id: logs attributes: label: Relevant Log Output - description: "Please copy and paste any relevant log output. You can obtain these from the MCP folder by running `docker compose logs`." + description: "Please copy and paste any relevant log output. Logs are stored under the `logs` folder in the zen folder. You an also use `./run-server.sh -f` to see logs" render: shell - type: dropdown diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml index 0a4fd56..cb555fa 100644 --- a/.github/ISSUE_TEMPLATE/documentation.yml +++ b/.github/ISSUE_TEMPLATE/documentation.yml @@ -33,7 +33,7 @@ body: attributes: label: What is wrong with the documentation? description: "Please describe the problem. Be specific about what is unclear, incorrect, or missing." - placeholder: "The Docker setup command in the README is missing the `--pull=always` flag, which means users might use an outdated image version." + placeholder: "README is missing some details" validations: required: true @@ -42,16 +42,8 @@ body: attributes: label: Suggested Improvement description: "How can we make it better? If you can, please provide the exact text or changes you'd like to see." - placeholder: | - Change: - ``` - docker run ghcr.io/beehiveinnovations/zen-mcp-server:latest - ``` - - To: - ``` - docker run --pull=always ghcr.io/beehiveinnovations/zen-mcp-server:latest - ``` + placeholder: "Please improve...." + - type: dropdown id: audience diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 28ae33c..f138e7e 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -33,7 +33,7 @@ body: label: Feature Category description: What type of enhancement is this? options: - - New Gemini tool (chat, codereview, debug, etc.) + - New tool (chat, codereview, debug, etc.) - Workflow improvement - Integration enhancement - Performance optimization diff --git a/.github/ISSUE_TEMPLATE/tool_addition.yml b/.github/ISSUE_TEMPLATE/tool_addition.yml index 460010d..1c2212d 100644 --- a/.github/ISSUE_TEMPLATE/tool_addition.yml +++ b/.github/ISSUE_TEMPLATE/tool_addition.yml @@ -1,12 +1,12 @@ name: ๐Ÿ› ๏ธ New Gemini Tool Proposal -description: Propose a new Gemini MCP tool (e.g., `summarize`, `testgen`, `refactor`) +description: Propose a new Zen MCP tool (e.g., `summarize`, `fixer`, `refactor`) labels: ["enhancement", "new-tool"] body: - type: input id: tool-name attributes: label: Proposed Tool Name - description: "What would the tool be called? (e.g., `summarize`, `testgen`, `refactor`)" + description: "What would the tool be called? (e.g., `summarize`, `docgen`, `refactor`)" placeholder: "e.g., `docgen`" validations: required: true @@ -15,7 +15,7 @@ body: id: purpose attributes: label: What is the primary purpose of this tool? - description: "Explain the tool's core function and the value it provides to developers using Claude + Gemini." + description: "Explain the tool's core function and the value it provides to developers using Claude + Zen." placeholder: "This tool will automatically generate comprehensive documentation from code, extracting class and function signatures, docstrings, and creating usage examples." validations: required: true @@ -27,9 +27,9 @@ body: description: "Show how a user would invoke this tool through Claude and what the expected output would look like." placeholder: | **User prompt to Claude:** - "Use gemini to generate documentation for my entire src/ directory" + "Use zen to generate documentation for my entire src/ directory" - **Expected Gemini tool behavior:** + **Expected behavior:** - Analyze all Python files in src/ - Extract classes, functions, and their docstrings - Generate structured markdown documentation @@ -61,7 +61,7 @@ body: id: system-prompt attributes: label: Proposed System Prompt (Optional) - description: "If you have ideas for how Gemini should be prompted for this tool, share them here." + description: "If you have ideas for how zen should be prompted for this tool, share them here." placeholder: | You are an expert technical documentation generator. Your task is to create comprehensive, user-friendly documentation from source code... diff --git a/.github/workflows/build_and_publish_docker.yml b/.github/workflows/build_and_publish_docker.yml deleted file mode 100644 index 9cf8011..0000000 --- a/.github/workflows/build_and_publish_docker.yml +++ /dev/null @@ -1,197 +0,0 @@ -name: Build and Publish Docker Image to GHCR - -on: - push: - tags: [ 'v*' ] - repository_dispatch: - types: [docker-build] - -env: - REGISTRY: ghcr.io - IMAGE_NAME: beehiveinnovations/zen-mcp-server - -jobs: - build-and-push: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - id-token: write - attestations: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - token: ${{ secrets.PAT }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - tags: | - type=ref,event=tag - type=raw,value=latest,enable=${{ github.ref_type == 'tag' }} - type=sha,prefix=main-,enable=${{ github.event_name == 'repository_dispatch' }} - type=raw,value=pr-${{ github.event.client_payload.pr_number }},enable=${{ github.event_name == 'repository_dispatch' && github.event.client_payload.pr_number != '' }} - - - name: Build and push Docker image - id: build - uses: docker/build-push-action@v5 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: Generate artifact attestation - uses: actions/attest-build-provenance@v1 - with: - subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - subject-digest: ${{ steps.build.outputs.digest }} - push-to-registry: true - - - name: Generate usage instructions - run: | - echo "## ๐Ÿณ Docker Image Published Successfully!" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "**Image Registry:** GitHub Container Registry (GHCR)" >> $GITHUB_STEP_SUMMARY - echo "**Built Tags:** ${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - # Extract the first tag for the main pull command - MAIN_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1) - - echo "### ๐Ÿ“ฅ Pull the Image" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY - echo "docker pull $MAIN_TAG" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - echo "### โš™๏ธ Claude Desktop Configuration" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`json" >> $GITHUB_STEP_SUMMARY - echo "{" >> $GITHUB_STEP_SUMMARY - echo " \"mcpServers\": {" >> $GITHUB_STEP_SUMMARY - echo " \"gemini\": {" >> $GITHUB_STEP_SUMMARY - echo " \"command\": \"docker\"," >> $GITHUB_STEP_SUMMARY - echo " \"args\": [" >> $GITHUB_STEP_SUMMARY - echo " \"run\", \"--rm\", \"-i\"," >> $GITHUB_STEP_SUMMARY - echo " \"-e\", \"GEMINI_API_KEY\"," >> $GITHUB_STEP_SUMMARY - echo " \"$MAIN_TAG\"" >> $GITHUB_STEP_SUMMARY - echo " ]," >> $GITHUB_STEP_SUMMARY - echo " \"env\": {" >> $GITHUB_STEP_SUMMARY - echo " \"GEMINI_API_KEY\": \"your-gemini-api-key-here\"" >> $GITHUB_STEP_SUMMARY - echo " }" >> $GITHUB_STEP_SUMMARY - echo " }" >> $GITHUB_STEP_SUMMARY - echo " }" >> $GITHUB_STEP_SUMMARY - echo "}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - echo "### ๐Ÿท๏ธ All Available Tags" >> $GITHUB_STEP_SUMMARY - echo "Built and pushed the following tags:" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.meta.outputs.tags }}" | sed 's/^/- `/' | sed 's/$/`/' >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - if [[ "${{ github.event_name }}" == "repository_dispatch" ]]; then - echo "**Note:** This is a development build triggered by PR #${{ github.event.client_payload.pr_number }}" >> $GITHUB_STEP_SUMMARY - echo "Use this image for testing the changes from that PR." >> $GITHUB_STEP_SUMMARY - elif [[ "${{ github.ref_type }}" == "tag" ]]; then - echo "**Note:** This is a release build from tag ${{ github.ref_name }}" >> $GITHUB_STEP_SUMMARY - echo "This image represents a stable release version." >> $GITHUB_STEP_SUMMARY - fi - - echo "" >> $GITHUB_STEP_SUMMARY - echo "### ๐Ÿ“ฆ View in GitHub Container Registry" >> $GITHUB_STEP_SUMMARY - echo "[View all versions and tags โ†’](https://github.com/${{ github.repository }}/pkgs/container/zen-mcp-server)" >> $GITHUB_STEP_SUMMARY - - - name: Update README with latest image info - if: false # Temporarily disabled as agreed with repo author - # if: github.ref_type == 'tag' || (github.event_name == 'repository_dispatch' && github.event.client_payload.pr_number != '') - run: | - # Checkout main branch to avoid detached HEAD when pushing - git fetch origin main:main - git checkout main - # Extract the primary image tag for updating README - if [[ "${{ github.ref_type }}" == "tag" ]]; then - # For tag releases, use the version tag - LATEST_TAG="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}" - UPDATE_TYPE="release" - elif [[ "${{ github.event_name }}" == "repository_dispatch" && "${{ github.event.client_payload.pr_number }}" != "" ]]; then - # For repository_dispatch (PR builds), use the PR tag - LATEST_TAG="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:pr-${{ github.event.client_payload.pr_number }}" - UPDATE_TYPE="development" - else - # For manual repository_dispatch without PR number, use latest tag - LATEST_TAG="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" - UPDATE_TYPE="manual" - fi - - echo "Updating README.md with latest Docker image: $LATEST_TAG" - - # Update README.md with the latest image tag - sed -i.bak "s|ghcr\.io/[^/]*/zen-mcp-server:[a-zA-Z0-9\._-]*|$LATEST_TAG|g" README.md - - # Also update docs/user-guides/installation.md - sed -i.bak "s|ghcr\.io/[^/]*/zen-mcp-server:[a-zA-Z0-9\._-]*|$LATEST_TAG|g" docs/user-guides/installation.md - - # Also update docs/user-guides/configuration.md - sed -i.bak "s|ghcr\.io/[^/]*/zen-mcp-server:[a-zA-Z0-9\._-]*|$LATEST_TAG|g" docs/user-guides/configuration.md - - # Check if there are any changes - if git diff --quiet README.md docs/user-guides/installation.md docs/user-guides/configuration.md; then - echo "No changes needed in documentation" - else - echo "Documentation updated with new image tag" - - # Configure git for automated commit - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - - # Add and commit changes - git add README.md docs/user-guides/installation.md docs/user-guides/configuration.md - - if [[ "$UPDATE_TYPE" == "release" ]]; then - git commit -m "docs: Update Docker image references to ${{ github.ref_name }} - - Automated update after Docker image publish for release ${{ github.ref_name }}. - All documentation now references the latest stable image. - - ๐Ÿค– Automated by GitHub Actions" - elif [[ "$UPDATE_TYPE" == "development" ]]; then - git commit -m "docs: Update Docker image references for PR #${{ github.event.client_payload.pr_number }} - - Automated update after Docker image publish for development build. - Documentation updated to reference the latest development image. - - ๐Ÿค– Automated by GitHub Actions" - else - git commit -m "docs: Update Docker image references to latest - - Automated update after manual Docker image build. - Documentation updated to reference the latest image. - - ๐Ÿค– Automated by GitHub Actions" - fi - - # Push changes back to the repository - git push --set-upstream origin main - - echo "### ๐Ÿ“ Documentation Updated" >> $GITHUB_STEP_SUMMARY - echo "README.md and user guides have been automatically updated with the new Docker image tag: \`$LATEST_TAG\`" >> $GITHUB_STEP_SUMMARY - fi - diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml deleted file mode 100644 index 12fcfad..0000000 --- a/.github/workflows/docker-test.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Docker Build Test - -on: - pull_request: - branches: [ main ] - -jobs: - docker-build-test: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Test Docker build - uses: docker/build-push-action@v5 - with: - context: . - push: false - tags: test:latest - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: Build test summary - run: | - echo "### โœ… Docker Build Test Passed" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "Docker image builds successfully and is ready for production." >> $GITHUB_STEP_SUMMARY - diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2d13b39..4513ec6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,6 +25,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt + pip install -r requirements-dev.txt - name: Run unit tests run: | @@ -49,7 +50,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install ruff black + pip install -r requirements-dev.txt - name: Run black formatter check run: black --check . @@ -57,61 +58,3 @@ jobs: - name: Run ruff linter run: ruff check . - simulation-tests: - runs-on: ubuntu-latest - # Only run simulation tests on main branch pushes (requires manual API key setup) - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.11" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Check API key availability - id: check-key - run: | - has_key=false - if [ -n "${{ secrets.GEMINI_API_KEY }}" ] || [ -n "${{ secrets.OPENAI_API_KEY }}" ]; then - has_key=true - echo "โœ… API key(s) found - running simulation tests" - else - echo "โš ๏ธ No API keys configured - skipping simulation tests" - fi - echo "api_key_available=$has_key" >> $GITHUB_OUTPUT - - - name: Set up Docker - if: steps.check-key.outputs.api_key_available == 'true' - uses: docker/setup-buildx-action@v3 - - - name: Build Docker image - if: steps.check-key.outputs.api_key_available == 'true' - run: | - docker compose build - - - name: Run simulation tests - if: steps.check-key.outputs.api_key_available == 'true' - run: | - # Start services - docker compose up -d - - # Wait for services to be ready - sleep 10 - - # Run communication simulator tests - python communication_simulator_test.py --skip-docker - env: - GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - - name: Skip simulation tests - if: steps.check-key.outputs.api_key_available == 'false' - run: | - echo "๐Ÿ”’ Simulation tests skipped (no API keys configured)" - echo "To enable simulation tests, add GEMINI_API_KEY and/or OPENAI_API_KEY as repository secrets" \ No newline at end of file diff --git a/.gitignore b/.gitignore index fa7e02d..fd961db 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,10 @@ FEATURE_*.md # Local user instructions CLAUDE.local.md + +# Standalone mode files +.zen_venv/ +.docker_cleaned +logs/ +*.backup +/.desktop_configured diff --git a/CLAUDE.md b/CLAUDE.md index bf4f422..0e9d456 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,31 +20,31 @@ This script automatically runs: - Ruff linting with auto-fix - Black code formatting - Import sorting with isort -- Complete unit test suite (361 tests) +- Complete unit test suite - Verification that all checks pass 100% ### Server Management -#### Start/Restart the Server +#### Setup/Update the Server ```bash -# Start or restart the Docker containers +# Run setup script (handles everything) ./run-server.sh ``` This script will: -- Build/rebuild Docker images if needed -- Start the MCP server container (`zen-mcp-server`) -- Start the Redis container (`zen-mcp-redis`) -- Set up proper networking and volumes +- Set up Python virtual environment +- Install all dependencies +- Create/update .env file +- Configure MCP with Claude +- Verify API keys -#### Check Server Status +#### View Logs ```bash -# Check if containers are running -docker ps +# Follow logs in real-time +./run-server.sh -f -# Look for these containers: -# - zen-mcp-server -# - zen-mcp-redis +# Or manually view logs +tail -f logs/mcp_server.log ``` ### Log Management @@ -52,26 +52,26 @@ docker ps #### View Server Logs ```bash # View last 500 lines of server logs -docker exec zen-mcp-server tail -n 500 /tmp/mcp_server.log +tail -n 500 logs/mcp_server.log # Follow logs in real-time -docker exec zen-mcp-server tail -f /tmp/mcp_server.log +tail -f logs/mcp_server.log -# View specific number of lines (replace 100 with desired count) -docker exec zen-mcp-server tail -n 100 /tmp/mcp_server.log +# View specific number of lines +tail -n 100 logs/mcp_server.log # Search logs for specific patterns -docker exec zen-mcp-server grep "ERROR" /tmp/mcp_server.log -docker exec zen-mcp-server grep "tool_name" /tmp/mcp_server.log +grep "ERROR" logs/mcp_server.log +grep "tool_name" logs/mcp_activity.log ``` #### Monitor Tool Executions Only ```bash # View tool activity log (focused on tool calls and completions) -docker exec zen-mcp-server tail -n 100 /tmp/mcp_activity.log +tail -n 100 logs/mcp_activity.log # Follow tool activity in real-time -docker exec zen-mcp-server tail -f /tmp/mcp_activity.log +tail -f logs/mcp_activity.log # Use the dedicated log monitor (shows tool calls, completions, errors) python log_monitor.py @@ -86,36 +86,21 @@ The `log_monitor.py` script provides a real-time view of: #### All Available Log Files ```bash # Main server log (all activity) -docker exec zen-mcp-server tail -f /tmp/mcp_server.log +tail -f logs/mcp_server.log # Tool activity only (TOOL_CALL, TOOL_COMPLETED, etc.) -docker exec zen-mcp-server tail -f /tmp/mcp_activity.log +tail -f logs/mcp_activity.log -# Debug information -docker exec zen-mcp-server tail -f /tmp/gemini_debug.log - -# Overflow logs (when main log gets too large) -docker exec zen-mcp-server tail -f /tmp/mcp_server_overflow.log -``` - -#### Debug Container Issues -```bash -# Check container logs (Docker level) -docker logs zen-mcp-server - -# Execute interactive shell in container -docker exec -it zen-mcp-server /bin/bash - -# Check Redis container logs -docker logs zen-mcp-redis +# Debug information (if configured) +tail -f logs/debug.log ``` ### Testing Simulation tests are available to test the MCP server in a 'live' scenario, using your configured API keys to ensure the models are working and the server is able to communicate back and forth. -IMPORTANT: Any time any code is changed or updated, you MUST first restart it with ./run-server.sh OR -pass `--rebuild` to the `communication_simulator_test.py` script (if running it for the first time after changes) so that it's able to restart and use the latest code. + +**IMPORTANT**: After any code changes, restart your Claude session for the changes to take effect. #### Run All Simulator Tests ```bash @@ -124,9 +109,6 @@ python communication_simulator_test.py # Run tests with verbose output python communication_simulator_test.py --verbose - -# Force rebuild environment before testing -python communication_simulator_test.py --rebuild ``` #### Run Individual Simulator Tests (Recommended) @@ -138,17 +120,13 @@ python communication_simulator_test.py --list-tests python communication_simulator_test.py --individual basic_conversation python communication_simulator_test.py --individual content_validation python communication_simulator_test.py --individual cross_tool_continuation -python communication_simulator_test.py --individual logs_validation -python communication_simulator_test.py --individual redis_validation +python communication_simulator_test.py --individual memory_validation -# Run multiple specific tests (alternative approach) +# Run multiple specific tests python communication_simulator_test.py --tests basic_conversation content_validation # Run individual test with verbose output for debugging -python communication_simulator_test.py --individual logs_validation --verbose - -# Individual tests provide full Docker setup and teardown per test -# This ensures clean state and better error isolation +python communication_simulator_test.py --individual memory_validation --verbose ``` Available simulator tests include: @@ -158,8 +136,7 @@ Available simulator tests include: - `cross_tool_continuation` - Cross-tool conversation continuation scenarios - `cross_tool_comprehensive` - Comprehensive cross-tool file deduplication and continuation - `line_number_validation` - Line number handling validation across tools -- `logs_validation` - Docker logs validation -- `redis_validation` - Redis conversation memory validation +- `memory_validation` - Conversation memory validation - `model_thinking_config` - Model-specific thinking configuration behavior - `o3_model_selection` - O3 model selection and usage validation - `ollama_custom_url` - Ollama custom URL endpoint functionality @@ -193,12 +170,13 @@ python -m pytest tests/ --cov=. --cov-report=html #### Before Making Changes 1. Ensure virtual environment is activated: `source venv/bin/activate` 2. Run quality checks: `./code_quality_checks.sh` -3. Check server is running: `./run-server.sh` +3. Check logs to ensure server is healthy: `tail -n 50 logs/mcp_server.log` #### After Making Changes 1. Run quality checks again: `./code_quality_checks.sh` 2. Run relevant simulator tests: `python communication_simulator_test.py --individual ` -3. Check logs for any issues: `docker exec zen-mcp-server tail -n 100 /tmp/mcp_server.log` +3. Check logs for any issues: `tail -n 100 logs/mcp_server.log` +4. Restart Claude session to use updated code #### Before Committing/PR 1. Final quality check: `./code_quality_checks.sh` @@ -207,18 +185,17 @@ python -m pytest tests/ --cov=. --cov-report=html ### Common Troubleshooting -#### Container Issues +#### Server Issues ```bash -# Restart containers if they're not responding -docker stop zen-mcp-server zen-mcp-redis +# Check if Python environment is set up correctly ./run-server.sh -# Check container resource usage -docker stats zen-mcp-server +# View recent errors +grep "ERROR" logs/mcp_server.log | tail -20 -# Remove containers and rebuild from scratch -docker rm -f zen-mcp-server zen-mcp-redis -./run-server.sh +# Check virtual environment +which python +# Should show: .../zen-mcp-server/.zen_venv/bin/python ``` #### Test Failures @@ -227,10 +204,10 @@ docker rm -f zen-mcp-server zen-mcp-redis python communication_simulator_test.py --individual --verbose # Check server logs during test execution -docker exec zen-mcp-server tail -f /tmp/mcp_server.log +tail -f logs/mcp_server.log -# Run tests while keeping containers running for debugging -python communication_simulator_test.py --keep-logs +# Run tests with debug output +LOG_LEVEL=DEBUG python communication_simulator_test.py --individual ``` #### Linting Issues @@ -249,19 +226,19 @@ isort --check-only . ### File Structure Context - `./code_quality_checks.sh` - Comprehensive quality check script -- `./run-server.sh` - Docker container setup and management +- `./run-server.sh` - Server setup and management - `communication_simulator_test.py` - End-to-end testing framework - `simulator_tests/` - Individual test modules - `tests/` - Unit test suite - `tools/` - MCP tool implementations - `providers/` - AI provider implementations - `systemprompts/` - System prompt definitions +- `logs/` - Server log files ### Environment Requirements -- Python 3.8+ with virtual environment activated -- Docker and Docker Compose installed +- Python 3.9+ with virtual environment - All dependencies from `requirements.txt` installed -- Proper API keys configured in environment or config files +- Proper API keys configured in `.env` file This guide provides everything needed to efficiently work with the Zen MCP Server codebase using Claude. Always run quality checks before and after making changes to ensure code integrity. \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1a77633..0000000 --- a/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# Use Python 3.11 slim image for smaller size and consistent environment -FROM python:3.11-slim - -# Set working directory inside the container -WORKDIR /app - -# Install git (required for some Python packages that may need it) -RUN apt-get update && apt-get install -y --no-install-recommends \ - git \ - && rm -rf /var/lib/apt/lists/* - -# Copy requirements first to leverage Docker layer caching -COPY requirements.txt . - -# Install Python dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the rest of the application -COPY . . - -# Create a non-root user to run the application (security best practice) -RUN useradd -m -u 1000 mcpuser && \ - chown -R mcpuser:mcpuser /app - -# Switch to non-root user -USER mcpuser - -# Set the entrypoint to run the server -ENTRYPOINT ["python", "server.py"] \ No newline at end of file diff --git a/README.md b/README.md index 11ff452..d1637c6 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Because these AI models [clearly aren't when they get chatty โ†’](docs/ai_banter ## Quick Navigation - **Getting Started** - - [Quickstart](#quickstart-5-minutes) - Get running in 5 minutes with Docker + - [Quickstart](#quickstart-5-minutes) - Get running in 5 minutes - [Available Tools](#available-tools) - Overview of all tools - [AI-to-AI Conversations](#ai-to-ai-conversation-threading) - Multi-turn conversations @@ -123,7 +123,7 @@ The final implementation resulted in a 26% improvement in JSON parsing performan ### Prerequisites -- Docker Desktop installed ([Download here](https://www.docker.com/products/docker-desktop/)) +- Python 3.10+ (3.12 recommended) - Git - **Windows users**: WSL2 is required for Claude Code CLI @@ -158,16 +158,16 @@ The final implementation resulted in a 26% improvement in JSON parsing performan git clone https://github.com/BeehiveInnovations/zen-mcp-server.git cd zen-mcp-server -# One-command setup (includes Redis for AI conversations) +# One-command setup ./run-server.sh ``` **What this does:** -- **Builds Docker images** with all dependencies (including Redis for conversation threading) -- **Creates .env file** (automatically uses `$GEMINI_API_KEY` and `$OPENAI_API_KEY` if set in environment) -- **Starts Redis service** for AI-to-AI conversation memory -- **Starts MCP server** with providers based on available API keys -- **Adds Zen to Claude Code automatically** +- **Sets up everything automatically** - Python environment, dependencies, configuration +- **Configures Claude integrations** - Adds to Claude Code CLI and guides Desktop setup +- **Ready to use immediately** - No manual configuration needed + +**After updates:** Always run `./run-server.sh` again after `git pull` to ensure everything stays current. ### 3. Add Your API Keys @@ -180,74 +180,26 @@ nano .env # OPENAI_API_KEY=your-openai-api-key-here # For O3 model # OPENROUTER_API_KEY=your-openrouter-key # For OpenRouter (see docs/custom_models.md) -# For local models (Ollama, vLLM, etc.) - Note: Use host.docker.internal for Docker networking: -# CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example (NOT localhost!) +# For local models (Ollama, vLLM, etc.): +# CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example # CUSTOM_API_KEY= # Empty for Ollama # CUSTOM_MODEL_NAME=llama3.2 # Default model -# WORKSPACE_ROOT=/Users/your-username (automatically configured) - # Note: At least one API key OR custom URL is required - -# After making changes to .env, restart the server: -# ./run-server.sh ``` -**Restart MCP Server**: This step is important. You will need to `./run-server.sh` again for it to -pick the changes made to `.env` otherwise the server will be unable to use your newly edited keys. Please also -`./run-server.sh` any time in the future you modify the `.env` file. +**No restart needed**: The server reads the .env file each time Claude calls a tool, so changes take effect immediately. **Next**: Now run `claude` from your project folder using the terminal for it to connect to the newly added mcp server. If you were already running a `claude` code session, please exit and start a new session. #### If Setting up for Claude Desktop -1. **Launch Claude Desktop** -- Open Claude Desktop -- Go to **Settings** โ†’ **Developer** โ†’ **Edit Config** +**Need the exact configuration?** Run `./run-server.sh -c` to display the platform-specific setup instructions with correct paths. -This will open a folder revealing `claude_desktop_config.json`. - -2. **Update Docker Configuration** - -The setup script shows you the exact configuration. It looks like this. When you ran `run-server.sh` it should -have produced a configuration for you to copy: - -```json -{ - "mcpServers": { - "zen": { - "command": "docker", - "args": [ - "exec", - "-i", - "zen-mcp-server", - "python", - "server.py" - ] - } - } -} -``` - -Paste the above into `claude_desktop_config.json`. If you have several other MCP servers listed, simply add this below the rest after a `,` comma: -```json - ... other mcp servers ... , - - "zen": { - "command": "docker", - "args": [ - "exec", - "-i", - "zen-mcp-server", - "python", - "server.py" - ] - } -``` - -3. **Restart Claude Desktop** -Completely quit and restart Claude Desktop for the changes to take effect. +1. **Open Claude Desktop config**: Settings โ†’ Developer โ†’ Edit Config +2. **Copy the configuration** shown by `./run-server.sh -c` into your `claude_desktop_config.json` +3. **Restart Claude Desktop** for changes to take effect ### 4. Start Using It! @@ -546,7 +498,7 @@ OPENAI_API_KEY=your-openai-key - **API Keys**: Native APIs (Gemini, OpenAI, X.AI), OpenRouter, or Custom endpoints (Ollama, vLLM) - **Model Selection**: Auto mode or specific model defaults - **Usage Restrictions**: Control which models can be used for cost control -- **Conversation Settings**: Timeout, turn limits, Redis configuration +- **Conversation Settings**: Timeout, turn limits, memory configuration - **Thinking Modes**: Token allocation for extended reasoning - **Logging**: Debug levels and operational visibility diff --git a/claude_config_example.json b/claude_config_example.json index a0c5229..fa2e7d9 100644 --- a/claude_config_example.json +++ b/claude_config_example.json @@ -1,17 +1,11 @@ { "comment": "Example Claude Desktop configuration for Zen MCP Server", - "comment2": "For Docker setup, use examples/claude_config_docker_home.json", + "comment2": "Run './run-server.sh -c' to get the exact configuration for your system", "comment3": "For platform-specific examples, see the examples/ directory", "mcpServers": { "zen": { - "command": "docker", - "args": [ - "exec", - "-i", - "zen-mcp-server", - "python", - "server.py" - ] + "command": "/path/to/zen-mcp-server/.zen_venv/bin/python", + "args": ["/path/to/zen-mcp-server/server.py"] } } } \ No newline at end of file diff --git a/code_quality_checks.sh b/code_quality_checks.sh index 8988454..4852f9c 100755 --- a/code_quality_checks.sh +++ b/code_quality_checks.sh @@ -9,14 +9,57 @@ set -e # Exit on any error echo "๐Ÿ” Running Code Quality Checks for Zen MCP Server" echo "=================================================" -# Check if virtual environment is activated -if [[ "$VIRTUAL_ENV" == "" ]]; then - echo "โŒ Virtual environment not activated!" - echo "Please run: source venv/bin/activate" +# Determine Python command +if [[ -f ".zen_venv/bin/python" ]]; then + PYTHON_CMD=".zen_venv/bin/python" + PIP_CMD=".zen_venv/bin/pip" + echo "โœ… Using venv" +elif [[ -n "$VIRTUAL_ENV" ]]; then + PYTHON_CMD="python" + PIP_CMD="pip" + echo "โœ… Using activated virtual environment: $VIRTUAL_ENV" +else + echo "โŒ No virtual environment found!" + echo "Please run: ./run-server.sh first to set up the environment" exit 1 fi +echo "" -echo "โœ… Virtual environment detected: $VIRTUAL_ENV" +# Check and install dev dependencies if needed +echo "๐Ÿ” Checking development dependencies..." +DEV_DEPS_NEEDED=false + +# Check each dev dependency +for tool in ruff black isort pytest; do + # Check if tool exists in venv or in PATH + if [[ -f ".zen_venv/bin/$tool" ]] || command -v $tool &> /dev/null; then + continue + else + DEV_DEPS_NEEDED=true + break + fi +done + +if [ "$DEV_DEPS_NEEDED" = true ]; then + echo "๐Ÿ“ฆ Installing development dependencies..." + $PIP_CMD install -q -r requirements-dev.txt + echo "โœ… Development dependencies installed" +else + echo "โœ… Development dependencies already installed" +fi + +# Set tool paths +if [[ -f ".zen_venv/bin/ruff" ]]; then + RUFF=".zen_venv/bin/ruff" + BLACK=".zen_venv/bin/black" + ISORT=".zen_venv/bin/isort" + PYTEST=".zen_venv/bin/pytest" +else + RUFF="ruff" + BLACK="black" + ISORT="isort" + PYTEST="pytest" +fi echo "" # Step 1: Linting and Formatting @@ -24,16 +67,16 @@ echo "๐Ÿ“‹ Step 1: Running Linting and Formatting Checks" echo "--------------------------------------------------" echo "๐Ÿ”ง Running ruff linting with auto-fix..." -ruff check --fix +$RUFF check --fix echo "๐ŸŽจ Running black code formatting..." -black . +$BLACK . echo "๐Ÿ“ฆ Running import sorting with isort..." -isort . +$ISORT . --skip-glob=".zen_venv/*" echo "โœ… Verifying all linting passes..." -ruff check +$RUFF check echo "โœ… Step 1 Complete: All linting and formatting checks passed!" echo "" @@ -42,8 +85,8 @@ echo "" echo "๐Ÿงช Step 2: Running Complete Unit Test Suite" echo "---------------------------------------------" -echo "๐Ÿƒ Running all 361 unit tests..." -python -m pytest tests/ -v +echo "๐Ÿƒ Running all unit tests..." +$PYTHON_CMD -m pytest tests/ -v -x echo "โœ… Step 2 Complete: All unit tests passed!" echo "" @@ -54,7 +97,7 @@ echo "==================================" echo "โœ… Linting (ruff): PASSED" echo "โœ… Formatting (black): PASSED" echo "โœ… Import sorting (isort): PASSED" -echo "โœ… Unit tests (361 tests): PASSED" +echo "โœ… Unit tests: PASSED" echo "" echo "๐Ÿš€ Your code is ready for commit and GitHub Actions!" echo "๐Ÿ’ก Remember to add simulator tests if you modified tools" \ No newline at end of file diff --git a/communication_simulator_test.py b/communication_simulator_test.py index af52091..9c5cb89 100644 --- a/communication_simulator_test.py +++ b/communication_simulator_test.py @@ -6,18 +6,18 @@ by simulating real Claude CLI communications and validating conversation continuity, file handling, deduplication features, and clarification scenarios. Test Flow: -1. Setup fresh Docker environment with clean containers +1. Setup standalone server environment 2. Load and run individual test modules -3. Validate system behavior through logs and Redis +3. Validate system behavior through logs and memory 4. Cleanup and report results Usage: - python communication_simulator_test.py [--verbose] [--keep-logs] [--tests TEST_NAME...] [--individual TEST_NAME] [--rebuild] + python communication_simulator_test.py [--verbose] [--keep-logs] [--tests TEST_NAME...] [--individual TEST_NAME] [--setup] --tests: Run specific tests only (space-separated) --list-tests: List all available tests --individual: Run a single test individually - --rebuild: Force rebuild Docker environment using run-server.sh + --setup: Force setup standalone server environment using run-server.sh Available tests: basic_conversation - Basic conversation flow with chat tool @@ -25,8 +25,8 @@ Available tests: per_tool_deduplication - File deduplication for individual tools cross_tool_continuation - Cross-tool conversation continuation scenarios cross_tool_comprehensive - Comprehensive cross-tool integration testing - logs_validation - Docker logs validation - redis_validation - Redis conversation memory validation + line_number_validation - Line number handling validation across tools + memory_validation - Conversation memory validation model_thinking_config - Model thinking configuration testing o3_model_selection - O3 model selection and routing testing ollama_custom_url - Ollama custom URL configuration testing @@ -45,11 +45,11 @@ Examples: # Run only basic conversation and content validation tests python communication_simulator_test.py --tests basic_conversation content_validation - # Run a single test individually (with full Docker setup) + # Run a single test individually (with full standalone setup) python communication_simulator_test.py --individual content_validation - # Force rebuild Docker environment before running tests - python communication_simulator_test.py --rebuild + # Force setup standalone server environment before running tests + python communication_simulator_test.py --setup # List available tests python communication_simulator_test.py --list-tests @@ -68,15 +68,15 @@ class CommunicationSimulator: """Simulates real-world Claude CLI communication with MCP Gemini server""" def __init__( - self, verbose: bool = False, keep_logs: bool = False, selected_tests: list[str] = None, rebuild: bool = False + self, verbose: bool = False, keep_logs: bool = False, selected_tests: list[str] = None, setup: bool = False ): self.verbose = verbose self.keep_logs = keep_logs self.selected_tests = selected_tests or [] - self.rebuild = rebuild + self.setup = setup self.temp_dir = None - self.container_name = "zen-mcp-server" - self.redis_container = "zen-mcp-redis" + self.server_process = None + self.python_path = self._get_python_path() # Import test registry from simulator_tests import TEST_REGISTRY @@ -96,6 +96,23 @@ class CommunicationSimulator: logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s") self.logger = logging.getLogger(__name__) + def _get_python_path(self) -> str: + """Get the Python path for the virtual environment""" + current_dir = os.getcwd() + venv_python = os.path.join(current_dir, "venv", "bin", "python") + + if os.path.exists(venv_python): + return venv_python + + # Try .zen_venv as fallback + zen_venv_python = os.path.join(current_dir, ".zen_venv", "bin", "python") + if os.path.exists(zen_venv_python): + return zen_venv_python + + # Fallback to system python if venv doesn't exist + self.logger.warning("Virtual environment not found, using system python") + return "python" + def _create_test_runner(self, test_class): """Create a test runner function for a test class""" @@ -118,13 +135,13 @@ class CommunicationSimulator: self.temp_dir = tempfile.mkdtemp(prefix="mcp_test_") self.logger.debug(f"Created temp directory: {self.temp_dir}") - # Only run run-server.sh if rebuild is requested - if self.rebuild: + # Only run run-server.sh if setup is requested + if self.setup: if not self._run_server_script(): return False - # Always verify containers are running (regardless of rebuild) - return self._verify_existing_containers() + # Always verify server environment is available + return self._verify_server_environment() except Exception as e: self.logger.error(f"Failed to setup test environment: {e}") @@ -160,29 +177,40 @@ class CommunicationSimulator: self.logger.error(f"Failed to run run-server.sh: {e}") return False - def _verify_existing_containers(self) -> bool: - """Verify that required containers are already running (no setup)""" + def _verify_server_environment(self) -> bool: + """Verify that server environment is ready""" try: - self.logger.info("Verifying existing Docker containers...") + self.logger.info("Verifying standalone server environment...") - result = self._run_command(["docker", "ps", "--format", "{{.Names}}"], capture_output=True) - running_containers = result.stdout.decode().strip().split("\n") + # Check if server.py exists + server_file = "server.py" + if not os.path.exists(server_file): + self.logger.error(f"Server file not found: {server_file}") + self.logger.error("Please ensure you're in the correct directory and server.py exists") + return False - required = [self.container_name, self.redis_container] - for container in required: - if container not in running_containers: - self.logger.error(f"Required container not running: {container}") - self.logger.error( - "Please start Docker containers first, or use --rebuild to set them up automatically" - ) + # Check if virtual environment is available + if not os.path.exists(self.python_path): + self.logger.error(f"Python executable not found: {self.python_path}") + self.logger.error("Please run ./run-server.sh first to set up the environment") + return False + + # Check if required dependencies are available + try: + result = self._run_command([self.python_path, "-c", "import json; print('OK')"], capture_output=True) + if result.returncode != 0: + self.logger.error("Python environment validation failed") return False + except Exception as e: + self.logger.error(f"Python environment check failed: {e}") + return False - self.logger.info(f"All required containers are running: {required}") + self.logger.info("Standalone server environment is ready") return True except Exception as e: - self.logger.error(f"Container verification failed: {e}") - self.logger.error("Please ensure Docker is running and containers are available, or use --rebuild") + self.logger.error(f"Server environment verification failed: {e}") + self.logger.error("Please ensure the server environment is set up correctly, or use --setup") return False def simulate_claude_cli_session(self) -> bool: @@ -348,11 +376,20 @@ class CommunicationSimulator: try: self.logger.info("Cleaning up test environment...") - # Note: We don't stop Docker services ourselves - let run-server.sh handle Docker lifecycle + # Stop any running server processes + if self.server_process and self.server_process.poll() is None: + self.logger.info("Stopping server process...") + self.server_process.terminate() + try: + self.server_process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.server_process.kill() + self.server_process.wait() + if not self.keep_logs: - self.logger.info("Test completed. Docker containers left running (use run-server.sh to manage)") + self.logger.info("Test completed. Standalone server process stopped.") else: - self.logger.info("Keeping logs and Docker services running for inspection") + self.logger.info("Keeping logs for inspection") # Remove temp directory if self.temp_dir and os.path.exists(self.temp_dir): @@ -374,11 +411,13 @@ def parse_arguments(): """Parse and validate command line arguments""" parser = argparse.ArgumentParser(description="Zen MCP Communication Simulator Test") parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging") - parser.add_argument("--keep-logs", action="store_true", help="Keep Docker services running for log inspection") + parser.add_argument("--keep-logs", action="store_true", help="Keep logs for inspection after test completion") parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)") parser.add_argument("--list-tests", action="store_true", help="List available tests and exit") parser.add_argument("--individual", "-i", help="Run a single test individually") - parser.add_argument("--rebuild", action="store_true", help="Force rebuild Docker environment using run-server.sh") + parser.add_argument( + "--setup", action="store_true", help="Force setup standalone server environment using run-server.sh" + ) return parser.parse_args() @@ -453,7 +492,7 @@ def main(): # Initialize simulator consistently for all use cases simulator = CommunicationSimulator( - verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests, rebuild=args.rebuild + verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests, setup=args.setup ) # Determine execution mode and run diff --git a/config.py b/config.py index a57d55d..a9aa5ab 100644 --- a/config.py +++ b/config.py @@ -14,7 +14,7 @@ import os # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.0.2" +__version__ = "5.1.0" # Last update date in ISO format __updated__ = "2025-06-18" # Primary maintainer @@ -136,7 +136,7 @@ DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION = 2 # What is NOT limited by this constant: # - System prompts added internally by tools # - File content embedded by tools -# - Conversation history loaded from Redis +# - Conversation history loaded from storage # - Web search instructions or other internal additions # - Complete prompts sent to external models (managed by model-specific token limits) # @@ -145,6 +145,5 @@ DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION = 2 MCP_PROMPT_SIZE_LIMIT = 50_000 # 50K characters (user input only) # Threading configuration -# Simple Redis-based conversation threading for stateless MCP environment -# Set REDIS_URL environment variable to connect to your Redis instance -REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") +# Simple in-memory conversation threading for stateless MCP environment +# Conversations persist only during the Claude session diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 71dcce1..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,81 +0,0 @@ -services: - redis: - image: redis:7-alpine - container_name: zen-mcp-redis - restart: unless-stopped - stop_grace_period: 3s - ports: - - "6379:6379" - volumes: - - redis_data:/data - command: redis-server --save 60 1 --loglevel warning --maxmemory 512mb --maxmemory-policy allkeys-lru - deploy: - resources: - limits: - memory: 1G - reservations: - memory: 128M - - zen-mcp: - build: . - image: zen-mcp-server:latest - container_name: zen-mcp-server - restart: unless-stopped - stop_grace_period: 5s - depends_on: - - redis - environment: - - GEMINI_API_KEY=${GEMINI_API_KEY:-} - - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - XAI_API_KEY=${XAI_API_KEY:-} - # OpenRouter support - - OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-} - - CUSTOM_MODELS_CONFIG_PATH=${CUSTOM_MODELS_CONFIG_PATH:-} - # Custom API endpoint support (for Ollama, vLLM, etc.) - - CUSTOM_API_URL=${CUSTOM_API_URL:-} - - CUSTOM_API_KEY=${CUSTOM_API_KEY:-} - - CUSTOM_MODEL_NAME=${CUSTOM_MODEL_NAME:-llama3.2} - - DEFAULT_MODEL=${DEFAULT_MODEL:-auto} - - DEFAULT_THINKING_MODE_THINKDEEP=${DEFAULT_THINKING_MODE_THINKDEEP:-high} - - CONVERSATION_TIMEOUT_HOURS=${CONVERSATION_TIMEOUT_HOURS:-3} - - MAX_CONVERSATION_TURNS=${MAX_CONVERSATION_TURNS:-20} - # Model usage restrictions - - OPENAI_ALLOWED_MODELS=${OPENAI_ALLOWED_MODELS:-} - - GOOGLE_ALLOWED_MODELS=${GOOGLE_ALLOWED_MODELS:-} - - XAI_ALLOWED_MODELS=${XAI_ALLOWED_MODELS:-} - - REDIS_URL=redis://redis:6379/0 - # Use HOME not PWD: Claude needs access to any absolute file path, not just current project, - # and Claude Code could be running from multiple locations at the same time - - WORKSPACE_ROOT=${WORKSPACE_ROOT:-${HOME}} - # USER_HOME helps detect and protect against scanning the home directory root - - USER_HOME=${HOME} - - LOG_LEVEL=${LOG_LEVEL:-DEBUG} - - PYTHONUNBUFFERED=1 - volumes: - - ${WORKSPACE_ROOT:-${HOME}}:/workspace:ro - - mcp_logs:/tmp # Shared volume for logs - - /etc/localtime:/etc/localtime:ro - stdin_open: true - tty: true - entrypoint: ["python"] - command: ["server.py"] - - log-monitor: - build: . - image: zen-mcp-server:latest - container_name: zen-mcp-log-monitor - restart: unless-stopped - stop_grace_period: 3s - depends_on: - - zen-mcp - environment: - - PYTHONUNBUFFERED=1 - volumes: - - mcp_logs:/tmp # Shared volume for logs - - /etc/localtime:/etc/localtime:ro - entrypoint: ["python"] - command: ["log_monitor.py"] - -volumes: - redis_data: - mcp_logs: \ No newline at end of file diff --git a/docs/adding_providers.md b/docs/adding_providers.md index f700c86..af629d8 100644 --- a/docs/adding_providers.md +++ b/docs/adding_providers.md @@ -320,32 +320,7 @@ def _get_api_key_for_provider(cls, provider_type: ProviderType) -> Optional[str] # ... rest of the method ``` -### 4. Configure Docker Environment Variables - -**CRITICAL**: You must add your provider's environment variables to `docker-compose.yml` for them to be available in the Docker container. - -Add your API key and restriction variables to the `environment` section: - -```yaml -services: - zen-mcp: - # ... other configuration ... - environment: - - GEMINI_API_KEY=${GEMINI_API_KEY:-} - - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - EXAMPLE_API_KEY=${EXAMPLE_API_KEY:-} # Add this line - # OpenRouter support - - OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-} - # ... other variables ... - # Model usage restrictions - - OPENAI_ALLOWED_MODELS=${OPENAI_ALLOWED_MODELS:-} - - GOOGLE_ALLOWED_MODELS=${GOOGLE_ALLOWED_MODELS:-} - - EXAMPLE_ALLOWED_MODELS=${EXAMPLE_ALLOWED_MODELS:-} # Add this line -``` - -โš ๏ธ **Without this step**, the Docker container won't have access to your environment variables, and your provider won't be registered even if the API key is set in your `.env` file. - -### 5. Register Provider in server.py +### 4. Register Provider in server.py The `configure_providers()` function in `server.py` handles provider registration. You need to: @@ -672,7 +647,7 @@ if __name__ == "__main__": ``` The simulator test is crucial because it: -- Validates your provider works in the actual Docker environment +- Validates your provider works in the actual server environment - Tests real API integration, not just mocked behavior - Verifies model name resolution works correctly - Checks conversation continuity across requests @@ -799,7 +774,7 @@ Before submitting your PR: - [ ] Provider implementation complete with all required methods - [ ] API key mapping added to `_get_api_key_for_provider()` in `providers/registry.py` - [ ] Provider added to `PROVIDER_PRIORITY_ORDER` in `registry.py` (if native provider) -- [ ] **Environment variables added to `docker-compose.yml`** (API key and restrictions) +- [ ] **Environment variables added to `.env` file** (API key and restrictions) - [ ] Provider imported and registered in `server.py`'s `configure_providers()` - [ ] API key checking added to `configure_providers()` function - [ ] Error message updated to include new provider diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md index 77fad20..65dc7f3 100644 --- a/docs/advanced-usage.md +++ b/docs/advanced-usage.md @@ -239,9 +239,9 @@ All tools that work with files support **both individual files and entire direct **The Zen MCP Server's most revolutionary feature** is its ability to maintain conversation context even after Claude's memory resets. This enables truly persistent AI collaboration across multiple sessions and context boundaries. -### ๐Ÿ”ฅ **The Breakthrough** +### **The Breakthrough** -Even when Claude's context resets or compacts, conversations can continue seamlessly because other models (O3, Gemini) have access to the complete conversation history stored in Redis and can "remind" Claude of everything that was discussed. +Even when Claude's context resets or compacts, conversations can continue seamlessly because other models (O3, Gemini) have access to the complete conversation history stored in memory and can "remind" Claude of everything that was discussed. ### Key Benefits diff --git a/docs/ai-collaboration.md b/docs/ai-collaboration.md index 64f940e..ac8b393 100644 --- a/docs/ai-collaboration.md +++ b/docs/ai-collaboration.md @@ -12,7 +12,7 @@ This server enables **true AI collaboration** between Claude and multiple AI mod - **Cross-tool continuation** - Start with one tool (e.g., `analyze`) and continue with another (e.g., `codereview`) using the same conversation thread - **Both AIs coordinate their approaches** - questioning assumptions, validating solutions, and building on each other's insights - Each conversation maintains full context while only sending incremental updates -- Conversations are automatically managed with Redis for persistence +- Conversations are automatically managed in memory for the session duration ## Example: Multi-Model AI Coordination @@ -52,7 +52,7 @@ This server enables **true AI collaboration** between Claude and multiple AI mod **Conversation Management:** - Up to 10 exchanges per conversation (configurable via `MAX_CONVERSATION_TURNS`) - 3-hour expiry (configurable via `CONVERSATION_TIMEOUT_HOURS`) -- Thread-safe with Redis persistence across all tools +- Thread-safe with in-memory persistence across all tools - **Image context preservation** - Images and visual references are maintained across conversation turns and tool switches ## Cross-Tool & Cross-Model Continuation Example diff --git a/docs/configuration.md b/docs/configuration.md index b9d4d85..f9cada6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -19,11 +19,6 @@ OPENAI_API_KEY=your-openai-key **Workspace Root:** ```env -# Required: Workspace root directory for file access -WORKSPACE_ROOT=/Users/your-username -``` -- Path that contains all files Claude might reference -- Defaults to `$HOME` for direct usage, auto-configured for Docker ### API Keys (At least one required) @@ -55,15 +50,14 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here **Option 3: Custom API Endpoints (Local models)** ```env # For Ollama, vLLM, LM Studio, etc. -# IMPORTANT: Use host.docker.internal, NOT localhost (Docker requirement) -CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example +CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example CUSTOM_API_KEY= # Empty for Ollama CUSTOM_MODEL_NAME=llama3.2 # Default model ``` -**Docker Network Requirements:** -- โŒ WRONG: `http://localhost:11434/v1` (Docker containers cannot reach localhost) -- โœ… CORRECT: `http://host.docker.internal:11434/v1` (Docker can reach host services) +**Local Model Connection:** +- Use standard localhost URLs since the server runs natively +- Example: `http://localhost:11434/v1` for Ollama ### Model Configuration @@ -165,16 +159,12 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json ``` -**Redis Configuration:** -```env -# Redis URL for conversation threading (auto-configured for Docker) -REDIS_URL=redis://redis:6379/0 -``` - **Conversation Settings:** ```env -# How long AI-to-AI conversation threads persist (hours) -CONVERSATION_TIMEOUT_HOURS=3 +# How long AI-to-AI conversation threads persist in memory (hours) +# Conversations are auto-purged when claude closes its MCP connection or +# when a session is quit / re-launched +CONVERSATION_TIMEOUT_HOURS=5 # Maximum conversation turns (each exchange = 2 turns) MAX_CONVERSATION_TURNS=20 @@ -215,7 +205,7 @@ CONVERSATION_TIMEOUT_HOURS=3 ```env # Local models only DEFAULT_MODEL=llama3.2 -CUSTOM_API_URL=http://host.docker.internal:11434/v1 +CUSTOM_API_URL=http://localhost:11434/v1 CUSTOM_API_KEY= CUSTOM_MODEL_NAME=llama3.2 LOG_LEVEL=DEBUG @@ -232,9 +222,9 @@ LOG_LEVEL=INFO ## Important Notes -**Docker Networking:** -- Always use `host.docker.internal` instead of `localhost` for custom APIs -- The server runs in Docker and cannot access `localhost` directly +**Local Networking:** +- Use standard localhost URLs for local models +- The server runs as a native Python process **API Key Priority:** - Native APIs take priority over OpenRouter when both are configured diff --git a/docs/contributions.md b/docs/contributions.md index 2e6bd0d..185b727 100644 --- a/docs/contributions.md +++ b/docs/contributions.md @@ -8,9 +8,7 @@ Thank you for your interest in contributing to Zen MCP Server! This guide will h 2. **Clone your fork** locally 3. **Set up the development environment**: ```bash - python -m venv venv - source venv/bin/activate # On Windows: venv\Scripts\activate - pip install -r requirements.txt + ./run-server.sh ``` 4. **Create a feature branch** from `main`: ```bash @@ -28,9 +26,6 @@ We maintain high code quality standards. **All contributions must pass our autom Before submitting any PR, run our automated quality check script: ```bash -# Activate virtual environment first -source venv/bin/activate - # Run the comprehensive quality checks script ./code_quality_checks.sh ``` @@ -78,7 +73,7 @@ python communication_simulator_test.py 2. **Tool changes require simulator tests**: - Add simulator tests in `simulator_tests/` for new or modified tools - Use realistic prompts that demonstrate the feature - - Validate output through Docker logs + - Validate output through server logs 3. **Bug fixes require regression tests**: - Add a test that would have caught the bug @@ -94,7 +89,7 @@ python communication_simulator_test.py Your PR title MUST follow one of these formats: -**Version Bumping Prefixes** (trigger Docker build + version bump): +**Version Bumping Prefixes** (trigger version bump): - `feat: ` - New features (MINOR version bump) - `fix: ` - Bug fixes (PATCH version bump) - `breaking: ` or `BREAKING CHANGE: ` - Breaking changes (MAJOR version bump) @@ -108,10 +103,9 @@ Your PR title MUST follow one of these formats: - `ci: ` - CI/CD changes - `style: ` - Code style changes -**Docker Build Options**: -- `docker: ` - Force Docker build without version bump -- `docs+docker: ` - Documentation + Docker build -- `chore+docker: ` - Maintenance + Docker build +**Other Options**: +- `docs: ` - Documentation changes only +- `chore: ` - Maintenance tasks #### PR Checklist @@ -216,7 +210,7 @@ isort . ### Test Failures - Check test output for specific errors - Run individual tests for debugging: `pytest tests/test_specific.py -xvs` -- Ensure Docker is running for simulator tests +- Ensure server environment is set up for simulator tests ### Import Errors - Verify virtual environment is activated diff --git a/docs/custom_models.md b/docs/custom_models.md index 45becd6..4bcd32e 100644 --- a/docs/custom_models.md +++ b/docs/custom_models.md @@ -80,7 +80,7 @@ OPENROUTER_API_KEY=your-openrouter-api-key > **Note:** Control which models can be used directly in your OpenRouter dashboard at [openrouter.ai](https://openrouter.ai/). > This gives you centralized control over model access and spending limits. -That's it! Docker Compose already includes all necessary configuration. +That's it! The setup script handles all necessary configuration automatically. ### Option 2: Custom API Setup (Ollama, vLLM, etc.) @@ -102,49 +102,46 @@ python -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat- #### 2. Configure Environment Variables ```bash # Add to your .env file -CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example +CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example CUSTOM_API_KEY= # Empty for Ollama (no auth needed) CUSTOM_MODEL_NAME=llama3.2 # Default model to use ``` -**Important: Docker URL Configuration** +**Local Model Connection** -Since the Zen MCP server always runs in Docker, you must use `host.docker.internal` instead of `localhost` to connect to local models running on your host machine: +The Zen MCP server runs natively, so you can use standard localhost URLs to connect to local models: ```bash -# For Ollama, vLLM, LM Studio, etc. running on your host machine -CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama default port (NOT localhost!) +# For Ollama, vLLM, LM Studio, etc. running on your machine +CUSTOM_API_URL=http://localhost:11434/v1 # Ollama default port ``` -โŒ **Never use:** `http://localhost:11434/v1` - Docker containers cannot reach localhost -โœ… **Always use:** `http://host.docker.internal:11434/v1` - This allows Docker to access host services - #### 3. Examples for Different Platforms **Ollama:** ```bash -CUSTOM_API_URL=http://host.docker.internal:11434/v1 +CUSTOM_API_URL=http://localhost:11434/v1 CUSTOM_API_KEY= CUSTOM_MODEL_NAME=llama3.2 ``` **vLLM:** ```bash -CUSTOM_API_URL=http://host.docker.internal:8000/v1 +CUSTOM_API_URL=http://localhost:8000/v1 CUSTOM_API_KEY= CUSTOM_MODEL_NAME=meta-llama/Llama-2-7b-chat-hf ``` **LM Studio:** ```bash -CUSTOM_API_URL=http://host.docker.internal:1234/v1 +CUSTOM_API_URL=http://localhost:1234/v1 CUSTOM_API_KEY=lm-studio # Or any value, LM Studio often requires some key CUSTOM_MODEL_NAME=local-model ``` **text-generation-webui (with OpenAI extension):** ```bash -CUSTOM_API_URL=http://host.docker.internal:5001/v1 +CUSTOM_API_URL=http://localhost:5001/v1 CUSTOM_API_KEY= CUSTOM_MODEL_NAME=your-loaded-model ``` diff --git a/docs/logging.md b/docs/logging.md index 4b7d6af..312c9c0 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -11,49 +11,59 @@ The easiest way to monitor logs is to use the `-f` flag when starting the server This will start the server and immediately begin tailing the MCP server logs. -## Viewing Logs in Docker - -To monitor MCP server activity in real-time: - -```bash -# Follow MCP server logs (recommended) -docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log - -# Or use the -f flag when starting the server -./run-server.sh -f -``` - -**Note**: Due to MCP protocol limitations, container logs don't show tool execution details. Always use the commands above for debugging. - ## Log Files -Logs are stored in the container's `/tmp/` directory and rotate daily at midnight, keeping 7 days of history: +Logs are stored in the `logs/` directory within your project folder: -- **`mcp_server.log`** - Main server operations -- **`mcp_activity.log`** - Tool calls and conversations -- **`mcp_server_overflow.log`** - Overflow protection for large logs +- **`mcp_server.log`** - Main server operations, API calls, and errors +- **`mcp_activity.log`** - Tool calls and conversation tracking -## Accessing Log Files +Log files rotate automatically when they reach 20MB, keeping up to 10 rotated files. -To access log files directly: +## Viewing Logs + +To monitor MCP server activity: ```bash -# Enter the container -docker exec -it zen-mcp-server /bin/sh +# Follow logs in real-time +tail -f logs/mcp_server.log -# View current logs -cat /tmp/mcp_server.log -cat /tmp/mcp_activity.log +# View last 100 lines +tail -n 100 logs/mcp_server.log -# View previous days (with date suffix) -cat /tmp/mcp_server.log.2024-06-14 +# View activity logs (tool calls only) +tail -f logs/mcp_activity.log + +# Search for specific patterns +grep "ERROR" logs/mcp_server.log +grep "tool_name" logs/mcp_activity.log ``` ## Log Level -Set verbosity with `LOG_LEVEL` in your `.env` file or docker-compose.yml: +Set verbosity with `LOG_LEVEL` in your `.env` file: -```yaml -environment: - - LOG_LEVEL=DEBUG # Options: DEBUG, INFO, WARNING, ERROR -``` \ No newline at end of file +```env +# Options: DEBUG, INFO, WARNING, ERROR +LOG_LEVEL=INFO +``` + +- **DEBUG**: Detailed information for debugging +- **INFO**: General operational messages (default) +- **WARNING**: Warning messages +- **ERROR**: Only error messages + +## Log Format + +Logs use a standardized format with timestamps: + +``` +2024-06-14 10:30:45,123 - module.name - INFO - Message here +``` + +## Tips + +- Use `./run-server.sh -f` for the easiest log monitoring experience +- Activity logs show only tool-related events for cleaner output +- Main server logs include all operational details +- Logs persist across server restarts \ No newline at end of file diff --git a/docs/testing.md b/docs/testing.md index fec3932..6c9851b 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -5,9 +5,7 @@ This project includes comprehensive test coverage through unit tests and integra ## Running Tests ### Prerequisites -- Python virtual environment activated: `source venv/bin/activate` -- All dependencies installed: `pip install -r requirements.txt` -- Docker containers running (for simulator tests): `./run-server.sh` +- Environment set up: `./run-server.sh` - Use `./run-server.sh -f` to automatically follow logs after starting ### Unit Tests @@ -23,9 +21,9 @@ python -m pytest tests/test_providers.py -xvs ### Simulator Tests -Simulator tests replicate real-world Claude CLI interactions with the MCP server running in Docker. Unlike unit tests that test isolated functions, simulator tests validate the complete end-to-end flow including: +Simulator tests replicate real-world Claude CLI interactions with the standalone MCP server. Unlike unit tests that test isolated functions, simulator tests validate the complete end-to-end flow including: - Actual MCP protocol communication -- Docker container interactions +- Standalone server interactions - Multi-turn conversations across tools - Log output validation @@ -33,7 +31,7 @@ Simulator tests replicate real-world Claude CLI interactions with the MCP server #### Monitoring Logs During Tests -**Important**: The MCP stdio protocol interferes with stderr output during tool execution. While server startup logs appear in `docker compose logs`, tool execution logs are only written to file-based logs inside the container. This is a known limitation of the stdio-based MCP protocol and cannot be fixed without changing the MCP implementation. +**Important**: The MCP stdio protocol interferes with stderr output during tool execution. Tool execution logs are written to local log files. This is a known limitation of the stdio-based MCP protocol. To monitor logs during test execution: @@ -42,20 +40,20 @@ To monitor logs during test execution: ./run-server.sh -f # Or manually monitor main server logs (includes all tool execution details) -docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log +tail -f -n 500 logs/mcp_server.log # Monitor MCP activity logs (tool calls and completions) -docker exec zen-mcp-server tail -f /tmp/mcp_activity.log +tail -f logs/mcp_activity.log # Check log file sizes (logs rotate at 20MB) -docker exec zen-mcp-server ls -lh /tmp/mcp_*.log* +ls -lh logs/mcp_*.log* ``` **Log Rotation**: All log files are configured with automatic rotation at 20MB to prevent disk space issues. The server keeps: - 10 rotated files for mcp_server.log (200MB total) - 5 rotated files for mcp_activity.log (100MB total) -**Why logs don't appear in docker compose logs**: The MCP stdio_server captures stderr during tool execution to prevent interference with the JSON-RPC protocol communication. This means that while you'll see startup logs in `docker compose logs`, you won't see tool execution logs there. +**Why logs appear in files**: The MCP stdio_server captures stderr during tool execution to prevent interference with the JSON-RPC protocol communication. This means tool execution logs are written to files rather than displayed in console output. #### Running All Simulator Tests ```bash @@ -65,7 +63,7 @@ python communication_simulator_test.py # Run with verbose output for debugging python communication_simulator_test.py --verbose -# Keep Docker logs after tests for inspection +# Keep server logs after tests for inspection python communication_simulator_test.py --keep-logs ``` @@ -79,7 +77,7 @@ python communication_simulator_test.py --individual basic_conversation # Examples of available tests: python communication_simulator_test.py --individual content_validation python communication_simulator_test.py --individual cross_tool_continuation -python communication_simulator_test.py --individual redis_validation +python communication_simulator_test.py --individual memory_validation ``` #### Other Options @@ -90,8 +88,6 @@ python communication_simulator_test.py --list-tests # Run multiple specific tests (not all) python communication_simulator_test.py --tests basic_conversation content_validation -# Force Docker environment rebuild before running tests -python communication_simulator_test.py --rebuild ``` ### Code Quality Checks @@ -135,11 +131,8 @@ For detailed contribution guidelines, testing requirements, and code quality sta ### Quick Testing Reference ```bash -# Activate virtual environment -source venv/bin/activate - -# Run linting checks -ruff check . && black --check . && isort --check-only . +# Run quality checks +./code_quality_checks.sh # Run unit tests python -m pytest -xvs diff --git a/docs/tools/debug.md b/docs/tools/debug.md index 3f338e0..62bb5e4 100644 --- a/docs/tools/debug.md +++ b/docs/tools/debug.md @@ -79,7 +79,7 @@ bug hunting and reduces the chance of wasting precious tokens back and forth. **Runtime Environment Issues:** ``` -"Debug deployment issues with Docker container startup failures, here's the runtime info: [environment details]" +"Debug deployment issues with server startup failures, here's the runtime info: [environment details]" ``` ## Debugging Methodology diff --git a/docs/tools/listmodels.md b/docs/tools/listmodels.md index 03a971a..1805ae3 100644 --- a/docs/tools/listmodels.md +++ b/docs/tools/listmodels.md @@ -56,7 +56,7 @@ The tool displays: ๐Ÿ”น Custom/Local - โœ… Configured โ€ข local-llama (llama3.2) - 128K context, local inference - โ€ข Available at: http://host.docker.internal:11434/v1 + โ€ข Available at: http://localhost:11434/v1 ๐Ÿ”น OpenRouter - โŒ Not configured Set OPENROUTER_API_KEY to enable access to Claude, GPT-4, and more models diff --git a/docs/tools/version.md b/docs/tools/version.md index f57bcb2..51d68dd 100644 --- a/docs/tools/version.md +++ b/docs/tools/version.md @@ -42,8 +42,8 @@ The tool provides: **System Information:** - Server uptime and status - Memory and resource usage (if available) -- Connection status with Redis (for conversation memory) -- Docker container information +- Conversation memory status +- Server process information ## Example Output @@ -58,7 +58,7 @@ The tool provides: โš™๏ธ Configuration: โ€ข Default Model: auto โ€ข Providers: Google โœ…, OpenAI โœ…, Custom โœ… -โ€ข Conversation Memory: Redis โœ… +โ€ข Conversation Memory: Active โœ… โ€ข Web Search: Enabled ๐Ÿ› ๏ธ Available Tools (12): @@ -77,8 +77,8 @@ The tool provides: ๐Ÿ” System Status: โ€ข Server Uptime: 2h 35m -โ€ข Redis Connection: Active -โ€ข Docker Container: zen-mcp-server (running) +โ€ข Memory Storage: Active +โ€ข Server Process: Running ``` ## When to Use Version Tool @@ -106,7 +106,7 @@ The version tool can help diagnose common issues: **Performance Troubleshooting:** - Server uptime and stability - Resource usage patterns -- Redis connection health +- Memory storage health ## Tool Parameters diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 78138f3..d09ec6b 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -24,7 +24,7 @@ claude.exe --debug Look for error messages in the console output, especially: - API key errors -- Docker connection issues +- Python/environment issues - File permission errors ### 3. Verify API Keys @@ -40,60 +40,72 @@ cat .env # OPENAI_API_KEY=your-key-here ``` -If you need to update your API keys, edit the `.env` file and then run: +If you need to update your API keys, edit the `.env` file and then restart Claude for changes to take effect. + +### 4. Check Server Logs + +View the server logs for detailed error information: ```bash -# Restart services -./run-server.sh +# View recent logs +tail -n 100 logs/mcp_server.log -# Or restart and follow logs for troubleshooting -./run-server.sh -f -``` - -This will validate your configuration and restart the services. - -### 4. Check Docker Logs - -View the container logs for detailed error information: - -```bash -# Check if containers are running -docker-compose ps - -# View MCP server logs (recommended - shows actual tool execution) -docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log +# Follow logs in real-time +tail -f logs/mcp_server.log # Or use the -f flag when starting to automatically follow logs ./run-server.sh -f -``` -**Note**: Due to MCP protocol limitations, `docker-compose logs` only shows startup logs, not tool execution logs. Always use the docker exec command above or the `-f` flag for debugging. +# Search for errors +grep "ERROR" logs/mcp_server.log +``` See [Logging Documentation](logging.md) for more details on accessing logs. ### 5. Common Issues **"Connection failed" in Claude Desktop** -- Ensure Docker is running: `docker ps` -- Restart services: `docker-compose restart` +- Ensure the server path is correct in your Claude config +- Run `./run-server.sh` to verify setup and see configuration +- Check that Python is installed: `python3 --version` **"API key environment variable is required"** - Add your API key to the `.env` file -- Run: `./run-server.sh` to validate and restart +- Restart Claude Desktop after updating `.env` **File path errors** - Always use absolute paths: `/Users/you/project/file.py` - Never use relative paths: `./file.py` -### 6. Still Having Issues? +**Python module not found** +- Run `./run-server.sh` to reinstall dependencies +- Check virtual environment is activated: should see `.zen_venv` in the Python path + +### 6. Environment Issues + +**Virtual Environment Problems** +```bash +# Reset environment completely +rm -rf .zen_venv +./run-server.sh +``` + +**Permission Issues** +```bash +# Ensure script is executable +chmod +x run-server.sh +``` + +### 7. Still Having Issues? If the problem persists after trying these steps: 1. **Reproduce the issue** - Note the exact steps that cause the problem -2. **Collect logs** - Save relevant error messages from Claude debug mode and Docker logs +2. **Collect logs** - Save relevant error messages from Claude debug mode and server logs 3. **Open a GitHub issue** with: - Your operating system - - Error messages + - Python version: `python3 --version` + - Error messages from logs - Steps to reproduce - What you've already tried diff --git a/examples/claude_config_docker_home.json b/examples/claude_config_docker_home.json deleted file mode 100644 index a7176ca..0000000 --- a/examples/claude_config_docker_home.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "comment": "Docker configuration that mounts your home directory", - "comment2": "Update paths: /path/to/zen-mcp-server/.env and /Users/your-username", - "comment3": "The container auto-detects /workspace as sandbox from WORKSPACE_ROOT", - "mcpServers": { - "zen": { - "command": "docker", - "args": [ - "run", - "--rm", - "-i", - "--env-file", "/path/to/zen-mcp-server/.env", - "-e", "WORKSPACE_ROOT=/Users/your-username", - "-v", "/Users/your-username:/workspace:ro", - "zen-mcp-server:latest" - ] - } - } -} \ No newline at end of file diff --git a/examples/claude_config_macos.json b/examples/claude_config_macos.json index cab866d..c1657af 100644 --- a/examples/claude_config_macos.json +++ b/examples/claude_config_macos.json @@ -1,17 +1,11 @@ { - "comment": "macOS configuration using Docker", - "comment2": "Ensure Docker is running and containers are started", - "comment3": "Run './run-server.sh' first to set up the environment", + "comment": "macOS configuration using standalone server", + "comment2": "Run './run-server.sh' to set up the environment and get exact paths", + "comment3": "Use './run-server.sh -c' to display the correct configuration", "mcpServers": { "zen": { - "command": "docker", - "args": [ - "exec", - "-i", - "zen-mcp-server", - "python", - "server.py" - ] + "command": "/path/to/zen-mcp-server/.zen_venv/bin/python", + "args": ["/path/to/zen-mcp-server/server.py"] } } -} +} \ No newline at end of file diff --git a/examples/claude_config_wsl.json b/examples/claude_config_wsl.json index f73747e..b0dae6d 100644 --- a/examples/claude_config_wsl.json +++ b/examples/claude_config_wsl.json @@ -1,18 +1,14 @@ { - "comment": "Windows configuration using WSL with Docker", - "comment2": "Ensure Docker Desktop is running and WSL integration is enabled", - "comment3": "Run './run-server.sh' in WSL first to set up the environment", + "comment": "Windows configuration using WSL with standalone server", + "comment2": "Run './run-server.sh' in WSL to set up the environment and get exact paths", + "comment3": "Use './run-server.sh -c' to display the correct configuration", "mcpServers": { "zen": { "command": "wsl.exe", "args": [ - "docker", - "exec", - "-i", - "zen-mcp-server", - "python", - "server.py" + "/path/to/zen-mcp-server/.zen_venv/bin/python", + "/path/to/zen-mcp-server/server.py" ] } } -} +} \ No newline at end of file diff --git a/providers/__init__.py b/providers/__init__.py index b36b92e..ffeecb6 100644 --- a/providers/__init__.py +++ b/providers/__init__.py @@ -2,8 +2,8 @@ from .base import ModelCapabilities, ModelProvider, ModelResponse from .gemini import GeminiModelProvider -from .openai import OpenAIModelProvider from .openai_compatible import OpenAICompatibleProvider +from .openai_provider import OpenAIModelProvider from .openrouter import OpenRouterProvider from .registry import ModelProviderRegistry diff --git a/providers/custom.py b/providers/custom.py index 60e9822..70a1d41 100644 --- a/providers/custom.py +++ b/providers/custom.py @@ -40,7 +40,7 @@ class CustomProvider(OpenAICompatibleProvider): api_key: API key for the custom endpoint. Can be empty string for providers that don't require authentication (like Ollama). Falls back to CUSTOM_API_KEY environment variable if not provided. - base_url: Base URL for the custom API endpoint (e.g., 'http://host.docker.internal:11434/v1'). + base_url: Base URL for the custom API endpoint (e.g., 'http://localhost:11434/v1'). Falls back to CUSTOM_API_URL environment variable if not provided. **kwargs: Additional configuration passed to parent OpenAI-compatible provider diff --git a/providers/gemini.py b/providers/gemini.py index 2ce6310..07e002a 100644 --- a/providers/gemini.py +++ b/providers/gemini.py @@ -453,20 +453,13 @@ class GeminiModelProvider(ModelProvider): mime_type = header.split(";")[0].split(":")[1] return {"inline_data": {"mime_type": mime_type, "data": data}} else: - # Handle file path - translate for Docker environment + # Handle file path from utils.file_types import get_image_mime_type - from utils.file_utils import translate_path_for_environment - translated_path = translate_path_for_environment(image_path) - logger.debug(f"Translated image path from '{image_path}' to '{translated_path}'") - - if not os.path.exists(translated_path): - logger.warning(f"Image file not found: {translated_path} (original: {image_path})") + if not os.path.exists(image_path): + logger.warning(f"Image file not found: {image_path}") return None - # Use translated path for all subsequent operations - image_path = translated_path - # Detect MIME type from file extension using centralized mappings ext = os.path.splitext(image_path)[1].lower() mime_type = get_image_mime_type(ext) diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index 0163cc8..90fc30c 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -151,10 +151,6 @@ class OpenAICompatibleProvider(ModelProvider): if hostname in ["localhost", "127.0.0.1", "::1"]: return True - # Check for Docker internal hostnames (like host.docker.internal) - if hostname and ("docker.internal" in hostname or "host.docker.internal" in hostname): - return True - # Check for private network ranges (local network) if hostname: try: @@ -201,26 +197,70 @@ class OpenAICompatibleProvider(ModelProvider): def client(self): """Lazy initialization of OpenAI client with security checks and timeout configuration.""" if self._client is None: - client_kwargs = { - "api_key": self.api_key, - } + import os - if self.base_url: - client_kwargs["base_url"] = self.base_url + import httpx - if self.organization: - client_kwargs["organization"] = self.organization + # Temporarily disable proxy environment variables to prevent httpx from detecting them + original_env = {} + proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"] - # Add default headers if any - if self.DEFAULT_HEADERS: - client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() + for var in proxy_env_vars: + if var in os.environ: + original_env[var] = os.environ[var] + del os.environ[var] - # Add configured timeout settings - if hasattr(self, "timeout_config") and self.timeout_config: - client_kwargs["timeout"] = self.timeout_config - logging.debug(f"OpenAI client initialized with custom timeout: {self.timeout_config}") + try: + # Create a custom httpx client that explicitly avoids proxy parameters + timeout_config = ( + self.timeout_config + if hasattr(self, "timeout_config") and self.timeout_config + else httpx.Timeout(30.0) + ) - self._client = OpenAI(**client_kwargs) + # Create httpx client with minimal config to avoid proxy conflicts + # Note: proxies parameter was removed in httpx 0.28.0 + http_client = httpx.Client( + timeout=timeout_config, + follow_redirects=True, + ) + + # Keep client initialization minimal to avoid proxy parameter conflicts + client_kwargs = { + "api_key": self.api_key, + "http_client": http_client, + } + + if self.base_url: + client_kwargs["base_url"] = self.base_url + + if self.organization: + client_kwargs["organization"] = self.organization + + # Add default headers if any + if self.DEFAULT_HEADERS: + client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() + + logging.debug(f"OpenAI client initialized with custom httpx client and timeout: {timeout_config}") + + # Create OpenAI client with custom httpx client + self._client = OpenAI(**client_kwargs) + + except Exception as e: + # If all else fails, try absolute minimal client without custom httpx + logging.warning(f"Failed to create client with custom httpx, falling back to minimal config: {e}") + try: + minimal_kwargs = {"api_key": self.api_key} + if self.base_url: + minimal_kwargs["base_url"] = self.base_url + self._client = OpenAI(**minimal_kwargs) + except Exception as fallback_error: + logging.error(f"Even minimal OpenAI client creation failed: {fallback_error}") + raise + finally: + # Restore original proxy environment variables + for var, value in original_env.items(): + os.environ[var] = value return self._client @@ -480,7 +520,7 @@ class OpenAICompatibleProvider(ModelProvider): # Log retry attempt logging.warning( - f"{self.FRIENDLY_NAME} API error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..." + f"{self.FRIENDLY_NAME} error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..." ) time.sleep(delay) @@ -738,19 +778,11 @@ class OpenAICompatibleProvider(ModelProvider): # Handle data URL: ... return {"type": "image_url", "image_url": {"url": image_path}} else: - # Handle file path - translate for Docker environment - from utils.file_utils import translate_path_for_environment - - translated_path = translate_path_for_environment(image_path) - logging.debug(f"Translated image path from '{image_path}' to '{translated_path}'") - - if not os.path.exists(translated_path): - logging.warning(f"Image file not found: {translated_path} (original: {image_path})") + # Handle file path + if not os.path.exists(image_path): + logging.warning(f"Image file not found: {image_path}") return None - # Use translated path for all subsequent operations - image_path = translated_path - # Detect MIME type from file extension using centralized mappings from utils.file_types import get_image_mime_type diff --git a/providers/openai.py b/providers/openai_provider.py similarity index 100% rename from providers/openai.py rename to providers/openai_provider.py diff --git a/providers/openrouter_registry.py b/providers/openrouter_registry.py index ce84bc2..eb5c011 100644 --- a/providers/openrouter_registry.py +++ b/providers/openrouter_registry.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Optional -from utils.file_utils import read_json_file, translate_path_for_environment +from utils.file_utils import read_json_file from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint @@ -59,19 +59,17 @@ class OpenRouterModelRegistry: # Determine config path if config_path: - # Direct config_path parameter - translate for Docker if needed - translated_path = translate_path_for_environment(config_path) - self.config_path = Path(translated_path) + # Direct config_path parameter + self.config_path = Path(config_path) else: # Check environment variable first env_path = os.getenv("CUSTOM_MODELS_CONFIG_PATH") if env_path: - # Environment variable path - translate for Docker if needed - translated_path = translate_path_for_environment(env_path) - self.config_path = Path(translated_path) + # Environment variable path + self.config_path = Path(env_path) else: # Default to conf/custom_models.json - use relative path from this file - # This works both in development and container environments + # This works in development environment self.config_path = Path(__file__).parent.parent / "conf" / "custom_models.json" # Load configuration diff --git a/pyproject.toml b/pyproject.toml index 3b51397..303a47e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ force_grid_wrap = 0 use_parentheses = true ensure_newline_before_comments = true line_length = 120 -skip_glob = ["venv/*", ".venv/*"] +skip_glob = ["venv/*", ".venv/*", ".zen_venv/*"] [tool.ruff] target-version = "py39" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..86e039d --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +pytest>=7.4.0 +pytest-asyncio>=0.21.0 +pytest-mock>=3.11.0 +black>=23.0.0 +ruff>=0.1.0 +isort>=5.12.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 719e6c0..a021f7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ mcp>=1.0.0 google-genai>=1.19.0 -openai>=1.0.0 +openai>=1.55.2 # Minimum version for httpx 0.28.0 compatibility pydantic>=2.0.0 -redis>=5.0.0 +python-dotenv>=1.0.0 -# Development dependencies -pytest>=7.4.0 -pytest-asyncio>=0.21.0 -pytest-mock>=3.11.0 \ No newline at end of file +# Development dependencies (install with pip install -r requirements-dev.txt) +# pytest>=7.4.0 +# pytest-asyncio>=0.21.0 +# pytest-mock>=3.11.0 \ No newline at end of file diff --git a/run-server.sh b/run-server.sh index a281baa..ed47659 100755 --- a/run-server.sh +++ b/run-server.sh @@ -1,457 +1,869 @@ #!/bin/bash - -# Exit on any error, undefined variables, and pipe failures set -euo pipefail -# Run/Restart script for Zen MCP Server with Redis -# This script builds, starts, and manages the Docker environment including Redis for conversation threading -# Run this script to: -# - Initial setup of the Docker environment -# - Restart services after changing .env configuration -# - Rebuild and restart after code changes +# ============================================================================ +# Zen MCP Server Setup Script # -# Usage: ./run-server.sh [-f] -# Options: -# -f Follow logs after starting (tail -f the MCP server log) +# A platform-agnostic setup script that works on macOS, Linux, and WSL. +# Handles environment setup, dependency installation, and configuration. +# ============================================================================ -# Parse command line arguments -FOLLOW_LOGS=false -while getopts "f" opt; do - case $opt in - f) - FOLLOW_LOGS=true - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - echo "Usage: $0 [-f]" >&2 - exit 1 - ;; - esac -done +# ---------------------------------------------------------------------------- +# Constants and Configuration +# ---------------------------------------------------------------------------- -# Spinner function for long-running operations -show_spinner() { - local pid=$1 - local message=$2 - local spinner_chars="โ ‹โ ™โ นโ ธโ ผโ ดโ ฆโ งโ ‡โ " - local delay=0.1 - - # Hide cursor - tput civis 2>/dev/null || true - - while kill -0 $pid 2>/dev/null; do - for (( i=0; i<${#spinner_chars}; i++ )); do - printf "\r%s %s" "${spinner_chars:$i:1}" "$message" - sleep $delay - if ! kill -0 $pid 2>/dev/null; then - break 2 - fi - done - done - - # Show cursor and clear line - tput cnorm 2>/dev/null || true - printf "\r" +# Colors for output (ANSI codes work on all platforms) +readonly GREEN='\033[0;32m' +readonly YELLOW='\033[1;33m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +# Configuration +readonly VENV_PATH=".zen_venv" +readonly DOCKER_CLEANED_FLAG=".docker_cleaned" +readonly DESKTOP_CONFIG_FLAG=".desktop_configured" +readonly LOG_DIR="logs" +readonly LOG_FILE="mcp_server.log" + +# ---------------------------------------------------------------------------- +# Utility Functions +# ---------------------------------------------------------------------------- + +# Print colored output +print_success() { + echo -e "${GREEN}โœ“${NC} $1" } -# Function to run command with spinner -run_with_spinner() { - local message=$1 - local command=$2 - - printf "%s" "$message" - eval "$command" >/dev/null 2>&1 & - local pid=$! - - show_spinner $pid "$message" - wait $pid - local result=$? - - if [ $result -eq 0 ]; then - printf "\rโœ… %s\n" "${message#* }" - else - printf "\rโŒ %s failed\n" "${message#* }" - return $result - fi +print_error() { + echo -e "${RED}โœ—${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}!${NC} $1" +} + +print_info() { + echo -e "${YELLOW}$1${NC}" +} + +# Get the script's directory (works on all platforms) +get_script_dir() { + cd "$(dirname "$0")" && pwd } # Extract version from config.py -VERSION=$(grep -E '^__version__ = ' config.py 2>/dev/null | sed 's/__version__ = "\(.*\)"/\1/' || echo "unknown") +get_version() { + grep -E '^__version__ = ' config.py 2>/dev/null | sed 's/__version__ = "\(.*\)"/\1/' || echo "unknown" +} -echo "Setting up Zen MCP Server v$VERSION..." -echo "" +# ---------------------------------------------------------------------------- +# Platform Detection Functions +# ---------------------------------------------------------------------------- -# Get the current working directory (absolute path) -CURRENT_DIR=$(pwd) +# Detect the operating system +detect_os() { + case "$OSTYPE" in + darwin*) echo "macos" ;; + linux*) + if grep -qi microsoft /proc/version 2>/dev/null; then + echo "wsl" + else + echo "linux" + fi + ;; + msys*|cygwin*|win32) echo "windows" ;; + *) echo "unknown" ;; + esac +} -# Check if .env already exists -if [ -f .env ]; then - echo "โœ… .env file already exists!" - echo "" -else - # Copy from .env.example and customize - if [ ! -f .env.example ]; then - echo "โŒ .env.example file not found! This file should exist in the project directory." +# Get Claude config path based on platform +get_claude_config_path() { + local os_type=$(detect_os) + + case "$os_type" in + macos) + echo "$HOME/Library/Application Support/Claude/claude_desktop_config.json" + ;; + linux) + echo "$HOME/.config/Claude/claude_desktop_config.json" + ;; + wsl) + echo "/mnt/c/Users/$USER/AppData/Roaming/Claude/claude_desktop_config.json" + ;; + windows) + echo "$APPDATA/Claude/claude_desktop_config.json" + ;; + *) + echo "" + ;; + esac +} + +# ---------------------------------------------------------------------------- +# Docker Cleanup Functions +# ---------------------------------------------------------------------------- + +# Clean up old Docker artifacts +cleanup_docker() { + # Skip if already cleaned or Docker not available + [[ -f "$DOCKER_CLEANED_FLAG" ]] && return 0 + + if ! command -v docker &> /dev/null || ! docker info &> /dev/null 2>&1; then + return 0 + fi + + local found_artifacts=false + + # Define containers to remove + local containers=( + "gemini-mcp-server" + "gemini-mcp-redis" + "zen-mcp-server" + "zen-mcp-redis" + "zen-mcp-log-monitor" + ) + + # Remove containers + for container in "${containers[@]}"; do + if docker ps -a --format "{{.Names}}" | grep -q "^${container}$" 2>/dev/null; then + if [[ "$found_artifacts" == false ]]; then + echo "One-time Docker cleanup..." + found_artifacts=true + fi + echo " Removing container: $container" + docker stop "$container" >/dev/null 2>&1 || true + docker rm "$container" >/dev/null 2>&1 || true + fi + done + + # Remove images + local images=("gemini-mcp-server:latest" "zen-mcp-server:latest") + for image in "${images[@]}"; do + if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${image}$" 2>/dev/null; then + if [[ "$found_artifacts" == false ]]; then + echo "One-time Docker cleanup..." + found_artifacts=true + fi + echo " Removing image: $image" + docker rmi "$image" >/dev/null 2>&1 || true + fi + done + + # Remove volumes + local volumes=("redis_data" "mcp_logs") + for volume in "${volumes[@]}"; do + if docker volume ls --format "{{.Name}}" | grep -q "^${volume}$" 2>/dev/null; then + if [[ "$found_artifacts" == false ]]; then + echo "One-time Docker cleanup..." + found_artifacts=true + fi + echo " Removing volume: $volume" + docker volume rm "$volume" >/dev/null 2>&1 || true + fi + done + + if [[ "$found_artifacts" == true ]]; then + print_success "Docker cleanup complete" + fi + + touch "$DOCKER_CLEANED_FLAG" +} + +# ---------------------------------------------------------------------------- +# Python Environment Functions +# ---------------------------------------------------------------------------- + +# Find suitable Python command +find_python() { + # Prefer Python 3.12 for best compatibility + local python_cmds=("python3.12" "python3.13" "python3.11" "python3.10" "python3" "python" "py") + + for cmd in "${python_cmds[@]}"; do + if command -v "$cmd" &> /dev/null; then + local version=$($cmd --version 2>&1) + if [[ $version =~ Python\ 3\.([0-9]+)\.([0-9]+) ]]; then + local major_version=${BASH_REMATCH[1]} + local minor_version=${BASH_REMATCH[2]} + + # Check minimum version (3.10) for better library compatibility + if [[ $major_version -ge 10 ]]; then + echo "$cmd" + print_success "Found Python: $version" + + # Recommend Python 3.12 + if [[ $major_version -ne 12 ]]; then + print_info "Note: Python 3.12 is recommended for best compatibility." + fi + + return 0 + fi + fi + fi + done + + print_error "Python 3.10+ not found. Please install Python 3.10 or newer (3.12 recommended)." + return 1 +} + +# Setup virtual environment +setup_venv() { + local python_cmd="$1" + local venv_python="" + + # Create venv if it doesn't exist + if [[ ! -d "$VENV_PATH" ]]; then + print_info "Creating isolated environment..." + if $python_cmd -m venv "$VENV_PATH" 2>/dev/null; then + print_success "Created isolated environment" + else + print_error "Failed to create virtual environment" + exit 1 + fi + fi + + # Get venv Python path based on platform + local os_type=$(detect_os) + case "$os_type" in + windows) + venv_python="$VENV_PATH/Scripts/python.exe" + ;; + *) + venv_python="$VENV_PATH/bin/python" + ;; + esac + + # Always use venv Python + if [[ -f "$venv_python" ]]; then + if [[ -n "${VIRTUAL_ENV:-}" ]]; then + print_success "Using activated virtual environment" >&2 + fi + # Convert to absolute path for MCP registration + local abs_venv_python=$(cd "$(dirname "$venv_python")" && pwd)/$(basename "$venv_python") + echo "$abs_venv_python" + return 0 + else + print_error "Virtual environment Python not found" exit 1 fi - - # Copy .env.example to .env - cp .env.example .env - echo "โœ… Created .env from .env.example" - - # Customize the API keys if they're set in environment - if [ -n "${GEMINI_API_KEY:-}" ]; then - # Replace the placeholder API key with the actual value - if command -v sed >/dev/null 2>&1; then - sed -i.bak "s/your_gemini_api_key_here/$GEMINI_API_KEY/" .env && rm .env.bak - echo "โœ… Updated .env with existing GEMINI_API_KEY from environment" - else - echo "โš ๏ธ Found GEMINI_API_KEY in environment, but sed not available. Please update .env manually." - fi - fi - - if [ -n "${OPENAI_API_KEY:-}" ]; then - # Replace the placeholder API key with the actual value - if command -v sed >/dev/null 2>&1; then - sed -i.bak "s/your_openai_api_key_here/$OPENAI_API_KEY/" .env && rm .env.bak - echo "โœ… Updated .env with existing OPENAI_API_KEY from environment" - else - echo "โš ๏ธ Found OPENAI_API_KEY in environment, but sed not available. Please update .env manually." - fi - fi - - if [ -n "${XAI_API_KEY:-}" ]; then - # Replace the placeholder API key with the actual value - if command -v sed >/dev/null 2>&1; then - sed -i.bak "s/your_xai_api_key_here/$XAI_API_KEY/" .env && rm .env.bak - echo "โœ… Updated .env with existing XAI_API_KEY from environment" - else - echo "โš ๏ธ Found XAI_API_KEY in environment, but sed not available. Please update .env manually." - fi - fi - - if [ -n "${OPENROUTER_API_KEY:-}" ]; then - # Replace the placeholder API key with the actual value - if command -v sed >/dev/null 2>&1; then - sed -i.bak "s/your_openrouter_api_key_here/$OPENROUTER_API_KEY/" .env && rm .env.bak - echo "โœ… Updated .env with existing OPENROUTER_API_KEY from environment" - else - echo "โš ๏ธ Found OPENROUTER_API_KEY in environment, but sed not available. Please update .env manually." - fi - fi - - # Update WORKSPACE_ROOT to use current user's home directory - if command -v sed >/dev/null 2>&1; then - sed -i.bak "s|WORKSPACE_ROOT=/Users/your-username|WORKSPACE_ROOT=$HOME|" .env && rm .env.bak - echo "โœ… Updated WORKSPACE_ROOT to $HOME" - fi - echo "โœ… Created .env file with Redis configuration" - echo "" -fi - -# Check if Docker and Docker Compose are installed -if ! command -v docker &> /dev/null; then - echo "โŒ Docker is not installed. Please install Docker first." - echo " Visit: https://docs.docker.com/get-docker/" - exit 1 -fi - -if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then - echo "โŒ Docker Compose is not installed. Please install Docker Compose first." - echo " Visit: https://docs.docker.com/compose/install/" - exit 1 -fi - -# Check if Docker daemon is running -if ! docker info &> /dev/null; then - echo "โŒ Docker daemon is not running. Please start Docker." - exit 1 -fi - -# Use modern docker compose syntax if available, fall back to docker-compose -COMPOSE_CMD="docker compose" -if ! docker compose version &> /dev/null; then - COMPOSE_CMD="docker-compose" -fi - -# Check if at least one API key or custom URL is properly configured -source .env 2>/dev/null || true - -VALID_GEMINI_KEY=false -VALID_OPENAI_KEY=false -VALID_XAI_KEY=false -VALID_OPENROUTER_KEY=false -VALID_CUSTOM_URL=false - -# Check if GEMINI_API_KEY is set and not the placeholder -if [ -n "${GEMINI_API_KEY:-}" ] && [ "$GEMINI_API_KEY" != "your_gemini_api_key_here" ]; then - VALID_GEMINI_KEY=true - echo "โœ… GEMINI_API_KEY found" -fi - -# Check if OPENAI_API_KEY is set and not the placeholder -if [ -n "${OPENAI_API_KEY:-}" ] && [ "$OPENAI_API_KEY" != "your_openai_api_key_here" ]; then - VALID_OPENAI_KEY=true - echo "โœ… OPENAI_API_KEY found" -fi - -# Check if XAI_API_KEY is set and not the placeholder -if [ -n "${XAI_API_KEY:-}" ] && [ "$XAI_API_KEY" != "your_xai_api_key_here" ]; then - VALID_XAI_KEY=true - echo "โœ… XAI_API_KEY found" -fi - -# Check if OPENROUTER_API_KEY is set and not the placeholder -if [ -n "${OPENROUTER_API_KEY:-}" ] && [ "$OPENROUTER_API_KEY" != "your_openrouter_api_key_here" ]; then - VALID_OPENROUTER_KEY=true - echo "โœ… OPENROUTER_API_KEY found" -fi - -# Check if CUSTOM_API_URL is set and not empty (custom API key is optional) -if [ -n "${CUSTOM_API_URL:-}" ]; then - VALID_CUSTOM_URL=true - echo "โœ… CUSTOM_API_URL found: $CUSTOM_API_URL" -fi - -# Require at least one valid API key or custom URL -if [ "$VALID_GEMINI_KEY" = false ] && [ "$VALID_OPENAI_KEY" = false ] && [ "$VALID_XAI_KEY" = false ] && [ "$VALID_OPENROUTER_KEY" = false ] && [ "$VALID_CUSTOM_URL" = false ]; then - echo "" - echo "โŒ ERROR: At least one valid API key or custom URL is required!" - echo "" - echo "Please edit the .env file and set at least one of:" - echo " - GEMINI_API_KEY (get from https://makersuite.google.com/app/apikey)" - echo " - OPENAI_API_KEY (get from https://platform.openai.com/api-keys)" - echo " - XAI_API_KEY (get from https://console.x.ai/)" - echo " - OPENROUTER_API_KEY (get from https://openrouter.ai/)" - echo " - CUSTOM_API_URL (for local models like Ollama, vLLM, etc.)" - echo "" - echo "Example:" - echo " GEMINI_API_KEY=your-actual-api-key-here" - echo " OPENAI_API_KEY=sk-your-actual-openai-key-here" - echo " XAI_API_KEY=xai-your-actual-xai-key-here" - echo " OPENROUTER_API_KEY=sk-or-your-actual-openrouter-key-here" - echo " CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama (use host.docker.internal, NOT localhost!)" - echo "" - exit 1 -fi - -echo "" - -# Stop and remove existing containers -run_with_spinner "๐Ÿ›‘ Stopping existing docker containers..." "$COMPOSE_CMD down --remove-orphans" || true - -# Clean up any old containers with different naming patterns -OLD_CONTAINERS_FOUND=false - -# Check for old Gemini MCP containers (for migration) -if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server-gemini-mcp-1$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old container: gemini-mcp-server-gemini-mcp-1" - docker stop gemini-mcp-server-gemini-mcp-1 >/dev/null 2>&1 || true - docker rm gemini-mcp-server-gemini-mcp-1 >/dev/null 2>&1 || true -fi - -if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old container: gemini-mcp-server" - docker stop gemini-mcp-server >/dev/null 2>&1 || true - docker rm gemini-mcp-server >/dev/null 2>&1 || true -fi - -# Check for current old containers (from recent versions) -if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-log-monitor$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old container: gemini-mcp-log-monitor" - docker stop gemini-mcp-log-monitor >/dev/null 2>&1 || true - docker rm gemini-mcp-log-monitor >/dev/null 2>&1 || true -fi - -# Check for old Redis container -if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-server-redis-1$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old container: gemini-mcp-server-redis-1" - docker stop gemini-mcp-server-redis-1 >/dev/null 2>&1 || true - docker rm gemini-mcp-server-redis-1 >/dev/null 2>&1 || true -fi - -if docker ps -a --format "{{.Names}}" | grep -q "^gemini-mcp-redis$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old container: gemini-mcp-redis" - docker stop gemini-mcp-redis >/dev/null 2>&1 || true - docker rm gemini-mcp-redis >/dev/null 2>&1 || true -fi - -# Check for old images -if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^gemini-mcp-server-gemini-mcp:latest$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old image: gemini-mcp-server-gemini-mcp:latest" - docker rmi gemini-mcp-server-gemini-mcp:latest >/dev/null 2>&1 || true -fi - -if docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^gemini-mcp-server:latest$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old image: gemini-mcp-server:latest" - docker rmi gemini-mcp-server:latest >/dev/null 2>&1 || true -fi - -# Check for current old network (if it exists) -if docker network ls --format "{{.Name}}" | grep -q "^gemini-mcp-server_default$" 2>/dev/null || false; then - OLD_CONTAINERS_FOUND=true - echo " - Cleaning up old network: gemini-mcp-server_default" - docker network rm gemini-mcp-server_default >/dev/null 2>&1 || true -fi - -# Only show cleanup messages if something was actually cleaned up - -# Build and start services -if ! run_with_spinner "๐Ÿ”จ Building Zen MCP Server image..." "$COMPOSE_CMD build"; then - echo "โŒ Failed to build Docker image. Run '$COMPOSE_CMD build' manually to see errors." - exit 1 -fi - -if ! run_with_spinner "Starting server (Redis + Zen MCP)..." "$COMPOSE_CMD up -d"; then - echo "โŒ Failed to start services. Run '$COMPOSE_CMD up -d' manually to see errors." - exit 1 -fi - -echo "โœ… Services started successfully!" - -# Function to show configuration steps - only if CLI not already set up -show_configuration_steps() { - echo "" - echo "๐Ÿ”„ Next steps:" - NEEDS_KEY_UPDATE=false - if grep -q "your_gemini_api_key_here" .env 2>/dev/null || grep -q "your_openai_api_key_here" .env 2>/dev/null || grep -q "your_xai_api_key_here" .env 2>/dev/null || grep -q "your_openrouter_api_key_here" .env 2>/dev/null; then - NEEDS_KEY_UPDATE=true - fi - - if [ "$NEEDS_KEY_UPDATE" = true ]; then - echo "1. Edit .env and replace placeholder API keys with actual ones" - echo " - GEMINI_API_KEY: your-gemini-api-key-here" - echo " - OPENAI_API_KEY: your-openai-api-key-here" - echo " - XAI_API_KEY: your-xai-api-key-here" - echo " - OPENROUTER_API_KEY: your-openrouter-api-key-here (optional)" - echo "2. Restart services: $COMPOSE_CMD restart" - echo "3. Copy the configuration below to your Claude Desktop config if required:" - else - echo "1. Copy the configuration below to your Claude Desktop config if required:" - fi - - echo "" - echo "===== CLAUDE DESKTOP CONFIGURATION =====" - echo "{" - echo " \"mcpServers\": {" - echo " \"zen\": {" - echo " \"command\": \"docker\"," - echo " \"args\": [" - echo " \"exec\"," - echo " \"-i\"," - echo " \"zen-mcp-server\"," - echo " \"python\"," - echo " \"server.py\"" - echo " ]" - echo " }" - echo " }" - echo "}" - echo "===========================================" } -# Function to automatically configure Claude Code CLI -# Returns: 0 if already configured, 1 if CLI not found, 2 if configured/skipped -setup_claude_code_cli() { - # Check if claude command exists - if ! command -v claude &> /dev/null; then - echo "โš ๏ธ Claude Code CLI not found. Install it to use with CLI:" - echo " npm install -g @anthropic-ai/claude-code" + +# Check if package is installed +check_package() { + local python_cmd="$1" + local package="$2" + $python_cmd -c "import $package" 2>/dev/null +} + +# Install dependencies +install_dependencies() { + local python_cmd="$1" + local deps_needed=false + + # Check required packages + local packages=("mcp" "google.generativeai" "openai" "pydantic") + for package in "${packages[@]}"; do + local import_name=${package%%.*} # Get first part before dot + if ! check_package "$python_cmd" "$import_name"; then + deps_needed=true + break + fi + done + + if [[ "$deps_needed" == false ]]; then + print_success "Dependencies already installed" + return 0 + fi + + echo "" + print_info "Setting up Zen MCP Server..." + echo "Installing required components:" + echo " โ€ข MCP protocol library" + echo " โ€ข AI model connectors" + echo " โ€ข Data validation tools" + echo "" + + # Determine if we're in a venv + local install_cmd + if [[ -n "${VIRTUAL_ENV:-}" ]] || [[ "$python_cmd" == *"$VENV_PATH"* ]]; then + install_cmd="$python_cmd -m pip install -q -r requirements.txt" + else + install_cmd="$python_cmd -m pip install -q --user -r requirements.txt" + fi + + # Install packages + echo -n "Downloading packages..." + if $install_cmd 2>&1 | grep -i error | grep -v warning; then + echo -e "\r${RED}โœ— Setup failed${NC} " echo "" - echo "๐Ÿ“‹ Manual MCP configuration for Claude Code CLI:" - echo "claude mcp add zen -s user -- docker exec -i zen-mcp-server python server.py" + echo "Try running manually:" + echo " $python_cmd -m pip install mcp google-genai openai pydantic" + return 1 + else + echo -e "\r${GREEN}โœ“ Setup complete!${NC} " + return 0 + fi +} + +# ---------------------------------------------------------------------------- +# Environment Configuration Functions +# ---------------------------------------------------------------------------- + +# Setup .env file +setup_env_file() { + if [[ -f .env ]]; then + print_success ".env file already exists" + migrate_env_file + return 0 + fi + + if [[ ! -f .env.example ]]; then + print_error ".env.example not found!" return 1 fi - - # Get current MCP list and check if zen-mcp-server already exists - if claude mcp list 2>/dev/null | grep -q "zen-mcp-server" 2>/dev/null; then - echo "" - return 0 # Already configured + + cp .env.example .env + print_success "Created .env from .env.example" + + # Detect sed version for cross-platform compatibility + local sed_cmd + if sed --version >/dev/null 2>&1; then + sed_cmd="sed -i" # GNU sed (Linux) else - echo "" - echo "๐Ÿ”ง Configuring Claude Code CLI..." - echo "" - echo -n "Would you like to add the Zen MCP Server to Claude Code CLI now? [Y/n]: " - read -r response + sed_cmd="sed -i ''" # BSD sed (macOS) + fi + + # Update API keys from environment if present + local api_keys=( + "GEMINI_API_KEY:your_gemini_api_key_here" + "OPENAI_API_KEY:your_openai_api_key_here" + "XAI_API_KEY:your_xai_api_key_here" + "OPENROUTER_API_KEY:your_openrouter_api_key_here" + ) + + for key_pair in "${api_keys[@]}"; do + local key_name="${key_pair%%:*}" + local placeholder="${key_pair##*:}" + local key_value="${!key_name:-}" - # Default to yes if empty response (just pressed enter) - if [[ -z "$response" || "$response" =~ ^[Yy]$ ]]; then - echo " - Adding Zen MCP Server to Claude Code CLI..." - if claude mcp add zen -s user -- docker exec -i zen-mcp-server python server.py >/dev/null 2>&1; then - echo "โœ… Zen MCP Server added to Claude Code CLI successfully!" - echo " Use 'claude' command to start a session with the MCP server" + if [[ -n "$key_value" ]]; then + $sed_cmd "s/$placeholder/$key_value/" .env + print_success "Updated .env with $key_name from environment" + fi + done + + return 0 +} + +# Migrate .env file from Docker to standalone format +migrate_env_file() { + # Check if migration is needed + if ! grep -q "host\.docker\.internal" .env 2>/dev/null; then + return 0 + fi + + print_warning "Migrating .env from Docker to standalone format..." + + # Create backup + cp .env .env.backup_$(date +%Y%m%d_%H%M%S) + + # Detect sed version for cross-platform compatibility + local sed_cmd + if sed --version >/dev/null 2>&1; then + sed_cmd="sed -i" # GNU sed (Linux) + else + sed_cmd="sed -i ''" # BSD sed (macOS) + fi + + # Replace host.docker.internal with localhost + $sed_cmd 's/host\.docker\.internal/localhost/g' .env + + print_success "Migrated Docker URLs to localhost in .env" + echo " (Backup saved as .env.backup_*)" +} + +# Validate API keys +validate_api_keys() { + local has_key=false + local api_keys=( + "GEMINI_API_KEY:your_gemini_api_key_here" + "OPENAI_API_KEY:your_openai_api_key_here" + "XAI_API_KEY:your_xai_api_key_here" + "OPENROUTER_API_KEY:your_openrouter_api_key_here" + ) + + for key_pair in "${api_keys[@]}"; do + local key_name="${key_pair%%:*}" + local placeholder="${key_pair##*:}" + local key_value="${!key_name:-}" + + if [[ -n "$key_value" ]] && [[ "$key_value" != "$placeholder" ]]; then + print_success "$key_name configured" + has_key=true + fi + done + + # Check custom API URL + if [[ -n "${CUSTOM_API_URL:-}" ]]; then + print_success "CUSTOM_API_URL configured: $CUSTOM_API_URL" + has_key=true + fi + + if [[ "$has_key" == false ]]; then + print_error "No API keys found in .env!" + echo "" + echo "Please edit .env and add at least one API key:" + echo " GEMINI_API_KEY=your-actual-key" + echo " OPENAI_API_KEY=your-actual-key" + echo " XAI_API_KEY=your-actual-key" + echo " OPENROUTER_API_KEY=your-actual-key" + echo "" + return 1 + fi + + return 0 +} + +# ---------------------------------------------------------------------------- +# Claude Integration Functions +# ---------------------------------------------------------------------------- + +# Check if MCP is added to Claude CLI and verify it's correct +check_claude_cli_integration() { + local python_cmd="$1" + local server_path="$2" + + if ! command -v claude &> /dev/null; then + echo "" + print_warning "Claude CLI not found" + echo "" + read -p "Would you like to add Zen to Claude Code? (Y/n): " -n 1 -r + echo "" + if [[ $REPLY =~ ^[Nn]$ ]]; then + print_info "Skipping Claude Code integration" + return 0 + fi + + echo "" + echo "Please install Claude Code first:" + echo " Visit: https://docs.anthropic.com/en/docs/claude-code/cli-usage" + echo "" + echo "Then run this script again to register MCP." + return 1 + fi + + # Check if zen is registered + local mcp_list=$(claude mcp list 2>/dev/null) + if echo "$mcp_list" | grep -q "zen"; then + # Check if it's using the old Docker command + if echo "$mcp_list" | grep -E "zen.*docker|zen.*compose" &>/dev/null; then + print_warning "Found old Docker-based Zen registration, updating..." + claude mcp remove zen -s user 2>/dev/null || true + + # Re-add with correct Python command + if claude mcp add zen -s user -- "$python_cmd" "$server_path" 2>/dev/null; then + print_success "Updated Zen to become a standalone script" + return 0 else - echo "โš ๏ธ Failed to add MCP server automatically. You can add it manually:" - echo " claude mcp add zen -s user -- docker exec -i zen-mcp-server python server.py" + echo "" + echo "Failed to update MCP registration. Please run manually:" + echo " claude mcp remove zen -s user" + echo " claude mcp add zen -s user -- $python_cmd $server_path" + return 1 fi else - echo " - Skipped adding MCP server. You can add it manually later:" - echo " claude mcp add zen -s user -- docker exec -i zen-mcp-server python server.py" + # Verify the registered path matches current setup + local expected_cmd="$python_cmd $server_path" + if echo "$mcp_list" | grep -F "$server_path" &>/dev/null; then + return 0 + else + print_warning "Zen registered with different path, updating..." + claude mcp remove zen -s user 2>/dev/null || true + + if claude mcp add zen -s user -- "$python_cmd" "$server_path" 2>/dev/null; then + print_success "Updated Zen with current path" + return 0 + else + echo "" + echo "Failed to update MCP registration. Please run manually:" + echo " claude mcp remove zen -s user" + echo " claude mcp add zen -s user -- $python_cmd $server_path" + return 1 + fi + fi fi + else + # Not registered at all, ask user if they want to add it echo "" - return 2 # Configured or skipped + read -p "Add Zen to Claude Code? (Y/n): " -n 1 -r + echo "" + if [[ $REPLY =~ ^[Nn]$ ]]; then + print_info "To add manually later, run:" + echo " claude mcp add zen -s user -- $python_cmd $server_path" + return 0 + fi + + print_info "Registering Zen with Claude Code..." + if claude mcp add zen -s user -- "$python_cmd" "$server_path" 2>/dev/null; then + print_success "Successfully added Zen to Claude Code" + return 0 + else + echo "" + echo "Failed to add automatically. To add manually, run:" + echo " claude mcp add zen -s user -- $python_cmd $server_path" + return 1 + fi fi } -# Set up Claude Code CLI automatically -setup_claude_code_cli -CLI_STATUS=$? - -# Only show configuration details if zen is NOT already configured -if [ $CLI_STATUS -ne 0 ]; then - # Show configuration steps - show_configuration_steps +# Check and update Claude Desktop configuration +check_claude_desktop_integration() { + local python_cmd="$1" + local server_path="$2" - echo "" - echo "===== CLAUDE CODE CLI CONFIGURATION =====" - echo "# Useful Claude Code CLI commands:" - echo "claude # Start interactive session" - echo "claude mcp list # List your MCP servers" - echo "claude mcp remove zen -s user # Remove if needed" - echo "===========================================" - echo "" - - echo "๐Ÿ“ Config file locations:" - echo " macOS: ~/Library/Application Support/Claude/claude_desktop_config.json" - echo ' Windows (WSL): /mnt/c/Users/USERNAME/AppData/Roaming/Claude/claude_desktop_config.json' - echo "" -fi - -echo "๐Ÿ”ง Useful commands:" -echo " Start services: $COMPOSE_CMD up -d" -echo " Stop services: $COMPOSE_CMD down" -echo " View MCP logs: docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log" -echo " Restart services: $COMPOSE_CMD restart" -echo " Service status: $COMPOSE_CMD ps" -echo "" - -# Follow logs if -f flag was specified -if [ "$FOLLOW_LOGS" = true ]; then - echo "Following MCP server logs (press Ctrl+C to stop)..." - echo "" - - # Give the container a moment to fully start - echo "Waiting for container to be ready..." - sleep 3 - - # Check if container is running before trying to exec - if docker ps --format "{{.Names}}" | grep -q "^zen-mcp-server$"; then - echo "Container is running, following logs..." - docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log - else - echo "Container zen-mcp-server is not running" - echo " Container status:" - docker ps -a | grep zen-mcp-server || echo " Container not found" - echo " Try running: docker logs zen-mcp-server" - exit 1 + # Skip if already configured (check flag) + if [[ -f "$DESKTOP_CONFIG_FLAG" ]]; then + return 0 fi -else - echo "๐Ÿ’ก Tip: Use './run-server.sh -f' next time to automatically follow logs after startup" + + local config_path=$(get_claude_config_path) + if [[ -z "$config_path" ]]; then + print_warning "Unable to determine Claude Desktop config path for this platform" + return 0 + fi + echo "" - echo "Happy Clauding!" -fi \ No newline at end of file + read -p "Configure Zen for Claude Desktop? (Y/n): " -n 1 -r + echo "" + if [[ $REPLY =~ ^[Nn]$ ]]; then + print_info "Skipping Claude Desktop integration" + touch "$DESKTOP_CONFIG_FLAG" # Don't ask again + return 0 + fi + + # Create config directory if it doesn't exist + local config_dir=$(dirname "$config_path") + mkdir -p "$config_dir" 2>/dev/null || true + + # Handle existing config + if [[ -f "$config_path" ]]; then + print_info "Updating existing Claude Desktop config..." + + # Check for old Docker config and remove it + if grep -q "docker.*compose.*zen\|zen.*docker" "$config_path" 2>/dev/null; then + print_warning "Removing old Docker-based MCP configuration..." + # Create backup + cp "$config_path" "${config_path}.backup_$(date +%Y%m%d_%H%M%S)" + + # Remove old zen config using a more robust approach + local temp_file=$(mktemp) + python3 -c " +import json +import sys + +try: + with open('$config_path', 'r') as f: + config = json.load(f) + + # Remove zen from mcpServers if it exists + if 'mcpServers' in config and 'zen' in config['mcpServers']: + del config['mcpServers']['zen'] + print('Removed old zen MCP configuration') + + with open('$temp_file', 'w') as f: + json.dump(config, f, indent=2) + +except Exception as e: + print(f'Error processing config: {e}', file=sys.stderr) + sys.exit(1) +" && mv "$temp_file" "$config_path" + fi + + # Add new config + local temp_file=$(mktemp) + python3 -c " +import json +import sys + +try: + with open('$config_path', 'r') as f: + config = json.load(f) +except: + config = {} + +# Ensure mcpServers exists +if 'mcpServers' not in config: + config['mcpServers'] = {} + +# Add zen server +config['mcpServers']['zen'] = { + 'command': '$python_cmd', + 'args': ['$server_path'] +} + +with open('$temp_file', 'w') as f: + json.dump(config, f, indent=2) +" && mv "$temp_file" "$config_path" + + else + print_info "Creating new Claude Desktop config..." + cat > "$config_path" << EOF +{ + "mcpServers": { + "zen": { + "command": "$python_cmd", + "args": ["$server_path"] + } + } +} +EOF + fi + + if [[ $? -eq 0 ]]; then + print_success "Successfully configured Claude Desktop" + echo " Config: $config_path" + echo " Restart Claude Desktop to use the new MCP server" + touch "$DESKTOP_CONFIG_FLAG" + else + print_error "Failed to update Claude Desktop config" + echo "Manual config location: $config_path" + echo "Add this configuration:" + cat << EOF +{ + "mcpServers": { + "zen": { + "command": "$python_cmd", + "args": ["$server_path"] + } + } +} +EOF + fi +} + +# Display configuration instructions +display_config_instructions() { + local python_cmd="$1" + local server_path="$2" + + echo "" + local config_header="ZEN MCP SERVER CONFIGURATION" + echo "===== $config_header =====" + printf '%*s\n' "$((${#config_header} + 12))" | tr ' ' '=' + echo "" + echo "To use Zen MCP Server with your Claude clients:" + echo "" + + print_info "1. For Claude Code (CLI):" + echo -e " ${GREEN}claude mcp add zen -s user -- $python_cmd $server_path${NC}" + echo "" + + print_info "2. For Claude Desktop:" + echo " Add this configuration to your Claude Desktop config file:" + echo "" + cat << EOF + { + "mcpServers": { + "zen": { + "command": "$python_cmd", + "args": ["$server_path"] + } + } + } +EOF + + # Show platform-specific config location + local config_path=$(get_claude_config_path) + if [[ -n "$config_path" ]]; then + echo "" + print_info " Config file location:" + echo -e " ${YELLOW}$config_path${NC}" + fi + + echo "" + print_info "3. Restart Claude Desktop after updating the config file" + echo "" +} + +# Display setup instructions +display_setup_instructions() { + local python_cmd="$1" + local server_path="$2" + + echo "" + local setup_header="SETUP COMPLETE" + echo "===== $setup_header =====" + printf '%*s\n' "$((${#setup_header} + 12))" | tr ' ' '=' + echo "" + print_success "Zen is ready to use!" +} + +# ---------------------------------------------------------------------------- +# Log Management Functions +# ---------------------------------------------------------------------------- + +# Show help message +show_help() { + local version=$(get_version) + local header="๐Ÿค– Zen MCP Server v$version" + echo "$header" + printf '%*s\n' "${#header}" | tr ' ' '=' + echo "" + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo " -v, --version Show version information" + echo " -f, --follow Follow server logs in real-time" + echo " -c, --config Show configuration instructions for Claude clients" + echo "" + echo "Examples:" + echo " $0 Setup and start the MCP server" + echo " $0 -f Setup and follow logs" + echo " $0 -c Show configuration instructions" + echo " $0 --version Show version only" + echo "" + echo "For more information, visit:" + echo " https://github.com/BeehiveInnovations/zen-mcp-server" +} + +# Show version only +show_version() { + local version=$(get_version) + echo "$version" +} + +# Follow logs +follow_logs() { + local log_path="$LOG_DIR/$LOG_FILE" + + echo "Following server logs (Ctrl+C to stop)..." + echo "" + + # Create logs directory and file if they don't exist + mkdir -p "$LOG_DIR" + touch "$log_path" + + # Follow the log file + tail -f "$log_path" +} + +# ---------------------------------------------------------------------------- +# Main Function +# ---------------------------------------------------------------------------- + +main() { + # Parse command line arguments + local arg="${1:-}" + + case "$arg" in + -h|--help) + show_help + exit 0 + ;; + -v|--version) + show_version + exit 0 + ;; + -c|--config) + # Setup minimal environment to get paths for config display + local python_cmd + python_cmd=$(find_python) || exit 1 + local new_python_cmd + new_python_cmd=$(setup_venv "$python_cmd") + python_cmd="$new_python_cmd" + local script_dir=$(get_script_dir) + local server_path="$script_dir/server.py" + display_config_instructions "$python_cmd" "$server_path" + exit 0 + ;; + -f|--follow) + # Continue with normal setup then follow logs + ;; + "") + # Normal setup without following logs + ;; + *) + print_error "Unknown option: $arg" + echo "" + show_help + exit 1 + ;; + esac + + # Display header + local main_header="๐Ÿค– Zen MCP Server" + echo "$main_header" + printf '%*s\n' "${#main_header}" | tr ' ' '=' + + # Get and display version + local version=$(get_version) + echo "Version: $version" + echo "" + + # Check if venv exists + if [[ ! -d "$VENV_PATH" ]]; then + echo "Setting up Python environment for first time..." + fi + + # Step 1: Docker cleanup + cleanup_docker + + # Step 2: Find Python + local python_cmd + python_cmd=$(find_python) || exit 1 + + # Step 3: Setup environment file + setup_env_file || exit 1 + + # Step 4: Source .env file + if [[ -f .env ]]; then + set -a + source .env + set +a + fi + + # Step 5: Validate API keys + validate_api_keys || exit 1 + + # Step 6: Setup virtual environment + local new_python_cmd + new_python_cmd=$(setup_venv "$python_cmd") + python_cmd="$new_python_cmd" + + # Step 7: Install dependencies + install_dependencies "$python_cmd" || exit 1 + + # Step 8: Get absolute server path + local script_dir=$(get_script_dir) + local server_path="$script_dir/server.py" + + # Step 9: Display setup instructions + display_setup_instructions "$python_cmd" "$server_path" + + # Step 10: Check Claude integrations + check_claude_cli_integration "$python_cmd" "$server_path" + check_claude_desktop_integration "$python_cmd" "$server_path" + + # Step 11: Display log information + echo "" + echo "Logs will be written to: $script_dir/$LOG_DIR/$LOG_FILE" + echo "" + + # Step 12: Handle command line arguments + if [[ "$arg" == "-f" ]] || [[ "$arg" == "--follow" ]]; then + follow_logs + else + echo "To follow logs: ./run-server.sh -f" + echo "To show config: ./run-server.sh -c" + echo "To update: git pull, then run ./run-server.sh again" + echo "" + echo "Happy Clauding! ๐ŸŽ‰" + fi +} + +# ---------------------------------------------------------------------------- +# Script Entry Point +# ---------------------------------------------------------------------------- + +# Run main function with all arguments +main "$@" \ No newline at end of file diff --git a/server.py b/server.py index b334ebc..2987a49 100644 --- a/server.py +++ b/server.py @@ -25,12 +25,21 @@ import sys import time from datetime import datetime from logging.handlers import RotatingFileHandler +from pathlib import Path from typing import Any, Optional -from mcp.server import Server -from mcp.server.models import InitializationOptions -from mcp.server.stdio import stdio_server -from mcp.types import ( +from dotenv import load_dotenv + +# Load environment variables from .env file in the script's directory +# This ensures .env is loaded regardless of the current working directory +script_dir = Path(__file__).parent +env_file = script_dir / ".env" +load_dotenv(dotenv_path=env_file) + +from mcp.server import Server # noqa: E402 +from mcp.server.models import InitializationOptions # noqa: E402 +from mcp.server.stdio import stdio_server # noqa: E402 +from mcp.types import ( # noqa: E402 GetPromptResult, Prompt, PromptMessage, @@ -41,13 +50,13 @@ from mcp.types import ( ToolsCapability, ) -from config import ( +from config import ( # noqa: E402 DEFAULT_MODEL, __author__, __updated__, __version__, ) -from tools import ( +from tools import ( # noqa: E402 AnalyzeTool, ChatTool, CodeReviewTool, @@ -61,7 +70,7 @@ from tools import ( ThinkDeepTool, TracerTool, ) -from tools.models import ToolOutput +from tools.models import ToolOutput # noqa: E402 # Configure logging for server operations # Can be controlled via LOG_LEVEL environment variable (DEBUG, INFO, WARNING, ERROR) @@ -101,13 +110,17 @@ root_logger.addHandler(stderr_handler) # Set root logger level root_logger.setLevel(getattr(logging, log_level, logging.INFO)) -# Add rotating file handler for Docker log monitoring +# Add rotating file handler for local log monitoring try: + # Create logs directory in project root + log_dir = Path(__file__).parent / "logs" + log_dir.mkdir(exist_ok=True) + # Main server log with size-based rotation (20MB max per file) # This ensures logs don't grow indefinitely and are properly managed file_handler = RotatingFileHandler( - "/tmp/mcp_server.log", + log_dir / "mcp_server.log", maxBytes=20 * 1024 * 1024, # 20MB max file size backupCount=10, # Keep 10 rotated files (200MB total) encoding="utf-8", @@ -119,7 +132,7 @@ try: # Create a special logger for MCP activity tracking with size-based rotation mcp_logger = logging.getLogger("mcp_activity") mcp_file_handler = RotatingFileHandler( - "/tmp/mcp_activity.log", + log_dir / "mcp_activity.log", maxBytes=20 * 1024 * 1024, # 20MB max file size backupCount=5, # Keep 5 rotated files (100MB total) encoding="utf-8", @@ -131,16 +144,9 @@ try: # Ensure MCP activity also goes to stderr mcp_logger.propagate = True - # Also keep a size-based rotation as backup (100MB max per file) - # This prevents any single day's log from growing too large - size_handler = RotatingFileHandler( - "/tmp/mcp_server_overflow.log", - maxBytes=100 * 1024 * 1024, - backupCount=3, # 100MB - ) - size_handler.setLevel(logging.WARNING) # Only warnings and errors - size_handler.setFormatter(LocalTimeFormatter(log_format)) - logging.getLogger().addHandler(size_handler) + # Log setup info directly to root logger since logger isn't defined yet + logging.info(f"Logging to: {log_dir / 'mcp_server.log'}") + logging.info(f"Process PID: {os.getpid()}") except Exception as e: print(f"Warning: Could not set up file logging: {e}", file=sys.stderr) @@ -243,7 +249,7 @@ def configure_providers(): from providers.base import ProviderType from providers.custom import CustomProvider from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider from providers.openrouter import OpenRouterProvider from providers.xai import XAIModelProvider from utils.model_restrictions import get_restriction_service @@ -450,7 +456,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon This function serves as the central orchestrator for multi-turn AI-to-AI conversations: 1. THREAD RESUMPTION: When continuation_id is present, it reconstructs complete conversation - context from Redis including conversation history and file references + context from in-memory storage including conversation history and file references 2. CROSS-TOOL CONTINUATION: Enables seamless handoffs between different tools (analyze โ†’ codereview โ†’ debug) while preserving full conversation context and file references @@ -465,7 +471,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon STATELESS TO STATEFUL BRIDGE: The MCP protocol is inherently stateless, but this function bridges the gap by: - - Loading persistent conversation state from Redis + - Loading persistent conversation state from in-memory storage - Reconstructing full multi-turn context for tool execution - Enabling tools to access previous exchanges and file references - Supporting conversation chains across different tool types @@ -700,13 +706,13 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any Reconstruct conversation context for stateless-to-stateful thread continuation. This is a critical function that transforms the inherently stateless MCP protocol into - stateful multi-turn conversations. It loads persistent conversation state from Redis - and rebuilds complete conversation context using the sophisticated dual prioritization + stateful multi-turn conversations. It loads persistent conversation state from in-memory + storage and rebuilds complete conversation context using the sophisticated dual prioritization strategy implemented in the conversation memory system. CONTEXT RECONSTRUCTION PROCESS: - 1. THREAD RETRIEVAL: Loads complete ThreadContext from Redis using continuation_id + 1. THREAD RETRIEVAL: Loads complete ThreadContext from storage using continuation_id - Includes all conversation turns with tool attribution - Preserves file references and cross-tool context - Handles conversation chains across multiple linked threads @@ -742,7 +748,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any ERROR HANDLING & RECOVERY: - Thread expiration: Provides clear instructions for conversation restart - - Redis unavailability: Graceful degradation with error messaging + - Storage unavailability: Graceful degradation with error messaging - Invalid continuation_id: Security validation and user-friendly errors Args: @@ -762,7 +768,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any Includes user-friendly recovery instructions Performance Characteristics: - - O(1) thread lookup in Redis + - O(1) thread lookup in memory - O(n) conversation history reconstruction where n = number of turns - Intelligent token budgeting prevents context window overflow - Optimized file deduplication minimizes redundant content @@ -778,12 +784,12 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any continuation_id = arguments["continuation_id"] - # Get thread context from Redis - logger.debug(f"[CONVERSATION_DEBUG] Looking up thread {continuation_id} in Redis") + # Get thread context from storage + logger.debug(f"[CONVERSATION_DEBUG] Looking up thread {continuation_id} in storage") context = get_thread(continuation_id) if not context: logger.warning(f"Thread not found: {continuation_id}") - logger.debug(f"[CONVERSATION_DEBUG] Thread {continuation_id} not found in Redis or expired") + logger.debug(f"[CONVERSATION_DEBUG] Thread {continuation_id} not found in storage or expired") # Log to activity file for monitoring try: @@ -795,8 +801,8 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any # Return error asking Claude to restart conversation with full context raise ValueError( f"Conversation thread '{continuation_id}' was not found or has expired. " - f"This may happen if the conversation was created more than 1 hour ago or if there was an issue " - f"with Redis storage. " + f"This may happen if the conversation was created more than 3 hours ago or if the " + f"server was restarted. " f"Please restart the conversation by providing your full question/prompt without the " f"continuation_id parameter. " f"This will create a new conversation thread that can continue with follow-up exchanges." @@ -1165,7 +1171,7 @@ async def main(): # Validate and configure providers based on available API keys configure_providers() - # Log startup message for Docker log monitoring + # Log startup message logger.info("Zen MCP Server starting up...") logger.info(f"Log level: {log_level}") diff --git a/simulator_tests/__init__.py b/simulator_tests/__init__.py index a4ddbfe..5f86d71 100644 --- a/simulator_tests/__init__.py +++ b/simulator_tests/__init__.py @@ -26,7 +26,8 @@ from .test_openrouter_models import OpenRouterModelsTest from .test_per_tool_deduplication import PerToolDeduplicationTest from .test_planner_continuation_history import PlannerContinuationHistoryTest from .test_planner_validation import PlannerValidationTest -from .test_redis_validation import RedisValidationTest + +# Redis validation test removed - no longer needed for standalone server from .test_refactor_validation import RefactorValidationTest from .test_testgen_validation import TestGenValidationTest from .test_token_allocation_validation import TokenAllocationValidationTest @@ -42,7 +43,7 @@ TEST_REGISTRY = { "cross_tool_comprehensive": CrossToolComprehensiveTest, "line_number_validation": LineNumberValidationTest, "logs_validation": LogsValidationTest, - "redis_validation": RedisValidationTest, + # "redis_validation": RedisValidationTest, # Removed - no longer needed for standalone server "model_thinking_config": TestModelThinkingConfig, "o3_model_selection": O3ModelSelectionTest, "ollama_custom_url": OllamaCustomUrlTest, @@ -72,7 +73,7 @@ __all__ = [ "CrossToolComprehensiveTest", "LineNumberValidationTest", "LogsValidationTest", - "RedisValidationTest", + # "RedisValidationTest", # Removed - no longer needed for standalone server "TestModelThinkingConfig", "O3ModelSelectionTest", "O3ProExpensiveTest", diff --git a/simulator_tests/base_test.py b/simulator_tests/base_test.py index bc75ac3..8273af7 100644 --- a/simulator_tests/base_test.py +++ b/simulator_tests/base_test.py @@ -11,6 +11,8 @@ import os import subprocess from typing import Optional +from .log_utils import LogUtils + class BaseSimulatorTest: """Base class for all communication simulator tests""" @@ -19,14 +21,25 @@ class BaseSimulatorTest: self.verbose = verbose self.test_files = {} self.test_dir = None - self.container_name = "zen-mcp-server" - self.redis_container = "zen-mcp-redis" + self.python_path = self._get_python_path() # Configure logging log_level = logging.DEBUG if verbose else logging.INFO logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s") self.logger = logging.getLogger(self.__class__.__name__) + def _get_python_path(self) -> str: + """Get the Python path for the virtual environment""" + current_dir = os.getcwd() + venv_python = os.path.join(current_dir, ".zen_venv", "bin", "python") + + if os.path.exists(venv_python): + return venv_python + + # Fallback to system python if venv doesn't exist + self.logger.warning("Virtual environment not found, using system python") + return "python" + def setup_test_files(self): """Create test files for the simulation""" # Test Python file @@ -100,7 +113,7 @@ class Calculator: self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}") def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: - """Call an MCP tool via Claude CLI (docker exec)""" + """Call an MCP tool via standalone server""" try: # Prepare the MCP initialization and tool call sequence init_request = { @@ -131,8 +144,8 @@ class Calculator: # Join with newlines as MCP expects input_data = "\n".join(messages) + "\n" - # Simulate Claude CLI calling the MCP server via docker exec - docker_cmd = ["docker", "exec", "-i", self.container_name, "python", "server.py"] + # Call the standalone MCP server directly + server_cmd = [self.python_path, "server.py"] self.logger.debug(f"Calling MCP tool {tool_name} with proper initialization") @@ -140,7 +153,7 @@ class Calculator: # For consensus tool and other long-running tools, we need to ensure # the subprocess doesn't close prematurely result = subprocess.run( - docker_cmd, + server_cmd, input=input_data, text=True, capture_output=True, @@ -149,7 +162,7 @@ class Calculator: ) if result.returncode != 0: - self.logger.error(f"Docker exec failed with return code {result.returncode}") + self.logger.error(f"Standalone server failed with return code {result.returncode}") self.logger.error(f"Stderr: {result.stderr}") # Still try to parse stdout as the response might have been written before the error self.logger.debug(f"Attempting to parse stdout despite error: {result.stdout[:500]}") @@ -263,6 +276,56 @@ class Calculator: shutil.rmtree(self.test_dir) self.logger.debug(f"Removed test files directory: {self.test_dir}") + # ============================================================================ + # Log Utility Methods (delegate to LogUtils) + # ============================================================================ + + def get_server_logs_since(self, since_time: Optional[str] = None) -> str: + """Get server logs from both main and activity log files.""" + return LogUtils.get_server_logs_since(since_time) + + def get_recent_server_logs(self, lines: int = 500) -> str: + """Get recent server logs from the main log file.""" + return LogUtils.get_recent_server_logs(lines) + + def get_server_logs_subprocess(self, lines: int = 500) -> str: + """Get server logs using subprocess (alternative method).""" + return LogUtils.get_server_logs_subprocess(lines) + + def check_server_logs_for_errors(self, lines: int = 500) -> list[str]: + """Check server logs for error messages.""" + return LogUtils.check_server_logs_for_errors(lines) + + def extract_conversation_usage_logs(self, logs: str) -> list[dict[str, int]]: + """Extract token budget calculation information from logs.""" + return LogUtils.extract_conversation_usage_logs(logs) + + def extract_conversation_token_usage(self, logs: str) -> list[int]: + """Extract conversation token usage values from logs.""" + return LogUtils.extract_conversation_token_usage(logs) + + def extract_thread_creation_logs(self, logs: str) -> list[dict[str, str]]: + """Extract thread creation logs with parent relationships.""" + return LogUtils.extract_thread_creation_logs(logs) + + def extract_history_traversal_logs(self, logs: str) -> list[dict[str, any]]: + """Extract conversation history traversal logs.""" + return LogUtils.extract_history_traversal_logs(logs) + + def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool: + """Validate that logs show file deduplication behavior.""" + return LogUtils.validate_file_deduplication_in_logs(logs, tool_name, test_file) + + def search_logs_for_pattern( + self, pattern: str, logs: Optional[str] = None, case_sensitive: bool = False + ) -> list[str]: + """Search logs for a specific pattern.""" + return LogUtils.search_logs_for_pattern(pattern, logs, case_sensitive) + + def get_log_file_info(self) -> dict[str, dict[str, any]]: + """Get information about log files.""" + return LogUtils.get_log_file_info() + def run_test(self) -> bool: """Run the test - to be implemented by subclasses""" raise NotImplementedError("Subclasses must implement run_test()") diff --git a/simulator_tests/conversation_base_test.py b/simulator_tests/conversation_base_test.py new file mode 100644 index 0000000..ec7890b --- /dev/null +++ b/simulator_tests/conversation_base_test.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Conversation Base Test Class for In-Process MCP Tool Testing + +This class enables testing MCP tools within the same process to maintain conversation +memory state across tool calls. Unlike BaseSimulatorTest which runs each tool call +as a separate subprocess (losing memory state), this class calls tools directly +in-process, allowing conversation functionality to work correctly. + +USAGE: +- Inherit from ConversationBaseTest instead of BaseSimulatorTest for conversation tests +- Use call_mcp_tool_direct() to call tools in-process +- Conversation memory persists across tool calls within the same test +- setUp() clears memory between test methods for proper isolation + +EXAMPLE: + class TestConversationFeature(ConversationBaseTest): + def test_cross_tool_continuation(self): + # Step 1: Call precommit tool + result1, continuation_id = self.call_mcp_tool_direct("precommit", { + "path": "/path/to/repo", + "prompt": "Review these changes" + }) + + # Step 2: Continue with codereview tool - memory is preserved! + result2, _ = self.call_mcp_tool_direct("codereview", { + "files": ["/path/to/file.py"], + "prompt": "Focus on security issues", + "continuation_id": continuation_id + }) +""" + +import asyncio +import json +from typing import Optional + +from .base_test import BaseSimulatorTest + + +class ConversationBaseTest(BaseSimulatorTest): + """Base class for conversation tests that require in-process tool calling""" + + def __init__(self, verbose: bool = False): + super().__init__(verbose) + self._tools = None + self._loop = None + + def setUp(self): + """Set up test environment - clears conversation memory between tests""" + super().setup_test_files() + + # Clear conversation memory for test isolation + self._clear_conversation_memory() + + # Import tools from server.py for in-process calling + if self._tools is None: + self._import_tools() + + def _clear_conversation_memory(self): + """Clear all conversation memory to ensure test isolation""" + try: + from utils.storage_backend import get_storage_backend + + storage = get_storage_backend() + # Clear all stored conversation threads + with storage._lock: + storage._store.clear() + self.logger.debug("Cleared conversation memory for test isolation") + except Exception as e: + self.logger.warning(f"Could not clear conversation memory: {e}") + + def _import_tools(self): + """Import tools from server.py for direct calling""" + try: + import os + import sys + + # Add project root to Python path if not already there + project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + if project_root not in sys.path: + sys.path.insert(0, project_root) + + # Import tools from server + from server import TOOLS + + self._tools = TOOLS + self.logger.debug(f"Imported {len(self._tools)} tools for in-process testing") + except ImportError as e: + raise RuntimeError(f"Could not import tools from server.py: {e}") + + def _get_event_loop(self): + """Get or create event loop for async tool execution""" + if self._loop is None: + try: + self._loop = asyncio.get_event_loop() + except RuntimeError: + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + return self._loop + + def call_mcp_tool_direct(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: + """ + Call an MCP tool directly in-process without subprocess isolation. + + This method maintains conversation memory across calls, enabling proper + testing of conversation functionality. + + Args: + tool_name: Name of the tool to call (e.g., "precommit", "codereview") + params: Parameters to pass to the tool + + Returns: + tuple: (response_content, continuation_id) where continuation_id + can be used for follow-up calls + """ + if self._tools is None: + raise RuntimeError("Tools not imported. Call setUp() first.") + + if tool_name not in self._tools: + raise ValueError(f"Tool '{tool_name}' not found. Available: {list(self._tools.keys())}") + + try: + tool = self._tools[tool_name] + self.logger.debug(f"Calling tool '{tool_name}' directly in-process") + + # Set up minimal model context if not provided + if "model" not in params: + params["model"] = "flash" # Use fast model for testing + + # Execute tool directly using asyncio + loop = self._get_event_loop() + + # Import required modules for model resolution (similar to server.py) + from config import DEFAULT_MODEL + from providers.registry import ModelProviderRegistry + from utils.model_context import ModelContext + + # Resolve model (simplified version of server.py logic) + model_name = params.get("model", DEFAULT_MODEL) + provider = ModelProviderRegistry.get_provider_for_model(model_name) + if not provider: + # Fallback to available model for testing + available_models = list(ModelProviderRegistry.get_available_models(respect_restrictions=True).keys()) + if available_models: + model_name = available_models[0] + params["model"] = model_name + self.logger.debug(f"Using fallback model for testing: {model_name}") + + # Create model context + model_context = ModelContext(model_name) + params["_model_context"] = model_context + params["_resolved_model_name"] = model_name + + # Execute tool asynchronously + result = loop.run_until_complete(tool.execute(params)) + + if not result or len(result) == 0: + return None, None + + # Extract response content + response_text = result[0].text if hasattr(result[0], "text") else str(result[0]) + + # Parse response to extract continuation_id + continuation_id = self._extract_continuation_id_from_response(response_text) + + self.logger.debug(f"Tool '{tool_name}' completed successfully in-process") + return response_text, continuation_id + + except Exception as e: + self.logger.error(f"Direct tool call failed for '{tool_name}': {e}") + return None, None + + def _extract_continuation_id_from_response(self, response_text: str) -> Optional[str]: + """Extract continuation_id from tool response""" + try: + # Parse the response as JSON to look for continuation metadata + response_data = json.loads(response_text) + + # Look for continuation_id in various places + if isinstance(response_data, dict): + # Check metadata + metadata = response_data.get("metadata", {}) + if "thread_id" in metadata: + return metadata["thread_id"] + + # Check continuation_offer + continuation_offer = response_data.get("continuation_offer", {}) + if continuation_offer and "continuation_id" in continuation_offer: + return continuation_offer["continuation_id"] + + # Check follow_up_request + follow_up = response_data.get("follow_up_request", {}) + if follow_up and "continuation_id" in follow_up: + return follow_up["continuation_id"] + + return None + + except (json.JSONDecodeError, AttributeError): + # If response is not JSON or doesn't have expected structure, return None + return None + + def tearDown(self): + """Clean up after test""" + super().cleanup_test_files() + # Clear memory again for good measure + self._clear_conversation_memory() + + @property + def test_name(self) -> str: + """Get the test name""" + return self.__class__.__name__ + + @property + def test_description(self) -> str: + """Get the test description""" + return "In-process conversation test" diff --git a/simulator_tests/log_utils.py b/simulator_tests/log_utils.py new file mode 100644 index 0000000..b25927e --- /dev/null +++ b/simulator_tests/log_utils.py @@ -0,0 +1,316 @@ +""" +Centralized log utility for simulator tests. + +This module provides common log reading and parsing functionality +used across multiple simulator test files to reduce code duplication. +""" + +import logging +import re +import subprocess +from typing import Optional, Union + + +class LogUtils: + """Centralized logging utilities for simulator tests.""" + + # Log file paths + MAIN_LOG_FILE = "logs/mcp_server.log" + ACTIVITY_LOG_FILE = "logs/mcp_activity.log" + + @classmethod + def get_server_logs_since(cls, since_time: Optional[str] = None) -> str: + """ + Get server logs from both main and activity log files. + + Args: + since_time: Currently ignored, returns all available logs + + Returns: + Combined logs from both log files + """ + try: + main_logs = "" + activity_logs = "" + + # Read main server log + try: + with open(cls.MAIN_LOG_FILE) as f: + main_logs = f.read() + except FileNotFoundError: + pass + + # Read activity log + try: + with open(cls.ACTIVITY_LOG_FILE) as f: + activity_logs = f.read() + except FileNotFoundError: + pass + + return main_logs + "\n" + activity_logs + + except Exception as e: + logging.warning(f"Failed to read server logs: {e}") + return "" + + @classmethod + def get_recent_server_logs(cls, lines: int = 500) -> str: + """ + Get recent server logs from the main log file. + + Args: + lines: Number of recent lines to retrieve (default: 500) + + Returns: + Recent log content as string + """ + try: + with open(cls.MAIN_LOG_FILE) as f: + all_lines = f.readlines() + recent_lines = all_lines[-lines:] if len(all_lines) > lines else all_lines + return "".join(recent_lines) + except FileNotFoundError: + logging.warning(f"Log file {cls.MAIN_LOG_FILE} not found") + return "" + except Exception as e: + logging.warning(f"Failed to read recent server logs: {e}") + return "" + + @classmethod + def get_server_logs_subprocess(cls, lines: int = 500) -> str: + """ + Get server logs using subprocess (alternative method). + + Args: + lines: Number of recent lines to retrieve + + Returns: + Recent log content as string + """ + try: + result = subprocess.run( + ["tail", "-n", str(lines), cls.MAIN_LOG_FILE], capture_output=True, text=True, timeout=10 + ) + return result.stdout + result.stderr + except Exception as e: + logging.warning(f"Failed to get server logs via subprocess: {e}") + return "" + + @classmethod + def check_server_logs_for_errors(cls, lines: int = 500) -> list[str]: + """ + Check server logs for error messages. + + Args: + lines: Number of recent lines to check + + Returns: + List of error messages found + """ + logs = cls.get_recent_server_logs(lines) + error_patterns = [r"ERROR.*", r"CRITICAL.*", r"Failed.*", r"Exception.*", r"Error:.*"] + + errors = [] + for line in logs.split("\n"): + for pattern in error_patterns: + if re.search(pattern, line, re.IGNORECASE): + errors.append(line.strip()) + break + + return errors + + @classmethod + def extract_conversation_usage_logs(cls, logs: str) -> list[dict[str, int]]: + """ + Extract token budget calculation information from logs. + + Args: + logs: Log content to parse + + Returns: + List of dictionaries containing token usage data + """ + usage_data = [] + pattern = r"\[CONVERSATION_DEBUG\] Token budget calculation:" + + for line in logs.split("\n"): + if re.search(pattern, line): + # Parse the token usage information + usage_info = {} + + # Extract total capacity + capacity_match = re.search(r"Total capacity: ([\d,]+)", line) + if capacity_match: + usage_info["total_capacity"] = int(capacity_match.group(1).replace(",", "")) + + # Extract content allocation + content_match = re.search(r"Content allocation: ([\d,]+)", line) + if content_match: + usage_info["content_allocation"] = int(content_match.group(1).replace(",", "")) + + # Extract conversation tokens + conv_match = re.search(r"Conversation tokens: ([\d,]+)", line) + if conv_match: + usage_info["conversation_tokens"] = int(conv_match.group(1).replace(",", "")) + + # Extract remaining tokens + remaining_match = re.search(r"Remaining tokens: ([\d,]+)", line) + if remaining_match: + usage_info["remaining_tokens"] = int(remaining_match.group(1).replace(",", "")) + + if usage_info: + usage_data.append(usage_info) + + return usage_data + + @classmethod + def extract_conversation_token_usage(cls, logs: str) -> list[int]: + """ + Extract conversation token usage values from logs. + + Args: + logs: Log content to parse + + Returns: + List of token usage values + """ + pattern = r"Conversation history token usage:\s*([\d,]+)" + usage_values = [] + + for match in re.finditer(pattern, logs): + usage_value = int(match.group(1).replace(",", "")) + usage_values.append(usage_value) + + return usage_values + + @classmethod + def extract_thread_creation_logs(cls, logs: str) -> list[dict[str, str]]: + """ + Extract thread creation logs with parent relationships. + + Args: + logs: Log content to parse + + Returns: + List of dictionaries with thread relationship data + """ + thread_data = [] + pattern = r"\[THREAD\] Created new thread (\w+)(?: with parent (\w+))?" + + for match in re.finditer(pattern, logs): + thread_info = {"thread_id": match.group(1), "parent_id": match.group(2) if match.group(2) else None} + thread_data.append(thread_info) + + return thread_data + + @classmethod + def extract_history_traversal_logs(cls, logs: str) -> list[dict[str, Union[str, int]]]: + """ + Extract conversation history traversal logs. + + Args: + logs: Log content to parse + + Returns: + List of dictionaries with traversal data + """ + traversal_data = [] + pattern = r"\[THREAD\] Retrieved chain of (\d+) messages for thread (\w+)" + + for match in re.finditer(pattern, logs): + traversal_info = {"chain_length": int(match.group(1)), "thread_id": match.group(2)} + traversal_data.append(traversal_info) + + return traversal_data + + @classmethod + def validate_file_deduplication_in_logs(cls, logs: str, tool_name: str, test_file: str) -> bool: + """ + Validate that logs show file deduplication behavior. + + Args: + logs: Log content to parse + tool_name: Name of the tool being tested + test_file: Name of the test file to check for deduplication + + Returns: + True if deduplication evidence is found, False otherwise + """ + # Look for embedding calculation + embedding_pattern = f"Calculating embeddings for {test_file}" + has_embedding = bool(re.search(embedding_pattern, logs)) + + # Look for filtering message + filtering_pattern = f"Filtering {test_file} to prevent duplication" + has_filtering = bool(re.search(filtering_pattern, logs)) + + # Look for skip message + skip_pattern = f"Skipping {test_file} \\(already processed" + has_skip = bool(re.search(skip_pattern, logs)) + + # Look for tool-specific processing + tool_pattern = f"\\[{tool_name.upper()}\\].*{test_file}" + has_tool_processing = bool(re.search(tool_pattern, logs, re.IGNORECASE)) + + # Deduplication is confirmed if we see evidence of processing and filtering/skipping + return has_embedding and (has_filtering or has_skip) and has_tool_processing + + @classmethod + def search_logs_for_pattern( + cls, pattern: str, logs: Optional[str] = None, case_sensitive: bool = False + ) -> list[str]: + """ + Search logs for a specific pattern. + + Args: + pattern: Regex pattern to search for + logs: Log content to search (if None, reads recent logs) + case_sensitive: Whether the search should be case sensitive + + Returns: + List of matching lines + """ + if logs is None: + logs = cls.get_recent_server_logs() + + flags = 0 if case_sensitive else re.IGNORECASE + matches = [] + + for line in logs.split("\n"): + if re.search(pattern, line, flags): + matches.append(line.strip()) + + return matches + + @classmethod + def get_log_file_info(cls) -> dict[str, dict[str, Union[str, int, bool]]]: + """ + Get information about log files. + + Returns: + Dictionary with file information for each log file + """ + import os + + file_info = {} + + for log_file in [cls.MAIN_LOG_FILE, cls.ACTIVITY_LOG_FILE]: + if os.path.exists(log_file): + stat = os.stat(log_file) + file_info[log_file] = { + "exists": True, + "size_bytes": stat.st_size, + "size_mb": round(stat.st_size / (1024 * 1024), 2), + "last_modified": stat.st_mtime, + "readable": os.access(log_file, os.R_OK), + } + else: + file_info[log_file] = { + "exists": False, + "size_bytes": 0, + "size_mb": 0, + "last_modified": 0, + "readable": False, + } + + return file_info diff --git a/simulator_tests/test_consensus_conversation.py b/simulator_tests/test_consensus_conversation.py index ab40905..a399f47 100644 --- a/simulator_tests/test_consensus_conversation.py +++ b/simulator_tests/test_consensus_conversation.py @@ -7,7 +7,6 @@ and builds conversation context correctly when using continuation_id. """ import json -import subprocess from .base_test import BaseSimulatorTest @@ -23,19 +22,16 @@ class TestConsensusConversation(BaseSimulatorTest): def test_description(self) -> str: return "Test consensus tool conversation building and continuation" - def get_docker_logs(self): - """Get Docker container logs""" + def get_server_logs(self): + """Get server logs from local log file""" try: - result = subprocess.run( - ["docker", "logs", "--tail", "100", self.container_name], capture_output=True, text=True, timeout=30 - ) - if result.returncode == 0: - return result.stdout.split("\n") - else: - self.logger.warning(f"Failed to get Docker logs: {result.stderr}") - return [] + log_file_path = "logs/mcp_server.log" + with open(log_file_path) as f: + lines = f.readlines() + # Return last 100 lines + return [line.strip() for line in lines[-100:]] except Exception as e: - self.logger.warning(f"Exception getting Docker logs: {e}") + self.logger.warning(f"Exception getting server logs: {e}") return [] def run_test(self) -> bool: @@ -121,9 +117,9 @@ class TestConsensusConversation(BaseSimulatorTest): self.logger.info("Phase 3: Checking server logs for conversation building") # Check for conversation-related log entries - logs = self.get_docker_logs() + logs = self.get_server_logs() if not logs: - self.logger.warning("Could not retrieve Docker logs for verification") + self.logger.warning("Could not retrieve server logs for verification") else: # Look for conversation building indicators conversation_logs = [ diff --git a/simulator_tests/test_content_validation.py b/simulator_tests/test_content_validation.py index cdc42af..88ece79 100644 --- a/simulator_tests/test_content_validation.py +++ b/simulator_tests/test_content_validation.py @@ -22,42 +22,6 @@ class ContentValidationTest(BaseSimulatorTest): def test_description(self) -> str: return "Content validation and duplicate detection" - def get_docker_logs_since(self, since_time: str) -> str: - """Get docker logs since a specific timestamp""" - try: - # Check both main server and log monitor for comprehensive logs - cmd_server = ["docker", "logs", "--since", since_time, self.container_name] - cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"] - - import subprocess - - result_server = subprocess.run(cmd_server, capture_output=True, text=True) - result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True) - - # Get the internal log files which have more detailed logging - server_log_result = subprocess.run( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True - ) - - activity_log_result = subprocess.run( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True - ) - - # Combine all logs - combined_logs = ( - result_server.stdout - + "\n" - + result_monitor.stdout - + "\n" - + server_log_result.stdout - + "\n" - + activity_log_result.stdout - ) - return combined_logs - except Exception as e: - self.logger.error(f"Failed to get docker logs: {e}") - return "" - def run_test(self) -> bool: """Test that file processing system properly handles file deduplication""" try: @@ -151,9 +115,9 @@ DATABASE_CONFIG = { else: self.logger.warning(" โš ๏ธ Different tool failed") - # Validate file processing behavior from Docker logs + # Validate file processing behavior from server logs self.logger.info(" 4: Validating file processing logs") - logs = self.get_docker_logs_since(start_time) + logs = self.get_server_logs_since(start_time) # Check for proper file embedding logs embedding_logs = [ diff --git a/simulator_tests/test_conversation_chain_validation.py b/simulator_tests/test_conversation_chain_validation.py index 03f08c0..3f23c79 100644 --- a/simulator_tests/test_conversation_chain_validation.py +++ b/simulator_tests/test_conversation_chain_validation.py @@ -21,8 +21,6 @@ This validates the conversation threading system's ability to: - Properly traverse parent relationships for history reconstruction """ -import re -import subprocess from .base_test import BaseSimulatorTest @@ -38,53 +36,6 @@ class ConversationChainValidationTest(BaseSimulatorTest): def test_description(self) -> str: return "Conversation chain and threading validation" - def get_recent_server_logs(self) -> str: - """Get recent server logs from the log file directly""" - try: - cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"] - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return result.stdout - else: - self.logger.warning(f"Failed to read server logs: {result.stderr}") - return "" - except Exception as e: - self.logger.error(f"Failed to get server logs: {e}") - return "" - - def extract_thread_creation_logs(self, logs: str) -> list[dict[str, str]]: - """Extract thread creation logs with parent relationships""" - thread_logs = [] - - lines = logs.split("\n") - for line in lines: - if "[THREAD] Created new thread" in line: - # Parse: [THREAD] Created new thread 9dc779eb-645f-4850-9659-34c0e6978d73 with parent a0ce754d-c995-4b3e-9103-88af429455aa - match = re.search(r"\[THREAD\] Created new thread ([a-f0-9-]+) with parent ([a-f0-9-]+|None)", line) - if match: - thread_id = match.group(1) - parent_id = match.group(2) if match.group(2) != "None" else None - thread_logs.append({"thread_id": thread_id, "parent_id": parent_id, "log_line": line}) - - return thread_logs - - def extract_history_traversal_logs(self, logs: str) -> list[dict[str, str]]: - """Extract conversation history traversal logs""" - traversal_logs = [] - - lines = logs.split("\n") - for line in lines: - if "[THREAD] Retrieved chain of" in line: - # Parse: [THREAD] Retrieved chain of 3 threads for 9dc779eb-645f-4850-9659-34c0e6978d73 - match = re.search(r"\[THREAD\] Retrieved chain of (\d+) threads for ([a-f0-9-]+)", line) - if match: - chain_length = int(match.group(1)) - thread_id = match.group(2) - traversal_logs.append({"thread_id": thread_id, "chain_length": chain_length, "log_line": line}) - - return traversal_logs - def run_test(self) -> bool: """Test conversation chain and threading functionality""" try: diff --git a/simulator_tests/test_cross_tool_comprehensive.py b/simulator_tests/test_cross_tool_comprehensive.py index 591684b..cc8701f 100644 --- a/simulator_tests/test_cross_tool_comprehensive.py +++ b/simulator_tests/test_cross_tool_comprehensive.py @@ -12,7 +12,6 @@ Validates: 5. Proper tool chaining with context """ -import subprocess from .base_test import BaseSimulatorTest @@ -28,40 +27,6 @@ class CrossToolComprehensiveTest(BaseSimulatorTest): def test_description(self) -> str: return "Comprehensive cross-tool file deduplication and continuation" - def get_docker_logs_since(self, since_time: str) -> str: - """Get docker logs since a specific timestamp""" - try: - # Check both main server and log monitor for comprehensive logs - cmd_server = ["docker", "logs", "--since", since_time, self.container_name] - cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"] - - result_server = subprocess.run(cmd_server, capture_output=True, text=True) - result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True) - - # Get the internal log files which have more detailed logging - server_log_result = subprocess.run( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True - ) - - activity_log_result = subprocess.run( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True - ) - - # Combine all logs - combined_logs = ( - result_server.stdout - + "\n" - + result_monitor.stdout - + "\n" - + server_log_result.stdout - + "\n" - + activity_log_result.stdout - ) - return combined_logs - except Exception as e: - self.logger.error(f"Failed to get docker logs: {e}") - return "" - def run_test(self) -> bool: """Comprehensive cross-tool test with all MCP tools""" try: @@ -247,7 +212,7 @@ def secure_login(user, pwd): # Validate comprehensive results self.logger.info(" ๐Ÿ“‹ Validating comprehensive cross-tool results...") - logs = self.get_docker_logs_since(start_time) + logs = self.get_server_logs_since(start_time) # Validation criteria tools_used = [r[0] for r in responses] diff --git a/simulator_tests/test_cross_tool_continuation.py b/simulator_tests/test_cross_tool_continuation.py index ca97fdf..a2ab4fd 100644 --- a/simulator_tests/test_cross_tool_continuation.py +++ b/simulator_tests/test_cross_tool_continuation.py @@ -6,10 +6,10 @@ Tests comprehensive cross-tool continuation scenarios to ensure conversation context is maintained when switching between different tools. """ -from .base_test import BaseSimulatorTest +from .conversation_base_test import ConversationBaseTest -class CrossToolContinuationTest(BaseSimulatorTest): +class CrossToolContinuationTest(ConversationBaseTest): """Test comprehensive cross-tool continuation scenarios""" @property @@ -25,8 +25,8 @@ class CrossToolContinuationTest(BaseSimulatorTest): try: self.logger.info("๐Ÿ”ง Test: Cross-tool continuation scenarios") - # Setup test files - self.setup_test_files() + # Setup test environment for conversation testing + self.setUp() success_count = 0 total_scenarios = 3 @@ -62,7 +62,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): self.logger.info(" 1: Testing chat -> thinkdeep -> codereview") # Start with chat - chat_response, chat_id = self.call_mcp_tool( + chat_response, chat_id = self.call_mcp_tool_direct( "chat", { "prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it", @@ -76,7 +76,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): return False # Continue with thinkdeep - thinkdeep_response, _ = self.call_mcp_tool( + thinkdeep_response, _ = self.call_mcp_tool_direct( "thinkdeep", { "prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code", @@ -91,7 +91,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): return False # Continue with codereview - codereview_response, _ = self.call_mcp_tool( + codereview_response, _ = self.call_mcp_tool_direct( "codereview", { "files": [self.test_files["python"]], # Same file should be deduplicated @@ -118,8 +118,13 @@ class CrossToolContinuationTest(BaseSimulatorTest): self.logger.info(" 2: Testing analyze -> debug -> thinkdeep") # Start with analyze - analyze_response, analyze_id = self.call_mcp_tool( - "analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality", "model": "flash"} + analyze_response, analyze_id = self.call_mcp_tool_direct( + "analyze", + { + "files": [self.test_files["python"]], + "prompt": "Analyze this code for quality and performance issues", + "model": "flash", + }, ) if not analyze_response or not analyze_id: @@ -127,7 +132,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): return False # Continue with debug - debug_response, _ = self.call_mcp_tool( + debug_response, _ = self.call_mcp_tool_direct( "debug", { "files": [self.test_files["python"]], # Same file should be deduplicated @@ -142,7 +147,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): return False # Continue with thinkdeep - final_response, _ = self.call_mcp_tool( + final_response, _ = self.call_mcp_tool_direct( "thinkdeep", { "prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found", @@ -169,7 +174,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): self.logger.info(" 3: Testing multi-file cross-tool continuation") # Start with both files - multi_response, multi_id = self.call_mcp_tool( + multi_response, multi_id = self.call_mcp_tool_direct( "chat", { "prompt": "Please use low thinking mode. Analyze both the Python code and configuration file", @@ -183,7 +188,7 @@ class CrossToolContinuationTest(BaseSimulatorTest): return False # Switch to codereview with same files (should use conversation history) - multi_review, _ = self.call_mcp_tool( + multi_review, _ = self.call_mcp_tool_direct( "codereview", { "files": [self.test_files["python"], self.test_files["config"]], # Same files diff --git a/simulator_tests/test_debug_validation.py b/simulator_tests/test_debug_validation.py index 8c1481d..3b7233c 100644 --- a/simulator_tests/test_debug_validation.py +++ b/simulator_tests/test_debug_validation.py @@ -378,35 +378,28 @@ The code looks correct to me, but something is causing valid sessions to be trea # Validate logs self.logger.info(" ๐Ÿ“‹ Validating execution logs...") - # Get server logs from the actual log file inside the container - result = self.run_command( - ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True - ) + # Get server logs using inherited method + logs = self.get_recent_server_logs(500) - if result.returncode == 0: - logs = result.stdout.decode() + result.stderr.decode() + # Look for debug tool execution patterns + debug_patterns = [ + "debug tool", + "[DEBUG]", + "systematic investigation", + "Token budget", + "Essential files for debugging", + ] - # Look for debug tool execution patterns - debug_patterns = [ - "debug tool", - "[DEBUG]", - "systematic investigation", - "Token budget", - "Essential files for debugging", - ] + patterns_found = 0 + for pattern in debug_patterns: + if pattern in logs: + patterns_found += 1 + self.logger.debug(f" โœ… Found log pattern: {pattern}") - patterns_found = 0 - for pattern in debug_patterns: - if pattern in logs: - patterns_found += 1 - self.logger.debug(f" โœ… Found log pattern: {pattern}") - - if patterns_found >= 3: - self.logger.info(f" โœ… Log validation passed ({patterns_found}/{len(debug_patterns)} patterns)") - else: - self.logger.warning(f" โš ๏ธ Only found {patterns_found}/{len(debug_patterns)} log patterns") + if patterns_found >= 3: + self.logger.info(f" โœ… Log validation passed ({patterns_found}/{len(debug_patterns)} patterns)") else: - self.logger.warning(" โš ๏ธ Could not retrieve Docker logs") + self.logger.warning(f" โš ๏ธ Only found {patterns_found}/{len(debug_patterns)} log patterns") # Test continuation if available if continuation_id: diff --git a/simulator_tests/test_line_number_validation.py b/simulator_tests/test_line_number_validation.py index 714bb8d..ae3e045 100644 --- a/simulator_tests/test_line_number_validation.py +++ b/simulator_tests/test_line_number_validation.py @@ -145,14 +145,16 @@ def validate_data(data): # Test 4: Validate log patterns self.logger.info(" 1.4: Validating line number processing in logs") - # Get logs from container - result = self.run_command( - ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True - ) - - logs = "" - if result.returncode == 0: - logs = result.stdout.decode() + # Get logs from server + try: + log_file_path = "logs/mcp_server.log" + with open(log_file_path) as f: + lines = f.readlines() + logs = "".join(lines[-500:]) + except Exception as e: + self.logger.error(f"Failed to read server logs: {e}") + logs = "" + pass # Check for line number formatting patterns line_number_patterns = ["Line numbers for", "enabled", "โ”‚", "line number"] # The line number separator diff --git a/simulator_tests/test_logs_validation.py b/simulator_tests/test_logs_validation.py index aade337..e3a64e7 100644 --- a/simulator_tests/test_logs_validation.py +++ b/simulator_tests/test_logs_validation.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 """ -Docker Logs Validation Test +Server Logs Validation Test -Validates Docker logs to confirm file deduplication behavior and +Validates server logs to confirm file deduplication behavior and conversation threading is working properly. """ @@ -10,7 +10,7 @@ from .base_test import BaseSimulatorTest class LogsValidationTest(BaseSimulatorTest): - """Validate Docker logs to confirm file deduplication behavior""" + """Validate server logs to confirm file deduplication behavior""" @property def test_name(self) -> str: @@ -18,39 +18,35 @@ class LogsValidationTest(BaseSimulatorTest): @property def test_description(self) -> str: - return "Docker logs validation" + return "Server logs validation" def run_test(self) -> bool: - """Validate Docker logs to confirm file deduplication behavior""" + """Validate server logs to confirm file deduplication behavior""" try: - self.logger.info("๐Ÿ“‹ Test: Validating Docker logs for file deduplication...") + self.logger.info("๐Ÿ“‹ Test: Validating server logs for file deduplication...") - # Get server logs from main container - result = self.run_command(["docker", "logs", self.container_name], capture_output=True) + # Get server logs from log files + import os - if result.returncode != 0: - self.logger.error(f"Failed to get Docker logs: {result.stderr}") + logs = "" + log_files = ["logs/mcp_server.log", "logs/mcp_activity.log"] + + for log_file in log_files: + if os.path.exists(log_file): + try: + with open(log_file) as f: + file_content = f.read() + logs += f"\n=== {log_file} ===\n{file_content}\n" + self.logger.debug(f"Read {len(file_content)} characters from {log_file}") + except Exception as e: + self.logger.warning(f"Could not read {log_file}: {e}") + else: + self.logger.warning(f"Log file not found: {log_file}") + + if not logs.strip(): + self.logger.warning("No log content found - server may not have processed any requests yet") return False - main_logs = result.stdout.decode() + result.stderr.decode() - - # Get logs from log monitor container (where detailed activity is logged) - monitor_result = self.run_command(["docker", "logs", "zen-mcp-log-monitor"], capture_output=True) - monitor_logs = "" - if monitor_result.returncode == 0: - monitor_logs = monitor_result.stdout.decode() + monitor_result.stderr.decode() - - # Also get activity logs for more detailed conversation tracking - activity_result = self.run_command( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True - ) - - activity_logs = "" - if activity_result.returncode == 0: - activity_logs = activity_result.stdout.decode() - - logs = main_logs + "\n" + monitor_logs + "\n" + activity_logs - # Look for conversation threading patterns that indicate the system is working conversation_patterns = [ "CONVERSATION_RESUME", diff --git a/simulator_tests/test_o3_model_selection.py b/simulator_tests/test_o3_model_selection.py index 035e262..3e811f2 100644 --- a/simulator_tests/test_o3_model_selection.py +++ b/simulator_tests/test_o3_model_selection.py @@ -4,11 +4,10 @@ O3 Model Selection Test Tests that O3 models are properly selected and used when explicitly specified, regardless of the default model configuration (even when set to auto). -Validates model selection via Docker logs. +Validates model selection via server logs. """ import datetime -import subprocess from .base_test import BaseSimulatorTest @@ -24,47 +23,16 @@ class O3ModelSelectionTest(BaseSimulatorTest): def test_description(self) -> str: return "O3 model selection and usage validation" - def get_recent_server_logs(self) -> str: - """Get recent server logs from the log file directly""" - try: - # Read logs directly from the log file - use more lines to ensure we get all test-related logs - cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"] - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return result.stdout - else: - self.logger.warning(f"Failed to read server logs: {result.stderr}") - return "" - except Exception as e: - self.logger.error(f"Failed to get server logs: {e}") - return "" - def run_test(self) -> bool: """Test O3 model selection and usage""" try: self.logger.info(" Test: O3 model selection and usage validation") # Check which API keys are configured - check_cmd = [ - "docker", - "exec", - self.container_name, - "python", - "-c", - 'import os; print(f\'OPENAI_KEY:{bool(os.environ.get("OPENAI_API_KEY"))}|OPENROUTER_KEY:{bool(os.environ.get("OPENROUTER_API_KEY"))}\')', - ] - result = subprocess.run(check_cmd, capture_output=True, text=True) + import os - has_openai = False - has_openrouter = False - - if result.returncode == 0: - output = result.stdout.strip() - if "OPENAI_KEY:True" in output: - has_openai = True - if "OPENROUTER_KEY:True" in output: - has_openrouter = True + has_openai = bool(os.environ.get("OPENAI_API_KEY")) + has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY")) # If only OpenRouter is configured, adjust test expectations if has_openrouter and not has_openai: diff --git a/simulator_tests/test_ollama_custom_url.py b/simulator_tests/test_ollama_custom_url.py index 9451759..0f4ab62 100644 --- a/simulator_tests/test_ollama_custom_url.py +++ b/simulator_tests/test_ollama_custom_url.py @@ -9,7 +9,6 @@ Tests custom API endpoint functionality with Ollama-style local models, includin - Model alias resolution for local models """ -import subprocess from .base_test import BaseSimulatorTest @@ -30,14 +29,15 @@ class OllamaCustomUrlTest(BaseSimulatorTest): try: self.logger.info("Test: Ollama custom URL functionality") - # Check if custom URL is configured in the Docker container - custom_url = self._check_docker_custom_url() + # Check if custom URL is configured + import os + + custom_url = os.environ.get("CUSTOM_API_URL") if not custom_url: - self.logger.warning("CUSTOM_API_URL not set in Docker container, skipping Ollama test") + self.logger.warning("CUSTOM_API_URL not set, skipping Ollama test") self.logger.info("To enable this test, add to .env file:") - self.logger.info("CUSTOM_API_URL=http://host.docker.internal:11434/v1") + self.logger.info("CUSTOM_API_URL=http://localhost:11434/v1") self.logger.info("CUSTOM_API_KEY=") - self.logger.info("Then restart docker-compose") return True # Skip gracefully self.logger.info(f"Testing with custom URL: {custom_url}") @@ -172,25 +172,6 @@ if __name__ == "__main__": finally: self.cleanup_test_files() - def _check_docker_custom_url(self) -> str: - """Check if CUSTOM_API_URL is set in the Docker container""" - try: - result = subprocess.run( - ["docker", "exec", self.container_name, "printenv", "CUSTOM_API_URL"], - capture_output=True, - text=True, - timeout=10, - ) - - if result.returncode == 0 and result.stdout.strip(): - return result.stdout.strip() - - return "" - - except Exception as e: - self.logger.debug(f"Failed to check Docker CUSTOM_API_URL: {e}") - return "" - def validate_successful_response(self, response: str, test_name: str, files_provided: bool = False) -> bool: """Validate that the response indicates success, not an error @@ -201,7 +182,7 @@ if __name__ == "__main__": """ if not response: self.logger.error(f"No response received for {test_name}") - self._check_docker_logs_for_errors() + self._check_server_logs_for_errors() return False # Check for common error indicators @@ -227,7 +208,7 @@ if __name__ == "__main__": ] # Special handling for clarification requests from local models - if "clarification_required" in response.lower(): + if "files_required_to_continue" in response.lower(): if files_provided: # If we provided actual files, clarification request is a FAILURE self.logger.error( @@ -243,7 +224,7 @@ if __name__ == "__main__": self.logger.debug(f"Clarification response: {response[:200]}...") return True - # Check for SSRF security restriction - this is expected for local URLs from Docker + # Check for SSRF security restriction - this is expected for local URLs if "restricted IP address" in response and "security risk (SSRF)" in response: self.logger.info( f"โœ… Custom URL routing working - {test_name} correctly attempted to connect to custom API" @@ -256,19 +237,19 @@ if __name__ == "__main__": if error.lower() in response_lower: self.logger.error(f"Error detected in {test_name}: {error}") self.logger.debug(f"Full response: {response}") - self._check_docker_logs_for_errors() + self._check_server_logs_for_errors() return False # Response should be substantial (more than just a few words) if len(response.strip()) < 10: self.logger.error(f"Response too short for {test_name}: {response}") - self._check_docker_logs_for_errors() + self._check_server_logs_for_errors() return False # Verify this looks like a real AI response, not just an error message if not self._validate_ai_response_content(response): self.logger.error(f"Response doesn't look like valid AI output for {test_name}") - self._check_docker_logs_for_errors() + self._check_server_logs_for_errors() return False self.logger.debug(f"Successful response for {test_name}: {response[:100]}...") @@ -329,24 +310,23 @@ if __name__ == "__main__": return True - def _check_docker_logs_for_errors(self): - """Check Docker logs for any error messages that might explain failures""" + def _check_server_logs_for_errors(self): + """Check server logs for any error messages that might explain failures""" try: - # Get recent logs from the container - result = subprocess.run( - ["docker", "logs", "--tail", "50", self.container_name], capture_output=True, text=True, timeout=10 - ) + # Get recent logs from the log file + log_file_path = "logs/mcp_server.log" + with open(log_file_path) as f: + lines = f.readlines() + recent_logs = lines[-50:] # Last 50 lines - if result.returncode == 0 and result.stderr: - recent_logs = result.stderr.strip() - if recent_logs: - self.logger.info("Recent container logs:") - for line in recent_logs.split("\n")[-10:]: # Last 10 lines - if line.strip(): - self.logger.info(f" {line}") + if recent_logs: + self.logger.info("Recent server logs:") + for line in recent_logs[-10:]: # Last 10 lines + if line.strip(): + self.logger.info(f" {line.strip()}") except Exception as e: - self.logger.debug(f"Failed to check Docker logs: {e}") + self.logger.debug(f"Failed to check server logs: {e}") def validate_local_model_response(self, response: str) -> bool: """Validate that response appears to come from a local model""" diff --git a/simulator_tests/test_openrouter_fallback.py b/simulator_tests/test_openrouter_fallback.py index da907cc..4802171 100644 --- a/simulator_tests/test_openrouter_fallback.py +++ b/simulator_tests/test_openrouter_fallback.py @@ -8,7 +8,6 @@ Tests that verify the system correctly falls back to OpenRouter when: - Auto mode correctly selects OpenRouter models """ -import subprocess from .base_test import BaseSimulatorTest @@ -24,53 +23,28 @@ class OpenRouterFallbackTest(BaseSimulatorTest): def test_description(self) -> str: return "OpenRouter fallback behavior when only provider" - def get_recent_server_logs(self) -> str: - """Get recent server logs from the log file directly""" - try: - cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"] - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return result.stdout - else: - self.logger.warning(f"Failed to read server logs: {result.stderr}") - return "" - except Exception as e: - self.logger.error(f"Failed to get server logs: {e}") - return "" - def run_test(self) -> bool: """Test OpenRouter fallback behavior""" try: self.logger.info("Test: OpenRouter fallback behavior when only provider available") # Check if ONLY OpenRouter API key is configured (this is a fallback test) - check_cmd = [ - "docker", - "exec", - self.container_name, - "python", - "-c", - 'import os; print("OPENROUTER_KEY:" + str(bool(os.environ.get("OPENROUTER_API_KEY"))) + "|GEMINI_KEY:" + str(bool(os.environ.get("GEMINI_API_KEY"))) + "|OPENAI_KEY:" + str(bool(os.environ.get("OPENAI_API_KEY"))))', - ] - result = subprocess.run(check_cmd, capture_output=True, text=True) + import os - if result.returncode == 0: - output = result.stdout.strip() - has_openrouter = "OPENROUTER_KEY:True" in output - has_gemini = "GEMINI_KEY:True" in output - has_openai = "OPENAI_KEY:True" in output + has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY")) + has_gemini = bool(os.environ.get("GEMINI_API_KEY")) + has_openai = bool(os.environ.get("OPENAI_API_KEY")) - if not has_openrouter: - self.logger.info(" โš ๏ธ OpenRouter API key not configured - skipping test") - self.logger.info(" โ„น๏ธ This test requires OPENROUTER_API_KEY to be set in .env") - return True # Return True to indicate test is skipped, not failed + if not has_openrouter: + self.logger.info(" โš ๏ธ OpenRouter API key not configured - skipping test") + self.logger.info(" โ„น๏ธ This test requires OPENROUTER_API_KEY to be set in .env") + return True # Return True to indicate test is skipped, not failed - if has_gemini or has_openai: - self.logger.info(" โš ๏ธ Other API keys configured - this is not a fallback scenario") - self.logger.info(" โ„น๏ธ This test requires ONLY OpenRouter to be configured (no Gemini/OpenAI keys)") - self.logger.info(" โ„น๏ธ Current setup has multiple providers, so fallback behavior doesn't apply") - return True # Return True to indicate test is skipped, not failed + if has_gemini or has_openai: + self.logger.info(" โš ๏ธ Other API keys configured - this is not a fallback scenario") + self.logger.info(" โ„น๏ธ This test requires ONLY OpenRouter to be configured (no Gemini/OpenAI keys)") + self.logger.info(" โ„น๏ธ Current setup has multiple providers, so fallback behavior doesn't apply") + return True # Return True to indicate test is skipped, not failed # Setup test files self.setup_test_files() diff --git a/simulator_tests/test_openrouter_models.py b/simulator_tests/test_openrouter_models.py index 63316d7..1cd421c 100644 --- a/simulator_tests/test_openrouter_models.py +++ b/simulator_tests/test_openrouter_models.py @@ -9,7 +9,6 @@ Tests that verify OpenRouter functionality including: - Error handling when models are not available """ -import subprocess from .base_test import BaseSimulatorTest @@ -25,39 +24,17 @@ class OpenRouterModelsTest(BaseSimulatorTest): def test_description(self) -> str: return "OpenRouter model functionality and alias mapping" - def get_recent_server_logs(self) -> str: - """Get recent server logs from the log file directly""" - try: - # Read logs directly from the log file - cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"] - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return result.stdout - else: - self.logger.warning(f"Failed to read server logs: {result.stderr}") - return "" - except Exception as e: - self.logger.error(f"Failed to get server logs: {e}") - return "" - def run_test(self) -> bool: """Test OpenRouter model functionality""" try: self.logger.info("Test: OpenRouter model functionality and alias mapping") # Check if OpenRouter API key is configured - check_cmd = [ - "docker", - "exec", - self.container_name, - "python", - "-c", - 'import os; print("OPENROUTER_KEY:" + str(bool(os.environ.get("OPENROUTER_API_KEY"))))', - ] - result = subprocess.run(check_cmd, capture_output=True, text=True) + import os - if result.returncode == 0 and "OPENROUTER_KEY:False" in result.stdout: + has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY")) + + if not has_openrouter: self.logger.info(" โš ๏ธ OpenRouter API key not configured - skipping test") self.logger.info(" โ„น๏ธ This test requires OPENROUTER_API_KEY to be set in .env") return True # Return True to indicate test is skipped, not failed diff --git a/simulator_tests/test_per_tool_deduplication.py b/simulator_tests/test_per_tool_deduplication.py index d8dae80..d883705 100644 --- a/simulator_tests/test_per_tool_deduplication.py +++ b/simulator_tests/test_per_tool_deduplication.py @@ -8,16 +8,15 @@ Validates that: 1. Files are embedded only once in conversation history 2. Continuation calls don't re-read existing files 3. New files are still properly embedded -4. Docker logs show deduplication behavior +4. Server logs show deduplication behavior """ import os -import subprocess -from .base_test import BaseSimulatorTest +from .conversation_base_test import ConversationBaseTest -class PerToolDeduplicationTest(BaseSimulatorTest): +class PerToolDeduplicationTest(ConversationBaseTest): """Test file deduplication for each individual tool""" @property @@ -28,74 +27,16 @@ class PerToolDeduplicationTest(BaseSimulatorTest): def test_description(self) -> str: return "File deduplication for individual tools" - def get_docker_logs_since(self, since_time: str) -> str: - """Get docker logs since a specific timestamp""" - try: - # Check both main server and log monitor for comprehensive logs - cmd_server = ["docker", "logs", "--since", since_time, self.container_name] - cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"] - - result_server = subprocess.run(cmd_server, capture_output=True, text=True) - result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True) - - # Get the internal log files which have more detailed logging - server_log_result = subprocess.run( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True - ) - - activity_log_result = subprocess.run( - ["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True - ) - - # Combine all logs - combined_logs = ( - result_server.stdout - + "\n" - + result_monitor.stdout - + "\n" - + server_log_result.stdout - + "\n" - + activity_log_result.stdout - ) - return combined_logs - except Exception as e: - self.logger.error(f"Failed to get docker logs: {e}") - return "" - # create_additional_test_file method now inherited from base class - def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool: - """Validate that logs show file deduplication behavior""" - # Look for file embedding messages - embedding_messages = [ - line for line in logs.split("\n") if "๐Ÿ“" in line and "embedding" in line and tool_name in line - ] - - # Look for deduplication/filtering messages - filtering_messages = [ - line for line in logs.split("\n") if "๐Ÿ“" in line and "Filtering" in line and tool_name in line - ] - skipping_messages = [ - line for line in logs.split("\n") if "๐Ÿ“" in line and "skipping" in line and tool_name in line - ] - - deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0 - - if deduplication_found: - self.logger.info(f" โœ… {tool_name}: Found deduplication evidence in logs") - for msg in filtering_messages + skipping_messages: - self.logger.debug(f" ๐Ÿ“ {msg.strip()}") - else: - self.logger.warning(f" โš ๏ธ {tool_name}: No deduplication evidence found in logs") - self.logger.debug(f" ๐Ÿ“ All embedding messages: {embedding_messages}") - - return deduplication_found - def run_test(self) -> bool: """Test file deduplication with realistic precommit/codereview workflow""" try: self.logger.info("๐Ÿ“„ Test: Simplified file deduplication with precommit/codereview workflow") + # Setup test environment for conversation testing + self.setUp() + # Setup test files self.setup_test_files() @@ -126,7 +67,7 @@ def divide(x, y): "model": "flash", } - response1, continuation_id = self.call_mcp_tool("precommit", precommit_params) + response1, continuation_id = self.call_mcp_tool_direct("precommit", precommit_params) if not response1: self.logger.error(" โŒ Step 1: precommit tool failed") return False @@ -151,7 +92,7 @@ def divide(x, y): "model": "flash", } - response2, _ = self.call_mcp_tool("codereview", codereview_params) + response2, _ = self.call_mcp_tool_direct("codereview", codereview_params) if not response2: self.logger.error(" โŒ Step 2: codereview tool failed") return False @@ -181,16 +122,16 @@ def subtract(a, b): "model": "flash", } - response3, _ = self.call_mcp_tool("precommit", continue_params) + response3, _ = self.call_mcp_tool_direct("precommit", continue_params) if not response3: self.logger.error(" โŒ Step 3: precommit continuation failed") return False self.logger.info(" โœ… Step 3: precommit continuation completed") - # Validate results in docker logs + # Validate results in server logs self.logger.info(" ๐Ÿ“‹ Validating conversation history and file deduplication...") - logs = self.get_docker_logs_since(start_time) + logs = self.get_server_logs_since(start_time) # Check for conversation history building conversation_logs = [ @@ -249,7 +190,7 @@ def subtract(a, b): return True else: self.logger.warning(" โš ๏ธ File deduplication workflow test: FAILED") - self.logger.warning(" ๐Ÿ’ก Check docker logs for detailed file embedding and continuation activity") + self.logger.warning(" ๐Ÿ’ก Check server logs for detailed file embedding and continuation activity") return False except Exception as e: diff --git a/simulator_tests/test_planner_continuation_history.py b/simulator_tests/test_planner_continuation_history.py index 463c82d..66caba5 100644 --- a/simulator_tests/test_planner_continuation_history.py +++ b/simulator_tests/test_planner_continuation_history.py @@ -244,7 +244,7 @@ class PlannerContinuationHistoryTest(BaseSimulatorTest): response2, _ = self.call_mcp_tool( "planner", { - "step": "Deployment strategy: Use Kubernetes for container orchestration with Helm charts. Implement CI/CD pipeline with GitOps. Use service mesh (Istio) for traffic management, monitoring, and security. Deploy databases in separate namespaces with backup automation.", + "step": "Deployment strategy: Use Kubernetes for orchestration with Helm charts. Implement CI/CD pipeline with GitOps. Use service mesh (Istio) for traffic management, monitoring, and security. Deploy databases in separate namespaces with backup automation.", "step_number": 2, "total_steps": 2, "next_step_required": False, # Complete the session @@ -326,7 +326,7 @@ class PlannerContinuationHistoryTest(BaseSimulatorTest): return False def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: - """Call an MCP tool via Claude CLI (docker exec) - override for planner-specific response handling""" + """Call an MCP tool via standalone server - override for planner-specific response handling""" # Use parent implementation to get the raw response response_text, _ = super().call_mcp_tool(tool_name, params) diff --git a/simulator_tests/test_planner_validation.py b/simulator_tests/test_planner_validation.py index d00b0c5..ea24cd4 100644 --- a/simulator_tests/test_planner_validation.py +++ b/simulator_tests/test_planner_validation.py @@ -275,7 +275,7 @@ class PlannerValidationTest(BaseSimulatorTest): response3, _ = self.call_mcp_tool( "planner", { - "step": "Revision: Actually, let me revise the Kubernetes approach. I'll use a simpler Docker Swarm deployment initially, then migrate to Kubernetes later.", + "step": "Revision: Actually, let me revise the Kubernetes approach. I'll use a simpler deployment initially, then migrate to Kubernetes later.", "step_number": 3, "total_steps": 4, "next_step_required": True, @@ -311,7 +311,7 @@ class PlannerValidationTest(BaseSimulatorTest): return False def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: - """Call an MCP tool via Claude CLI (docker exec) - override for planner-specific response handling""" + """Call an MCP tool via standalone server - override for planner-specific response handling""" # Use parent implementation to get the raw response response_text, _ = super().call_mcp_tool(tool_name, params) diff --git a/simulator_tests/test_redis_validation.py b/simulator_tests/test_redis_validation.py deleted file mode 100644 index ce6f861..0000000 --- a/simulator_tests/test_redis_validation.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python3 -""" -Redis Conversation Memory Validation Test - -Validates that conversation memory is working via Redis by checking -for stored conversation threads and their content. -""" - -import json - -from .base_test import BaseSimulatorTest - - -class RedisValidationTest(BaseSimulatorTest): - """Validate that conversation memory is working via Redis""" - - @property - def test_name(self) -> str: - return "redis_validation" - - @property - def test_description(self) -> str: - return "Redis conversation memory validation" - - def run_test(self) -> bool: - """Validate that conversation memory is working via Redis""" - try: - self.logger.info("๐Ÿ’พ Test: Validating conversation memory via Redis...") - - # First, test Redis connectivity - ping_result = self.run_command( - ["docker", "exec", self.redis_container, "redis-cli", "ping"], capture_output=True - ) - - if ping_result.returncode != 0: - self.logger.error("Failed to connect to Redis") - return False - - if "PONG" not in ping_result.stdout.decode(): - self.logger.error("Redis ping failed") - return False - - self.logger.info("โœ… Redis connectivity confirmed") - - # Check Redis for stored conversations - result = self.run_command( - ["docker", "exec", self.redis_container, "redis-cli", "KEYS", "thread:*"], capture_output=True - ) - - if result.returncode != 0: - self.logger.error("Failed to query Redis") - return False - - keys = result.stdout.decode().strip().split("\n") - thread_keys = [k for k in keys if k.startswith("thread:") and k != "thread:*"] - - if thread_keys: - self.logger.info(f"โœ… Found {len(thread_keys)} conversation threads in Redis") - - # Get details of first thread - thread_key = thread_keys[0] - result = self.run_command( - ["docker", "exec", self.redis_container, "redis-cli", "GET", thread_key], capture_output=True - ) - - if result.returncode == 0: - thread_data = result.stdout.decode() - try: - parsed = json.loads(thread_data) - turns = parsed.get("turns", []) - self.logger.info(f"โœ… Thread has {len(turns)} turns") - return True - except json.JSONDecodeError: - self.logger.warning("Could not parse thread data") - - return True - else: - # If no existing threads, create a test thread to validate Redis functionality - self.logger.info(" No existing threads found, creating test thread to validate Redis...") - - test_thread_id = "test_thread_validation" - test_data = { - "thread_id": test_thread_id, - "turns": [ - {"tool": "chat", "timestamp": "2025-06-11T16:30:00Z", "prompt": "Test validation prompt"} - ], - } - - # Store test data - store_result = self.run_command( - [ - "docker", - "exec", - self.redis_container, - "redis-cli", - "SET", - f"thread:{test_thread_id}", - json.dumps(test_data), - ], - capture_output=True, - ) - - if store_result.returncode != 0: - self.logger.error("Failed to store test data in Redis") - return False - - # Retrieve test data - retrieve_result = self.run_command( - ["docker", "exec", self.redis_container, "redis-cli", "GET", f"thread:{test_thread_id}"], - capture_output=True, - ) - - if retrieve_result.returncode != 0: - self.logger.error("Failed to retrieve test data from Redis") - return False - - retrieved_data = retrieve_result.stdout.decode() - try: - parsed = json.loads(retrieved_data) - if parsed.get("thread_id") == test_thread_id: - self.logger.info("โœ… Redis read/write validation successful") - - # Clean up test data - self.run_command( - ["docker", "exec", self.redis_container, "redis-cli", "DEL", f"thread:{test_thread_id}"], - capture_output=True, - ) - - return True - else: - self.logger.error("Retrieved data doesn't match stored data") - return False - except json.JSONDecodeError: - self.logger.error("Could not parse retrieved test data") - return False - - except Exception as e: - self.logger.error(f"Conversation memory validation failed: {e}") - return False diff --git a/simulator_tests/test_refactor_validation.py b/simulator_tests/test_refactor_validation.py index 579a39f..954fab8 100644 --- a/simulator_tests/test_refactor_validation.py +++ b/simulator_tests/test_refactor_validation.py @@ -241,35 +241,28 @@ def handle_everything(user_input, config, database): # Validate logs self.logger.info(" ๐Ÿ“‹ Validating execution logs...") - # Get server logs from the actual log file inside the container - result = self.run_command( - ["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True - ) + # Get server logs using inherited method + logs = self.get_recent_server_logs(500) - if result.returncode == 0: - logs = result.stdout.decode() + result.stderr.decode() + # Look for refactor tool execution patterns + refactor_patterns = [ + "[REFACTOR]", + "refactor tool", + "codesmells", + "Token budget", + "Code files embedded successfully", + ] - # Look for refactor tool execution patterns - refactor_patterns = [ - "[REFACTOR]", - "refactor tool", - "codesmells", - "Token budget", - "Code files embedded successfully", - ] + patterns_found = 0 + for pattern in refactor_patterns: + if pattern in logs: + patterns_found += 1 + self.logger.debug(f" โœ… Found log pattern: {pattern}") - patterns_found = 0 - for pattern in refactor_patterns: - if pattern in logs: - patterns_found += 1 - self.logger.debug(f" โœ… Found log pattern: {pattern}") - - if patterns_found >= 3: - self.logger.info(f" โœ… Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)") - else: - self.logger.warning(f" โš ๏ธ Only found {patterns_found}/{len(refactor_patterns)} log patterns") + if patterns_found >= 3: + self.logger.info(f" โœ… Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)") else: - self.logger.warning(" โš ๏ธ Could not retrieve Docker logs") + self.logger.warning(f" โš ๏ธ Only found {patterns_found}/{len(refactor_patterns)} log patterns") self.logger.info(" โœ… Refactor tool validation completed successfully") return True diff --git a/simulator_tests/test_token_allocation_validation.py b/simulator_tests/test_token_allocation_validation.py index 53b675f..31f1bda 100644 --- a/simulator_tests/test_token_allocation_validation.py +++ b/simulator_tests/test_token_allocation_validation.py @@ -11,7 +11,6 @@ This test validates that: import datetime import re -import subprocess from .base_test import BaseSimulatorTest @@ -27,78 +26,6 @@ class TokenAllocationValidationTest(BaseSimulatorTest): def test_description(self) -> str: return "Token allocation and conversation history validation" - def get_recent_server_logs(self) -> str: - """Get recent server logs from the log file directly""" - try: - cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"] - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return result.stdout - else: - self.logger.warning(f"Failed to read server logs: {result.stderr}") - return "" - except Exception as e: - self.logger.error(f"Failed to get server logs: {e}") - return "" - - def extract_conversation_usage_logs(self, logs: str) -> list[dict[str, int]]: - """Extract actual conversation token usage from server logs""" - usage_logs = [] - - # Look for conversation debug logs that show actual usage - lines = logs.split("\n") - - for i, line in enumerate(lines): - if "[CONVERSATION_DEBUG] Token budget calculation:" in line: - # Found start of token budget log, extract the following lines - usage = {} - for j in range(1, 8): # Next 7 lines contain the usage details - if i + j < len(lines): - detail_line = lines[i + j] - - # Parse Total capacity: 1,048,576 - if "Total capacity:" in detail_line: - match = re.search(r"Total capacity:\s*([\d,]+)", detail_line) - if match: - usage["total_capacity"] = int(match.group(1).replace(",", "")) - - # Parse Content allocation: 838,860 - elif "Content allocation:" in detail_line: - match = re.search(r"Content allocation:\s*([\d,]+)", detail_line) - if match: - usage["content_allocation"] = int(match.group(1).replace(",", "")) - - # Parse Conversation tokens: 12,345 - elif "Conversation tokens:" in detail_line: - match = re.search(r"Conversation tokens:\s*([\d,]+)", detail_line) - if match: - usage["conversation_tokens"] = int(match.group(1).replace(",", "")) - - # Parse Remaining tokens: 825,515 - elif "Remaining tokens:" in detail_line: - match = re.search(r"Remaining tokens:\s*([\d,]+)", detail_line) - if match: - usage["remaining_tokens"] = int(match.group(1).replace(",", "")) - - if usage: # Only add if we found some usage data - usage_logs.append(usage) - - return usage_logs - - def extract_conversation_token_usage(self, logs: str) -> list[int]: - """Extract conversation token usage from logs""" - usage_values = [] - - # Look for conversation token usage logs - pattern = r"Conversation history token usage:\s*([\d,]+)" - matches = re.findall(pattern, logs) - - for match in matches: - usage_values.append(int(match.replace(",", ""))) - - return usage_values - def run_test(self) -> bool: """Test token allocation and conversation history functionality""" try: diff --git a/simulator_tests/test_vision_capability.py b/simulator_tests/test_vision_capability.py index e75b3c9..6dc3228 100644 --- a/simulator_tests/test_vision_capability.py +++ b/simulator_tests/test_vision_capability.py @@ -81,7 +81,7 @@ class VisionCapabilityTest(BaseSimulatorTest): "don't have access", "cannot see", "no image", - "clarification_required", + "files_required_to_continue", "image you're referring to", "supply the image", "error", @@ -122,7 +122,7 @@ class VisionCapabilityTest(BaseSimulatorTest): "don't have access", "cannot see", "no image", - "clarification_required", + "files_required_to_continue", "image you're referring to", "supply the image", "error", diff --git a/simulator_tests/test_xai_models.py b/simulator_tests/test_xai_models.py index c71a996..66d1b13 100644 --- a/simulator_tests/test_xai_models.py +++ b/simulator_tests/test_xai_models.py @@ -9,7 +9,6 @@ Tests that verify X.AI GROK functionality including: - API integration and response validation """ -import subprocess from .base_test import BaseSimulatorTest @@ -25,44 +24,18 @@ class XAIModelsTest(BaseSimulatorTest): def test_description(self) -> str: return "X.AI GROK model functionality and integration" - def get_recent_server_logs(self) -> str: - """Get recent server logs from the log file directly""" - try: - # Read logs directly from the log file - cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"] - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - return result.stdout - else: - self.logger.warning(f"Failed to read server logs: {result.stderr}") - return "" - except Exception as e: - self.logger.error(f"Failed to get server logs: {e}") - return "" - def run_test(self) -> bool: """Test X.AI GROK model functionality""" try: self.logger.info("Test: X.AI GROK model functionality and integration") # Check if X.AI API key is configured and not empty - check_cmd = [ - "docker", - "exec", - self.container_name, - "python", - "-c", - """ -import os -xai_key = os.environ.get("XAI_API_KEY", "") -is_valid = bool(xai_key and xai_key != "your_xai_api_key_here" and xai_key.strip()) -print(f"XAI_KEY_VALID:{is_valid}") - """.strip(), - ] - result = subprocess.run(check_cmd, capture_output=True, text=True) + import os - if result.returncode == 0 and "XAI_KEY_VALID:False" in result.stdout: + xai_key = os.environ.get("XAI_API_KEY", "") + is_valid = bool(xai_key and xai_key != "your_xai_api_key_here" and xai_key.strip()) + + if not is_valid: self.logger.info(" โš ๏ธ X.AI API key not configured or empty - skipping test") self.logger.info(" โ„น๏ธ This test requires XAI_API_KEY to be set in .env with a valid key") return True # Return True to indicate test is skipped, not failed diff --git a/systemprompts/analyze_prompt.py b/systemprompts/analyze_prompt.py index 7460042..f36f07b 100644 --- a/systemprompts/analyze_prompt.py +++ b/systemprompts/analyze_prompt.py @@ -19,8 +19,11 @@ IF MORE INFORMATION IS NEEDED If you need additional context (e.g., dependencies, configuration files, test files) to provide complete analysis, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} ESCALATE TO A FULL CODEREVIEW IF REQUIRED If, after thoroughly analysing the question and the provided code, you determine that a comprehensive, code-baseโ€“wide diff --git a/systemprompts/chat_prompt.py b/systemprompts/chat_prompt.py index 0a0b6a8..fe2967f 100644 --- a/systemprompts/chat_prompt.py +++ b/systemprompts/chat_prompt.py @@ -18,8 +18,11 @@ If Claude is discussing specific code, functions, or project components that was and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} SCOPE & FOCUS โ€ข Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints. diff --git a/systemprompts/codereview_prompt.py b/systemprompts/codereview_prompt.py index 3b46197..97f67cf 100644 --- a/systemprompts/codereview_prompt.py +++ b/systemprompts/codereview_prompt.py @@ -19,8 +19,11 @@ IF MORE INFORMATION IS NEEDED If you need additional context (e.g., related files, configuration, dependencies) to provide a complete and accurate review, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} CRITICAL: Align your review with the user's context and expectations. Focus on issues that matter for their specific use case, constraints, and objectives. Don't provide a generic "find everything" review - tailor diff --git a/systemprompts/consensus_prompt.py b/systemprompts/consensus_prompt.py index 7425ddf..fea6cfa 100644 --- a/systemprompts/consensus_prompt.py +++ b/systemprompts/consensus_prompt.py @@ -26,8 +26,11 @@ IF MORE INFORMATION IS NEEDED If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide thorough analysis or response, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} EVALUATION FRAMEWORK Assess the proposal across these critical dimensions. Your stance influences HOW you present findings, not WHETHER you diff --git a/systemprompts/debug_prompt.py b/systemprompts/debug_prompt.py index ef94b3b..ab71f5a 100644 --- a/systemprompts/debug_prompt.py +++ b/systemprompts/debug_prompt.py @@ -49,8 +49,8 @@ Do NOT include any text before or after the JSON. The response must be valid JSO IF MORE INFORMATION IS NEEDED: If you lack critical information to proceed, you MUST only respond with the following: { - "status": "clarification_required", - "question": "", + "status": "files_required_to_continue", + "mandatory_instructions": "", "files_needed": ["[file name here]", "[or some folder/]"] } diff --git a/systemprompts/planner_prompt.py b/systemprompts/planner_prompt.py index cf3c694..991b656 100644 --- a/systemprompts/planner_prompt.py +++ b/systemprompts/planner_prompt.py @@ -23,8 +23,11 @@ If Claude is discussing specific code, functions, or project components that was and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} PLANNING METHODOLOGY: @@ -63,8 +66,8 @@ Do NOT include any text before or after the JSON. The response must be valid JSO IF MORE INFORMATION IS NEEDED: If you lack critical information to proceed with planning, you MUST only respond with: { - "status": "clarification_required", - "question": "", + "status": "files_required_to_continue", + "mandatory_instructions": "", "files_needed": ["", ""] } diff --git a/systemprompts/precommit_prompt.py b/systemprompts/precommit_prompt.py index 14bf364..b507a0c 100644 --- a/systemprompts/precommit_prompt.py +++ b/systemprompts/precommit_prompt.py @@ -18,8 +18,11 @@ If you need additional context (e.g., related files not in the diff, test files, analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} INPUTS PROVIDED 1. Git diff (staged or branch comparison) diff --git a/systemprompts/refactor_prompt.py b/systemprompts/refactor_prompt.py index 899d542..3513b8c 100644 --- a/systemprompts/refactor_prompt.py +++ b/systemprompts/refactor_prompt.py @@ -21,7 +21,11 @@ IF MORE INFORMATION IS NEEDED If you need additional context (e.g., related files, configuration, dependencies) to provide accurate refactoring recommendations, you MUST respond ONLY with this JSON format (and ABSOLUTELY nothing else - no text before or after). Do NOT ask for the same file you've been provided unless its content is missing or incomplete: -{"status": "clarification_required", "question": "", "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} REFACTOR TYPES (PRIORITY ORDER) diff --git a/systemprompts/testgen_prompt.py b/systemprompts/testgen_prompt.py index 0d8e2de..cfcba94 100644 --- a/systemprompts/testgen_prompt.py +++ b/systemprompts/testgen_prompt.py @@ -19,8 +19,11 @@ IF MORE INFORMATION IS NEEDED If you need additional context (e.g., test framework details, dependencies, existing test patterns) to provide accurate test generation, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} MULTI-AGENT WORKFLOW You sequentially inhabit five expert personasโ€”each passes a concise artefact to the next: diff --git a/systemprompts/thinkdeep_prompt.py b/systemprompts/thinkdeep_prompt.py index 2e48397..a2ee672 100644 --- a/systemprompts/thinkdeep_prompt.py +++ b/systemprompts/thinkdeep_prompt.py @@ -18,8 +18,11 @@ IF MORE INFORMATION IS NEEDED If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide thorough analysis, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is missing or incomplete: -{"status": "clarification_required", "question": "", - "files_needed": ["[file name here]", "[or some folder/]"]} +{ + "status": "files_required_to_continue", + "mandatory_instructions": "", + "files_needed": ["[file name here]", "[or some folder/]"] +} GUIDELINES 1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints. diff --git a/tests/conftest.py b/tests/conftest.py index c164a73..0387bcd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,6 @@ import asyncio import importlib import os import sys -import tempfile from pathlib import Path import pytest @@ -33,11 +32,8 @@ import config # noqa: E402 importlib.reload(config) -# Set WORKSPACE_ROOT to a temporary directory for tests -# This provides a safe sandbox for file operations during testing -# Create a temporary directory that will be used as the workspace for all tests -test_root = tempfile.mkdtemp(prefix="zen_mcp_test_") -os.environ["WORKSPACE_ROOT"] = test_root +# Note: This creates a test sandbox environment +# Tests create their own temporary directories as needed # Configure asyncio for Windows compatibility if sys.platform == "win32": @@ -47,7 +43,7 @@ if sys.platform == "win32": from providers import ModelProviderRegistry # noqa: E402 from providers.base import ProviderType # noqa: E402 from providers.gemini import GeminiModelProvider # noqa: E402 -from providers.openai import OpenAIModelProvider # noqa: E402 +from providers.openai_provider import OpenAIModelProvider # noqa: E402 from providers.xai import XAIModelProvider # noqa: E402 # Register providers at test startup @@ -59,14 +55,11 @@ ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider) @pytest.fixture def project_path(tmp_path): """ - Provides a temporary directory within the WORKSPACE_ROOT sandbox for tests. - This ensures all file operations during tests are within the allowed directory. + Provides a temporary directory for tests. + This ensures all file operations during tests are isolated. """ - # Get the test workspace root - test_root = Path(os.environ.get("WORKSPACE_ROOT", "/tmp")) - # Create a subdirectory for this specific test - test_dir = test_root / f"test_{tmp_path.name}" + test_dir = tmp_path / "test_workspace" test_dir.mkdir(parents=True, exist_ok=True) return test_dir diff --git a/tests/test_alias_target_restrictions.py b/tests/test_alias_target_restrictions.py index 1bfd339..7641087 100644 --- a/tests/test_alias_target_restrictions.py +++ b/tests/test_alias_target_restrictions.py @@ -10,7 +10,7 @@ from unittest.mock import patch from providers.base import ProviderType from providers.gemini import GeminiModelProvider -from providers.openai import OpenAIModelProvider +from providers.openai_provider import OpenAIModelProvider from utils.model_restrictions import ModelRestrictionService diff --git a/tests/test_auto_mode_comprehensive.py b/tests/test_auto_mode_comprehensive.py index 46fa668..3f1e7dd 100644 --- a/tests/test_auto_mode_comprehensive.py +++ b/tests/test_auto_mode_comprehensive.py @@ -61,7 +61,7 @@ class TestAutoModeComprehensive: # Re-register providers for subsequent tests (like conftest.py does) from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider from providers.xai import XAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) @@ -178,7 +178,7 @@ class TestAutoModeComprehensive: # Register providers based on configuration from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider from providers.openrouter import OpenRouterProvider from providers.xai import XAIModelProvider @@ -349,7 +349,7 @@ class TestAutoModeComprehensive: # Register all native providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider from providers.xai import XAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) @@ -460,7 +460,7 @@ class TestAutoModeComprehensive: # Register providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) diff --git a/tests/test_auto_mode_provider_selection.py b/tests/test_auto_mode_provider_selection.py index a45c388..416d0d9 100644 --- a/tests/test_auto_mode_provider_selection.py +++ b/tests/test_auto_mode_provider_selection.py @@ -86,7 +86,7 @@ class TestAutoModeProviderSelection: os.environ.pop(key, None) # Register only OpenAI provider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) @@ -127,7 +127,7 @@ class TestAutoModeProviderSelection: # Register both providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) @@ -212,7 +212,7 @@ class TestAutoModeProviderSelection: # Register both providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) @@ -256,7 +256,7 @@ class TestAutoModeProviderSelection: # Register all providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider from providers.xai import XAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) @@ -307,7 +307,7 @@ class TestAutoModeProviderSelection: # Register all providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider from providers.xai import XAIModelProvider ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) diff --git a/tests/test_buggy_behavior_prevention.py b/tests/test_buggy_behavior_prevention.py index d54ff1d..4a4810e 100644 --- a/tests/test_buggy_behavior_prevention.py +++ b/tests/test_buggy_behavior_prevention.py @@ -16,7 +16,7 @@ import pytest from providers.base import ProviderType from providers.gemini import GeminiModelProvider -from providers.openai import OpenAIModelProvider +from providers.openai_provider import OpenAIModelProvider from utils.model_restrictions import ModelRestrictionService diff --git a/tests/test_claude_continuation.py b/tests/test_claude_continuation.py index e4fa6e0..e3cf622 100644 --- a/tests/test_claude_continuation.py +++ b/tests/test_claude_continuation.py @@ -61,16 +61,16 @@ class TestClaudeContinuationOffers: # Set default model to avoid effective auto mode self.tool.default_model = "gemini-2.5-flash-preview-05-20" - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_new_conversation_offers_continuation(self, mock_redis): + async def test_new_conversation_offers_continuation(self, mock_storage): """Test that new conversations offer Claude continuation opportunity""" # Create tool AFTER providers are registered (in conftest.py fixture) tool = ClaudeContinuationTool() tool.default_model = "gemini-2.5-flash-preview-05-20" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the model with patch.object(tool, "get_model_provider") as mock_get_provider: @@ -97,12 +97,12 @@ class TestClaudeContinuationOffers: assert "continuation_offer" in response_data assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1 - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_existing_conversation_still_offers_continuation(self, mock_redis): + async def test_existing_conversation_still_offers_continuation(self, mock_storage): """Test that existing threaded conversations still offer continuation if turns remain""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock existing thread context with 2 turns from utils.conversation_memory import ConversationTurn, ThreadContext @@ -155,12 +155,12 @@ class TestClaudeContinuationOffers: # MAX_CONVERSATION_TURNS - 2 existing - 1 new = remaining assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 3 - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_full_response_flow_with_continuation_offer(self, mock_redis): + async def test_full_response_flow_with_continuation_offer(self, mock_storage): """Test complete response flow that creates continuation offer""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the model to return a response without follow-up question with patch.object(self.tool, "get_model_provider") as mock_get_provider: @@ -193,12 +193,12 @@ class TestClaudeContinuationOffers: assert "You have" in offer["note"] assert "more exchange(s) available" in offer["note"] - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_continuation_always_offered_with_natural_language(self, mock_redis): + async def test_continuation_always_offered_with_natural_language(self, mock_storage): """Test that continuation is always offered with natural language prompts""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the model to return a response with natural language follow-up with patch.object(self.tool, "get_model_provider") as mock_get_provider: @@ -229,12 +229,12 @@ I'd be happy to examine the error handling patterns in more detail if that would assert "continuation_offer" in response_data assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1 - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_threaded_conversation_with_continuation_offer(self, mock_redis): + async def test_threaded_conversation_with_continuation_offer(self, mock_storage): """Test that threaded conversations still get continuation offers when turns remain""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock existing thread context from utils.conversation_memory import ThreadContext @@ -274,12 +274,12 @@ I'd be happy to examine the error handling patterns in more detail if that would assert response_data.get("continuation_offer") is not None assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1 - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_max_turns_reached_no_continuation_offer(self, mock_redis): + async def test_max_turns_reached_no_continuation_offer(self, mock_storage): """Test that no continuation is offered when max turns would be exceeded""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock existing thread context at max turns from utils.conversation_memory import ConversationTurn, ThreadContext @@ -338,12 +338,12 @@ class TestContinuationIntegration: # Set default model to avoid effective auto mode self.tool.default_model = "gemini-2.5-flash-preview-05-20" - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_continuation_offer_creates_proper_thread(self, mock_redis): + async def test_continuation_offer_creates_proper_thread(self, mock_storage): """Test that continuation offers create properly formatted threads""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the get call that add_turn makes to retrieve the existing thread # We'll set this up after the first setex call @@ -402,12 +402,12 @@ class TestContinuationIntegration: assert thread_context["initial_context"]["prompt"] == "Initial analysis" assert thread_context["initial_context"]["files"] == ["/test/file.py"] - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_claude_can_use_continuation_id(self, mock_redis): + async def test_claude_can_use_continuation_id(self, mock_storage): """Test that Claude can use the provided continuation_id in subsequent calls""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Step 1: Initial request creates continuation offer with patch.object(self.tool, "get_model_provider") as mock_get_provider: diff --git a/tests/test_collaboration.py b/tests/test_collaboration.py index 80d4ddf..186c91a 100644 --- a/tests/test_collaboration.py +++ b/tests/test_collaboration.py @@ -10,7 +10,7 @@ import pytest from tests.mock_helpers import create_mock_provider from tools.analyze import AnalyzeTool from tools.debug import DebugIssueTool -from tools.models import ClarificationRequest, ToolOutput +from tools.models import FilesNeededRequest, ToolOutput class TestDynamicContextRequests: @@ -31,8 +31,8 @@ class TestDynamicContextRequests: # Mock model to return a clarification request clarification_json = json.dumps( { - "status": "clarification_required", - "question": "I need to see the package.json file to understand dependencies", + "status": "files_required_to_continue", + "mandatory_instructions": "I need to see the package.json file to understand dependencies", "files_needed": ["package.json", "package-lock.json"], } ) @@ -56,12 +56,16 @@ class TestDynamicContextRequests: # Parse the response response_data = json.loads(result[0].text) - assert response_data["status"] == "clarification_required" + assert response_data["status"] == "files_required_to_continue" assert response_data["content_type"] == "json" # Parse the clarification request clarification = json.loads(response_data["content"]) - assert clarification["question"] == "I need to see the package.json file to understand dependencies" + # Check that the enhanced instructions contain the original message and additional guidance + expected_start = "I need to see the package.json file to understand dependencies" + assert clarification["mandatory_instructions"].startswith(expected_start) + assert "IMPORTANT GUIDANCE:" in clarification["mandatory_instructions"] + assert "Use FULL absolute paths" in clarification["mandatory_instructions"] assert clarification["files_needed"] == ["package.json", "package-lock.json"] @pytest.mark.asyncio @@ -100,7 +104,7 @@ class TestDynamicContextRequests: @patch("tools.base.BaseTool.get_model_provider") async def test_malformed_clarification_request_treated_as_normal(self, mock_get_provider, analyze_tool): """Test that malformed JSON clarification requests are treated as normal responses""" - malformed_json = '{"status": "clarification_required", "prompt": "Missing closing brace"' + malformed_json = '{"status": "files_required_to_continue", "prompt": "Missing closing brace"' mock_provider = create_mock_provider() mock_provider.get_provider_type.return_value = Mock(value="google") @@ -125,8 +129,8 @@ class TestDynamicContextRequests: """Test clarification request with suggested next action""" clarification_json = json.dumps( { - "status": "clarification_required", - "question": "I need to see the database configuration to diagnose the connection error", + "status": "files_required_to_continue", + "mandatory_instructions": "I need to see the database configuration to diagnose the connection error", "files_needed": ["config/database.yml", "src/db.py"], "suggested_next_action": { "tool": "debug", @@ -160,7 +164,7 @@ class TestDynamicContextRequests: assert len(result) == 1 response_data = json.loads(result[0].text) - assert response_data["status"] == "clarification_required" + assert response_data["status"] == "files_required_to_continue" clarification = json.loads(response_data["content"]) assert "suggested_next_action" in clarification @@ -184,17 +188,54 @@ class TestDynamicContextRequests: assert parsed["metadata"]["tool_name"] == "test" def test_clarification_request_model(self): - """Test ClarificationRequest model""" - request = ClarificationRequest( - question="Need more context", + """Test FilesNeededRequest model""" + request = FilesNeededRequest( + mandatory_instructions="Need more context", files_needed=["file1.py", "file2.py"], suggested_next_action={"tool": "analyze", "args": {}}, ) - assert request.question == "Need more context" + assert request.mandatory_instructions == "Need more context" assert len(request.files_needed) == 2 assert request.suggested_next_action["tool"] == "analyze" + def test_mandatory_instructions_enhancement(self): + """Test that mandatory_instructions are enhanced with additional guidance""" + from tools.base import BaseTool + + # Create a dummy tool instance for testing + class TestTool(BaseTool): + def get_name(self): + return "test" + + def get_description(self): + return "test" + + def get_request_model(self): + return None + + def prepare_prompt(self, request): + return "" + + def get_system_prompt(self): + return "" + + def get_input_schema(self): + return {} + + tool = TestTool() + original = "I need additional files to proceed" + enhanced = tool._enhance_mandatory_instructions(original) + + # Verify the original instructions are preserved + assert enhanced.startswith(original) + + # Verify additional guidance is added + assert "IMPORTANT GUIDANCE:" in enhanced + assert "CRITICAL for providing accurate analysis" in enhanced + assert "Use FULL absolute paths" in enhanced + assert "continuation_id to continue" in enhanced + @pytest.mark.asyncio @patch("tools.base.BaseTool.get_model_provider") async def test_error_response_format(self, mock_get_provider, analyze_tool): @@ -223,8 +264,8 @@ class TestCollaborationWorkflow: # Mock Gemini to request package.json when asked about dependencies clarification_json = json.dumps( { - "status": "clarification_required", - "question": "I need to see the package.json file to analyze npm dependencies", + "status": "files_required_to_continue", + "mandatory_instructions": "I need to see the package.json file to analyze npm dependencies", "files_needed": ["package.json", "package-lock.json"], } ) @@ -247,7 +288,7 @@ class TestCollaborationWorkflow: response = json.loads(result[0].text) assert ( - response["status"] == "clarification_required" + response["status"] == "files_required_to_continue" ), "Should request clarification when asked about dependencies without package files" clarification = json.loads(response["content"]) @@ -262,8 +303,8 @@ class TestCollaborationWorkflow: # Step 1: Initial request returns clarification needed clarification_json = json.dumps( { - "status": "clarification_required", - "question": "I need to see the configuration file to understand the connection settings", + "status": "files_required_to_continue", + "mandatory_instructions": "I need to see the configuration file to understand the connection settings", "files_needed": ["config.py"], } ) @@ -284,7 +325,7 @@ class TestCollaborationWorkflow: ) response1 = json.loads(result1[0].text) - assert response1["status"] == "clarification_required" + assert response1["status"] == "files_required_to_continue" # Step 2: Claude would provide additional context and re-invoke # This simulates the second call with more context diff --git a/tests/test_conversation_memory.py b/tests/test_conversation_memory.py index e5f8933..ae1f5e3 100644 --- a/tests/test_conversation_memory.py +++ b/tests/test_conversation_memory.py @@ -26,11 +26,11 @@ from utils.conversation_memory import ( class TestConversationMemory: """Test the conversation memory system for stateless MCP requests""" - @patch("utils.conversation_memory.get_redis_client") - def test_create_thread(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_create_thread(self, mock_storage): """Test creating a new thread""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client thread_id = create_thread("chat", {"prompt": "Hello", "files": ["/test.py"]}) @@ -43,11 +43,11 @@ class TestConversationMemory: assert call_args[0][0] == f"thread:{thread_id}" # key assert call_args[0][1] == CONVERSATION_TIMEOUT_SECONDS # TTL from configuration - @patch("utils.conversation_memory.get_redis_client") - def test_get_thread_valid(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_get_thread_valid(self, mock_storage): """Test retrieving an existing thread""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client test_uuid = "12345678-1234-1234-1234-123456789012" @@ -69,27 +69,27 @@ class TestConversationMemory: assert context.tool_name == "chat" mock_client.get.assert_called_once_with(f"thread:{test_uuid}") - @patch("utils.conversation_memory.get_redis_client") - def test_get_thread_invalid_uuid(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_get_thread_invalid_uuid(self, mock_storage): """Test handling invalid UUID""" context = get_thread("invalid-uuid") assert context is None - @patch("utils.conversation_memory.get_redis_client") - def test_get_thread_not_found(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_get_thread_not_found(self, mock_storage): """Test handling thread not found""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client mock_client.get.return_value = None context = get_thread("12345678-1234-1234-1234-123456789012") assert context is None - @patch("utils.conversation_memory.get_redis_client") - def test_add_turn_success(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_add_turn_success(self, mock_storage): """Test adding a turn to existing thread""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client test_uuid = "12345678-1234-1234-1234-123456789012" @@ -111,11 +111,11 @@ class TestConversationMemory: mock_client.get.assert_called_once() mock_client.setex.assert_called_once() - @patch("utils.conversation_memory.get_redis_client") - def test_add_turn_max_limit(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_add_turn_max_limit(self, mock_storage): """Test turn limit enforcement""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client test_uuid = "12345678-1234-1234-1234-123456789012" @@ -237,11 +237,11 @@ class TestConversationMemory: class TestConversationFlow: """Test complete conversation flows simulating stateless MCP requests""" - @patch("utils.conversation_memory.get_redis_client") - def test_complete_conversation_cycle(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_complete_conversation_cycle(self, mock_storage): """Test a complete 5-turn conversation until limit reached""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Simulate independent MCP request cycles @@ -341,13 +341,13 @@ class TestConversationFlow: success = add_turn(thread_id, "user", "This should be rejected") assert success is False # CONVERSATION STOPS HERE - @patch("utils.conversation_memory.get_redis_client") - def test_invalid_continuation_id_error(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_invalid_continuation_id_error(self, mock_storage): """Test that invalid continuation IDs raise proper error for restart""" from server import reconstruct_thread_context mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client mock_client.get.return_value = None # Thread not found arguments = {"continuation_id": "invalid-uuid-12345", "prompt": "Continue conversation"} @@ -439,11 +439,11 @@ class TestConversationFlow: expected_remaining = MAX_CONVERSATION_TURNS - 1 assert f"({expected_remaining} exchanges remaining)" in instructions - @patch("utils.conversation_memory.get_redis_client") - def test_complete_conversation_with_dynamic_turns(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_complete_conversation_with_dynamic_turns(self, mock_storage): """Test complete conversation respecting MAX_CONVERSATION_TURNS dynamically""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client thread_id = create_thread("chat", {"prompt": "Start conversation"}) @@ -495,16 +495,16 @@ class TestConversationFlow: success = add_turn(thread_id, "user", "This should fail") assert success is False, f"Turn {MAX_CONVERSATION_TURNS + 1} should fail" - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False) - def test_conversation_with_files_and_context_preservation(self, mock_redis): + def test_conversation_with_files_and_context_preservation(self, mock_storage): """Test complete conversation flow with file tracking and context preservation""" from providers.registry import ModelProviderRegistry ModelProviderRegistry.clear_cache() mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Start conversation with files thread_id = create_thread("analyze", {"prompt": "Analyze this codebase", "files": ["/project/src/"]}) @@ -648,11 +648,11 @@ class TestConversationFlow: assert turn_1_pos < turn_2_pos < turn_3_pos - @patch("utils.conversation_memory.get_redis_client") - def test_stateless_request_isolation(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_stateless_request_isolation(self, mock_storage): """Test that each request cycle is independent but shares context via Redis""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Simulate two different "processes" accessing same thread thread_id = "12345678-1234-1234-1234-123456789012" diff --git a/tests/test_cross_tool_continuation.py b/tests/test_cross_tool_continuation.py index ac4a95a..4f52464 100644 --- a/tests/test_cross_tool_continuation.py +++ b/tests/test_cross_tool_continuation.py @@ -93,12 +93,12 @@ class TestCrossToolContinuation: self.analysis_tool = MockAnalysisTool() self.review_tool = MockReviewTool() - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_continuation_id_works_across_different_tools(self, mock_redis): + async def test_continuation_id_works_across_different_tools(self, mock_storage): """Test that a continuation_id from one tool can be used with another tool""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Step 1: Analysis tool creates a conversation with continuation offer with patch.object(self.analysis_tool, "get_model_provider") as mock_get_provider: @@ -195,11 +195,11 @@ I'd be happy to review these security findings in detail if that would be helpfu assert second_turn["tool_name"] == "test_review" # New tool name assert "Critical security vulnerability confirmed" in second_turn["content"] - @patch("utils.conversation_memory.get_redis_client") - def test_cross_tool_conversation_history_includes_tool_names(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_cross_tool_conversation_history_includes_tool_names(self, mock_storage): """Test that conversation history properly shows which tool was used for each turn""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Create a thread context with turns from different tools thread_context = ThreadContext( @@ -247,13 +247,13 @@ I'd be happy to review these security findings in detail if that would be helpfu assert "Review complete: 2 critical, 1 minor issue" in history assert "Deep analysis: Root cause identified" in history - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch("utils.conversation_memory.get_thread") @patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False) - async def test_cross_tool_conversation_with_files_context(self, mock_get_thread, mock_redis): + async def test_cross_tool_conversation_with_files_context(self, mock_get_thread, mock_storage): """Test that file context is preserved across tool switches""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Create existing context with files from analysis tool existing_context = ThreadContext( @@ -317,12 +317,12 @@ I'd be happy to review these security findings in detail if that would be helpfu analysis_turn = final_context["turns"][0] # First turn (analysis tool) assert analysis_turn["files"] == ["/src/auth.py", "/src/utils.py"] - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch("utils.conversation_memory.get_thread") - def test_thread_preserves_original_tool_name(self, mock_get_thread, mock_redis): + def test_thread_preserves_original_tool_name(self, mock_get_thread, mock_storage): """Test that the thread's original tool_name is preserved even when other tools contribute""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Create existing thread from analysis tool existing_context = ThreadContext( diff --git a/tests/test_custom_provider.py b/tests/test_custom_provider.py index 5a0275f..79080a6 100644 --- a/tests/test_custom_provider.py +++ b/tests/test_custom_provider.py @@ -31,8 +31,9 @@ class TestCustomProvider: def test_provider_initialization_missing_url(self): """Test CustomProvider raises error when URL is missing.""" - with pytest.raises(ValueError, match="Custom API URL must be provided"): - CustomProvider(api_key="test-key") + with patch.dict(os.environ, {"CUSTOM_API_URL": ""}, clear=False): + with pytest.raises(ValueError, match="Custom API URL must be provided"): + CustomProvider(api_key="test-key") def test_validate_model_names_always_true(self): """Test CustomProvider accepts any model name.""" diff --git a/tests/test_directory_expansion_tracking.py b/tests/test_directory_expansion_tracking.py index b9da217..87e72fe 100644 --- a/tests/test_directory_expansion_tracking.py +++ b/tests/test_directory_expansion_tracking.py @@ -121,10 +121,10 @@ def helper_function(): assert any(str(Path(f).resolve()) == expected_resolved for f in captured_files) @pytest.mark.asyncio - @patch("utils.conversation_memory.get_redis_client") + @patch("utils.conversation_memory.get_storage") @patch("providers.ModelProviderRegistry.get_provider_for_model") async def test_conversation_continuation_with_directory_files( - self, mock_get_provider, mock_redis, tool, temp_directory_with_files + self, mock_get_provider, mock_storage, tool, temp_directory_with_files ): """Test that conversation continuation works correctly with directory expansion""" # Setup mock Redis client with in-memory storage @@ -140,7 +140,7 @@ def helper_function(): mock_client.get.side_effect = mock_get mock_client.setex.side_effect = mock_setex - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Setup mock provider mock_provider = create_mock_provider() @@ -196,8 +196,8 @@ def helper_function(): # This test shows the fix is working - conversation continuation properly filters out # already-embedded files. The exact length depends on whether any new files are found. - @patch("utils.conversation_memory.get_redis_client") - def test_get_conversation_embedded_files_with_expanded_files(self, mock_redis, tool, temp_directory_with_files): + @patch("utils.conversation_memory.get_storage") + def test_get_conversation_embedded_files_with_expanded_files(self, mock_storage, tool, temp_directory_with_files): """Test that get_conversation_embedded_files returns expanded files""" # Setup mock Redis client with in-memory storage mock_client = Mock() @@ -212,7 +212,7 @@ def helper_function(): mock_client.get.side_effect = mock_get mock_client.setex.side_effect = mock_setex - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client directory = temp_directory_with_files["directory"] expected_files = temp_directory_with_files["files"] @@ -237,8 +237,8 @@ def helper_function(): assert set(embedded_files) == set(expected_files) assert directory not in embedded_files - @patch("utils.conversation_memory.get_redis_client") - def test_file_filtering_with_mixed_files_and_directories(self, mock_redis, tool, temp_directory_with_files): + @patch("utils.conversation_memory.get_storage") + def test_file_filtering_with_mixed_files_and_directories(self, mock_storage, tool, temp_directory_with_files): """Test file filtering when request contains both individual files and directories""" # Setup mock Redis client with in-memory storage mock_client = Mock() @@ -253,7 +253,7 @@ def helper_function(): mock_client.get.side_effect = mock_get mock_client.setex.side_effect = mock_setex - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client directory = temp_directory_with_files["directory"] python_file = temp_directory_with_files["python_file"] diff --git a/tests/test_docker_path_integration.py b/tests/test_docker_path_integration.py deleted file mode 100644 index f445d72..0000000 --- a/tests/test_docker_path_integration.py +++ /dev/null @@ -1,320 +0,0 @@ -""" -Integration tests for Docker path translation - -These tests verify the actual behavior when running in a Docker-like environment -by creating temporary directories and testing the path translation logic. -""" - -import importlib -import os -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -# We'll reload the module to test different environment configurations -import utils.file_utils - - -def test_docker_path_translation_integration(): - """Test path translation in a simulated Docker environment""" - - with tempfile.TemporaryDirectory() as tmpdir: - # Set up directories - host_workspace = Path(tmpdir) / "host_workspace" - host_workspace.mkdir() - container_workspace = Path(tmpdir) / "container_workspace" - container_workspace.mkdir() - - # Create a test file structure - (host_workspace / "src").mkdir() - test_file = host_workspace / "src" / "test.py" - test_file.write_text("# test file") - - # Set environment variables and reload the module - original_env = os.environ.copy() - try: - os.environ["WORKSPACE_ROOT"] = str(host_workspace) - - # Reload the modules to pick up new environment variables - # Need to reload security_config first since it sets WORKSPACE_ROOT - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - # Properly mock the CONTAINER_WORKSPACE - with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace): - # Test the translation - from utils.file_utils import translate_path_for_environment - - # This should translate the host path to container path - host_path = str(test_file) - result = translate_path_for_environment(host_path) - - # Verify the translation worked - expected = str(container_workspace / "src" / "test.py") - assert result == expected - - finally: - # Restore original environment - os.environ.clear() - os.environ.update(original_env) - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - -def test_docker_security_validation(): - """Test that path traversal attempts are properly blocked""" - - with tempfile.TemporaryDirectory() as tmpdir: - # Set up directories - host_workspace = Path(tmpdir) / "workspace" - host_workspace.mkdir() - secret_dir = Path(tmpdir) / "secret" - secret_dir.mkdir() - secret_file = secret_dir / "password.txt" - secret_file.write_text("secret") - - # Create a symlink inside workspace pointing to secret - symlink = host_workspace / "link_to_secret" - symlink.symlink_to(secret_file) - - original_env = os.environ.copy() - try: - os.environ["WORKSPACE_ROOT"] = str(host_workspace) - - # Reload the modules - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - # Properly mock the CONTAINER_WORKSPACE - with patch("utils.file_utils.CONTAINER_WORKSPACE", Path("/workspace")): - from utils.file_utils import resolve_and_validate_path - - # Trying to access the symlink should fail - with pytest.raises(PermissionError): - resolve_and_validate_path(str(symlink)) - - finally: - os.environ.clear() - os.environ.update(original_env) - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - -def test_no_docker_environment(): - """Test that paths are unchanged when Docker environment is not set""" - - original_env = os.environ.copy() - try: - # Clear Docker-related environment variables - os.environ.pop("WORKSPACE_ROOT", None) - - # Reload the module - importlib.reload(utils.file_utils) - - from utils.file_utils import translate_path_for_environment - - # Path should remain unchanged - test_path = "/some/random/path.py" - assert translate_path_for_environment(test_path) == test_path - - finally: - os.environ.clear() - os.environ.update(original_env) - importlib.reload(utils.file_utils) - - -def test_review_changes_docker_path_translation(): - """Test that review_changes tool properly translates Docker paths""" - - with tempfile.TemporaryDirectory() as tmpdir: - # Set up directories to simulate Docker mount - host_workspace = Path(tmpdir) / "host_workspace" - host_workspace.mkdir() - container_workspace = Path(tmpdir) / "container_workspace" - container_workspace.mkdir() - - # Create a git repository in the container workspace - project_dir = container_workspace / "project" - project_dir.mkdir() - - # Initialize git repo - import subprocess - - subprocess.run(["git", "init"], cwd=project_dir, capture_output=True) - - # Create a test file - test_file = project_dir / "test.py" - test_file.write_text("print('hello')") - - # Stage the file - subprocess.run(["git", "add", "test.py"], cwd=project_dir, capture_output=True) - - original_env = os.environ.copy() - try: - # Simulate Docker environment - os.environ["WORKSPACE_ROOT"] = str(host_workspace) - - # Reload the modules - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - # Properly mock the CONTAINER_WORKSPACE and reload precommit module - with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace): - # Need to also patch it in the modules that import it - with patch("utils.security_config.CONTAINER_WORKSPACE", container_workspace): - # Import after patching to get updated environment - from tools.precommit import Precommit - - # Create tool instance - tool = Precommit() - - # Test path translation in prepare_prompt - request = tool.get_request_model()( - path=str(host_workspace / "project"), # Host path that needs translation - review_type="quick", - severity_filter="all", - ) - - # This should translate the path and find the git repository - import asyncio - - result = asyncio.run(tool.prepare_prompt(request)) - - # Should find the repository (not raise an error about inaccessible path) - # If we get here without exception, the path was successfully translated - assert isinstance(result, str) - # The result should contain git diff information or indicate no changes - assert "No git repositories found" not in result or "changes" in result.lower() - - finally: - os.environ.clear() - os.environ.update(original_env) - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - -def test_review_changes_docker_path_error(): - """Test that review_changes tool raises error for inaccessible paths""" - - with tempfile.TemporaryDirectory() as tmpdir: - # Set up directories to simulate Docker mount - host_workspace = Path(tmpdir) / "host_workspace" - host_workspace.mkdir() - container_workspace = Path(tmpdir) / "container_workspace" - container_workspace.mkdir() - - # Create a path outside the mounted workspace - outside_path = Path(tmpdir) / "outside_workspace" - outside_path.mkdir() - - original_env = os.environ.copy() - try: - # Simulate Docker environment - os.environ["WORKSPACE_ROOT"] = str(host_workspace) - - # Reload the modules - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - # Properly mock the CONTAINER_WORKSPACE - with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace): - with patch("utils.security_config.CONTAINER_WORKSPACE", container_workspace): - # Import after patching to get updated environment - from tools.precommit import Precommit - - # Create tool instance - tool = Precommit() - - # Test path translation with an inaccessible path - request = tool.get_request_model()( - path=str(outside_path), # Path outside the mounted workspace - review_type="quick", - severity_filter="all", - ) - - # This should raise a ValueError - import asyncio - - with pytest.raises(ValueError) as exc_info: - asyncio.run(tool.prepare_prompt(request)) - - # Check the error message - assert "not accessible from within the Docker container" in str(exc_info.value) - assert "mounted workspace" in str(exc_info.value) - - finally: - os.environ.clear() - os.environ.update(original_env) - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - -def test_double_translation_prevention(): - """Test that already-translated paths are not double-translated""" - - with tempfile.TemporaryDirectory() as tmpdir: - # Set up directories - host_workspace = Path(tmpdir) / "host_workspace" - host_workspace.mkdir() - container_workspace = Path(tmpdir) / "container_workspace" - container_workspace.mkdir() - - original_env = os.environ.copy() - try: - os.environ["WORKSPACE_ROOT"] = str(host_workspace) - - # Reload the modules - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - # Properly mock the CONTAINER_WORKSPACE - with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace): - from utils.file_utils import translate_path_for_environment - - # Test 1: Normal translation - host_path = str(host_workspace / "src" / "main.py") - translated_once = translate_path_for_environment(host_path) - expected = str(container_workspace / "src" / "main.py") - assert translated_once == expected - - # Test 2: Double translation should return the same path - translated_twice = translate_path_for_environment(translated_once) - assert translated_twice == translated_once - assert translated_twice == expected - - # Test 3: Container workspace root should not be double-translated - root_path = str(container_workspace) - translated_root = translate_path_for_environment(root_path) - assert translated_root == root_path - - finally: - os.environ.clear() - os.environ.update(original_env) - import utils.security_config - - importlib.reload(utils.security_config) - importlib.reload(utils.file_utils) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/tests/test_file_protection.py b/tests/test_file_protection.py index 434e432..067eb0a 100644 --- a/tests/test_file_protection.py +++ b/tests/test_file_protection.py @@ -5,12 +5,10 @@ Test file protection mechanisms to ensure MCP doesn't scan: 3. Excluded directories """ -import os from pathlib import Path from unittest.mock import patch from utils.file_utils import ( - MCP_SIGNATURE_FILES, expand_paths, get_user_home_directory, is_home_directory_root, @@ -21,25 +19,31 @@ from utils.file_utils import ( class TestMCPDirectoryDetection: """Test MCP self-detection to prevent scanning its own code.""" - def test_detect_mcp_directory_with_all_signatures(self, tmp_path): - """Test detection when all signature files are present.""" - # Create a fake MCP directory with signature files - for sig_file in list(MCP_SIGNATURE_FILES)[:4]: # Use 4 files - if "/" in sig_file: - (tmp_path / sig_file).parent.mkdir(parents=True, exist_ok=True) - (tmp_path / sig_file).touch() + def test_detect_mcp_directory_dynamically(self, tmp_path): + """Test dynamic MCP directory detection based on script location.""" + # The is_mcp_directory function now uses __file__ to detect MCP location + # It checks if the given path is a subdirectory of the MCP server + from pathlib import Path - assert is_mcp_directory(tmp_path) is True + import utils.file_utils - def test_no_detection_with_few_signatures(self, tmp_path): - """Test no detection with only 1-2 signature files.""" - # Create only 2 signature files (less than threshold) - for sig_file in list(MCP_SIGNATURE_FILES)[:2]: - if "/" in sig_file: - (tmp_path / sig_file).parent.mkdir(parents=True, exist_ok=True) - (tmp_path / sig_file).touch() + # Get the actual MCP server directory + mcp_server_dir = Path(utils.file_utils.__file__).parent.parent.resolve() - assert is_mcp_directory(tmp_path) is False + # Test that the MCP server directory itself is detected + assert is_mcp_directory(mcp_server_dir) is True + + # Test that a subdirectory of MCP is also detected + if (mcp_server_dir / "tools").exists(): + assert is_mcp_directory(mcp_server_dir / "tools") is True + + def test_no_detection_on_non_mcp_directory(self, tmp_path): + """Test no detection on directories outside MCP.""" + # Any directory outside the MCP server should not be detected + non_mcp_dir = tmp_path / "some_other_project" + non_mcp_dir.mkdir() + + assert is_mcp_directory(non_mcp_dir) is False def test_no_detection_on_regular_directory(self, tmp_path): """Test no detection on regular project directories.""" @@ -59,7 +63,11 @@ class TestMCPDirectoryDetection: def test_mcp_directory_excluded_from_scan(self, tmp_path): """Test that MCP directories are excluded during path expansion.""" - # Create a project with MCP as subdirectory + # For this test, we need to mock is_mcp_directory since we can't + # actually create the MCP directory structure in tmp_path + from unittest.mock import patch as mock_patch + + # Create a project with a subdirectory we'll pretend is MCP project_root = tmp_path / "my_project" project_root.mkdir() @@ -67,19 +75,18 @@ class TestMCPDirectoryDetection: (project_root / "app.py").write_text("# My app") (project_root / "config.py").write_text("# Config") - # Create MCP subdirectory - mcp_dir = project_root / "gemini-mcp-server" - mcp_dir.mkdir() - for sig_file in list(MCP_SIGNATURE_FILES)[:4]: - if "/" in sig_file: - (mcp_dir / sig_file).parent.mkdir(parents=True, exist_ok=True) - (mcp_dir / sig_file).write_text("# MCP file") + # Create a subdirectory that we'll mock as MCP + fake_mcp_dir = project_root / "gemini-mcp-server" + fake_mcp_dir.mkdir() + (fake_mcp_dir / "server.py").write_text("# MCP server") + (fake_mcp_dir / "test.py").write_text("# Should not be included") - # Also add a regular file to MCP dir - (mcp_dir / "test.py").write_text("# Should not be included") + # Mock is_mcp_directory to return True for our fake MCP dir + def mock_is_mcp(path): + return str(path).endswith("gemini-mcp-server") - # Scan the project - use parent as SECURITY_ROOT to avoid workspace root check - with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + # Scan the project with mocked MCP detection + with mock_patch("utils.file_utils.is_mcp_directory", side_effect=mock_is_mcp): files = expand_paths([str(project_root)]) # Verify project files are included but MCP files are not @@ -135,42 +142,45 @@ class TestHomeDirectoryProtection: """Test that home directory root is excluded during path expansion.""" with patch("utils.file_utils.get_user_home_directory") as mock_home: mock_home.return_value = tmp_path - with patch("utils.file_utils.SECURITY_ROOT", tmp_path): - # Try to scan home directory - files = expand_paths([str(tmp_path)]) - # Should return empty as home root is skipped - assert files == [] + # Try to scan home directory + files = expand_paths([str(tmp_path)]) + # Should return empty as home root is skipped + assert files == [] class TestUserHomeEnvironmentVariable: """Test USER_HOME environment variable handling.""" - def test_user_home_from_env(self): - """Test USER_HOME is used when set.""" - test_home = "/Users/dockeruser" - with patch.dict(os.environ, {"USER_HOME": test_home}): + def test_user_home_from_pathlib(self): + """Test that get_user_home_directory uses Path.home().""" + with patch("pathlib.Path.home") as mock_home: + mock_home.return_value = Path("/Users/testuser") home = get_user_home_directory() - assert home == Path(test_home).resolve() + assert home == Path("/Users/testuser") - def test_fallback_to_workspace_root_in_docker(self): - """Test fallback to WORKSPACE_ROOT in Docker when USER_HOME not set.""" - with patch("utils.file_utils.WORKSPACE_ROOT", "/Users/realuser"): - with patch("utils.file_utils.CONTAINER_WORKSPACE") as mock_container: - mock_container.exists.return_value = True - # Clear USER_HOME to test fallback - with patch.dict(os.environ, {"USER_HOME": ""}, clear=False): - home = get_user_home_directory() - assert str(home) == "/Users/realuser" + def test_get_home_directory_uses_pathlib(self): + """Test that get_user_home_directory always uses Path.home().""" + with patch("pathlib.Path.home") as mock_home: + mock_home.return_value = Path("/home/testuser") + home = get_user_home_directory() + assert home == Path("/home/testuser") + # Verify Path.home() was called + mock_home.assert_called_once() - def test_fallback_to_system_home(self): - """Test fallback to system home when not in Docker.""" - with patch.dict(os.environ, {}, clear=True): - with patch("utils.file_utils.CONTAINER_WORKSPACE") as mock_container: - mock_container.exists.return_value = False - with patch("pathlib.Path.home") as mock_home: - mock_home.return_value = Path("/home/user") - home = get_user_home_directory() - assert home == Path("/home/user") + def test_home_directory_on_different_platforms(self): + """Test home directory detection on different platforms.""" + # Test different platform home directories + test_homes = [ + Path("/Users/john"), # macOS + Path("/home/ubuntu"), # Linux + Path("C:\\Users\\John"), # Windows + ] + + for test_home in test_homes: + with patch("pathlib.Path.home") as mock_home: + mock_home.return_value = test_home + home = get_user_home_directory() + assert home == test_home class TestExcludedDirectories: @@ -198,8 +208,7 @@ class TestExcludedDirectories: src.mkdir() (src / "utils.py").write_text("# Utils") - with patch("utils.file_utils.SECURITY_ROOT", tmp_path): - files = expand_paths([str(project)]) + files = expand_paths([str(project)]) file_names = [Path(f).name for f in files] @@ -226,8 +235,7 @@ class TestExcludedDirectories: # Create an allowed file (project / "index.js").write_text("// Index") - with patch("utils.file_utils.SECURITY_ROOT", tmp_path): - files = expand_paths([str(project)]) + files = expand_paths([str(project)]) file_names = [Path(f).name for f in files] @@ -254,10 +262,12 @@ class TestIntegrationScenarios: # MCP cloned inside the project mcp = user_project / "tools" / "gemini-mcp-server" mcp.mkdir(parents=True) - for sig_file in list(MCP_SIGNATURE_FILES)[:4]: - if "/" in sig_file: - (mcp / sig_file).parent.mkdir(parents=True, exist_ok=True) - (mcp / sig_file).write_text("# MCP code") + # Create typical MCP files + (mcp / "server.py").write_text("# MCP server code") + (mcp / "config.py").write_text("# MCP config") + tools_dir = mcp / "tools" + tools_dir.mkdir() + (tools_dir / "chat.py").write_text("# Chat tool") (mcp / "LICENSE").write_text("MIT License") (mcp / "README.md").write_text("# Gemini MCP") @@ -266,7 +276,11 @@ class TestIntegrationScenarios: node_modules.mkdir() (node_modules / "package.json").write_text("{}") - with patch("utils.file_utils.SECURITY_ROOT", tmp_path): + # Mock is_mcp_directory for this test + def mock_is_mcp(path): + return "gemini-mcp-server" in str(path) + + with patch("utils.file_utils.is_mcp_directory", side_effect=mock_is_mcp): files = expand_paths([str(user_project)]) file_paths = [str(f) for f in files] @@ -278,23 +292,28 @@ class TestIntegrationScenarios: # MCP files should NOT be included assert not any("gemini-mcp-server" in p for p in file_paths) - assert not any("zen_server.py" in p for p in file_paths) + assert not any("server.py" in p for p in file_paths) # node_modules should NOT be included assert not any("node_modules" in p for p in file_paths) - def test_cannot_scan_above_workspace_root(self, tmp_path): - """Test that we cannot scan outside the workspace root.""" - workspace = tmp_path / "workspace" - workspace.mkdir() + def test_security_without_workspace_root(self, tmp_path): + """Test that security still works with the new security model.""" + # The system now relies on is_dangerous_path and is_home_directory_root + # for security protection - # Create a file in workspace - (workspace / "allowed.py").write_text("# Allowed") + # Test that we can scan regular project directories + project_dir = tmp_path / "my_project" + project_dir.mkdir() + (project_dir / "app.py").write_text("# App") - # Create a file outside workspace - (tmp_path / "outside.py").write_text("# Outside") + files = expand_paths([str(project_dir)]) + assert len(files) == 1 + assert "app.py" in files[0] - with patch("utils.file_utils.SECURITY_ROOT", workspace): - # Try to expand paths outside workspace - should return empty list + # Test that home directory root is still protected + with patch("utils.file_utils.get_user_home_directory") as mock_home: + mock_home.return_value = tmp_path + # Scanning home root should return empty files = expand_paths([str(tmp_path)]) - assert files == [] # Path outside workspace is skipped silently + assert files == [] diff --git a/tests/test_image_support_integration.py b/tests/test_image_support_integration.py index 32e24f4..1e38e01 100644 --- a/tests/test_image_support_integration.py +++ b/tests/test_image_support_integration.py @@ -80,11 +80,11 @@ class TestImageSupportIntegration: expected = ["shared.png", "new_diagram.png", "middle.png", "old_diagram.png"] assert image_list == expected - @patch("utils.conversation_memory.get_redis_client") - def test_add_turn_with_images(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_add_turn_with_images(self, mock_storage): """Test adding a conversation turn with images.""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the Redis operations to return success mock_client.set.return_value = True @@ -348,11 +348,11 @@ class TestImageSupportIntegration: importlib.reload(config) ModelProviderRegistry._instance = None - @patch("utils.conversation_memory.get_redis_client") - def test_cross_tool_image_context_preservation(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_cross_tool_image_context_preservation(self, mock_storage): """Test that images are preserved across different tools in conversation.""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the Redis operations to return success mock_client.set.return_value = True @@ -521,11 +521,11 @@ class TestImageSupportIntegration: result = tool._validate_image_limits(None, "test_model") assert result is None - @patch("utils.conversation_memory.get_redis_client") - def test_conversation_memory_thread_chaining_with_images(self, mock_redis): + @patch("utils.conversation_memory.get_storage") + def test_conversation_memory_thread_chaining_with_images(self, mock_storage): """Test that images work correctly with conversation thread chaining.""" mock_client = Mock() - mock_redis.return_value = mock_client + mock_storage.return_value = mock_client # Mock the Redis operations to return success mock_client.set.return_value = True diff --git a/tests/test_intelligent_fallback.py b/tests/test_intelligent_fallback.py index f783dd2..6d7637e 100644 --- a/tests/test_intelligent_fallback.py +++ b/tests/test_intelligent_fallback.py @@ -39,7 +39,7 @@ class TestIntelligentFallback: def test_prefers_openai_o3_mini_when_available(self): """Test that o4-mini is preferred when OpenAI API key is available""" # Register only OpenAI provider for this test - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) @@ -62,7 +62,7 @@ class TestIntelligentFallback: """Test that OpenAI is preferred when both API keys are available""" # Register both OpenAI and Gemini providers from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) @@ -75,7 +75,7 @@ class TestIntelligentFallback: """Test fallback behavior when no API keys are available""" # Register providers but with no API keys available from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) @@ -86,7 +86,7 @@ class TestIntelligentFallback: def test_available_providers_with_keys(self): """Test the get_available_providers_with_keys method""" from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False): # Clear and register providers @@ -119,7 +119,7 @@ class TestIntelligentFallback: patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False), ): # Register only OpenAI provider for this test - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) diff --git a/tests/test_large_prompt_handling.py b/tests/test_large_prompt_handling.py index 855b6ae..14e249a 100644 --- a/tests/test_large_prompt_handling.py +++ b/tests/test_large_prompt_handling.py @@ -246,9 +246,9 @@ class TestLargePromptHandling: assert len(result) == 1 output = json.loads(result[0].text) - # The precommit tool may return success or clarification_required depending on git state + # The precommit tool may return success or files_required_to_continue depending on git state # The core fix ensures large prompts are detected at the right time - assert output["status"] in ["success", "clarification_required", "resend_prompt"] + assert output["status"] in ["success", "files_required_to_continue", "resend_prompt"] @pytest.mark.asyncio async def test_debug_large_error_description(self, large_prompt): @@ -298,17 +298,26 @@ class TestLargePromptHandling: ) mock_get_provider.return_value = mock_provider - # Mock the centralized file preparation method to avoid file system access - with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files: - mock_prepare_files.return_value = ("File content", [other_file]) + # Mock handle_prompt_file to verify prompt.txt is handled + with patch.object(tool, "handle_prompt_file") as mock_handle_prompt: + # Return the prompt content and updated files list (without prompt.txt) + mock_handle_prompt.return_value = ("Large prompt content from file", [other_file]) - await tool.execute({"prompt": "", "files": [temp_prompt_file, other_file]}) + # Mock the centralized file preparation method + with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files: + mock_prepare_files.return_value = ("File content", [other_file]) - # Verify prompt.txt was removed from files list - mock_prepare_files.assert_called_once() - files_arg = mock_prepare_files.call_args[0][0] - assert len(files_arg) == 1 - assert files_arg[0] == other_file + # Use a small prompt to avoid triggering size limit + await tool.execute({"prompt": "Test prompt", "files": [temp_prompt_file, other_file]}) + + # Verify handle_prompt_file was called with the original files list + mock_handle_prompt.assert_called_once_with([temp_prompt_file, other_file]) + + # Verify _prepare_file_content_for_prompt was called with the updated files list (without prompt.txt) + mock_prepare_files.assert_called_once() + files_arg = mock_prepare_files.call_args[0][0] + assert len(files_arg) == 1 + assert files_arg[0] == other_file temp_dir = os.path.dirname(temp_prompt_file) shutil.rmtree(temp_dir) diff --git a/tests/test_model_restrictions.py b/tests/test_model_restrictions.py index 9d6f000..75af28b 100644 --- a/tests/test_model_restrictions.py +++ b/tests/test_model_restrictions.py @@ -7,7 +7,7 @@ import pytest from providers.base import ProviderType from providers.gemini import GeminiModelProvider -from providers.openai import OpenAIModelProvider +from providers.openai_provider import OpenAIModelProvider from utils.model_restrictions import ModelRestrictionService @@ -677,7 +677,7 @@ class TestAutoModeWithRestrictions: # Clear registry and register only OpenAI and Gemini providers ModelProviderRegistry._instance = None from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) diff --git a/tests/test_old_behavior_simulation.py b/tests/test_old_behavior_simulation.py index 19c9e23..1f6ebc5 100644 --- a/tests/test_old_behavior_simulation.py +++ b/tests/test_old_behavior_simulation.py @@ -195,7 +195,7 @@ class TestOldBehaviorSimulation: Verify that our fix provides comprehensive alias->target coverage. """ from providers.gemini import GeminiModelProvider - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider # Test real providers to ensure they implement our fix correctly providers = [OpenAIModelProvider(api_key="test-key"), GeminiModelProvider(api_key="test-key")] diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py index d63a486..55548b7 100644 --- a/tests/test_openai_provider.py +++ b/tests/test_openai_provider.py @@ -4,7 +4,7 @@ import os from unittest.mock import MagicMock, patch from providers.base import ProviderType -from providers.openai import OpenAIModelProvider +from providers.openai_provider import OpenAIModelProvider class TestOpenAIProvider: diff --git a/tests/test_planner.py b/tests/test_planner.py index 5f9561f..182bba6 100644 --- a/tests/test_planner.py +++ b/tests/test_planner.py @@ -115,7 +115,7 @@ class TestPlannerTool: """Test execute method for subsequent planning step.""" tool = PlannerTool() arguments = { - "step": "Set up Docker containers for each microservice", + "step": "Set up deployment configuration for each microservice", "step_number": 2, "total_steps": 8, "next_step_required": True, diff --git a/tests/test_precommit_with_mock_store.py b/tests/test_precommit_with_mock_store.py index 4cd8b28..5e5afb0 100644 --- a/tests/test_precommit_with_mock_store.py +++ b/tests/test_precommit_with_mock_store.py @@ -4,7 +4,6 @@ Enhanced tests for precommit tool using mock storage to test real logic import os import tempfile -from pathlib import Path from typing import Optional from unittest.mock import patch @@ -50,21 +49,18 @@ class TestPrecommitToolWithMockStore: """Test precommit tool with mock storage to validate actual logic""" @pytest.fixture - def mock_redis(self): + def mock_storage(self): """Create mock Redis client""" return MockRedisClient() @pytest.fixture - def tool(self, mock_redis, temp_repo): + def tool(self, mock_storage, temp_repo): """Create tool instance with mocked Redis""" temp_dir, _ = temp_repo tool = Precommit() - # Mock the Redis client getter and SECURITY_ROOT to allow access to temp files - with ( - patch("utils.conversation_memory.get_redis_client", return_value=mock_redis), - patch("utils.file_utils.SECURITY_ROOT", Path(temp_dir).resolve()), - ): + # Mock the Redis client getter to use our mock storage + with patch("utils.conversation_memory.get_storage", return_value=mock_storage): yield tool @pytest.fixture @@ -112,7 +108,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging shutil.rmtree(temp_dir) @pytest.mark.asyncio - async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis): + async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_storage): """Test that file content appears in expected locations This test validates our design decision that files can legitimately appear in both: @@ -145,12 +141,12 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging # This is intentional and provides comprehensive context to the AI @pytest.mark.asyncio - async def test_conversation_memory_integration(self, tool, temp_repo, mock_redis): + async def test_conversation_memory_integration(self, tool, temp_repo, mock_storage): """Test that conversation memory works with mock storage""" temp_dir, config_path = temp_repo # Mock conversation memory functions to use our mock redis - with patch("utils.conversation_memory.get_redis_client", return_value=mock_redis): + with patch("utils.conversation_memory.get_storage", return_value=mock_storage): # First request - should embed file content PrecommitRequest(path=temp_dir, files=[config_path], prompt="First review") @@ -173,7 +169,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging assert len(files_to_embed_2) == 0, "Continuation should skip already embedded files" @pytest.mark.asyncio - async def test_prompt_structure_integrity(self, tool, temp_repo, mock_redis): + async def test_prompt_structure_integrity(self, tool, temp_repo, mock_storage): """Test that the prompt structure is well-formed and doesn't have content duplication""" temp_dir, config_path = temp_repo @@ -227,7 +223,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging assert '__version__ = "1.0.0"' not in after_file_section @pytest.mark.asyncio - async def test_file_content_formatting(self, tool, temp_repo, mock_redis): + async def test_file_content_formatting(self, tool, temp_repo, mock_storage): """Test that file content is properly formatted without duplication""" temp_dir, config_path = temp_repo @@ -254,18 +250,18 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging assert file_content.count('__version__ = "1.0.0"') == 1 -def test_mock_redis_basic_operations(): +def test_mock_storage_basic_operations(): """Test that our mock Redis implementation works correctly""" - mock_redis = MockRedisClient() + mock_storage = MockRedisClient() # Test basic operations - assert mock_redis.get("nonexistent") is None - assert mock_redis.exists("nonexistent") == 0 + assert mock_storage.get("nonexistent") is None + assert mock_storage.exists("nonexistent") == 0 - mock_redis.set("test_key", "test_value") - assert mock_redis.get("test_key") == "test_value" - assert mock_redis.exists("test_key") == 1 + mock_storage.set("test_key", "test_value") + assert mock_storage.get("test_key") == "test_value" + assert mock_storage.exists("test_key") == 1 - assert mock_redis.delete("test_key") == 1 - assert mock_redis.get("test_key") is None - assert mock_redis.delete("test_key") == 0 # Already deleted + assert mock_storage.delete("test_key") == 1 + assert mock_storage.get("test_key") is None + assert mock_storage.delete("test_key") == 0 # Already deleted diff --git a/tests/test_providers.py b/tests/test_providers.py index f436fa1..2920c92 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -8,7 +8,7 @@ import pytest from providers import ModelProviderRegistry, ModelResponse from providers.base import ProviderType from providers.gemini import GeminiModelProvider -from providers.openai import OpenAIModelProvider +from providers.openai_provider import OpenAIModelProvider class TestModelProviderRegistry: diff --git a/tests/test_rate_limit_patterns.py b/tests/test_rate_limit_patterns.py index 0ec446f..6de2176 100644 --- a/tests/test_rate_limit_patterns.py +++ b/tests/test_rate_limit_patterns.py @@ -3,7 +3,7 @@ Test to verify structured error code-based retry logic. """ from providers.gemini import GeminiModelProvider -from providers.openai import OpenAIModelProvider +from providers.openai_provider import OpenAIModelProvider def test_openai_structured_error_retry_logic(): diff --git a/tests/test_special_status_parsing.py b/tests/test_special_status_parsing.py index ba5eb34..913a843 100644 --- a/tests/test_special_status_parsing.py +++ b/tests/test_special_status_parsing.py @@ -84,17 +84,15 @@ class TestSpecialStatusParsing: assert result.content_type == "json" assert "pending_tests" in result.content - def test_clarification_required_still_works(self): - """Test that existing clarification_required still works""" - response_json = ( - '{"status": "clarification_required", "question": "What files need review?", "files_needed": ["src/"]}' - ) + def test_files_required_to_continue_still_works(self): + """Test that existing files_required_to_continue still works""" + response_json = '{"status": "files_required_to_continue", "mandatory_instructions": "What files need review?", "files_needed": ["src/"]}' result = self.tool._parse_response(response_json, self.request) - assert result.status == "clarification_required" + assert result.status == "files_required_to_continue" assert result.content_type == "json" - assert "question" in result.content + assert "mandatory_instructions" in result.content def test_invalid_status_payload(self): """Test that invalid payloads for known statuses are handled gracefully""" @@ -127,7 +125,7 @@ class TestSpecialStatusParsing: def test_malformed_json_handled(self): """Test that malformed JSON is handled gracefully""" - response_text = '{"status": "clarification_required", "question": "incomplete json' + response_text = '{"status": "files_required_to_continue", "question": "incomplete json' result = self.tool._parse_response(response_text, self.request) @@ -192,8 +190,8 @@ class TestSpecialStatusParsing: """Test that special status responses preserve exact JSON format for Claude""" test_cases = [ { - "input": '{"status": "clarification_required", "question": "What framework to use?", "files_needed": ["tests/"]}', - "expected_fields": ["status", "question", "files_needed"], + "input": '{"status": "files_required_to_continue", "mandatory_instructions": "What framework to use?", "files_needed": ["tests/"]}', + "expected_fields": ["status", "mandatory_instructions", "files_needed"], }, { "input": '{"status": "full_codereview_required", "reason": "Codebase too large"}', @@ -223,9 +221,20 @@ class TestSpecialStatusParsing: parsed_content = json.loads(result.content) for field in test_case["expected_fields"]: assert field in parsed_content, f"Field {field} missing from {input_data['status']} response" - assert ( - parsed_content[field] == input_data[field] - ), f"Field {field} value mismatch in {input_data['status']} response" + + # Special handling for mandatory_instructions which gets enhanced + if field == "mandatory_instructions" and input_data["status"] == "files_required_to_continue": + # Check that enhanced instructions contain the original message + assert parsed_content[field].startswith( + input_data[field] + ), f"Enhanced {field} should start with original value in {input_data['status']} response" + assert ( + "IMPORTANT GUIDANCE:" in parsed_content[field] + ), f"Enhanced {field} should contain guidance in {input_data['status']} response" + else: + assert ( + parsed_content[field] == input_data[field] + ), f"Field {field} value mismatch in {input_data['status']} response" def test_focused_review_required_parsing(self): """Test that focused_review_required status is parsed correctly""" diff --git a/tests/test_utils.py b/tests/test_utils.py index eed6980..a3add3e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -29,12 +29,14 @@ class TestFileUtils: assert "Error: File does not exist" in content assert tokens > 0 - def test_read_file_content_outside_project_root(self): - """Test that paths outside project root are rejected""" - # Try to read a file outside the project root + def test_read_file_content_safe_files_allowed(self): + """Test that safe files outside the original project root are now allowed""" + # In the new security model, safe files like /etc/passwd + # can be read as they're not in the dangerous paths list content, tokens = read_file_content("/etc/passwd") - assert "--- ERROR ACCESSING FILE:" in content - assert "Path outside workspace" in content + # Should successfully read the file + assert "--- BEGIN FILE: /etc/passwd ---" in content + assert "--- END FILE: /etc/passwd ---" in content assert tokens > 0 def test_read_file_content_relative_path_rejected(self): diff --git a/tools/base.py b/tools/base.py index b14aefa..6440198 100644 --- a/tools/base.py +++ b/tools/base.py @@ -37,7 +37,7 @@ from utils.conversation_memory import ( get_conversation_file_list, get_thread, ) -from utils.file_utils import read_file_content, read_files, translate_path_for_environment +from utils.file_utils import read_file_content, read_files from .models import SPECIAL_STATUS_MODELS, ContinuationOffer, ToolOutput @@ -1229,15 +1229,13 @@ When recommending searches, be specific about what information you need and why updated_files = [] for file_path in files: - # Translate path for current environment (Docker/direct) - translated_path = translate_path_for_environment(file_path) # Check if the filename is exactly "prompt.txt" # This ensures we don't match files like "myprompt.txt" or "prompt.txt.bak" - if os.path.basename(translated_path) == "prompt.txt": + if os.path.basename(file_path) == "prompt.txt": try: # Read prompt.txt content and extract just the text - content, _ = read_file_content(translated_path) + content, _ = read_file_content(file_path) # Extract the content between the file markers if "--- BEGIN FILE:" in content and "--- END FILE:" in content: lines = content.split("\n") @@ -1568,6 +1566,17 @@ When recommending searches, be specific about what information you need and why parsed_status = status_model.model_validate(potential_json) logger.debug(f"{self.name} tool detected special status: {status_key}") + # Enhance mandatory_instructions for files_required_to_continue + if status_key == "files_required_to_continue" and hasattr( + parsed_status, "mandatory_instructions" + ): + original_instructions = parsed_status.mandatory_instructions + enhanced_instructions = self._enhance_mandatory_instructions(original_instructions) + # Create a new model instance with enhanced instructions + enhanced_data = parsed_status.model_dump() + enhanced_data["mandatory_instructions"] = enhanced_instructions + parsed_status = status_model.model_validate(enhanced_data) + # Extract model information for metadata metadata = { "original_request": ( @@ -1936,7 +1945,7 @@ When recommending searches, be specific about what information you need and why elif "gpt" in model_name.lower() or "o3" in model_name.lower(): # Register OpenAI provider if not already registered from providers.base import ProviderType - from providers.openai import OpenAIModelProvider + from providers.openai_provider import OpenAIModelProvider ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) provider = ModelProviderRegistry.get_provider(ProviderType.OPENAI) @@ -1948,3 +1957,28 @@ When recommending searches, be specific about what information you need and why ) return provider + + def _enhance_mandatory_instructions(self, original_instructions: str) -> str: + """ + Enhance mandatory instructions for files_required_to_continue responses. + + This adds generic guidance to help Claude understand the importance + of providing the requested files and context. + + Args: + original_instructions: The original instructions from the model + + Returns: + str: Enhanced instructions with additional guidance + """ + generic_guidance = ( + "\n\nIMPORTANT GUIDANCE:\n" + "โ€ข The requested files are CRITICAL for providing accurate analysis\n" + "โ€ข Please include ALL files mentioned in the files_needed list\n" + "โ€ข Use FULL absolute paths to real files/folders - DO NOT SHORTEN paths - and confirm that these exist\n" + "โ€ข If you cannot locate specific files or the files are extremely large, think hard, study the code and provide similar/related files that might contain the needed information\n" + "โ€ข After providing the files, use the same tool again with the continuation_id to continue the analysis\n" + "โ€ข The tool cannot proceed to perform its function accurately without this additional context" + ) + + return f"{original_instructions}{generic_guidance}" diff --git a/tools/chat.py b/tools/chat.py index 6257e9a..2d3efa9 100644 --- a/tools/chat.py +++ b/tools/chat.py @@ -17,8 +17,8 @@ from .base import BaseTool, ToolRequest # Field descriptions to avoid duplication between Pydantic and JSON schema CHAT_FIELD_DESCRIPTIONS = { "prompt": ( - "Your thorough, expressive question with as much context as possible. Remember: you're talking to " - "another Claude assistant who has deep expertise and can provide nuanced insights. Include your " + "You MUST provide a thorough, expressive question or share an idea with as much context as possible. " + "Remember: you're talking to an assistant who has deep expertise and can provide nuanced insights. Include your " "current thinking, specific challenges, background context, what you've already tried, and what " "kind of response would be most helpful. The more context and detail you provide, the more " "valuable and targeted the response will be." @@ -26,7 +26,7 @@ CHAT_FIELD_DESCRIPTIONS = { "files": "Optional files for context (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", "images": ( "Optional images for visual context. Useful for UI discussions, diagrams, visual problems, " - "error screens, or architectural mockups." + "error screens, or architectural mockups. (must be FULL absolute paths to real files / folders - DO NOT SHORTEN - OR these can be bas64 data)" ), } diff --git a/tools/codereview.py b/tools/codereview.py index c78f2d6..6b4abe2 100644 --- a/tools/codereview.py +++ b/tools/codereview.py @@ -25,14 +25,16 @@ from .base import BaseTool, ToolRequest # Field descriptions to avoid duplication between Pydantic and JSON schema CODEREVIEW_FIELD_DESCRIPTIONS = { - "files": "Code files or directories to review (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", + "files": "Code files or directories to review that are relevant to the code that needs review or are closely " + "related to the code or component that needs to be reviewed (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)." + "Validate that these files exist on disk before sharing and only share code that is relevant.", "prompt": ( "User's summary of what the code does, expected behavior, constraints, and review objectives. " - "IMPORTANT: Before using this tool, Claude should first perform its own preliminary review - " + "IMPORTANT: Before using this tool, you should first perform its own preliminary review - " "examining the code structure, identifying potential issues, understanding the business logic, " - "and noting areas of concern. Include Claude's initial observations about code quality, potential " + "and noting areas of concern. Include your initial observations about code quality, potential " "bugs, architectural patterns, and specific areas that need deeper scrutiny. This dual-perspective " - "approach (Claude's analysis + external model's review) provides more comprehensive feedback and " + "approach (your analysis + external model's review) provides more comprehensive feedback and " "catches issues that either reviewer might miss alone." ), "images": ( @@ -299,7 +301,7 @@ Please provide a code review aligned with the user's context and expectations, f --- -**Claude's Next Steps:** +**Your Next Steps:** 1. **Understand the Context**: First examine the specific functions, files, and code sections mentioned in """ """the review to understand each issue thoroughly. diff --git a/tools/debug.py b/tools/debug.py index c0d11cb..3e851bb 100644 --- a/tools/debug.py +++ b/tools/debug.py @@ -17,34 +17,37 @@ from .base import BaseTool, ToolRequest # Field descriptions to avoid duplication between Pydantic and JSON schema DEBUG_FIELD_DESCRIPTIONS = { "prompt": ( - "Claud - you MUST first think deep. Issue description. Include what you can provide: " + "MANDATORY: You MUST first think deep about the issue, what it is, why it might be happening, what code might be involved, " + "is it an error stemming out of the code directly or is it a side-effect of some part of the existing code. If it's an error " + "message, could it be coming from an external resource and NOT directly from the project? What part of the code seems most likely" + "the culprit. MUST try and ZERO IN on the issue and surrounding code. Include all the details into the prompt that you can provide: " "error messages, symptoms, when it occurs, steps to reproduce, environment details, " "recent changes, and any other relevant information. Mention any previous attempts at fixing this issue, " "including any past fix that was in place but has now regressed. " "The more context available, the better the analysis. " - "SYSTEMATIC INVESTIGATION: Claude MUST begin by thinking hard and performing a thorough investigation using a systematic approach. " + "PERFORM SYSTEMATIC INVESTIGATION: You MUST begin by thinking hard and performing a thorough investigation using a systematic approach. " "First understand the issue, find the code that may be causing it or code that is breaking, as well as any related code that could have caused this as a side effect. " - "Claude MUST maintain detailed investigation notes in a DEBUGGING_{issue_description}.md file within the project folder, " + "You MUST maintain detailed investigation notes in a DEBUGGING_{issue_description}.md file within the project folder, " "updating it as it performs step-by-step analysis of the code, trying to determine the actual root cause and understanding how a minimal, appropriate fix can be found. " - "This file MUST contain functions, methods, files visited OR determined to be part of the problem. Claude MUST update this and remove any references that it finds to be irrelevant during its investigation. " - "CRITICAL: If after thorough investigation Claude has very high confidence that NO BUG EXISTS that correlates to the reported symptoms, " - "Claude should consider the possibility that the reported issue may not actually be present, may be a misunderstanding, or may be conflated with something else entirely. " - "In such cases, Claude should gather more information from the user through targeted questioning rather than continue hunting for non-existent bugs. " - "Once complete, Claude MUST provide Zen's debug tool with this file passed into the files parameter. " - "It is ESSENTIAL that this detailed work is performed by Claude before sharing all the relevant details with its development assistant. This will greatly help in zeroing in on the root cause." + "This file MUST contain functions, methods, files visited OR determined to be part of the problem. You MUST update this and remove any references that it finds to be irrelevant during its investigation. " + "CRITICAL: If after thorough investigation You has very high confidence that NO BUG EXISTS that correlates to the reported symptoms, " + "You should consider the possibility that the reported issue may not actually be present, may be a misunderstanding, or may be conflated with something else entirely. " + "In such cases, you should gather more information from the user through targeted questioning rather than continue hunting for non-existent bugs. " + "Once complete, you MUST provide also pass in this file into the files parameter of this tool. " + "It is ESSENTIAL that this detailed work is performed by you before sharing all the relevant details with its development assistant. This will greatly help in zeroing in on the root cause." ), "findings": ( - "Claude MUST first perform its own investigation, gather its findings and analysis. Include: steps taken to analyze the issue, " + "You MUST first perform its own investigation, gather its findings and analysis. Include: steps taken to analyze the issue, " "code patterns discovered, initial hypotheses formed, any relevant classes/functions/methods examined, " "and any preliminary conclusions. If investigation yields no concrete evidence of a bug correlating to the reported symptoms, " - "Claude should clearly state this finding and consider that the issue may not exist as described. " + "You should clearly state this finding and consider that the issue may not exist as described. " "This provides context for the assistant model's analysis." ), "files": ( "Essential files for debugging - ONLY include files that are directly related to the issue, " "contain the problematic code, or are necessary for understanding the root cause. " "This can include any relevant log files, error description documents, investigation documents, " - "claude's own findings as a document, related code that may help with analysis." + "Your own findings as a document, related code that may help with analysis." "DO NOT include every file scanned during investigation (must be FULL absolute paths - DO NOT SHORTEN)." ), "error_context": "Stack trace, snippet from logs, or additional error context. For very large text you MUST instead" @@ -77,13 +80,13 @@ class DebugIssueTool(BaseTool): "trace errors, or diagnose issues. " "MANDATORY: Claud you MUST first think deep and follow these instructions when using this tool" "SYSTEMATIC INVESTIGATION WORKFLOW: " - "Claude MUST begin by thinking hard and performing a thorough investigation using a systematic approach. " + "You MUST begin by thinking hard and performing a thorough investigation using a systematic approach. " "First understand the issue, find the code that may be causing it or code that is breaking, as well as any related code that could have caused this as a side effect. " - "Claude MUST maintain detailed investigation notes while it performs its analysis, " + "You MUST maintain detailed investigation notes while it performs its analysis, " "updating it as it performs step-by-step analysis of the code, trying to determine the actual root cause and understanding how a minimal, appropriate fix can be found. " - "This file MUST contain functions, methods, files visited OR determined to be part of the problem. Claude MUST update this and remove any references that it finds to be irrelevant during its investigation. " - "Once complete, Claude MUST provide Zen's debug tool with this file passed into the files parameter. " - "1. INVESTIGATE SYSTEMATICALLY: Claude MUST think and use a methodical approach to trace through error reports, " + "This file MUST contain functions, methods, files visited OR determined to be part of the problem. You MUST update this and remove any references that it finds to be irrelevant during its investigation. " + "Once complete, You MUST provide Zen's debug tool with this file passed into the files parameter. " + "1. INVESTIGATE SYSTEMATICALLY: You MUST think and use a methodical approach to trace through error reports, " "examine code, and gather evidence step by step " "2. DOCUMENT FINDINGS: Maintain detailed investigation notes to " "keep the user informed during its initial investigation. This investigation MUST be shared with this tool for the assistant " @@ -299,7 +302,7 @@ with comprehensive findings for expert analysis.""" def format_response(self, response: str, request: DebugIssueRequest, model_info: Optional[dict] = None) -> str: """Format the debugging response for Claude to present to user""" - # The base class automatically handles structured responses like 'clarification_required' + # The base class automatically handles structured responses like 'files_required_to_continue' # and 'analysis_complete' via SPECIAL_STATUS_MODELS, so we only handle normal text responses here model_name = self._get_model_name(model_info) diff --git a/tools/models.py b/tools/models.py index 363ad5f..b5301b6 100644 --- a/tools/models.py +++ b/tools/models.py @@ -35,7 +35,7 @@ class ToolOutput(BaseModel): status: Literal[ "success", "error", - "clarification_required", + "files_required_to_continue", "full_codereview_required", "focused_review_required", "test_sample_needed", @@ -55,11 +55,11 @@ class ToolOutput(BaseModel): ) -class ClarificationRequest(BaseModel): - """Request for additional context or clarification""" +class FilesNeededRequest(BaseModel): + """Request for missing files / code to continue""" - status: Literal["clarification_required"] = "clarification_required" - question: str = Field(..., description="Question to ask Claude for more context") + status: Literal["files_required_to_continue"] = "files_required_to_continue" + mandatory_instructions: str = Field(..., description="Critical instructions for Claude regarding required context") files_needed: Optional[list[str]] = Field( default_factory=list, description="Specific files that are needed for analysis" ) @@ -362,7 +362,7 @@ class NoBugFound(BaseModel): # Registry mapping status strings to their corresponding Pydantic models SPECIAL_STATUS_MODELS = { - "clarification_required": ClarificationRequest, + "files_required_to_continue": FilesNeededRequest, "full_codereview_required": FullCodereviewRequired, "focused_review_required": FocusedReviewRequired, "test_sample_needed": TestSampleNeeded, diff --git a/tools/planner.py b/tools/planner.py index 4777d4a..63b2d31 100644 --- a/tools/planner.py +++ b/tools/planner.py @@ -76,7 +76,8 @@ logger = logging.getLogger(__name__) PLANNER_FIELD_DESCRIPTIONS = { # Interactive planning fields for step-by-step planning "step": ( - "Your current planning step. For the first step, describe the task/problem to plan. " + "Your current planning step. For the first step, describe the task/problem to plan and be extremely expressive " + "so that subsequent steps can break this down into simpler steps. " "For subsequent steps, provide the actual planning step content. Can include: regular planning steps, " "revisions of previous steps, questions about previous decisions, realizations about needing more analysis, " "changes in approach, etc." diff --git a/tools/precommit.py b/tools/precommit.py index bd2afa8..9c6c895 100644 --- a/tools/precommit.py +++ b/tools/precommit.py @@ -17,7 +17,6 @@ if TYPE_CHECKING: from tools.models import ToolModelCategory from systemprompts import PRECOMMIT_PROMPT -from utils.file_utils import translate_file_paths, translate_path_for_environment from utils.git_utils import find_git_repositories, get_git_status, run_git_command from utils.token_utils import estimate_tokens @@ -28,10 +27,10 @@ DEFAULT_CONTEXT_WINDOW = 200_000 # Field descriptions to avoid duplication between Pydantic and JSON schema PRECOMMIT_FIELD_DESCRIPTIONS = { - "path": "Starting directory to search for git repositories (must be FULL absolute paths - DO NOT SHORTEN).", + "path": "Starting absolute path to the directory to search for git repositories (must be FULL absolute paths - DO NOT SHORTEN).", "prompt": ( "The original user request description for the changes. Provides critical context for the review. " - "If original request is limited or not available, you MUST study the changes carefully, think deeply " + "MANDATORY: if original request is limited or not available, you MUST study the changes carefully, think deeply " "about the implementation intent, analyze patterns across all modifications, infer the logic and " "requirements from the code changes and provide a thorough starting point." ), @@ -49,11 +48,11 @@ PRECOMMIT_FIELD_DESCRIPTIONS = { "thinking_mode": "Thinking depth mode for the assistant.", "files": ( "Optional files or directories to provide as context (must be FULL absolute paths - DO NOT SHORTEN). " - "These files are not part of the changes but provide helpful context like configs, docs, or related code." + "These additional files are not part of the changes but provide helpful context like configs, docs, or related code." ), "images": ( "Optional images showing expected UI changes, design requirements, or visual references for the changes " - "being validated" + "being validated (must be FULL absolute paths - DO NOT SHORTEN). " ), } @@ -235,22 +234,10 @@ class Precommit(BaseTool): raise ValueError(f"MCP_SIZE_CHECK:{ToolOutput(**size_check).model_dump_json()}") - # Translate the path and files if running in Docker - translated_path = translate_path_for_environment(request.path) - translated_files = translate_file_paths(request.files) - # File size validation happens at MCP boundary in server.py - # Check if the path translation resulted in an error path - if translated_path.startswith("/inaccessible/"): - raise ValueError( - f"The path '{request.path}' is not accessible from within the Docker container. " - f"The Docker container can only access files within the mounted workspace. " - f"Please ensure the path is within the mounted directory or adjust your Docker volume mounts." - ) - # Find all git repositories - repositories = find_git_repositories(translated_path, request.max_depth) + repositories = find_git_repositories(request.path, request.max_depth) if not repositories: return "No git repositories found in the specified path." @@ -421,12 +408,12 @@ class Precommit(BaseTool): context_files_summary = [] context_tokens = 0 - if translated_files: + if request.files: remaining_tokens = max_tokens - total_tokens # Use centralized file handling with filtering for duplicate prevention file_content, processed_files = self._prepare_file_content_for_prompt( - translated_files, + request.files, request.continuation_id, "Context files", max_tokens=remaining_tokens + 1000, # Add back the reserve that was calculated @@ -437,7 +424,7 @@ class Precommit(BaseTool): if file_content: context_tokens = estimate_tokens(file_content) context_files_content = [file_content] - context_files_summary.append(f"โœ… Included: {len(translated_files)} context files") + context_files_summary.append(f"โœ… Included: {len(request.files)} context files") else: context_files_summary.append("WARNING: No context files could be read or files too large") @@ -540,7 +527,7 @@ class Precommit(BaseTool): ) # Add instruction for requesting files if needed - if not translated_files: + if not request.files: prompt_parts.append( "\nIf you need additional context files to properly review these changes " "(such as configuration files, documentation, or related code), " diff --git a/tools/refactor.py b/tools/refactor.py index 02fccd4..19d9d5a 100644 --- a/tools/refactor.py +++ b/tools/refactor.py @@ -23,7 +23,6 @@ from pydantic import Field from config import TEMPERATURE_ANALYTICAL from systemprompts import REFACTOR_PROMPT -from utils.file_utils import translate_file_paths from .base import BaseTool, ToolRequest @@ -32,8 +31,10 @@ logger = logging.getLogger(__name__) # Field descriptions to avoid duplication between Pydantic and JSON schema REFACTOR_FIELD_DESCRIPTIONS = { - "files": "Code files or directories to analyze for refactoring opportunities (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", - "prompt": "Description of refactoring goals, context, and specific areas of focus", + "files": "Code files or directories to analyze for refactoring opportunities. MUST be FULL absolute paths to real files / folders - DO NOT SHORTEN." + "The files also MUST directly involve the classes, functions etc that need to be refactored. Closely related or dependent files" + "will also help.", + "prompt": "Description of refactoring goals, context, and specific areas of focus.", "refactor_type": "Type of refactoring analysis to perform", "focus_areas": "Specific areas to focus on (e.g., 'performance', 'readability', 'maintainability', 'security')", "style_guide_examples": ( @@ -285,9 +286,7 @@ class RefactorTool(BaseTool): logger.info(f"[REFACTOR] All {len(style_examples)} style examples already in conversation history") return "", "" - # Translate file paths for Docker environment before accessing files - translated_examples = translate_file_paths(examples_to_process) - logger.debug(f"[REFACTOR] Translated {len(examples_to_process)} file paths for container access") + logger.debug(f"[REFACTOR] Processing {len(examples_to_process)} file paths") # Calculate token budget for style examples (20% of available tokens, or fallback) if available_tokens: @@ -306,10 +305,9 @@ class RefactorTool(BaseTool): # Sort by file size (smallest first) for pattern-focused selection file_sizes = [] - for i, file_path in enumerate(examples_to_process): - translated_path = translated_examples[i] + for file_path in examples_to_process: try: - size = os.path.getsize(translated_path) + size = os.path.getsize(file_path) file_sizes.append((file_path, size)) logger.debug(f"[REFACTOR] Style example {os.path.basename(file_path)}: {size:,} bytes") except (OSError, FileNotFoundError) as e: diff --git a/tools/testgen.py b/tools/testgen.py index 4acdf2b..0799101 100644 --- a/tools/testgen.py +++ b/tools/testgen.py @@ -21,7 +21,6 @@ from pydantic import Field from config import TEMPERATURE_ANALYTICAL from systemprompts import TESTGEN_PROMPT -from utils.file_utils import translate_file_paths from .base import BaseTool, ToolRequest @@ -30,7 +29,8 @@ logger = logging.getLogger(__name__) # Field descriptions to avoid duplication between Pydantic and JSON schema TESTGEN_FIELD_DESCRIPTIONS = { "files": "Code files or directories to generate tests for (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", - "prompt": "Description of what to test, testing objectives, and specific scope/focus areas", + "prompt": "Description of what to test, testing objectives, and specific scope/focus areas. Be specific about any " + "particular component, module, class of function you would like to generate tests for.", "test_examples": ( "Optional existing test files or directories to use as style/pattern reference (must be FULL absolute paths to real files / folders - DO NOT SHORTEN). " "If not provided, the tool will determine the best testing approach based on the code structure. " @@ -164,9 +164,7 @@ class TestGenerationTool(BaseTool): logger.info(f"[TESTGEN] All {len(test_examples)} test examples already in conversation history") return "", "" - # Translate file paths for Docker environment before accessing files - translated_examples = translate_file_paths(examples_to_process) - logger.debug(f"[TESTGEN] Translated {len(examples_to_process)} file paths for container access") + logger.debug(f"[TESTGEN] Processing {len(examples_to_process)} file paths") # Calculate token budget for test examples (25% of available tokens, or fallback) if available_tokens: @@ -184,13 +182,11 @@ class TestGenerationTool(BaseTool): ) # Sort by file size (smallest first) for pattern-focused selection - # Use translated paths for file system operations, but keep original paths for processing file_sizes = [] - for i, file_path in enumerate(examples_to_process): - translated_path = translated_examples[i] + for file_path in examples_to_process: try: - size = os.path.getsize(translated_path) - file_sizes.append((file_path, size)) # Keep original path for consistency + size = os.path.getsize(file_path) + file_sizes.append((file_path, size)) logger.debug(f"[TESTGEN] Test example {os.path.basename(file_path)}: {size:,} bytes") except (OSError, FileNotFoundError) as e: # If we can't get size, put it at the end diff --git a/tools/thinkdeep.py b/tools/thinkdeep.py index edef846..45970b0 100644 --- a/tools/thinkdeep.py +++ b/tools/thinkdeep.py @@ -17,15 +17,17 @@ from .base import BaseTool, ToolRequest # Field descriptions to avoid duplication between Pydantic and JSON schema THINKDEEP_FIELD_DESCRIPTIONS = { "prompt": ( - "Your current thinking/analysis to extend and validate. IMPORTANT: Before using this tool, Claude MUST " - "first think hard and establish a deep understanding of the topic and question by thinking through all " - "relevant details, context, constraints, and implications. Share these extended thoughts and ideas in " - "the prompt so the model has comprehensive information to work with for the best analysis." + "MANDATORY: you MUST first think hard and establish a deep understanding of the topic and question by thinking through all " + "relevant details, context, constraints, and implications. Provide your thought-partner all of your current thinking/analysis " + "to extend and validate. Share these extended thoughts and ideas in " + "the prompt so your assistant has comprehensive information to work with for the best analysis." ), - "problem_context": "Additional context about the problem or goal. Be as expressive as possible.", + "problem_context": "Provate additional context about the problem or goal. Be as expressive as possible. More information will " + "be very helpful to your thought-partner.", "focus_areas": "Specific aspects to focus on (architecture, performance, security, etc.)", - "files": "Optional file paths or directories for additional context (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", - "images": "Optional images for visual analysis - diagrams, charts, system architectures, or any visual information to analyze", + "files": "Optional absolute file paths or directories for additional context (must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", + "images": "Optional images for visual analysis - diagrams, charts, system architectures, or any visual information to analyze. " + "(must be FULL absolute paths to real files / folders - DO NOT SHORTEN)", } diff --git a/tools/tracer.py b/tools/tracer.py index d87ac2f..5e1b1d0 100644 --- a/tools/tracer.py +++ b/tools/tracer.py @@ -15,8 +15,8 @@ from .base import BaseTool, ToolRequest # Field descriptions to avoid duplication between Pydantic and JSON schema TRACER_FIELD_DESCRIPTIONS = { "prompt": ( - "Detailed description of what to trace and WHY you need this analysis. Include context about what " - "you're trying to understand, debug, or analyze. For precision mode: describe the specific " + "Detailed description of what to trace and WHY you need this analysis. MUST include context about what " + "you're trying to understand, debug, analyze or find. For precision mode: describe the specific " "method/function and what aspect of its execution flow you need to understand. For dependencies " "mode: describe the class/module and what relationships you need to map. Example: 'I need to " "understand how BookingManager.finalizeInvoice method is called throughout the system and what " diff --git a/utils/__init__.py b/utils/__init__.py index de1c380..8024036 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -4,7 +4,7 @@ Utility functions for Zen MCP Server from .file_types import CODE_EXTENSIONS, FILE_CATEGORIES, PROGRAMMING_EXTENSIONS, TEXT_EXTENSIONS from .file_utils import expand_paths, read_file_content, read_files -from .security_config import EXCLUDED_DIRS, SECURITY_ROOT +from .security_config import EXCLUDED_DIRS from .token_utils import check_token_limit, estimate_tokens __all__ = [ @@ -15,7 +15,6 @@ __all__ = [ "PROGRAMMING_EXTENSIONS", "TEXT_EXTENSIONS", "FILE_CATEGORIES", - "SECURITY_ROOT", "EXCLUDED_DIRS", "estimate_tokens", "check_token_limit", diff --git a/utils/conversation_memory.py b/utils/conversation_memory.py index 4eb1524..1a69480 100644 --- a/utils/conversation_memory.py +++ b/utils/conversation_memory.py @@ -3,15 +3,29 @@ Conversation Memory for AI-to-AI Multi-turn Discussions This module provides conversation persistence and context reconstruction for stateless MCP (Model Context Protocol) environments. It enables multi-turn -conversations between Claude and Gemini by storing conversation state in Redis +conversations between Claude and Gemini by storing conversation state in memory across independent request cycles. +CRITICAL ARCHITECTURAL REQUIREMENT: +This conversation memory system is designed for PERSISTENT MCP SERVER PROCESSES. +It uses in-memory storage that persists only within a single Python process. + +โš ๏ธ IMPORTANT: This system will NOT work correctly if MCP tool calls are made + as separate subprocess invocations (each subprocess starts with empty memory). + + WORKING SCENARIO: Claude Desktop with persistent MCP server process + FAILING SCENARIO: Simulator tests calling server.py as individual subprocesses + + Root cause of test failures: Each subprocess call loses the conversation + state from previous calls because memory is process-specific, not shared + across subprocess boundaries. + ARCHITECTURE OVERVIEW: The MCP protocol is inherently stateless - each tool request is independent with no memory of previous interactions. This module bridges that gap by: 1. Creating persistent conversation threads with unique UUIDs -2. Storing complete conversation context (turns, files, metadata) in Redis +2. Storing complete conversation context (turns, files, metadata) in memory 3. Reconstructing conversation history when tools are called with continuation_id 4. Supporting cross-tool continuation - seamlessly switch between different tools while maintaining full conversation context and file references @@ -35,9 +49,9 @@ Key Features: most recent file context is preserved when token limits require exclusions. - Automatic turn limiting (20 turns max) to prevent runaway conversations - Context reconstruction for stateless request continuity -- Redis-based persistence with automatic expiration (3 hour TTL) +- In-memory persistence with automatic expiration (3 hour TTL) - Thread-safe operations for concurrent access -- Graceful degradation when Redis is unavailable +- Graceful degradation when storage is unavailable DUAL PRIORITIZATION STRATEGY (Files & Conversations): The conversation memory system implements sophisticated prioritization for both files and @@ -187,26 +201,16 @@ class ThreadContext(BaseModel): initial_context: dict[str, Any] # Original request parameters -def get_redis_client(): +def get_storage(): """ - Get Redis client from environment configuration - - Creates a Redis client using the REDIS_URL environment variable. - Defaults to localhost:6379/0 if not specified. + Get in-memory storage backend for conversation persistence. Returns: - redis.Redis: Configured Redis client with decode_responses=True - - Raises: - ValueError: If redis package is not installed + InMemoryStorage: Thread-safe in-memory storage backend """ - try: - import redis + from .storage_backend import get_storage_backend - redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") - return redis.from_url(redis_url, decode_responses=True) - except ImportError: - raise ValueError("redis package required. Install with: pip install redis") + return get_storage_backend() def create_thread(tool_name: str, initial_request: dict[str, Any], parent_thread_id: Optional[str] = None) -> str: @@ -251,10 +255,10 @@ def create_thread(tool_name: str, initial_request: dict[str, Any], parent_thread initial_context=filtered_context, ) - # Store in Redis with configurable TTL to prevent indefinite accumulation - client = get_redis_client() + # Store in memory with configurable TTL to prevent indefinite accumulation + storage = get_storage() key = f"thread:{thread_id}" - client.setex(key, CONVERSATION_TIMEOUT_SECONDS, context.model_dump_json()) + storage.setex(key, CONVERSATION_TIMEOUT_SECONDS, context.model_dump_json()) logger.debug(f"[THREAD] Created new thread {thread_id} with parent {parent_thread_id}") @@ -263,7 +267,7 @@ def create_thread(tool_name: str, initial_request: dict[str, Any], parent_thread def get_thread(thread_id: str) -> Optional[ThreadContext]: """ - Retrieve thread context from Redis + Retrieve thread context from in-memory storage Fetches complete conversation context for cross-tool continuation. This is the core function that enables tools to access conversation @@ -278,22 +282,22 @@ def get_thread(thread_id: str) -> Optional[ThreadContext]: Security: - Validates UUID format to prevent injection attacks - - Handles Redis connection failures gracefully + - Handles storage connection failures gracefully - No error information leakage on failure """ if not thread_id or not _is_valid_uuid(thread_id): return None try: - client = get_redis_client() + storage = get_storage() key = f"thread:{thread_id}" - data = client.get(key) + data = storage.get(key) if data: return ThreadContext.model_validate_json(data) return None except Exception: - # Silently handle errors to avoid exposing Redis details + # Silently handle errors to avoid exposing storage details return None @@ -313,8 +317,7 @@ def add_turn( Appends a new conversation turn to an existing thread. This is the core function for building conversation history and enabling cross-tool - continuation. Each turn preserves the tool and model that generated it, - and tracks file reception order using atomic Redis counters. + continuation. Each turn preserves the tool and model that generated it. Args: thread_id: UUID of the conversation thread @@ -333,7 +336,7 @@ def add_turn( Failure cases: - Thread doesn't exist or expired - Maximum turn limit reached - - Redis connection failure + - Storage connection failure Note: - Refreshes thread TTL to configured timeout on successful update @@ -370,14 +373,14 @@ def add_turn( context.turns.append(turn) context.last_updated_at = datetime.now(timezone.utc).isoformat() - # Save back to Redis and refresh TTL + # Save back to storage and refresh TTL try: - client = get_redis_client() + storage = get_storage() key = f"thread:{thread_id}" - client.setex(key, CONVERSATION_TIMEOUT_SECONDS, context.model_dump_json()) # Refresh TTL to configured timeout + storage.setex(key, CONVERSATION_TIMEOUT_SECONDS, context.model_dump_json()) # Refresh TTL to configured timeout return True except Exception as e: - logger.debug(f"[FLOW] Failed to save turn to Redis: {type(e).__name__}") + logger.debug(f"[FLOW] Failed to save turn to storage: {type(e).__name__}") return False @@ -591,11 +594,9 @@ def _plan_file_inclusion_by_size(all_files: list[str], max_file_tokens: int) -> for file_path in all_files: try: - from utils.file_utils import estimate_file_tokens, translate_path_for_environment + from utils.file_utils import estimate_file_tokens - translated_path = translate_path_for_environment(file_path) - - if os.path.exists(translated_path) and os.path.isfile(translated_path): + if os.path.exists(file_path) and os.path.isfile(file_path): # Use centralized token estimation for consistency estimated_tokens = estimate_file_tokens(file_path) @@ -613,7 +614,7 @@ def _plan_file_inclusion_by_size(all_files: list[str], max_file_tokens: int) -> else: files_to_skip.append(file_path) # More descriptive message for missing files - if not os.path.exists(translated_path): + if not os.path.exists(file_path): logger.debug( f"[FILES] Skipping {file_path} - file no longer exists (may have been moved/deleted since conversation)" ) @@ -724,7 +725,7 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_ Performance Characteristics: - O(n) file collection with newest-first prioritization - Intelligent token budgeting prevents context window overflow - - Redis-based persistence with automatic TTL management + - In-memory persistence with automatic TTL management - Graceful degradation when files are inaccessible or too large """ # Get the complete thread chain @@ -851,10 +852,7 @@ def build_conversation_history(context: ThreadContext, model_context=None, read_ except Exception as e: # More descriptive error handling for missing files try: - from utils.file_utils import translate_path_for_environment - - translated_path = translate_path_for_environment(file_path) - if not os.path.exists(translated_path): + if not os.path.exists(file_path): logger.info( f"File no longer accessible for conversation history: {file_path} - file was moved/deleted since conversation (marking as excluded)" ) diff --git a/utils/file_types.py b/utils/file_types.py index f820722..a1662cd 100644 --- a/utils/file_types.py +++ b/utils/file_types.py @@ -79,7 +79,7 @@ TEXT_DATA = { ".csv", # CSV ".tsv", # TSV ".gitignore", # Git ignore - ".dockerfile", # Docker + ".dockerfile", # Dockerfile ".makefile", # Make ".cmake", # CMake ".gradle", # Gradle @@ -221,7 +221,7 @@ TOKEN_ESTIMATION_RATIOS = { # Logs and data ".log": 4.5, # Log files - timestamps, messages, stack traces ".csv": 3.1, # CSV - data with delimiters - # Docker and infrastructure + # Infrastructure files ".dockerfile": 3.7, # Dockerfile - commands and paths ".tf": 3.5, # Terraform - infrastructure as code } diff --git a/utils/file_utils.py b/utils/file_utils.py index 5374652..c7cef9e 100644 --- a/utils/file_utils.py +++ b/utils/file_utils.py @@ -45,7 +45,7 @@ from pathlib import Path from typing import Callable, Optional from .file_types import BINARY_EXTENSIONS, CODE_EXTENSIONS, IMAGE_EXTENSIONS, TEXT_EXTENSIONS -from .security_config import CONTAINER_WORKSPACE, EXCLUDED_DIRS, MCP_SIGNATURE_FILES, SECURITY_ROOT, WORKSPACE_ROOT +from .security_config import EXCLUDED_DIRS, is_dangerous_path from .token_utils import DEFAULT_CONTEXT_WINDOW, estimate_tokens @@ -92,44 +92,32 @@ def is_mcp_directory(path: Path) -> bool: path: Directory path to check Returns: - True if this appears to be the MCP directory + True if this is the MCP server directory or a subdirectory """ if not path.is_dir(): return False - # Check for multiple signature files to be sure - matches = 0 - for sig_file in MCP_SIGNATURE_FILES: - if (path / sig_file).exists(): - matches += 1 - if matches >= 3: # Require at least 3 matches to be certain - logger.info(f"Detected MCP directory at {path}, will exclude from scanning") - return True - return False + # Get the directory where the MCP server is running from + # __file__ is utils/file_utils.py, so parent.parent is the MCP root + mcp_server_dir = Path(__file__).parent.parent.resolve() + + # Check if the given path is the MCP server directory or a subdirectory + try: + path.resolve().relative_to(mcp_server_dir) + logger.info(f"Detected MCP server directory at {path}, will exclude from scanning") + return True + except ValueError: + # Not a subdirectory of MCP server + return False def get_user_home_directory() -> Optional[Path]: """ - Get the user's home directory based on environment variables. - - In Docker, USER_HOME should be set to the mounted home path. - Outside Docker, we use Path.home() or environment variables. + Get the user's home directory. Returns: - User's home directory path or None if not determinable + User's home directory path """ - # Check for explicit USER_HOME env var (set in docker-compose.yml) - user_home = os.environ.get("USER_HOME") - if user_home: - return Path(user_home).resolve() - - # In container, check if we're running in Docker - if CONTAINER_WORKSPACE.exists(): - # We're in Docker but USER_HOME not set - use WORKSPACE_ROOT as fallback - if WORKSPACE_ROOT: - return Path(WORKSPACE_ROOT).resolve() - - # Outside Docker, use system home return Path.home() @@ -291,155 +279,51 @@ def _add_line_numbers(content: str) -> str: return "\n".join(numbered_lines) -def translate_path_for_environment(path_str: str) -> str: - """ - Translate paths between host and container environments as needed. - - This is the unified path translation function that should be used by all - tools and utilities throughout the codebase. It handles: - 1. Docker host-to-container path translation (host paths -> /workspace/...) - 2. Direct mode (no translation needed) - 3. Internal server files (conf/custom_models.json) - 4. Security validation and error handling - - Docker Path Translation Logic: - - Input: /Users/john/project/src/file.py (host path from Claude) - - WORKSPACE_ROOT: /Users/john/project (host path in env var) - - Output: /workspace/src/file.py (container path for file operations) - - Args: - path_str: Original path string from the client (absolute host path) - - Returns: - Translated path appropriate for the current environment - """ - # Handle built-in server config file - no translation needed - if _is_builtin_custom_models_config(path_str): - return path_str - if not WORKSPACE_ROOT or not WORKSPACE_ROOT.strip() or not CONTAINER_WORKSPACE.exists(): - if path_str.startswith("/app/"): - # Convert Docker internal paths to local relative paths for standalone mode - relative_path = path_str[5:] # Remove "/app/" prefix - if relative_path.startswith("/"): - relative_path = relative_path[1:] # Remove leading slash if present - return "./" + relative_path - # No other translation needed for standalone mode - return path_str - - # Check if the path is already a container path (starts with /workspace) - if path_str.startswith(str(CONTAINER_WORKSPACE) + "/") or path_str == str(CONTAINER_WORKSPACE): - # Path is already translated to container format, return as-is - return path_str - - try: - # Use os.path.realpath for security - it resolves symlinks completely - # This prevents symlink attacks that could escape the workspace - real_workspace_root = Path(os.path.realpath(WORKSPACE_ROOT)) - # For the host path, we can't use realpath if it doesn't exist in the container - # So we'll use Path().resolve(strict=False) instead - real_host_path = Path(path_str).resolve(strict=False) - - # Security check: ensure the path is within the mounted workspace - # This prevents path traversal attacks (e.g., ../../../etc/passwd) - relative_path = real_host_path.relative_to(real_workspace_root) - - # Construct the container path - container_path = CONTAINER_WORKSPACE / relative_path - - # Log the translation for debugging (but not sensitive paths) - if str(container_path) != path_str: - logger.info(f"Translated host path to container: {path_str} -> {container_path}") - - return str(container_path) - - except ValueError: - # Path is not within the host's WORKSPACE_ROOT - # In Docker, we cannot access files outside the mounted volume - logger.warning( - f"Path '{path_str}' is outside the mounted workspace '{WORKSPACE_ROOT}'. " - f"Docker containers can only access files within the mounted directory." - ) - # Return a clear error path that will fail gracefully - return f"/inaccessible/outside/mounted/volume{path_str}" - except Exception as e: - # Log unexpected errors but don't expose internal details to clients - logger.warning(f"Path translation failed for '{path_str}': {type(e).__name__}") - # Return a clear error path that will fail gracefully - return f"/inaccessible/translation/error{path_str}" - - def resolve_and_validate_path(path_str: str) -> Path: """ - Resolves, translates, and validates a path against security policies. + Resolves and validates a path against security policies. - This is the primary security function that ensures all file access - is properly sandboxed. It enforces three critical policies: - 1. Translate host paths to container paths if applicable (Docker environment) - 2. All paths must be absolute (no ambiguity) - 3. All paths must resolve to within PROJECT_ROOT (sandboxing) + This function ensures safe file access by: + 1. Requiring absolute paths (no ambiguity) + 2. Resolving symlinks to prevent deception + 3. Blocking access to dangerous system directories Args: path_str: Path string (must be absolute) Returns: - Resolved Path object that is guaranteed to be within PROJECT_ROOT + Resolved Path object that is safe to access Raises: ValueError: If path is not absolute or otherwise invalid - PermissionError: If path is outside allowed directory + PermissionError: If path is in a dangerous location """ - # Step 1: Translate Docker paths first (if applicable) - # This must happen before any other validation - translated_path_str = translate_path_for_environment(path_str) + # Step 1: Create a Path object + user_path = Path(path_str) - # Step 2: Create a Path object from the (potentially translated) path - user_path = Path(translated_path_str) - - # Step 3: Security Policy - Require absolute paths + # Step 2: Security Policy - Require absolute paths # Relative paths could be interpreted differently depending on working directory if not user_path.is_absolute(): raise ValueError(f"Relative paths are not supported. Please provide an absolute path.\nReceived: {path_str}") - # Step 4: Resolve the absolute path (follows symlinks, removes .. and .) + # Step 3: Resolve the absolute path (follows symlinks, removes .. and .) # This is critical for security as it reveals the true destination of symlinks resolved_path = user_path.resolve() - # Step 5: Security Policy - Ensure the resolved path is within PROJECT_ROOT - # This prevents directory traversal attacks (e.g., /project/../../../etc/passwd) - try: - resolved_path.relative_to(SECURITY_ROOT) - except ValueError: - # Provide detailed error for debugging while avoiding information disclosure - logger.warning( - f"Access denied - path outside workspace. " - f"Requested: {path_str}, Resolved: {resolved_path}, Workspace: {SECURITY_ROOT}" - ) + # Step 4: Check against dangerous paths + if is_dangerous_path(resolved_path): + logger.warning(f"Access denied - dangerous path: {resolved_path}") + raise PermissionError(f"Access to system directory denied: {path_str}") + + # Step 5: Check if it's the home directory root + if is_home_directory_root(resolved_path): raise PermissionError( - f"Path outside workspace: {path_str}\nWorkspace: {SECURITY_ROOT}\nResolved path: {resolved_path}" + f"Cannot scan entire home directory: {path_str}\n" f"Please specify a subdirectory within your home folder." ) return resolved_path -def translate_file_paths(file_paths: Optional[list[str]]) -> Optional[list[str]]: - """ - Translate a list of file paths for the current environment. - - This function should be used by all tools to consistently handle path translation - for file lists. It applies the unified path translation to each path in the list. - - Args: - file_paths: List of file paths to translate, or None - - Returns: - List of translated paths, or None if input was None - """ - if not file_paths: - return file_paths - - return [translate_path_for_environment(path) for path in file_paths] - - def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> list[str]: """ Expand paths to individual files, handling both files and directories. @@ -474,23 +358,12 @@ def expand_paths(paths: list[str], extensions: Optional[set[str]] = None) -> lis # Safety checks for directory scanning if path_obj.is_dir(): - resolved_workspace = SECURITY_ROOT.resolve() - resolved_path = path_obj.resolve() - - # Check 1: Prevent reading entire workspace root - if resolved_path == resolved_workspace: - logger.warning( - f"Ignoring request to read entire workspace directory: {path}. " - f"Please specify individual files or subdirectories instead." - ) - continue - - # Check 2: Prevent scanning user's home directory root + # Check 1: Prevent scanning user's home directory root if is_home_directory_root(path_obj): logger.warning(f"Skipping home directory root: {path}. Please specify a project subdirectory instead.") continue - # Check 3: Skip if this is the MCP's own directory + # Check 2: Skip if this is the MCP's own directory if is_mcp_directory(path_obj): logger.info( f"Skipping MCP server directory: {path}. The MCP server code is excluded from project scans." @@ -573,15 +446,6 @@ def read_file_content( # Return error in a format that provides context to the AI logger.debug(f"[FILES] Path validation failed for {file_path}: {type(e).__name__}: {e}") error_msg = str(e) - # Add Docker-specific help if we're in Docker and path is inaccessible - if WORKSPACE_ROOT and CONTAINER_WORKSPACE.exists(): - # We're in Docker - error_msg = ( - f"File is outside the Docker mounted directory. " - f"When running in Docker, only files within the mounted workspace are accessible. " - f"Current mounted directory: {WORKSPACE_ROOT}. " - f"To access files in a different directory, please run Claude from that directory." - ) content = f"\n--- ERROR ACCESSING FILE: {file_path} ---\nError: {error_msg}\n--- END FILE ---\n" tokens = estimate_tokens(content) logger.debug(f"[FILES] Returning error content for {file_path}: {tokens} tokens") @@ -761,12 +625,10 @@ def estimate_file_tokens(file_path: str) -> int: Estimated token count for the file """ try: - translated_path = translate_path_for_environment(file_path) - - if not os.path.exists(translated_path) or not os.path.isfile(translated_path): + if not os.path.exists(file_path) or not os.path.isfile(file_path): return 0 - file_size = os.path.getsize(translated_path) + file_size = os.path.getsize(file_path) # Get the appropriate ratio for this file type from .file_types import get_token_estimation_ratio @@ -911,11 +773,10 @@ def read_json_file(file_path: str) -> Optional[dict]: Parsed JSON data as dict, or None if file doesn't exist or invalid """ try: - translated_path = translate_path_for_environment(file_path) - if not os.path.exists(translated_path): + if not os.path.exists(file_path): return None - with open(translated_path, encoding="utf-8") as f: + with open(file_path, encoding="utf-8") as f: return json.load(f) except (json.JSONDecodeError, OSError): return None @@ -934,10 +795,9 @@ def write_json_file(file_path: str, data: dict, indent: int = 2) -> bool: True if successful, False otherwise """ try: - translated_path = translate_path_for_environment(file_path) - os.makedirs(os.path.dirname(translated_path), exist_ok=True) + os.makedirs(os.path.dirname(file_path), exist_ok=True) - with open(translated_path, "w", encoding="utf-8") as f: + with open(file_path, "w", encoding="utf-8") as f: json.dump(data, f, indent=indent, ensure_ascii=False) return True except (OSError, TypeError): @@ -955,9 +815,8 @@ def get_file_size(file_path: str) -> int: File size in bytes, or 0 if file doesn't exist or error """ try: - translated_path = translate_path_for_environment(file_path) - if os.path.exists(translated_path) and os.path.isfile(translated_path): - return os.path.getsize(translated_path) + if os.path.exists(file_path) and os.path.isfile(file_path): + return os.path.getsize(file_path) return 0 except OSError: return 0 @@ -974,8 +833,7 @@ def ensure_directory_exists(file_path: str) -> bool: True if directory exists or was created, False on error """ try: - translated_path = translate_path_for_environment(file_path) - directory = os.path.dirname(translated_path) + directory = os.path.dirname(file_path) if directory: os.makedirs(directory, exist_ok=True) return True @@ -1010,15 +868,14 @@ def read_file_safely(file_path: str, max_size: int = 10 * 1024 * 1024) -> Option File content as string, or None if file too large or unreadable """ try: - translated_path = translate_path_for_environment(file_path) - if not os.path.exists(translated_path) or not os.path.isfile(translated_path): + if not os.path.exists(file_path) or not os.path.isfile(file_path): return None - file_size = os.path.getsize(translated_path) + file_size = os.path.getsize(file_path) if file_size > max_size: return None - with open(translated_path, encoding="utf-8", errors="ignore") as f: + with open(file_path, encoding="utf-8", errors="ignore") as f: return f.read() except OSError: return None diff --git a/utils/git_utils.py b/utils/git_utils.py index f0d1fdc..683f134 100644 --- a/utils/git_utils.py +++ b/utils/git_utils.py @@ -55,7 +55,7 @@ def find_git_repositories(start_path: str, max_depth: int = 5) -> list[str]: try: # Create Path object - no need to resolve yet since the path might be - # a translated Docker path that doesn't exist on the host + # a translated path that doesn't exist start_path = Path(start_path) # Basic validation - must be absolute diff --git a/utils/security_config.py b/utils/security_config.py index 6e911a2..ce8fb29 100644 --- a/utils/security_config.py +++ b/utils/security_config.py @@ -2,15 +2,14 @@ Security configuration and path validation constants This module contains security-related constants and configurations -for file access control and workspace management. +for file access control. """ -import os from pathlib import Path -# Dangerous paths that should never be used as WORKSPACE_ROOT +# Dangerous paths that should never be scanned # These would give overly broad access and pose security risks -DANGEROUS_WORKSPACE_PATHS = { +DANGEROUS_PATHS = { "/", "/etc", "/usr", @@ -18,7 +17,6 @@ DANGEROUS_WORKSPACE_PATHS = { "/var", "/root", "/home", - "/workspace", # Container path - WORKSPACE_ROOT should be host path "C:\\", "C:\\Windows", "C:\\Program Files", @@ -88,87 +86,19 @@ EXCLUDED_DIRS = { "vendor", } -# MCP signature files - presence of these indicates the MCP's own directory -# Used to prevent the MCP from scanning its own codebase -MCP_SIGNATURE_FILES = { - "zen_server.py", - "server.py", - "tools/precommit.py", - "utils/file_utils.py", - "prompts/tool_prompts.py", -} -# Workspace configuration -WORKSPACE_ROOT = os.environ.get("WORKSPACE_ROOT") -CONTAINER_WORKSPACE = Path("/workspace") - - -def validate_workspace_security(workspace_root: str) -> None: +def is_dangerous_path(path: Path) -> bool: """ - Validate that WORKSPACE_ROOT is set to a safe directory. + Check if a path is in the dangerous paths list. Args: - workspace_root: The workspace root path to validate - - Raises: - RuntimeError: If the workspace root is unsafe - """ - if not workspace_root: - return - - # Resolve to canonical path for comparison - resolved_workspace = Path(workspace_root).resolve() - - # Special check for /workspace - common configuration mistake - if str(resolved_workspace) == "/workspace": - raise RuntimeError( - f"Configuration Error: WORKSPACE_ROOT should be set to the HOST path, not the container path. " - f"Found: WORKSPACE_ROOT={workspace_root} " - f"Expected: WORKSPACE_ROOT should be set to your host directory path (e.g., $HOME) " - f"that contains all files Claude might reference. " - f"This path gets mounted to /workspace inside the Docker container." - ) - - # Check against other dangerous paths - if str(resolved_workspace) in DANGEROUS_WORKSPACE_PATHS: - raise RuntimeError( - f"Security Error: WORKSPACE_ROOT '{workspace_root}' is set to a dangerous system directory. " - f"This would give access to critical system files. " - f"Please set WORKSPACE_ROOT to a specific project directory." - ) - - # Additional check: prevent filesystem root - if resolved_workspace.parent == resolved_workspace: - raise RuntimeError( - f"Security Error: WORKSPACE_ROOT '{workspace_root}' cannot be the filesystem root. " - f"This would give access to the entire filesystem. " - f"Please set WORKSPACE_ROOT to a specific project directory." - ) - - -def get_security_root() -> Path: - """ - Determine the security boundary for file access. + path: Path to check Returns: - Path object representing the security root directory + True if the path is dangerous and should not be accessed """ - # In Docker: use /workspace (container directory) - # In tests/direct mode: use WORKSPACE_ROOT (host directory) - if CONTAINER_WORKSPACE.exists(): - # Running in Docker container - return CONTAINER_WORKSPACE - elif WORKSPACE_ROOT: - # Running in tests or direct mode with WORKSPACE_ROOT set - return Path(WORKSPACE_ROOT).resolve() - else: - # Fallback for backward compatibility (should not happen in normal usage) - return Path.home() - - -# Validate security on import if WORKSPACE_ROOT is set -if WORKSPACE_ROOT: - validate_workspace_security(WORKSPACE_ROOT) - -# Export the computed security root -SECURITY_ROOT = get_security_root() + try: + resolved = path.resolve() + return str(resolved) in DANGEROUS_PATHS or resolved.parent == resolved + except Exception: + return True # If we can't resolve, consider it dangerous diff --git a/utils/storage_backend.py b/utils/storage_backend.py new file mode 100644 index 0000000..0951aab --- /dev/null +++ b/utils/storage_backend.py @@ -0,0 +1,113 @@ +""" +In-memory storage backend for conversation threads + +This module provides a thread-safe, in-memory alternative to Redis for storing +conversation contexts. It's designed for ephemeral MCP server sessions where +conversations only need to persist during a single Claude session. + +โš ๏ธ PROCESS-SPECIFIC STORAGE: This storage is confined to a single Python process. + Data stored in one process is NOT accessible from other processes or subprocesses. + This is why simulator tests that run server.py as separate subprocesses cannot + share conversation state between tool calls. + +Key Features: +- Thread-safe operations using locks +- TTL support with automatic expiration +- Background cleanup thread for memory management +- Singleton pattern for consistent state within a single process +- Drop-in replacement for Redis storage (for single-process scenarios) +""" + +import logging +import os +import threading +import time +from typing import Optional + +logger = logging.getLogger(__name__) + + +class InMemoryStorage: + """Thread-safe in-memory storage for conversation threads""" + + def __init__(self): + self._store: dict[str, tuple[str, float]] = {} + self._lock = threading.Lock() + # Match Redis behavior: cleanup interval based on conversation timeout + # Run cleanup at 1/10th of timeout interval (e.g., 18 mins for 3 hour timeout) + timeout_hours = int(os.getenv("CONVERSATION_TIMEOUT_HOURS", "3")) + self._cleanup_interval = (timeout_hours * 3600) // 10 + self._cleanup_interval = max(300, self._cleanup_interval) # Minimum 5 minutes + self._shutdown = False + + # Start background cleanup thread + self._cleanup_thread = threading.Thread(target=self._cleanup_worker, daemon=True) + self._cleanup_thread.start() + + logger.info( + f"In-memory storage initialized with {timeout_hours}h timeout, cleanup every {self._cleanup_interval//60}m" + ) + + def set_with_ttl(self, key: str, ttl_seconds: int, value: str) -> None: + """Store value with expiration time""" + with self._lock: + expires_at = time.time() + ttl_seconds + self._store[key] = (value, expires_at) + logger.debug(f"Stored key {key} with TTL {ttl_seconds}s") + + def get(self, key: str) -> Optional[str]: + """Retrieve value if not expired""" + with self._lock: + if key in self._store: + value, expires_at = self._store[key] + if time.time() < expires_at: + logger.debug(f"Retrieved key {key}") + return value + else: + # Clean up expired entry + del self._store[key] + logger.debug(f"Key {key} expired and removed") + return None + + def setex(self, key: str, ttl_seconds: int, value: str) -> None: + """Redis-compatible setex method""" + self.set_with_ttl(key, ttl_seconds, value) + + def _cleanup_worker(self): + """Background thread that periodically cleans up expired entries""" + while not self._shutdown: + time.sleep(self._cleanup_interval) + self._cleanup_expired() + + def _cleanup_expired(self): + """Remove all expired entries""" + with self._lock: + current_time = time.time() + expired_keys = [k for k, (_, exp) in self._store.items() if exp < current_time] + for key in expired_keys: + del self._store[key] + + if expired_keys: + logger.debug(f"Cleaned up {len(expired_keys)} expired conversation threads") + + def shutdown(self): + """Graceful shutdown of background thread""" + self._shutdown = True + if self._cleanup_thread.is_alive(): + self._cleanup_thread.join(timeout=1) + + +# Global singleton instance +_storage_instance = None +_storage_lock = threading.Lock() + + +def get_storage_backend() -> InMemoryStorage: + """Get the global storage instance (singleton pattern)""" + global _storage_instance + if _storage_instance is None: + with _storage_lock: + if _storage_instance is None: + _storage_instance = InMemoryStorage() + logger.info("Initialized in-memory conversation storage") + return _storage_instance