Migration from Docker to Standalone Python Server (#73)
* Migration from docker to standalone server Migration handling Fixed tests Use simpler in-memory storage Support for concurrent logging to disk Simplified direct connections to localhost * Migration from docker / redis to standalone script Updated tests Updated run script Fixed requirements Use dotenv Ask if user would like to install MCP in Claude Desktop once Updated docs * More cleanup and references to docker removed * Cleanup * Comments * Fixed tests * Fix GitHub Actions workflow for standalone Python architecture - Install requirements-dev.txt for pytest and testing dependencies - Remove Docker setup from simulation tests (now standalone) - Simplify linting job to use requirements-dev.txt - Update simulation tests to run directly without Docker Fixes unit test failures in CI due to missing pytest dependency. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Remove simulation tests from GitHub Actions - Removed simulation-tests job that makes real API calls - Keep only unit tests (mocked, no API costs) and linting - Simulation tests should be run manually with real API keys - Reduces CI costs and complexity GitHub Actions now only runs: - Unit tests (569 tests, all mocked) - Code quality checks (ruff, black) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * Fixed tests * Fixed tests --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
9d72545ecd
commit
4151c3c3a5
@@ -1,72 +0,0 @@
|
|||||||
# Git
|
|
||||||
.git
|
|
||||||
.gitignore
|
|
||||||
|
|
||||||
# Python
|
|
||||||
__pycache__
|
|
||||||
*.pyc
|
|
||||||
*.pyo
|
|
||||||
*.pyd
|
|
||||||
.Python
|
|
||||||
*.egg
|
|
||||||
*.egg-info/
|
|
||||||
dist/
|
|
||||||
build/
|
|
||||||
*.so
|
|
||||||
.coverage
|
|
||||||
.pytest_cache/
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.mypy_cache/
|
|
||||||
.ruff_cache/
|
|
||||||
|
|
||||||
# Virtual environments
|
|
||||||
venv/
|
|
||||||
env/
|
|
||||||
ENV/
|
|
||||||
.venv/
|
|
||||||
|
|
||||||
# IDE
|
|
||||||
.vscode/
|
|
||||||
.idea/
|
|
||||||
*.swp
|
|
||||||
*.swo
|
|
||||||
*~
|
|
||||||
.DS_Store
|
|
||||||
|
|
||||||
# Docker
|
|
||||||
.dockerignore
|
|
||||||
Dockerfile
|
|
||||||
docker-compose*.yml
|
|
||||||
|
|
||||||
# Environment files (contain secrets)
|
|
||||||
.env
|
|
||||||
.env.*
|
|
||||||
*.env
|
|
||||||
|
|
||||||
# Documentation
|
|
||||||
*.md
|
|
||||||
docs/
|
|
||||||
examples/
|
|
||||||
|
|
||||||
# Tests
|
|
||||||
tests/
|
|
||||||
test_*.py
|
|
||||||
*_test.py
|
|
||||||
|
|
||||||
# CI/CD
|
|
||||||
.github/
|
|
||||||
.gitlab-ci.yml
|
|
||||||
.travis.yml
|
|
||||||
|
|
||||||
# Logs
|
|
||||||
*.log
|
|
||||||
logs/
|
|
||||||
|
|
||||||
# Temporary files
|
|
||||||
tmp/
|
|
||||||
temp/
|
|
||||||
*.tmp
|
|
||||||
|
|
||||||
# OS specific
|
|
||||||
Thumbs.db
|
|
||||||
14
.env.example
14
.env.example
@@ -1,11 +1,6 @@
|
|||||||
# Zen MCP Server Environment Configuration
|
# Zen MCP Server Environment Configuration
|
||||||
# Copy this file to .env and fill in your values
|
# Copy this file to .env and fill in your values
|
||||||
|
|
||||||
# Required: Workspace root directory for file access
|
|
||||||
# This should be the HOST path that contains all files Claude might reference
|
|
||||||
# Defaults to $HOME for direct usage, auto-configured for Docker
|
|
||||||
WORKSPACE_ROOT=/Users/your-username
|
|
||||||
|
|
||||||
# API Keys - At least one is required
|
# API Keys - At least one is required
|
||||||
#
|
#
|
||||||
# IMPORTANT: Use EITHER OpenRouter OR native APIs (Gemini/OpenAI), not both!
|
# IMPORTANT: Use EITHER OpenRouter OR native APIs (Gemini/OpenAI), not both!
|
||||||
@@ -27,10 +22,7 @@ XAI_API_KEY=your_xai_api_key_here
|
|||||||
OPENROUTER_API_KEY=your_openrouter_api_key_here
|
OPENROUTER_API_KEY=your_openrouter_api_key_here
|
||||||
|
|
||||||
# Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.)
|
# Option 3: Use custom API endpoints for local models (Ollama, vLLM, LM Studio, etc.)
|
||||||
# IMPORTANT: Since this server ALWAYS runs in Docker, you MUST use host.docker.internal instead of localhost
|
# CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example
|
||||||
# ❌ WRONG: http://localhost:11434/v1 (Docker containers cannot reach localhost)
|
|
||||||
# ✅ CORRECT: http://host.docker.internal:11434/v1 (Docker can reach host services)
|
|
||||||
# CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example (NOT localhost!)
|
|
||||||
# CUSTOM_API_KEY= # Empty for Ollama (no auth needed)
|
# CUSTOM_API_KEY= # Empty for Ollama (no auth needed)
|
||||||
# CUSTOM_MODEL_NAME=llama3.2 # Default model name
|
# CUSTOM_MODEL_NAME=llama3.2 # Default model name
|
||||||
|
|
||||||
@@ -95,9 +87,7 @@ DEFAULT_THINKING_MODE_THINKDEEP=high
|
|||||||
# Override the default location of custom_models.json
|
# Override the default location of custom_models.json
|
||||||
# CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
|
# CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
|
||||||
|
|
||||||
# Optional: Redis configuration (auto-configured for Docker)
|
# Note: Redis is no longer used - conversations are stored in memory
|
||||||
# The Redis URL for conversation threading - typically managed by docker-compose
|
|
||||||
# REDIS_URL=redis://redis:6379/0
|
|
||||||
|
|
||||||
# Optional: Conversation timeout (hours)
|
# Optional: Conversation timeout (hours)
|
||||||
# How long AI-to-AI conversation threads persist before expiring
|
# How long AI-to-AI conversation threads persist before expiring
|
||||||
|
|||||||
6
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
6
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -11,8 +11,8 @@ body:
|
|||||||
id: version
|
id: version
|
||||||
attributes:
|
attributes:
|
||||||
label: Project Version
|
label: Project Version
|
||||||
description: "Which version are you using? (e.g., Docker image tag like `latest` or `v1.2.3`, or a git commit SHA)"
|
description: "Which version are you using? (To see version: ./run-server.sh -v)"
|
||||||
placeholder: "e.g., ghcr.io/beehiveinnovations/zen-mcp-server:latest"
|
placeholder: "e.g., 5.1.0"
|
||||||
validations:
|
validations:
|
||||||
required: true
|
required: true
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ body:
|
|||||||
id: logs
|
id: logs
|
||||||
attributes:
|
attributes:
|
||||||
label: Relevant Log Output
|
label: Relevant Log Output
|
||||||
description: "Please copy and paste any relevant log output. You can obtain these from the MCP folder by running `docker compose logs`."
|
description: "Please copy and paste any relevant log output. Logs are stored under the `logs` folder in the zen folder. You an also use `./run-server.sh -f` to see logs"
|
||||||
render: shell
|
render: shell
|
||||||
|
|
||||||
- type: dropdown
|
- type: dropdown
|
||||||
|
|||||||
12
.github/ISSUE_TEMPLATE/documentation.yml
vendored
12
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -33,7 +33,7 @@ body:
|
|||||||
attributes:
|
attributes:
|
||||||
label: What is wrong with the documentation?
|
label: What is wrong with the documentation?
|
||||||
description: "Please describe the problem. Be specific about what is unclear, incorrect, or missing."
|
description: "Please describe the problem. Be specific about what is unclear, incorrect, or missing."
|
||||||
placeholder: "The Docker setup command in the README is missing the `--pull=always` flag, which means users might use an outdated image version."
|
placeholder: "README is missing some details"
|
||||||
validations:
|
validations:
|
||||||
required: true
|
required: true
|
||||||
|
|
||||||
@@ -42,16 +42,8 @@ body:
|
|||||||
attributes:
|
attributes:
|
||||||
label: Suggested Improvement
|
label: Suggested Improvement
|
||||||
description: "How can we make it better? If you can, please provide the exact text or changes you'd like to see."
|
description: "How can we make it better? If you can, please provide the exact text or changes you'd like to see."
|
||||||
placeholder: |
|
placeholder: "Please improve...."
|
||||||
Change:
|
|
||||||
```
|
|
||||||
docker run ghcr.io/beehiveinnovations/zen-mcp-server:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
To:
|
|
||||||
```
|
|
||||||
docker run --pull=always ghcr.io/beehiveinnovations/zen-mcp-server:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
- type: dropdown
|
- type: dropdown
|
||||||
id: audience
|
id: audience
|
||||||
|
|||||||
2
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
2
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -33,7 +33,7 @@ body:
|
|||||||
label: Feature Category
|
label: Feature Category
|
||||||
description: What type of enhancement is this?
|
description: What type of enhancement is this?
|
||||||
options:
|
options:
|
||||||
- New Gemini tool (chat, codereview, debug, etc.)
|
- New tool (chat, codereview, debug, etc.)
|
||||||
- Workflow improvement
|
- Workflow improvement
|
||||||
- Integration enhancement
|
- Integration enhancement
|
||||||
- Performance optimization
|
- Performance optimization
|
||||||
|
|||||||
12
.github/ISSUE_TEMPLATE/tool_addition.yml
vendored
12
.github/ISSUE_TEMPLATE/tool_addition.yml
vendored
@@ -1,12 +1,12 @@
|
|||||||
name: 🛠️ New Gemini Tool Proposal
|
name: 🛠️ New Gemini Tool Proposal
|
||||||
description: Propose a new Gemini MCP tool (e.g., `summarize`, `testgen`, `refactor`)
|
description: Propose a new Zen MCP tool (e.g., `summarize`, `fixer`, `refactor`)
|
||||||
labels: ["enhancement", "new-tool"]
|
labels: ["enhancement", "new-tool"]
|
||||||
body:
|
body:
|
||||||
- type: input
|
- type: input
|
||||||
id: tool-name
|
id: tool-name
|
||||||
attributes:
|
attributes:
|
||||||
label: Proposed Tool Name
|
label: Proposed Tool Name
|
||||||
description: "What would the tool be called? (e.g., `summarize`, `testgen`, `refactor`)"
|
description: "What would the tool be called? (e.g., `summarize`, `docgen`, `refactor`)"
|
||||||
placeholder: "e.g., `docgen`"
|
placeholder: "e.g., `docgen`"
|
||||||
validations:
|
validations:
|
||||||
required: true
|
required: true
|
||||||
@@ -15,7 +15,7 @@ body:
|
|||||||
id: purpose
|
id: purpose
|
||||||
attributes:
|
attributes:
|
||||||
label: What is the primary purpose of this tool?
|
label: What is the primary purpose of this tool?
|
||||||
description: "Explain the tool's core function and the value it provides to developers using Claude + Gemini."
|
description: "Explain the tool's core function and the value it provides to developers using Claude + Zen."
|
||||||
placeholder: "This tool will automatically generate comprehensive documentation from code, extracting class and function signatures, docstrings, and creating usage examples."
|
placeholder: "This tool will automatically generate comprehensive documentation from code, extracting class and function signatures, docstrings, and creating usage examples."
|
||||||
validations:
|
validations:
|
||||||
required: true
|
required: true
|
||||||
@@ -27,9 +27,9 @@ body:
|
|||||||
description: "Show how a user would invoke this tool through Claude and what the expected output would look like."
|
description: "Show how a user would invoke this tool through Claude and what the expected output would look like."
|
||||||
placeholder: |
|
placeholder: |
|
||||||
**User prompt to Claude:**
|
**User prompt to Claude:**
|
||||||
"Use gemini to generate documentation for my entire src/ directory"
|
"Use zen to generate documentation for my entire src/ directory"
|
||||||
|
|
||||||
**Expected Gemini tool behavior:**
|
**Expected behavior:**
|
||||||
- Analyze all Python files in src/
|
- Analyze all Python files in src/
|
||||||
- Extract classes, functions, and their docstrings
|
- Extract classes, functions, and their docstrings
|
||||||
- Generate structured markdown documentation
|
- Generate structured markdown documentation
|
||||||
@@ -61,7 +61,7 @@ body:
|
|||||||
id: system-prompt
|
id: system-prompt
|
||||||
attributes:
|
attributes:
|
||||||
label: Proposed System Prompt (Optional)
|
label: Proposed System Prompt (Optional)
|
||||||
description: "If you have ideas for how Gemini should be prompted for this tool, share them here."
|
description: "If you have ideas for how zen should be prompted for this tool, share them here."
|
||||||
placeholder: |
|
placeholder: |
|
||||||
You are an expert technical documentation generator. Your task is to create comprehensive, user-friendly documentation from source code...
|
You are an expert technical documentation generator. Your task is to create comprehensive, user-friendly documentation from source code...
|
||||||
|
|
||||||
|
|||||||
197
.github/workflows/build_and_publish_docker.yml
vendored
197
.github/workflows/build_and_publish_docker.yml
vendored
@@ -1,197 +0,0 @@
|
|||||||
name: Build and Publish Docker Image to GHCR
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags: [ 'v*' ]
|
|
||||||
repository_dispatch:
|
|
||||||
types: [docker-build]
|
|
||||||
|
|
||||||
env:
|
|
||||||
REGISTRY: ghcr.io
|
|
||||||
IMAGE_NAME: beehiveinnovations/zen-mcp-server
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-and-push:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
packages: write
|
|
||||||
id-token: write
|
|
||||||
attestations: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.PAT }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: ${{ env.REGISTRY }}
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Extract metadata (tags, labels) for Docker
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
||||||
tags: |
|
|
||||||
type=ref,event=tag
|
|
||||||
type=raw,value=latest,enable=${{ github.ref_type == 'tag' }}
|
|
||||||
type=sha,prefix=main-,enable=${{ github.event_name == 'repository_dispatch' }}
|
|
||||||
type=raw,value=pr-${{ github.event.client_payload.pr_number }},enable=${{ github.event_name == 'repository_dispatch' && github.event.client_payload.pr_number != '' }}
|
|
||||||
|
|
||||||
- name: Build and push Docker image
|
|
||||||
id: build
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
platforms: linux/amd64,linux/arm64
|
|
||||||
push: true
|
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
||||||
|
|
||||||
- name: Generate artifact attestation
|
|
||||||
uses: actions/attest-build-provenance@v1
|
|
||||||
with:
|
|
||||||
subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
||||||
subject-digest: ${{ steps.build.outputs.digest }}
|
|
||||||
push-to-registry: true
|
|
||||||
|
|
||||||
- name: Generate usage instructions
|
|
||||||
run: |
|
|
||||||
echo "## 🐳 Docker Image Published Successfully!" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "**Image Registry:** GitHub Container Registry (GHCR)" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "**Built Tags:** ${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
# Extract the first tag for the main pull command
|
|
||||||
MAIN_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1)
|
|
||||||
|
|
||||||
echo "### 📥 Pull the Image" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "docker pull $MAIN_TAG" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
echo "### ⚙️ Claude Desktop Configuration" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "\`\`\`json" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "{" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"mcpServers\": {" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"gemini\": {" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"command\": \"docker\"," >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"args\": [" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"run\", \"--rm\", \"-i\"," >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"-e\", \"GEMINI_API_KEY\"," >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"$MAIN_TAG\"" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " ]," >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"env\": {" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " \"GEMINI_API_KEY\": \"your-gemini-api-key-here\"" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " }" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " }" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo " }" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
echo "### 🏷️ All Available Tags" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "Built and pushed the following tags:" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "${{ steps.meta.outputs.tags }}" | sed 's/^/- `/' | sed 's/$/`/' >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
if [[ "${{ github.event_name }}" == "repository_dispatch" ]]; then
|
|
||||||
echo "**Note:** This is a development build triggered by PR #${{ github.event.client_payload.pr_number }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "Use this image for testing the changes from that PR." >> $GITHUB_STEP_SUMMARY
|
|
||||||
elif [[ "${{ github.ref_type }}" == "tag" ]]; then
|
|
||||||
echo "**Note:** This is a release build from tag ${{ github.ref_name }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "This image represents a stable release version." >> $GITHUB_STEP_SUMMARY
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "### 📦 View in GitHub Container Registry" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "[View all versions and tags →](https://github.com/${{ github.repository }}/pkgs/container/zen-mcp-server)" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
- name: Update README with latest image info
|
|
||||||
if: false # Temporarily disabled as agreed with repo author
|
|
||||||
# if: github.ref_type == 'tag' || (github.event_name == 'repository_dispatch' && github.event.client_payload.pr_number != '')
|
|
||||||
run: |
|
|
||||||
# Checkout main branch to avoid detached HEAD when pushing
|
|
||||||
git fetch origin main:main
|
|
||||||
git checkout main
|
|
||||||
# Extract the primary image tag for updating README
|
|
||||||
if [[ "${{ github.ref_type }}" == "tag" ]]; then
|
|
||||||
# For tag releases, use the version tag
|
|
||||||
LATEST_TAG="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}"
|
|
||||||
UPDATE_TYPE="release"
|
|
||||||
elif [[ "${{ github.event_name }}" == "repository_dispatch" && "${{ github.event.client_payload.pr_number }}" != "" ]]; then
|
|
||||||
# For repository_dispatch (PR builds), use the PR tag
|
|
||||||
LATEST_TAG="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:pr-${{ github.event.client_payload.pr_number }}"
|
|
||||||
UPDATE_TYPE="development"
|
|
||||||
else
|
|
||||||
# For manual repository_dispatch without PR number, use latest tag
|
|
||||||
LATEST_TAG="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest"
|
|
||||||
UPDATE_TYPE="manual"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Updating README.md with latest Docker image: $LATEST_TAG"
|
|
||||||
|
|
||||||
# Update README.md with the latest image tag
|
|
||||||
sed -i.bak "s|ghcr\.io/[^/]*/zen-mcp-server:[a-zA-Z0-9\._-]*|$LATEST_TAG|g" README.md
|
|
||||||
|
|
||||||
# Also update docs/user-guides/installation.md
|
|
||||||
sed -i.bak "s|ghcr\.io/[^/]*/zen-mcp-server:[a-zA-Z0-9\._-]*|$LATEST_TAG|g" docs/user-guides/installation.md
|
|
||||||
|
|
||||||
# Also update docs/user-guides/configuration.md
|
|
||||||
sed -i.bak "s|ghcr\.io/[^/]*/zen-mcp-server:[a-zA-Z0-9\._-]*|$LATEST_TAG|g" docs/user-guides/configuration.md
|
|
||||||
|
|
||||||
# Check if there are any changes
|
|
||||||
if git diff --quiet README.md docs/user-guides/installation.md docs/user-guides/configuration.md; then
|
|
||||||
echo "No changes needed in documentation"
|
|
||||||
else
|
|
||||||
echo "Documentation updated with new image tag"
|
|
||||||
|
|
||||||
# Configure git for automated commit
|
|
||||||
git config user.name "github-actions[bot]"
|
|
||||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
||||||
|
|
||||||
# Add and commit changes
|
|
||||||
git add README.md docs/user-guides/installation.md docs/user-guides/configuration.md
|
|
||||||
|
|
||||||
if [[ "$UPDATE_TYPE" == "release" ]]; then
|
|
||||||
git commit -m "docs: Update Docker image references to ${{ github.ref_name }}
|
|
||||||
|
|
||||||
Automated update after Docker image publish for release ${{ github.ref_name }}.
|
|
||||||
All documentation now references the latest stable image.
|
|
||||||
|
|
||||||
🤖 Automated by GitHub Actions"
|
|
||||||
elif [[ "$UPDATE_TYPE" == "development" ]]; then
|
|
||||||
git commit -m "docs: Update Docker image references for PR #${{ github.event.client_payload.pr_number }}
|
|
||||||
|
|
||||||
Automated update after Docker image publish for development build.
|
|
||||||
Documentation updated to reference the latest development image.
|
|
||||||
|
|
||||||
🤖 Automated by GitHub Actions"
|
|
||||||
else
|
|
||||||
git commit -m "docs: Update Docker image references to latest
|
|
||||||
|
|
||||||
Automated update after manual Docker image build.
|
|
||||||
Documentation updated to reference the latest image.
|
|
||||||
|
|
||||||
🤖 Automated by GitHub Actions"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Push changes back to the repository
|
|
||||||
git push --set-upstream origin main
|
|
||||||
|
|
||||||
echo "### 📝 Documentation Updated" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "README.md and user guides have been automatically updated with the new Docker image tag: \`$LATEST_TAG\`" >> $GITHUB_STEP_SUMMARY
|
|
||||||
fi
|
|
||||||
|
|
||||||
32
.github/workflows/docker-test.yml
vendored
32
.github/workflows/docker-test.yml
vendored
@@ -1,32 +0,0 @@
|
|||||||
name: Docker Build Test
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
docker-build-test:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Test Docker build
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: false
|
|
||||||
tags: test:latest
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
||||||
|
|
||||||
- name: Build test summary
|
|
||||||
run: |
|
|
||||||
echo "### ✅ Docker Build Test Passed" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "Docker image builds successfully and is ready for production." >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|
||||||
61
.github/workflows/test.yml
vendored
61
.github/workflows/test.yml
vendored
@@ -25,6 +25,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
pip install -r requirements-dev.txt
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: |
|
run: |
|
||||||
@@ -49,7 +50,7 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install ruff black
|
pip install -r requirements-dev.txt
|
||||||
|
|
||||||
- name: Run black formatter check
|
- name: Run black formatter check
|
||||||
run: black --check .
|
run: black --check .
|
||||||
@@ -57,61 +58,3 @@ jobs:
|
|||||||
- name: Run ruff linter
|
- name: Run ruff linter
|
||||||
run: ruff check .
|
run: ruff check .
|
||||||
|
|
||||||
simulation-tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# Only run simulation tests on main branch pushes (requires manual API key setup)
|
|
||||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
- name: Check API key availability
|
|
||||||
id: check-key
|
|
||||||
run: |
|
|
||||||
has_key=false
|
|
||||||
if [ -n "${{ secrets.GEMINI_API_KEY }}" ] || [ -n "${{ secrets.OPENAI_API_KEY }}" ]; then
|
|
||||||
has_key=true
|
|
||||||
echo "✅ API key(s) found - running simulation tests"
|
|
||||||
else
|
|
||||||
echo "⚠️ No API keys configured - skipping simulation tests"
|
|
||||||
fi
|
|
||||||
echo "api_key_available=$has_key" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Set up Docker
|
|
||||||
if: steps.check-key.outputs.api_key_available == 'true'
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Build Docker image
|
|
||||||
if: steps.check-key.outputs.api_key_available == 'true'
|
|
||||||
run: |
|
|
||||||
docker compose build
|
|
||||||
|
|
||||||
- name: Run simulation tests
|
|
||||||
if: steps.check-key.outputs.api_key_available == 'true'
|
|
||||||
run: |
|
|
||||||
# Start services
|
|
||||||
docker compose up -d
|
|
||||||
|
|
||||||
# Wait for services to be ready
|
|
||||||
sleep 10
|
|
||||||
|
|
||||||
# Run communication simulator tests
|
|
||||||
python communication_simulator_test.py --skip-docker
|
|
||||||
env:
|
|
||||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
||||||
|
|
||||||
- name: Skip simulation tests
|
|
||||||
if: steps.check-key.outputs.api_key_available == 'false'
|
|
||||||
run: |
|
|
||||||
echo "🔒 Simulation tests skipped (no API keys configured)"
|
|
||||||
echo "To enable simulation tests, add GEMINI_API_KEY and/or OPENAI_API_KEY as repository secrets"
|
|
||||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -174,3 +174,10 @@ FEATURE_*.md
|
|||||||
|
|
||||||
# Local user instructions
|
# Local user instructions
|
||||||
CLAUDE.local.md
|
CLAUDE.local.md
|
||||||
|
|
||||||
|
# Standalone mode files
|
||||||
|
.zen_venv/
|
||||||
|
.docker_cleaned
|
||||||
|
logs/
|
||||||
|
*.backup
|
||||||
|
/.desktop_configured
|
||||||
|
|||||||
119
CLAUDE.md
119
CLAUDE.md
@@ -20,31 +20,31 @@ This script automatically runs:
|
|||||||
- Ruff linting with auto-fix
|
- Ruff linting with auto-fix
|
||||||
- Black code formatting
|
- Black code formatting
|
||||||
- Import sorting with isort
|
- Import sorting with isort
|
||||||
- Complete unit test suite (361 tests)
|
- Complete unit test suite
|
||||||
- Verification that all checks pass 100%
|
- Verification that all checks pass 100%
|
||||||
|
|
||||||
### Server Management
|
### Server Management
|
||||||
|
|
||||||
#### Start/Restart the Server
|
#### Setup/Update the Server
|
||||||
```bash
|
```bash
|
||||||
# Start or restart the Docker containers
|
# Run setup script (handles everything)
|
||||||
./run-server.sh
|
./run-server.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
This script will:
|
This script will:
|
||||||
- Build/rebuild Docker images if needed
|
- Set up Python virtual environment
|
||||||
- Start the MCP server container (`zen-mcp-server`)
|
- Install all dependencies
|
||||||
- Start the Redis container (`zen-mcp-redis`)
|
- Create/update .env file
|
||||||
- Set up proper networking and volumes
|
- Configure MCP with Claude
|
||||||
|
- Verify API keys
|
||||||
|
|
||||||
#### Check Server Status
|
#### View Logs
|
||||||
```bash
|
```bash
|
||||||
# Check if containers are running
|
# Follow logs in real-time
|
||||||
docker ps
|
./run-server.sh -f
|
||||||
|
|
||||||
# Look for these containers:
|
# Or manually view logs
|
||||||
# - zen-mcp-server
|
tail -f logs/mcp_server.log
|
||||||
# - zen-mcp-redis
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Log Management
|
### Log Management
|
||||||
@@ -52,26 +52,26 @@ docker ps
|
|||||||
#### View Server Logs
|
#### View Server Logs
|
||||||
```bash
|
```bash
|
||||||
# View last 500 lines of server logs
|
# View last 500 lines of server logs
|
||||||
docker exec zen-mcp-server tail -n 500 /tmp/mcp_server.log
|
tail -n 500 logs/mcp_server.log
|
||||||
|
|
||||||
# Follow logs in real-time
|
# Follow logs in real-time
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_server.log
|
tail -f logs/mcp_server.log
|
||||||
|
|
||||||
# View specific number of lines (replace 100 with desired count)
|
# View specific number of lines
|
||||||
docker exec zen-mcp-server tail -n 100 /tmp/mcp_server.log
|
tail -n 100 logs/mcp_server.log
|
||||||
|
|
||||||
# Search logs for specific patterns
|
# Search logs for specific patterns
|
||||||
docker exec zen-mcp-server grep "ERROR" /tmp/mcp_server.log
|
grep "ERROR" logs/mcp_server.log
|
||||||
docker exec zen-mcp-server grep "tool_name" /tmp/mcp_server.log
|
grep "tool_name" logs/mcp_activity.log
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Monitor Tool Executions Only
|
#### Monitor Tool Executions Only
|
||||||
```bash
|
```bash
|
||||||
# View tool activity log (focused on tool calls and completions)
|
# View tool activity log (focused on tool calls and completions)
|
||||||
docker exec zen-mcp-server tail -n 100 /tmp/mcp_activity.log
|
tail -n 100 logs/mcp_activity.log
|
||||||
|
|
||||||
# Follow tool activity in real-time
|
# Follow tool activity in real-time
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_activity.log
|
tail -f logs/mcp_activity.log
|
||||||
|
|
||||||
# Use the dedicated log monitor (shows tool calls, completions, errors)
|
# Use the dedicated log monitor (shows tool calls, completions, errors)
|
||||||
python log_monitor.py
|
python log_monitor.py
|
||||||
@@ -86,36 +86,21 @@ The `log_monitor.py` script provides a real-time view of:
|
|||||||
#### All Available Log Files
|
#### All Available Log Files
|
||||||
```bash
|
```bash
|
||||||
# Main server log (all activity)
|
# Main server log (all activity)
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_server.log
|
tail -f logs/mcp_server.log
|
||||||
|
|
||||||
# Tool activity only (TOOL_CALL, TOOL_COMPLETED, etc.)
|
# Tool activity only (TOOL_CALL, TOOL_COMPLETED, etc.)
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_activity.log
|
tail -f logs/mcp_activity.log
|
||||||
|
|
||||||
# Debug information
|
# Debug information (if configured)
|
||||||
docker exec zen-mcp-server tail -f /tmp/gemini_debug.log
|
tail -f logs/debug.log
|
||||||
|
|
||||||
# Overflow logs (when main log gets too large)
|
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_server_overflow.log
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Debug Container Issues
|
|
||||||
```bash
|
|
||||||
# Check container logs (Docker level)
|
|
||||||
docker logs zen-mcp-server
|
|
||||||
|
|
||||||
# Execute interactive shell in container
|
|
||||||
docker exec -it zen-mcp-server /bin/bash
|
|
||||||
|
|
||||||
# Check Redis container logs
|
|
||||||
docker logs zen-mcp-redis
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
|
|
||||||
Simulation tests are available to test the MCP server in a 'live' scenario, using your configured
|
Simulation tests are available to test the MCP server in a 'live' scenario, using your configured
|
||||||
API keys to ensure the models are working and the server is able to communicate back and forth.
|
API keys to ensure the models are working and the server is able to communicate back and forth.
|
||||||
IMPORTANT: Any time any code is changed or updated, you MUST first restart it with ./run-server.sh OR
|
|
||||||
pass `--rebuild` to the `communication_simulator_test.py` script (if running it for the first time after changes) so that it's able to restart and use the latest code.
|
**IMPORTANT**: After any code changes, restart your Claude session for the changes to take effect.
|
||||||
|
|
||||||
#### Run All Simulator Tests
|
#### Run All Simulator Tests
|
||||||
```bash
|
```bash
|
||||||
@@ -124,9 +109,6 @@ python communication_simulator_test.py
|
|||||||
|
|
||||||
# Run tests with verbose output
|
# Run tests with verbose output
|
||||||
python communication_simulator_test.py --verbose
|
python communication_simulator_test.py --verbose
|
||||||
|
|
||||||
# Force rebuild environment before testing
|
|
||||||
python communication_simulator_test.py --rebuild
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Run Individual Simulator Tests (Recommended)
|
#### Run Individual Simulator Tests (Recommended)
|
||||||
@@ -138,17 +120,13 @@ python communication_simulator_test.py --list-tests
|
|||||||
python communication_simulator_test.py --individual basic_conversation
|
python communication_simulator_test.py --individual basic_conversation
|
||||||
python communication_simulator_test.py --individual content_validation
|
python communication_simulator_test.py --individual content_validation
|
||||||
python communication_simulator_test.py --individual cross_tool_continuation
|
python communication_simulator_test.py --individual cross_tool_continuation
|
||||||
python communication_simulator_test.py --individual logs_validation
|
python communication_simulator_test.py --individual memory_validation
|
||||||
python communication_simulator_test.py --individual redis_validation
|
|
||||||
|
|
||||||
# Run multiple specific tests (alternative approach)
|
# Run multiple specific tests
|
||||||
python communication_simulator_test.py --tests basic_conversation content_validation
|
python communication_simulator_test.py --tests basic_conversation content_validation
|
||||||
|
|
||||||
# Run individual test with verbose output for debugging
|
# Run individual test with verbose output for debugging
|
||||||
python communication_simulator_test.py --individual logs_validation --verbose
|
python communication_simulator_test.py --individual memory_validation --verbose
|
||||||
|
|
||||||
# Individual tests provide full Docker setup and teardown per test
|
|
||||||
# This ensures clean state and better error isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Available simulator tests include:
|
Available simulator tests include:
|
||||||
@@ -158,8 +136,7 @@ Available simulator tests include:
|
|||||||
- `cross_tool_continuation` - Cross-tool conversation continuation scenarios
|
- `cross_tool_continuation` - Cross-tool conversation continuation scenarios
|
||||||
- `cross_tool_comprehensive` - Comprehensive cross-tool file deduplication and continuation
|
- `cross_tool_comprehensive` - Comprehensive cross-tool file deduplication and continuation
|
||||||
- `line_number_validation` - Line number handling validation across tools
|
- `line_number_validation` - Line number handling validation across tools
|
||||||
- `logs_validation` - Docker logs validation
|
- `memory_validation` - Conversation memory validation
|
||||||
- `redis_validation` - Redis conversation memory validation
|
|
||||||
- `model_thinking_config` - Model-specific thinking configuration behavior
|
- `model_thinking_config` - Model-specific thinking configuration behavior
|
||||||
- `o3_model_selection` - O3 model selection and usage validation
|
- `o3_model_selection` - O3 model selection and usage validation
|
||||||
- `ollama_custom_url` - Ollama custom URL endpoint functionality
|
- `ollama_custom_url` - Ollama custom URL endpoint functionality
|
||||||
@@ -193,12 +170,13 @@ python -m pytest tests/ --cov=. --cov-report=html
|
|||||||
#### Before Making Changes
|
#### Before Making Changes
|
||||||
1. Ensure virtual environment is activated: `source venv/bin/activate`
|
1. Ensure virtual environment is activated: `source venv/bin/activate`
|
||||||
2. Run quality checks: `./code_quality_checks.sh`
|
2. Run quality checks: `./code_quality_checks.sh`
|
||||||
3. Check server is running: `./run-server.sh`
|
3. Check logs to ensure server is healthy: `tail -n 50 logs/mcp_server.log`
|
||||||
|
|
||||||
#### After Making Changes
|
#### After Making Changes
|
||||||
1. Run quality checks again: `./code_quality_checks.sh`
|
1. Run quality checks again: `./code_quality_checks.sh`
|
||||||
2. Run relevant simulator tests: `python communication_simulator_test.py --individual <test_name>`
|
2. Run relevant simulator tests: `python communication_simulator_test.py --individual <test_name>`
|
||||||
3. Check logs for any issues: `docker exec zen-mcp-server tail -n 100 /tmp/mcp_server.log`
|
3. Check logs for any issues: `tail -n 100 logs/mcp_server.log`
|
||||||
|
4. Restart Claude session to use updated code
|
||||||
|
|
||||||
#### Before Committing/PR
|
#### Before Committing/PR
|
||||||
1. Final quality check: `./code_quality_checks.sh`
|
1. Final quality check: `./code_quality_checks.sh`
|
||||||
@@ -207,18 +185,17 @@ python -m pytest tests/ --cov=. --cov-report=html
|
|||||||
|
|
||||||
### Common Troubleshooting
|
### Common Troubleshooting
|
||||||
|
|
||||||
#### Container Issues
|
#### Server Issues
|
||||||
```bash
|
```bash
|
||||||
# Restart containers if they're not responding
|
# Check if Python environment is set up correctly
|
||||||
docker stop zen-mcp-server zen-mcp-redis
|
|
||||||
./run-server.sh
|
./run-server.sh
|
||||||
|
|
||||||
# Check container resource usage
|
# View recent errors
|
||||||
docker stats zen-mcp-server
|
grep "ERROR" logs/mcp_server.log | tail -20
|
||||||
|
|
||||||
# Remove containers and rebuild from scratch
|
# Check virtual environment
|
||||||
docker rm -f zen-mcp-server zen-mcp-redis
|
which python
|
||||||
./run-server.sh
|
# Should show: .../zen-mcp-server/.zen_venv/bin/python
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Test Failures
|
#### Test Failures
|
||||||
@@ -227,10 +204,10 @@ docker rm -f zen-mcp-server zen-mcp-redis
|
|||||||
python communication_simulator_test.py --individual <test_name> --verbose
|
python communication_simulator_test.py --individual <test_name> --verbose
|
||||||
|
|
||||||
# Check server logs during test execution
|
# Check server logs during test execution
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_server.log
|
tail -f logs/mcp_server.log
|
||||||
|
|
||||||
# Run tests while keeping containers running for debugging
|
# Run tests with debug output
|
||||||
python communication_simulator_test.py --keep-logs
|
LOG_LEVEL=DEBUG python communication_simulator_test.py --individual <test_name>
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Linting Issues
|
#### Linting Issues
|
||||||
@@ -249,19 +226,19 @@ isort --check-only .
|
|||||||
### File Structure Context
|
### File Structure Context
|
||||||
|
|
||||||
- `./code_quality_checks.sh` - Comprehensive quality check script
|
- `./code_quality_checks.sh` - Comprehensive quality check script
|
||||||
- `./run-server.sh` - Docker container setup and management
|
- `./run-server.sh` - Server setup and management
|
||||||
- `communication_simulator_test.py` - End-to-end testing framework
|
- `communication_simulator_test.py` - End-to-end testing framework
|
||||||
- `simulator_tests/` - Individual test modules
|
- `simulator_tests/` - Individual test modules
|
||||||
- `tests/` - Unit test suite
|
- `tests/` - Unit test suite
|
||||||
- `tools/` - MCP tool implementations
|
- `tools/` - MCP tool implementations
|
||||||
- `providers/` - AI provider implementations
|
- `providers/` - AI provider implementations
|
||||||
- `systemprompts/` - System prompt definitions
|
- `systemprompts/` - System prompt definitions
|
||||||
|
- `logs/` - Server log files
|
||||||
|
|
||||||
### Environment Requirements
|
### Environment Requirements
|
||||||
|
|
||||||
- Python 3.8+ with virtual environment activated
|
- Python 3.9+ with virtual environment
|
||||||
- Docker and Docker Compose installed
|
|
||||||
- All dependencies from `requirements.txt` installed
|
- All dependencies from `requirements.txt` installed
|
||||||
- Proper API keys configured in environment or config files
|
- Proper API keys configured in `.env` file
|
||||||
|
|
||||||
This guide provides everything needed to efficiently work with the Zen MCP Server codebase using Claude. Always run quality checks before and after making changes to ensure code integrity.
|
This guide provides everything needed to efficiently work with the Zen MCP Server codebase using Claude. Always run quality checks before and after making changes to ensure code integrity.
|
||||||
29
Dockerfile
29
Dockerfile
@@ -1,29 +0,0 @@
|
|||||||
# Use Python 3.11 slim image for smaller size and consistent environment
|
|
||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
# Set working directory inside the container
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install git (required for some Python packages that may need it)
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Copy requirements first to leverage Docker layer caching
|
|
||||||
COPY requirements.txt .
|
|
||||||
|
|
||||||
# Install Python dependencies
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Copy the rest of the application
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Create a non-root user to run the application (security best practice)
|
|
||||||
RUN useradd -m -u 1000 mcpuser && \
|
|
||||||
chown -R mcpuser:mcpuser /app
|
|
||||||
|
|
||||||
# Switch to non-root user
|
|
||||||
USER mcpuser
|
|
||||||
|
|
||||||
# Set the entrypoint to run the server
|
|
||||||
ENTRYPOINT ["python", "server.py"]
|
|
||||||
80
README.md
80
README.md
@@ -44,7 +44,7 @@ Because these AI models [clearly aren't when they get chatty →](docs/ai_banter
|
|||||||
## Quick Navigation
|
## Quick Navigation
|
||||||
|
|
||||||
- **Getting Started**
|
- **Getting Started**
|
||||||
- [Quickstart](#quickstart-5-minutes) - Get running in 5 minutes with Docker
|
- [Quickstart](#quickstart-5-minutes) - Get running in 5 minutes
|
||||||
- [Available Tools](#available-tools) - Overview of all tools
|
- [Available Tools](#available-tools) - Overview of all tools
|
||||||
- [AI-to-AI Conversations](#ai-to-ai-conversation-threading) - Multi-turn conversations
|
- [AI-to-AI Conversations](#ai-to-ai-conversation-threading) - Multi-turn conversations
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ The final implementation resulted in a 26% improvement in JSON parsing performan
|
|||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- Docker Desktop installed ([Download here](https://www.docker.com/products/docker-desktop/))
|
- Python 3.10+ (3.12 recommended)
|
||||||
- Git
|
- Git
|
||||||
- **Windows users**: WSL2 is required for Claude Code CLI
|
- **Windows users**: WSL2 is required for Claude Code CLI
|
||||||
|
|
||||||
@@ -158,16 +158,16 @@ The final implementation resulted in a 26% improvement in JSON parsing performan
|
|||||||
git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
|
git clone https://github.com/BeehiveInnovations/zen-mcp-server.git
|
||||||
cd zen-mcp-server
|
cd zen-mcp-server
|
||||||
|
|
||||||
# One-command setup (includes Redis for AI conversations)
|
# One-command setup
|
||||||
./run-server.sh
|
./run-server.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
**What this does:**
|
**What this does:**
|
||||||
- **Builds Docker images** with all dependencies (including Redis for conversation threading)
|
- **Sets up everything automatically** - Python environment, dependencies, configuration
|
||||||
- **Creates .env file** (automatically uses `$GEMINI_API_KEY` and `$OPENAI_API_KEY` if set in environment)
|
- **Configures Claude integrations** - Adds to Claude Code CLI and guides Desktop setup
|
||||||
- **Starts Redis service** for AI-to-AI conversation memory
|
- **Ready to use immediately** - No manual configuration needed
|
||||||
- **Starts MCP server** with providers based on available API keys
|
|
||||||
- **Adds Zen to Claude Code automatically**
|
**After updates:** Always run `./run-server.sh` again after `git pull` to ensure everything stays current.
|
||||||
|
|
||||||
### 3. Add Your API Keys
|
### 3. Add Your API Keys
|
||||||
|
|
||||||
@@ -180,74 +180,26 @@ nano .env
|
|||||||
# OPENAI_API_KEY=your-openai-api-key-here # For O3 model
|
# OPENAI_API_KEY=your-openai-api-key-here # For O3 model
|
||||||
# OPENROUTER_API_KEY=your-openrouter-key # For OpenRouter (see docs/custom_models.md)
|
# OPENROUTER_API_KEY=your-openrouter-key # For OpenRouter (see docs/custom_models.md)
|
||||||
|
|
||||||
# For local models (Ollama, vLLM, etc.) - Note: Use host.docker.internal for Docker networking:
|
# For local models (Ollama, vLLM, etc.):
|
||||||
# CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example (NOT localhost!)
|
# CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example
|
||||||
# CUSTOM_API_KEY= # Empty for Ollama
|
# CUSTOM_API_KEY= # Empty for Ollama
|
||||||
# CUSTOM_MODEL_NAME=llama3.2 # Default model
|
# CUSTOM_MODEL_NAME=llama3.2 # Default model
|
||||||
|
|
||||||
# WORKSPACE_ROOT=/Users/your-username (automatically configured)
|
|
||||||
|
|
||||||
# Note: At least one API key OR custom URL is required
|
# Note: At least one API key OR custom URL is required
|
||||||
|
|
||||||
# After making changes to .env, restart the server:
|
|
||||||
# ./run-server.sh
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Restart MCP Server**: This step is important. You will need to `./run-server.sh` again for it to
|
**No restart needed**: The server reads the .env file each time Claude calls a tool, so changes take effect immediately.
|
||||||
pick the changes made to `.env` otherwise the server will be unable to use your newly edited keys. Please also
|
|
||||||
`./run-server.sh` any time in the future you modify the `.env` file.
|
|
||||||
|
|
||||||
**Next**: Now run `claude` from your project folder using the terminal for it to connect to the newly added mcp server.
|
**Next**: Now run `claude` from your project folder using the terminal for it to connect to the newly added mcp server.
|
||||||
If you were already running a `claude` code session, please exit and start a new session.
|
If you were already running a `claude` code session, please exit and start a new session.
|
||||||
|
|
||||||
#### If Setting up for Claude Desktop
|
#### If Setting up for Claude Desktop
|
||||||
|
|
||||||
1. **Launch Claude Desktop**
|
**Need the exact configuration?** Run `./run-server.sh -c` to display the platform-specific setup instructions with correct paths.
|
||||||
- Open Claude Desktop
|
|
||||||
- Go to **Settings** → **Developer** → **Edit Config**
|
|
||||||
|
|
||||||
This will open a folder revealing `claude_desktop_config.json`.
|
1. **Open Claude Desktop config**: Settings → Developer → Edit Config
|
||||||
|
2. **Copy the configuration** shown by `./run-server.sh -c` into your `claude_desktop_config.json`
|
||||||
2. **Update Docker Configuration**
|
3. **Restart Claude Desktop** for changes to take effect
|
||||||
|
|
||||||
The setup script shows you the exact configuration. It looks like this. When you ran `run-server.sh` it should
|
|
||||||
have produced a configuration for you to copy:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"mcpServers": {
|
|
||||||
"zen": {
|
|
||||||
"command": "docker",
|
|
||||||
"args": [
|
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
"zen-mcp-server",
|
|
||||||
"python",
|
|
||||||
"server.py"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Paste the above into `claude_desktop_config.json`. If you have several other MCP servers listed, simply add this below the rest after a `,` comma:
|
|
||||||
```json
|
|
||||||
... other mcp servers ... ,
|
|
||||||
|
|
||||||
"zen": {
|
|
||||||
"command": "docker",
|
|
||||||
"args": [
|
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
"zen-mcp-server",
|
|
||||||
"python",
|
|
||||||
"server.py"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Restart Claude Desktop**
|
|
||||||
Completely quit and restart Claude Desktop for the changes to take effect.
|
|
||||||
|
|
||||||
### 4. Start Using It!
|
### 4. Start Using It!
|
||||||
|
|
||||||
@@ -546,7 +498,7 @@ OPENAI_API_KEY=your-openai-key
|
|||||||
- **API Keys**: Native APIs (Gemini, OpenAI, X.AI), OpenRouter, or Custom endpoints (Ollama, vLLM)
|
- **API Keys**: Native APIs (Gemini, OpenAI, X.AI), OpenRouter, or Custom endpoints (Ollama, vLLM)
|
||||||
- **Model Selection**: Auto mode or specific model defaults
|
- **Model Selection**: Auto mode or specific model defaults
|
||||||
- **Usage Restrictions**: Control which models can be used for cost control
|
- **Usage Restrictions**: Control which models can be used for cost control
|
||||||
- **Conversation Settings**: Timeout, turn limits, Redis configuration
|
- **Conversation Settings**: Timeout, turn limits, memory configuration
|
||||||
- **Thinking Modes**: Token allocation for extended reasoning
|
- **Thinking Modes**: Token allocation for extended reasoning
|
||||||
- **Logging**: Debug levels and operational visibility
|
- **Logging**: Debug levels and operational visibility
|
||||||
|
|
||||||
|
|||||||
@@ -1,17 +1,11 @@
|
|||||||
{
|
{
|
||||||
"comment": "Example Claude Desktop configuration for Zen MCP Server",
|
"comment": "Example Claude Desktop configuration for Zen MCP Server",
|
||||||
"comment2": "For Docker setup, use examples/claude_config_docker_home.json",
|
"comment2": "Run './run-server.sh -c' to get the exact configuration for your system",
|
||||||
"comment3": "For platform-specific examples, see the examples/ directory",
|
"comment3": "For platform-specific examples, see the examples/ directory",
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"zen": {
|
"zen": {
|
||||||
"command": "docker",
|
"command": "/path/to/zen-mcp-server/.zen_venv/bin/python",
|
||||||
"args": [
|
"args": ["/path/to/zen-mcp-server/server.py"]
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
"zen-mcp-server",
|
|
||||||
"python",
|
|
||||||
"server.py"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -9,14 +9,57 @@ set -e # Exit on any error
|
|||||||
echo "🔍 Running Code Quality Checks for Zen MCP Server"
|
echo "🔍 Running Code Quality Checks for Zen MCP Server"
|
||||||
echo "================================================="
|
echo "================================================="
|
||||||
|
|
||||||
# Check if virtual environment is activated
|
# Determine Python command
|
||||||
if [[ "$VIRTUAL_ENV" == "" ]]; then
|
if [[ -f ".zen_venv/bin/python" ]]; then
|
||||||
echo "❌ Virtual environment not activated!"
|
PYTHON_CMD=".zen_venv/bin/python"
|
||||||
echo "Please run: source venv/bin/activate"
|
PIP_CMD=".zen_venv/bin/pip"
|
||||||
|
echo "✅ Using venv"
|
||||||
|
elif [[ -n "$VIRTUAL_ENV" ]]; then
|
||||||
|
PYTHON_CMD="python"
|
||||||
|
PIP_CMD="pip"
|
||||||
|
echo "✅ Using activated virtual environment: $VIRTUAL_ENV"
|
||||||
|
else
|
||||||
|
echo "❌ No virtual environment found!"
|
||||||
|
echo "Please run: ./run-server.sh first to set up the environment"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
echo ""
|
||||||
|
|
||||||
echo "✅ Virtual environment detected: $VIRTUAL_ENV"
|
# Check and install dev dependencies if needed
|
||||||
|
echo "🔍 Checking development dependencies..."
|
||||||
|
DEV_DEPS_NEEDED=false
|
||||||
|
|
||||||
|
# Check each dev dependency
|
||||||
|
for tool in ruff black isort pytest; do
|
||||||
|
# Check if tool exists in venv or in PATH
|
||||||
|
if [[ -f ".zen_venv/bin/$tool" ]] || command -v $tool &> /dev/null; then
|
||||||
|
continue
|
||||||
|
else
|
||||||
|
DEV_DEPS_NEEDED=true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$DEV_DEPS_NEEDED" = true ]; then
|
||||||
|
echo "📦 Installing development dependencies..."
|
||||||
|
$PIP_CMD install -q -r requirements-dev.txt
|
||||||
|
echo "✅ Development dependencies installed"
|
||||||
|
else
|
||||||
|
echo "✅ Development dependencies already installed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set tool paths
|
||||||
|
if [[ -f ".zen_venv/bin/ruff" ]]; then
|
||||||
|
RUFF=".zen_venv/bin/ruff"
|
||||||
|
BLACK=".zen_venv/bin/black"
|
||||||
|
ISORT=".zen_venv/bin/isort"
|
||||||
|
PYTEST=".zen_venv/bin/pytest"
|
||||||
|
else
|
||||||
|
RUFF="ruff"
|
||||||
|
BLACK="black"
|
||||||
|
ISORT="isort"
|
||||||
|
PYTEST="pytest"
|
||||||
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Step 1: Linting and Formatting
|
# Step 1: Linting and Formatting
|
||||||
@@ -24,16 +67,16 @@ echo "📋 Step 1: Running Linting and Formatting Checks"
|
|||||||
echo "--------------------------------------------------"
|
echo "--------------------------------------------------"
|
||||||
|
|
||||||
echo "🔧 Running ruff linting with auto-fix..."
|
echo "🔧 Running ruff linting with auto-fix..."
|
||||||
ruff check --fix
|
$RUFF check --fix
|
||||||
|
|
||||||
echo "🎨 Running black code formatting..."
|
echo "🎨 Running black code formatting..."
|
||||||
black .
|
$BLACK .
|
||||||
|
|
||||||
echo "📦 Running import sorting with isort..."
|
echo "📦 Running import sorting with isort..."
|
||||||
isort .
|
$ISORT . --skip-glob=".zen_venv/*"
|
||||||
|
|
||||||
echo "✅ Verifying all linting passes..."
|
echo "✅ Verifying all linting passes..."
|
||||||
ruff check
|
$RUFF check
|
||||||
|
|
||||||
echo "✅ Step 1 Complete: All linting and formatting checks passed!"
|
echo "✅ Step 1 Complete: All linting and formatting checks passed!"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -42,8 +85,8 @@ echo ""
|
|||||||
echo "🧪 Step 2: Running Complete Unit Test Suite"
|
echo "🧪 Step 2: Running Complete Unit Test Suite"
|
||||||
echo "---------------------------------------------"
|
echo "---------------------------------------------"
|
||||||
|
|
||||||
echo "🏃 Running all 361 unit tests..."
|
echo "🏃 Running all unit tests..."
|
||||||
python -m pytest tests/ -v
|
$PYTHON_CMD -m pytest tests/ -v -x
|
||||||
|
|
||||||
echo "✅ Step 2 Complete: All unit tests passed!"
|
echo "✅ Step 2 Complete: All unit tests passed!"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -54,7 +97,7 @@ echo "=================================="
|
|||||||
echo "✅ Linting (ruff): PASSED"
|
echo "✅ Linting (ruff): PASSED"
|
||||||
echo "✅ Formatting (black): PASSED"
|
echo "✅ Formatting (black): PASSED"
|
||||||
echo "✅ Import sorting (isort): PASSED"
|
echo "✅ Import sorting (isort): PASSED"
|
||||||
echo "✅ Unit tests (361 tests): PASSED"
|
echo "✅ Unit tests: PASSED"
|
||||||
echo ""
|
echo ""
|
||||||
echo "🚀 Your code is ready for commit and GitHub Actions!"
|
echo "🚀 Your code is ready for commit and GitHub Actions!"
|
||||||
echo "💡 Remember to add simulator tests if you modified tools"
|
echo "💡 Remember to add simulator tests if you modified tools"
|
||||||
@@ -6,18 +6,18 @@ by simulating real Claude CLI communications and validating conversation
|
|||||||
continuity, file handling, deduplication features, and clarification scenarios.
|
continuity, file handling, deduplication features, and clarification scenarios.
|
||||||
|
|
||||||
Test Flow:
|
Test Flow:
|
||||||
1. Setup fresh Docker environment with clean containers
|
1. Setup standalone server environment
|
||||||
2. Load and run individual test modules
|
2. Load and run individual test modules
|
||||||
3. Validate system behavior through logs and Redis
|
3. Validate system behavior through logs and memory
|
||||||
4. Cleanup and report results
|
4. Cleanup and report results
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python communication_simulator_test.py [--verbose] [--keep-logs] [--tests TEST_NAME...] [--individual TEST_NAME] [--rebuild]
|
python communication_simulator_test.py [--verbose] [--keep-logs] [--tests TEST_NAME...] [--individual TEST_NAME] [--setup]
|
||||||
|
|
||||||
--tests: Run specific tests only (space-separated)
|
--tests: Run specific tests only (space-separated)
|
||||||
--list-tests: List all available tests
|
--list-tests: List all available tests
|
||||||
--individual: Run a single test individually
|
--individual: Run a single test individually
|
||||||
--rebuild: Force rebuild Docker environment using run-server.sh
|
--setup: Force setup standalone server environment using run-server.sh
|
||||||
|
|
||||||
Available tests:
|
Available tests:
|
||||||
basic_conversation - Basic conversation flow with chat tool
|
basic_conversation - Basic conversation flow with chat tool
|
||||||
@@ -25,8 +25,8 @@ Available tests:
|
|||||||
per_tool_deduplication - File deduplication for individual tools
|
per_tool_deduplication - File deduplication for individual tools
|
||||||
cross_tool_continuation - Cross-tool conversation continuation scenarios
|
cross_tool_continuation - Cross-tool conversation continuation scenarios
|
||||||
cross_tool_comprehensive - Comprehensive cross-tool integration testing
|
cross_tool_comprehensive - Comprehensive cross-tool integration testing
|
||||||
logs_validation - Docker logs validation
|
line_number_validation - Line number handling validation across tools
|
||||||
redis_validation - Redis conversation memory validation
|
memory_validation - Conversation memory validation
|
||||||
model_thinking_config - Model thinking configuration testing
|
model_thinking_config - Model thinking configuration testing
|
||||||
o3_model_selection - O3 model selection and routing testing
|
o3_model_selection - O3 model selection and routing testing
|
||||||
ollama_custom_url - Ollama custom URL configuration testing
|
ollama_custom_url - Ollama custom URL configuration testing
|
||||||
@@ -45,11 +45,11 @@ Examples:
|
|||||||
# Run only basic conversation and content validation tests
|
# Run only basic conversation and content validation tests
|
||||||
python communication_simulator_test.py --tests basic_conversation content_validation
|
python communication_simulator_test.py --tests basic_conversation content_validation
|
||||||
|
|
||||||
# Run a single test individually (with full Docker setup)
|
# Run a single test individually (with full standalone setup)
|
||||||
python communication_simulator_test.py --individual content_validation
|
python communication_simulator_test.py --individual content_validation
|
||||||
|
|
||||||
# Force rebuild Docker environment before running tests
|
# Force setup standalone server environment before running tests
|
||||||
python communication_simulator_test.py --rebuild
|
python communication_simulator_test.py --setup
|
||||||
|
|
||||||
# List available tests
|
# List available tests
|
||||||
python communication_simulator_test.py --list-tests
|
python communication_simulator_test.py --list-tests
|
||||||
@@ -68,15 +68,15 @@ class CommunicationSimulator:
|
|||||||
"""Simulates real-world Claude CLI communication with MCP Gemini server"""
|
"""Simulates real-world Claude CLI communication with MCP Gemini server"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, verbose: bool = False, keep_logs: bool = False, selected_tests: list[str] = None, rebuild: bool = False
|
self, verbose: bool = False, keep_logs: bool = False, selected_tests: list[str] = None, setup: bool = False
|
||||||
):
|
):
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.keep_logs = keep_logs
|
self.keep_logs = keep_logs
|
||||||
self.selected_tests = selected_tests or []
|
self.selected_tests = selected_tests or []
|
||||||
self.rebuild = rebuild
|
self.setup = setup
|
||||||
self.temp_dir = None
|
self.temp_dir = None
|
||||||
self.container_name = "zen-mcp-server"
|
self.server_process = None
|
||||||
self.redis_container = "zen-mcp-redis"
|
self.python_path = self._get_python_path()
|
||||||
|
|
||||||
# Import test registry
|
# Import test registry
|
||||||
from simulator_tests import TEST_REGISTRY
|
from simulator_tests import TEST_REGISTRY
|
||||||
@@ -96,6 +96,23 @@ class CommunicationSimulator:
|
|||||||
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def _get_python_path(self) -> str:
|
||||||
|
"""Get the Python path for the virtual environment"""
|
||||||
|
current_dir = os.getcwd()
|
||||||
|
venv_python = os.path.join(current_dir, "venv", "bin", "python")
|
||||||
|
|
||||||
|
if os.path.exists(venv_python):
|
||||||
|
return venv_python
|
||||||
|
|
||||||
|
# Try .zen_venv as fallback
|
||||||
|
zen_venv_python = os.path.join(current_dir, ".zen_venv", "bin", "python")
|
||||||
|
if os.path.exists(zen_venv_python):
|
||||||
|
return zen_venv_python
|
||||||
|
|
||||||
|
# Fallback to system python if venv doesn't exist
|
||||||
|
self.logger.warning("Virtual environment not found, using system python")
|
||||||
|
return "python"
|
||||||
|
|
||||||
def _create_test_runner(self, test_class):
|
def _create_test_runner(self, test_class):
|
||||||
"""Create a test runner function for a test class"""
|
"""Create a test runner function for a test class"""
|
||||||
|
|
||||||
@@ -118,13 +135,13 @@ class CommunicationSimulator:
|
|||||||
self.temp_dir = tempfile.mkdtemp(prefix="mcp_test_")
|
self.temp_dir = tempfile.mkdtemp(prefix="mcp_test_")
|
||||||
self.logger.debug(f"Created temp directory: {self.temp_dir}")
|
self.logger.debug(f"Created temp directory: {self.temp_dir}")
|
||||||
|
|
||||||
# Only run run-server.sh if rebuild is requested
|
# Only run run-server.sh if setup is requested
|
||||||
if self.rebuild:
|
if self.setup:
|
||||||
if not self._run_server_script():
|
if not self._run_server_script():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Always verify containers are running (regardless of rebuild)
|
# Always verify server environment is available
|
||||||
return self._verify_existing_containers()
|
return self._verify_server_environment()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Failed to setup test environment: {e}")
|
self.logger.error(f"Failed to setup test environment: {e}")
|
||||||
@@ -160,29 +177,40 @@ class CommunicationSimulator:
|
|||||||
self.logger.error(f"Failed to run run-server.sh: {e}")
|
self.logger.error(f"Failed to run run-server.sh: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _verify_existing_containers(self) -> bool:
|
def _verify_server_environment(self) -> bool:
|
||||||
"""Verify that required containers are already running (no setup)"""
|
"""Verify that server environment is ready"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("Verifying existing Docker containers...")
|
self.logger.info("Verifying standalone server environment...")
|
||||||
|
|
||||||
result = self._run_command(["docker", "ps", "--format", "{{.Names}}"], capture_output=True)
|
# Check if server.py exists
|
||||||
running_containers = result.stdout.decode().strip().split("\n")
|
server_file = "server.py"
|
||||||
|
if not os.path.exists(server_file):
|
||||||
required = [self.container_name, self.redis_container]
|
self.logger.error(f"Server file not found: {server_file}")
|
||||||
for container in required:
|
self.logger.error("Please ensure you're in the correct directory and server.py exists")
|
||||||
if container not in running_containers:
|
|
||||||
self.logger.error(f"Required container not running: {container}")
|
|
||||||
self.logger.error(
|
|
||||||
"Please start Docker containers first, or use --rebuild to set them up automatically"
|
|
||||||
)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info(f"All required containers are running: {required}")
|
# Check if virtual environment is available
|
||||||
|
if not os.path.exists(self.python_path):
|
||||||
|
self.logger.error(f"Python executable not found: {self.python_path}")
|
||||||
|
self.logger.error("Please run ./run-server.sh first to set up the environment")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if required dependencies are available
|
||||||
|
try:
|
||||||
|
result = self._run_command([self.python_path, "-c", "import json; print('OK')"], capture_output=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
self.logger.error("Python environment validation failed")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Python environment check failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.info("Standalone server environment is ready")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Container verification failed: {e}")
|
self.logger.error(f"Server environment verification failed: {e}")
|
||||||
self.logger.error("Please ensure Docker is running and containers are available, or use --rebuild")
|
self.logger.error("Please ensure the server environment is set up correctly, or use --setup")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def simulate_claude_cli_session(self) -> bool:
|
def simulate_claude_cli_session(self) -> bool:
|
||||||
@@ -348,11 +376,20 @@ class CommunicationSimulator:
|
|||||||
try:
|
try:
|
||||||
self.logger.info("Cleaning up test environment...")
|
self.logger.info("Cleaning up test environment...")
|
||||||
|
|
||||||
# Note: We don't stop Docker services ourselves - let run-server.sh handle Docker lifecycle
|
# Stop any running server processes
|
||||||
|
if self.server_process and self.server_process.poll() is None:
|
||||||
|
self.logger.info("Stopping server process...")
|
||||||
|
self.server_process.terminate()
|
||||||
|
try:
|
||||||
|
self.server_process.wait(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
self.server_process.kill()
|
||||||
|
self.server_process.wait()
|
||||||
|
|
||||||
if not self.keep_logs:
|
if not self.keep_logs:
|
||||||
self.logger.info("Test completed. Docker containers left running (use run-server.sh to manage)")
|
self.logger.info("Test completed. Standalone server process stopped.")
|
||||||
else:
|
else:
|
||||||
self.logger.info("Keeping logs and Docker services running for inspection")
|
self.logger.info("Keeping logs for inspection")
|
||||||
|
|
||||||
# Remove temp directory
|
# Remove temp directory
|
||||||
if self.temp_dir and os.path.exists(self.temp_dir):
|
if self.temp_dir and os.path.exists(self.temp_dir):
|
||||||
@@ -374,11 +411,13 @@ def parse_arguments():
|
|||||||
"""Parse and validate command line arguments"""
|
"""Parse and validate command line arguments"""
|
||||||
parser = argparse.ArgumentParser(description="Zen MCP Communication Simulator Test")
|
parser = argparse.ArgumentParser(description="Zen MCP Communication Simulator Test")
|
||||||
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
||||||
parser.add_argument("--keep-logs", action="store_true", help="Keep Docker services running for log inspection")
|
parser.add_argument("--keep-logs", action="store_true", help="Keep logs for inspection after test completion")
|
||||||
parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
|
parser.add_argument("--tests", "-t", nargs="+", help="Specific tests to run (space-separated)")
|
||||||
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
|
parser.add_argument("--list-tests", action="store_true", help="List available tests and exit")
|
||||||
parser.add_argument("--individual", "-i", help="Run a single test individually")
|
parser.add_argument("--individual", "-i", help="Run a single test individually")
|
||||||
parser.add_argument("--rebuild", action="store_true", help="Force rebuild Docker environment using run-server.sh")
|
parser.add_argument(
|
||||||
|
"--setup", action="store_true", help="Force setup standalone server environment using run-server.sh"
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@@ -453,7 +492,7 @@ def main():
|
|||||||
|
|
||||||
# Initialize simulator consistently for all use cases
|
# Initialize simulator consistently for all use cases
|
||||||
simulator = CommunicationSimulator(
|
simulator = CommunicationSimulator(
|
||||||
verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests, rebuild=args.rebuild
|
verbose=args.verbose, keep_logs=args.keep_logs, selected_tests=args.tests, setup=args.setup
|
||||||
)
|
)
|
||||||
|
|
||||||
# Determine execution mode and run
|
# Determine execution mode and run
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import os
|
|||||||
# These values are used in server responses and for tracking releases
|
# These values are used in server responses and for tracking releases
|
||||||
# IMPORTANT: This is the single source of truth for version and author info
|
# IMPORTANT: This is the single source of truth for version and author info
|
||||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||||
__version__ = "5.0.2"
|
__version__ = "5.1.0"
|
||||||
# Last update date in ISO format
|
# Last update date in ISO format
|
||||||
__updated__ = "2025-06-18"
|
__updated__ = "2025-06-18"
|
||||||
# Primary maintainer
|
# Primary maintainer
|
||||||
@@ -136,7 +136,7 @@ DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION = 2
|
|||||||
# What is NOT limited by this constant:
|
# What is NOT limited by this constant:
|
||||||
# - System prompts added internally by tools
|
# - System prompts added internally by tools
|
||||||
# - File content embedded by tools
|
# - File content embedded by tools
|
||||||
# - Conversation history loaded from Redis
|
# - Conversation history loaded from storage
|
||||||
# - Web search instructions or other internal additions
|
# - Web search instructions or other internal additions
|
||||||
# - Complete prompts sent to external models (managed by model-specific token limits)
|
# - Complete prompts sent to external models (managed by model-specific token limits)
|
||||||
#
|
#
|
||||||
@@ -145,6 +145,5 @@ DEFAULT_CONSENSUS_MAX_INSTANCES_PER_COMBINATION = 2
|
|||||||
MCP_PROMPT_SIZE_LIMIT = 50_000 # 50K characters (user input only)
|
MCP_PROMPT_SIZE_LIMIT = 50_000 # 50K characters (user input only)
|
||||||
|
|
||||||
# Threading configuration
|
# Threading configuration
|
||||||
# Simple Redis-based conversation threading for stateless MCP environment
|
# Simple in-memory conversation threading for stateless MCP environment
|
||||||
# Set REDIS_URL environment variable to connect to your Redis instance
|
# Conversations persist only during the Claude session
|
||||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
|
||||||
|
|||||||
@@ -1,81 +0,0 @@
|
|||||||
services:
|
|
||||||
redis:
|
|
||||||
image: redis:7-alpine
|
|
||||||
container_name: zen-mcp-redis
|
|
||||||
restart: unless-stopped
|
|
||||||
stop_grace_period: 3s
|
|
||||||
ports:
|
|
||||||
- "6379:6379"
|
|
||||||
volumes:
|
|
||||||
- redis_data:/data
|
|
||||||
command: redis-server --save 60 1 --loglevel warning --maxmemory 512mb --maxmemory-policy allkeys-lru
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
memory: 1G
|
|
||||||
reservations:
|
|
||||||
memory: 128M
|
|
||||||
|
|
||||||
zen-mcp:
|
|
||||||
build: .
|
|
||||||
image: zen-mcp-server:latest
|
|
||||||
container_name: zen-mcp-server
|
|
||||||
restart: unless-stopped
|
|
||||||
stop_grace_period: 5s
|
|
||||||
depends_on:
|
|
||||||
- redis
|
|
||||||
environment:
|
|
||||||
- GEMINI_API_KEY=${GEMINI_API_KEY:-}
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
|
||||||
- XAI_API_KEY=${XAI_API_KEY:-}
|
|
||||||
# OpenRouter support
|
|
||||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
|
|
||||||
- CUSTOM_MODELS_CONFIG_PATH=${CUSTOM_MODELS_CONFIG_PATH:-}
|
|
||||||
# Custom API endpoint support (for Ollama, vLLM, etc.)
|
|
||||||
- CUSTOM_API_URL=${CUSTOM_API_URL:-}
|
|
||||||
- CUSTOM_API_KEY=${CUSTOM_API_KEY:-}
|
|
||||||
- CUSTOM_MODEL_NAME=${CUSTOM_MODEL_NAME:-llama3.2}
|
|
||||||
- DEFAULT_MODEL=${DEFAULT_MODEL:-auto}
|
|
||||||
- DEFAULT_THINKING_MODE_THINKDEEP=${DEFAULT_THINKING_MODE_THINKDEEP:-high}
|
|
||||||
- CONVERSATION_TIMEOUT_HOURS=${CONVERSATION_TIMEOUT_HOURS:-3}
|
|
||||||
- MAX_CONVERSATION_TURNS=${MAX_CONVERSATION_TURNS:-20}
|
|
||||||
# Model usage restrictions
|
|
||||||
- OPENAI_ALLOWED_MODELS=${OPENAI_ALLOWED_MODELS:-}
|
|
||||||
- GOOGLE_ALLOWED_MODELS=${GOOGLE_ALLOWED_MODELS:-}
|
|
||||||
- XAI_ALLOWED_MODELS=${XAI_ALLOWED_MODELS:-}
|
|
||||||
- REDIS_URL=redis://redis:6379/0
|
|
||||||
# Use HOME not PWD: Claude needs access to any absolute file path, not just current project,
|
|
||||||
# and Claude Code could be running from multiple locations at the same time
|
|
||||||
- WORKSPACE_ROOT=${WORKSPACE_ROOT:-${HOME}}
|
|
||||||
# USER_HOME helps detect and protect against scanning the home directory root
|
|
||||||
- USER_HOME=${HOME}
|
|
||||||
- LOG_LEVEL=${LOG_LEVEL:-DEBUG}
|
|
||||||
- PYTHONUNBUFFERED=1
|
|
||||||
volumes:
|
|
||||||
- ${WORKSPACE_ROOT:-${HOME}}:/workspace:ro
|
|
||||||
- mcp_logs:/tmp # Shared volume for logs
|
|
||||||
- /etc/localtime:/etc/localtime:ro
|
|
||||||
stdin_open: true
|
|
||||||
tty: true
|
|
||||||
entrypoint: ["python"]
|
|
||||||
command: ["server.py"]
|
|
||||||
|
|
||||||
log-monitor:
|
|
||||||
build: .
|
|
||||||
image: zen-mcp-server:latest
|
|
||||||
container_name: zen-mcp-log-monitor
|
|
||||||
restart: unless-stopped
|
|
||||||
stop_grace_period: 3s
|
|
||||||
depends_on:
|
|
||||||
- zen-mcp
|
|
||||||
environment:
|
|
||||||
- PYTHONUNBUFFERED=1
|
|
||||||
volumes:
|
|
||||||
- mcp_logs:/tmp # Shared volume for logs
|
|
||||||
- /etc/localtime:/etc/localtime:ro
|
|
||||||
entrypoint: ["python"]
|
|
||||||
command: ["log_monitor.py"]
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
redis_data:
|
|
||||||
mcp_logs:
|
|
||||||
@@ -320,32 +320,7 @@ def _get_api_key_for_provider(cls, provider_type: ProviderType) -> Optional[str]
|
|||||||
# ... rest of the method
|
# ... rest of the method
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Configure Docker Environment Variables
|
### 4. Register Provider in server.py
|
||||||
|
|
||||||
**CRITICAL**: You must add your provider's environment variables to `docker-compose.yml` for them to be available in the Docker container.
|
|
||||||
|
|
||||||
Add your API key and restriction variables to the `environment` section:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
services:
|
|
||||||
zen-mcp:
|
|
||||||
# ... other configuration ...
|
|
||||||
environment:
|
|
||||||
- GEMINI_API_KEY=${GEMINI_API_KEY:-}
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
|
||||||
- EXAMPLE_API_KEY=${EXAMPLE_API_KEY:-} # Add this line
|
|
||||||
# OpenRouter support
|
|
||||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
|
|
||||||
# ... other variables ...
|
|
||||||
# Model usage restrictions
|
|
||||||
- OPENAI_ALLOWED_MODELS=${OPENAI_ALLOWED_MODELS:-}
|
|
||||||
- GOOGLE_ALLOWED_MODELS=${GOOGLE_ALLOWED_MODELS:-}
|
|
||||||
- EXAMPLE_ALLOWED_MODELS=${EXAMPLE_ALLOWED_MODELS:-} # Add this line
|
|
||||||
```
|
|
||||||
|
|
||||||
⚠️ **Without this step**, the Docker container won't have access to your environment variables, and your provider won't be registered even if the API key is set in your `.env` file.
|
|
||||||
|
|
||||||
### 5. Register Provider in server.py
|
|
||||||
|
|
||||||
The `configure_providers()` function in `server.py` handles provider registration. You need to:
|
The `configure_providers()` function in `server.py` handles provider registration. You need to:
|
||||||
|
|
||||||
@@ -672,7 +647,7 @@ if __name__ == "__main__":
|
|||||||
```
|
```
|
||||||
|
|
||||||
The simulator test is crucial because it:
|
The simulator test is crucial because it:
|
||||||
- Validates your provider works in the actual Docker environment
|
- Validates your provider works in the actual server environment
|
||||||
- Tests real API integration, not just mocked behavior
|
- Tests real API integration, not just mocked behavior
|
||||||
- Verifies model name resolution works correctly
|
- Verifies model name resolution works correctly
|
||||||
- Checks conversation continuity across requests
|
- Checks conversation continuity across requests
|
||||||
@@ -799,7 +774,7 @@ Before submitting your PR:
|
|||||||
- [ ] Provider implementation complete with all required methods
|
- [ ] Provider implementation complete with all required methods
|
||||||
- [ ] API key mapping added to `_get_api_key_for_provider()` in `providers/registry.py`
|
- [ ] API key mapping added to `_get_api_key_for_provider()` in `providers/registry.py`
|
||||||
- [ ] Provider added to `PROVIDER_PRIORITY_ORDER` in `registry.py` (if native provider)
|
- [ ] Provider added to `PROVIDER_PRIORITY_ORDER` in `registry.py` (if native provider)
|
||||||
- [ ] **Environment variables added to `docker-compose.yml`** (API key and restrictions)
|
- [ ] **Environment variables added to `.env` file** (API key and restrictions)
|
||||||
- [ ] Provider imported and registered in `server.py`'s `configure_providers()`
|
- [ ] Provider imported and registered in `server.py`'s `configure_providers()`
|
||||||
- [ ] API key checking added to `configure_providers()` function
|
- [ ] API key checking added to `configure_providers()` function
|
||||||
- [ ] Error message updated to include new provider
|
- [ ] Error message updated to include new provider
|
||||||
|
|||||||
@@ -239,9 +239,9 @@ All tools that work with files support **both individual files and entire direct
|
|||||||
|
|
||||||
**The Zen MCP Server's most revolutionary feature** is its ability to maintain conversation context even after Claude's memory resets. This enables truly persistent AI collaboration across multiple sessions and context boundaries.
|
**The Zen MCP Server's most revolutionary feature** is its ability to maintain conversation context even after Claude's memory resets. This enables truly persistent AI collaboration across multiple sessions and context boundaries.
|
||||||
|
|
||||||
### 🔥 **The Breakthrough**
|
### **The Breakthrough**
|
||||||
|
|
||||||
Even when Claude's context resets or compacts, conversations can continue seamlessly because other models (O3, Gemini) have access to the complete conversation history stored in Redis and can "remind" Claude of everything that was discussed.
|
Even when Claude's context resets or compacts, conversations can continue seamlessly because other models (O3, Gemini) have access to the complete conversation history stored in memory and can "remind" Claude of everything that was discussed.
|
||||||
|
|
||||||
### Key Benefits
|
### Key Benefits
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ This server enables **true AI collaboration** between Claude and multiple AI mod
|
|||||||
- **Cross-tool continuation** - Start with one tool (e.g., `analyze`) and continue with another (e.g., `codereview`) using the same conversation thread
|
- **Cross-tool continuation** - Start with one tool (e.g., `analyze`) and continue with another (e.g., `codereview`) using the same conversation thread
|
||||||
- **Both AIs coordinate their approaches** - questioning assumptions, validating solutions, and building on each other's insights
|
- **Both AIs coordinate their approaches** - questioning assumptions, validating solutions, and building on each other's insights
|
||||||
- Each conversation maintains full context while only sending incremental updates
|
- Each conversation maintains full context while only sending incremental updates
|
||||||
- Conversations are automatically managed with Redis for persistence
|
- Conversations are automatically managed in memory for the session duration
|
||||||
|
|
||||||
## Example: Multi-Model AI Coordination
|
## Example: Multi-Model AI Coordination
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ This server enables **true AI collaboration** between Claude and multiple AI mod
|
|||||||
**Conversation Management:**
|
**Conversation Management:**
|
||||||
- Up to 10 exchanges per conversation (configurable via `MAX_CONVERSATION_TURNS`)
|
- Up to 10 exchanges per conversation (configurable via `MAX_CONVERSATION_TURNS`)
|
||||||
- 3-hour expiry (configurable via `CONVERSATION_TIMEOUT_HOURS`)
|
- 3-hour expiry (configurable via `CONVERSATION_TIMEOUT_HOURS`)
|
||||||
- Thread-safe with Redis persistence across all tools
|
- Thread-safe with in-memory persistence across all tools
|
||||||
- **Image context preservation** - Images and visual references are maintained across conversation turns and tool switches
|
- **Image context preservation** - Images and visual references are maintained across conversation turns and tool switches
|
||||||
|
|
||||||
## Cross-Tool & Cross-Model Continuation Example
|
## Cross-Tool & Cross-Model Continuation Example
|
||||||
|
|||||||
@@ -19,11 +19,6 @@ OPENAI_API_KEY=your-openai-key
|
|||||||
|
|
||||||
**Workspace Root:**
|
**Workspace Root:**
|
||||||
```env
|
```env
|
||||||
# Required: Workspace root directory for file access
|
|
||||||
WORKSPACE_ROOT=/Users/your-username
|
|
||||||
```
|
|
||||||
- Path that contains all files Claude might reference
|
|
||||||
- Defaults to `$HOME` for direct usage, auto-configured for Docker
|
|
||||||
|
|
||||||
### API Keys (At least one required)
|
### API Keys (At least one required)
|
||||||
|
|
||||||
@@ -55,15 +50,14 @@ OPENROUTER_API_KEY=your_openrouter_api_key_here
|
|||||||
**Option 3: Custom API Endpoints (Local models)**
|
**Option 3: Custom API Endpoints (Local models)**
|
||||||
```env
|
```env
|
||||||
# For Ollama, vLLM, LM Studio, etc.
|
# For Ollama, vLLM, LM Studio, etc.
|
||||||
# IMPORTANT: Use host.docker.internal, NOT localhost (Docker requirement)
|
CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example
|
||||||
CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example
|
|
||||||
CUSTOM_API_KEY= # Empty for Ollama
|
CUSTOM_API_KEY= # Empty for Ollama
|
||||||
CUSTOM_MODEL_NAME=llama3.2 # Default model
|
CUSTOM_MODEL_NAME=llama3.2 # Default model
|
||||||
```
|
```
|
||||||
|
|
||||||
**Docker Network Requirements:**
|
**Local Model Connection:**
|
||||||
- ❌ WRONG: `http://localhost:11434/v1` (Docker containers cannot reach localhost)
|
- Use standard localhost URLs since the server runs natively
|
||||||
- ✅ CORRECT: `http://host.docker.internal:11434/v1` (Docker can reach host services)
|
- Example: `http://localhost:11434/v1` for Ollama
|
||||||
|
|
||||||
### Model Configuration
|
### Model Configuration
|
||||||
|
|
||||||
@@ -165,16 +159,12 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast
|
|||||||
CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
|
CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json
|
||||||
```
|
```
|
||||||
|
|
||||||
**Redis Configuration:**
|
|
||||||
```env
|
|
||||||
# Redis URL for conversation threading (auto-configured for Docker)
|
|
||||||
REDIS_URL=redis://redis:6379/0
|
|
||||||
```
|
|
||||||
|
|
||||||
**Conversation Settings:**
|
**Conversation Settings:**
|
||||||
```env
|
```env
|
||||||
# How long AI-to-AI conversation threads persist (hours)
|
# How long AI-to-AI conversation threads persist in memory (hours)
|
||||||
CONVERSATION_TIMEOUT_HOURS=3
|
# Conversations are auto-purged when claude closes its MCP connection or
|
||||||
|
# when a session is quit / re-launched
|
||||||
|
CONVERSATION_TIMEOUT_HOURS=5
|
||||||
|
|
||||||
# Maximum conversation turns (each exchange = 2 turns)
|
# Maximum conversation turns (each exchange = 2 turns)
|
||||||
MAX_CONVERSATION_TURNS=20
|
MAX_CONVERSATION_TURNS=20
|
||||||
@@ -215,7 +205,7 @@ CONVERSATION_TIMEOUT_HOURS=3
|
|||||||
```env
|
```env
|
||||||
# Local models only
|
# Local models only
|
||||||
DEFAULT_MODEL=llama3.2
|
DEFAULT_MODEL=llama3.2
|
||||||
CUSTOM_API_URL=http://host.docker.internal:11434/v1
|
CUSTOM_API_URL=http://localhost:11434/v1
|
||||||
CUSTOM_API_KEY=
|
CUSTOM_API_KEY=
|
||||||
CUSTOM_MODEL_NAME=llama3.2
|
CUSTOM_MODEL_NAME=llama3.2
|
||||||
LOG_LEVEL=DEBUG
|
LOG_LEVEL=DEBUG
|
||||||
@@ -232,9 +222,9 @@ LOG_LEVEL=INFO
|
|||||||
|
|
||||||
## Important Notes
|
## Important Notes
|
||||||
|
|
||||||
**Docker Networking:**
|
**Local Networking:**
|
||||||
- Always use `host.docker.internal` instead of `localhost` for custom APIs
|
- Use standard localhost URLs for local models
|
||||||
- The server runs in Docker and cannot access `localhost` directly
|
- The server runs as a native Python process
|
||||||
|
|
||||||
**API Key Priority:**
|
**API Key Priority:**
|
||||||
- Native APIs take priority over OpenRouter when both are configured
|
- Native APIs take priority over OpenRouter when both are configured
|
||||||
|
|||||||
@@ -8,9 +8,7 @@ Thank you for your interest in contributing to Zen MCP Server! This guide will h
|
|||||||
2. **Clone your fork** locally
|
2. **Clone your fork** locally
|
||||||
3. **Set up the development environment**:
|
3. **Set up the development environment**:
|
||||||
```bash
|
```bash
|
||||||
python -m venv venv
|
./run-server.sh
|
||||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
```
|
||||||
4. **Create a feature branch** from `main`:
|
4. **Create a feature branch** from `main`:
|
||||||
```bash
|
```bash
|
||||||
@@ -28,9 +26,6 @@ We maintain high code quality standards. **All contributions must pass our autom
|
|||||||
Before submitting any PR, run our automated quality check script:
|
Before submitting any PR, run our automated quality check script:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Activate virtual environment first
|
|
||||||
source venv/bin/activate
|
|
||||||
|
|
||||||
# Run the comprehensive quality checks script
|
# Run the comprehensive quality checks script
|
||||||
./code_quality_checks.sh
|
./code_quality_checks.sh
|
||||||
```
|
```
|
||||||
@@ -78,7 +73,7 @@ python communication_simulator_test.py
|
|||||||
2. **Tool changes require simulator tests**:
|
2. **Tool changes require simulator tests**:
|
||||||
- Add simulator tests in `simulator_tests/` for new or modified tools
|
- Add simulator tests in `simulator_tests/` for new or modified tools
|
||||||
- Use realistic prompts that demonstrate the feature
|
- Use realistic prompts that demonstrate the feature
|
||||||
- Validate output through Docker logs
|
- Validate output through server logs
|
||||||
|
|
||||||
3. **Bug fixes require regression tests**:
|
3. **Bug fixes require regression tests**:
|
||||||
- Add a test that would have caught the bug
|
- Add a test that would have caught the bug
|
||||||
@@ -94,7 +89,7 @@ python communication_simulator_test.py
|
|||||||
|
|
||||||
Your PR title MUST follow one of these formats:
|
Your PR title MUST follow one of these formats:
|
||||||
|
|
||||||
**Version Bumping Prefixes** (trigger Docker build + version bump):
|
**Version Bumping Prefixes** (trigger version bump):
|
||||||
- `feat: <description>` - New features (MINOR version bump)
|
- `feat: <description>` - New features (MINOR version bump)
|
||||||
- `fix: <description>` - Bug fixes (PATCH version bump)
|
- `fix: <description>` - Bug fixes (PATCH version bump)
|
||||||
- `breaking: <description>` or `BREAKING CHANGE: <description>` - Breaking changes (MAJOR version bump)
|
- `breaking: <description>` or `BREAKING CHANGE: <description>` - Breaking changes (MAJOR version bump)
|
||||||
@@ -108,10 +103,9 @@ Your PR title MUST follow one of these formats:
|
|||||||
- `ci: <description>` - CI/CD changes
|
- `ci: <description>` - CI/CD changes
|
||||||
- `style: <description>` - Code style changes
|
- `style: <description>` - Code style changes
|
||||||
|
|
||||||
**Docker Build Options**:
|
**Other Options**:
|
||||||
- `docker: <description>` - Force Docker build without version bump
|
- `docs: <description>` - Documentation changes only
|
||||||
- `docs+docker: <description>` - Documentation + Docker build
|
- `chore: <description>` - Maintenance tasks
|
||||||
- `chore+docker: <description>` - Maintenance + Docker build
|
|
||||||
|
|
||||||
#### PR Checklist
|
#### PR Checklist
|
||||||
|
|
||||||
@@ -216,7 +210,7 @@ isort .
|
|||||||
### Test Failures
|
### Test Failures
|
||||||
- Check test output for specific errors
|
- Check test output for specific errors
|
||||||
- Run individual tests for debugging: `pytest tests/test_specific.py -xvs`
|
- Run individual tests for debugging: `pytest tests/test_specific.py -xvs`
|
||||||
- Ensure Docker is running for simulator tests
|
- Ensure server environment is set up for simulator tests
|
||||||
|
|
||||||
### Import Errors
|
### Import Errors
|
||||||
- Verify virtual environment is activated
|
- Verify virtual environment is activated
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ OPENROUTER_API_KEY=your-openrouter-api-key
|
|||||||
> **Note:** Control which models can be used directly in your OpenRouter dashboard at [openrouter.ai](https://openrouter.ai/).
|
> **Note:** Control which models can be used directly in your OpenRouter dashboard at [openrouter.ai](https://openrouter.ai/).
|
||||||
> This gives you centralized control over model access and spending limits.
|
> This gives you centralized control over model access and spending limits.
|
||||||
|
|
||||||
That's it! Docker Compose already includes all necessary configuration.
|
That's it! The setup script handles all necessary configuration automatically.
|
||||||
|
|
||||||
### Option 2: Custom API Setup (Ollama, vLLM, etc.)
|
### Option 2: Custom API Setup (Ollama, vLLM, etc.)
|
||||||
|
|
||||||
@@ -102,49 +102,46 @@ python -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-
|
|||||||
#### 2. Configure Environment Variables
|
#### 2. Configure Environment Variables
|
||||||
```bash
|
```bash
|
||||||
# Add to your .env file
|
# Add to your .env file
|
||||||
CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama example
|
CUSTOM_API_URL=http://localhost:11434/v1 # Ollama example
|
||||||
CUSTOM_API_KEY= # Empty for Ollama (no auth needed)
|
CUSTOM_API_KEY= # Empty for Ollama (no auth needed)
|
||||||
CUSTOM_MODEL_NAME=llama3.2 # Default model to use
|
CUSTOM_MODEL_NAME=llama3.2 # Default model to use
|
||||||
```
|
```
|
||||||
|
|
||||||
**Important: Docker URL Configuration**
|
**Local Model Connection**
|
||||||
|
|
||||||
Since the Zen MCP server always runs in Docker, you must use `host.docker.internal` instead of `localhost` to connect to local models running on your host machine:
|
The Zen MCP server runs natively, so you can use standard localhost URLs to connect to local models:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# For Ollama, vLLM, LM Studio, etc. running on your host machine
|
# For Ollama, vLLM, LM Studio, etc. running on your machine
|
||||||
CUSTOM_API_URL=http://host.docker.internal:11434/v1 # Ollama default port (NOT localhost!)
|
CUSTOM_API_URL=http://localhost:11434/v1 # Ollama default port
|
||||||
```
|
```
|
||||||
|
|
||||||
❌ **Never use:** `http://localhost:11434/v1` - Docker containers cannot reach localhost
|
|
||||||
✅ **Always use:** `http://host.docker.internal:11434/v1` - This allows Docker to access host services
|
|
||||||
|
|
||||||
#### 3. Examples for Different Platforms
|
#### 3. Examples for Different Platforms
|
||||||
|
|
||||||
**Ollama:**
|
**Ollama:**
|
||||||
```bash
|
```bash
|
||||||
CUSTOM_API_URL=http://host.docker.internal:11434/v1
|
CUSTOM_API_URL=http://localhost:11434/v1
|
||||||
CUSTOM_API_KEY=
|
CUSTOM_API_KEY=
|
||||||
CUSTOM_MODEL_NAME=llama3.2
|
CUSTOM_MODEL_NAME=llama3.2
|
||||||
```
|
```
|
||||||
|
|
||||||
**vLLM:**
|
**vLLM:**
|
||||||
```bash
|
```bash
|
||||||
CUSTOM_API_URL=http://host.docker.internal:8000/v1
|
CUSTOM_API_URL=http://localhost:8000/v1
|
||||||
CUSTOM_API_KEY=
|
CUSTOM_API_KEY=
|
||||||
CUSTOM_MODEL_NAME=meta-llama/Llama-2-7b-chat-hf
|
CUSTOM_MODEL_NAME=meta-llama/Llama-2-7b-chat-hf
|
||||||
```
|
```
|
||||||
|
|
||||||
**LM Studio:**
|
**LM Studio:**
|
||||||
```bash
|
```bash
|
||||||
CUSTOM_API_URL=http://host.docker.internal:1234/v1
|
CUSTOM_API_URL=http://localhost:1234/v1
|
||||||
CUSTOM_API_KEY=lm-studio # Or any value, LM Studio often requires some key
|
CUSTOM_API_KEY=lm-studio # Or any value, LM Studio often requires some key
|
||||||
CUSTOM_MODEL_NAME=local-model
|
CUSTOM_MODEL_NAME=local-model
|
||||||
```
|
```
|
||||||
|
|
||||||
**text-generation-webui (with OpenAI extension):**
|
**text-generation-webui (with OpenAI extension):**
|
||||||
```bash
|
```bash
|
||||||
CUSTOM_API_URL=http://host.docker.internal:5001/v1
|
CUSTOM_API_URL=http://localhost:5001/v1
|
||||||
CUSTOM_API_KEY=
|
CUSTOM_API_KEY=
|
||||||
CUSTOM_MODEL_NAME=your-loaded-model
|
CUSTOM_MODEL_NAME=your-loaded-model
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -11,49 +11,59 @@ The easiest way to monitor logs is to use the `-f` flag when starting the server
|
|||||||
|
|
||||||
This will start the server and immediately begin tailing the MCP server logs.
|
This will start the server and immediately begin tailing the MCP server logs.
|
||||||
|
|
||||||
## Viewing Logs in Docker
|
|
||||||
|
|
||||||
To monitor MCP server activity in real-time:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Follow MCP server logs (recommended)
|
|
||||||
docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log
|
|
||||||
|
|
||||||
# Or use the -f flag when starting the server
|
|
||||||
./run-server.sh -f
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note**: Due to MCP protocol limitations, container logs don't show tool execution details. Always use the commands above for debugging.
|
|
||||||
|
|
||||||
## Log Files
|
## Log Files
|
||||||
|
|
||||||
Logs are stored in the container's `/tmp/` directory and rotate daily at midnight, keeping 7 days of history:
|
Logs are stored in the `logs/` directory within your project folder:
|
||||||
|
|
||||||
- **`mcp_server.log`** - Main server operations
|
- **`mcp_server.log`** - Main server operations, API calls, and errors
|
||||||
- **`mcp_activity.log`** - Tool calls and conversations
|
- **`mcp_activity.log`** - Tool calls and conversation tracking
|
||||||
- **`mcp_server_overflow.log`** - Overflow protection for large logs
|
|
||||||
|
|
||||||
## Accessing Log Files
|
Log files rotate automatically when they reach 20MB, keeping up to 10 rotated files.
|
||||||
|
|
||||||
To access log files directly:
|
## Viewing Logs
|
||||||
|
|
||||||
|
To monitor MCP server activity:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Enter the container
|
# Follow logs in real-time
|
||||||
docker exec -it zen-mcp-server /bin/sh
|
tail -f logs/mcp_server.log
|
||||||
|
|
||||||
# View current logs
|
# View last 100 lines
|
||||||
cat /tmp/mcp_server.log
|
tail -n 100 logs/mcp_server.log
|
||||||
cat /tmp/mcp_activity.log
|
|
||||||
|
|
||||||
# View previous days (with date suffix)
|
# View activity logs (tool calls only)
|
||||||
cat /tmp/mcp_server.log.2024-06-14
|
tail -f logs/mcp_activity.log
|
||||||
|
|
||||||
|
# Search for specific patterns
|
||||||
|
grep "ERROR" logs/mcp_server.log
|
||||||
|
grep "tool_name" logs/mcp_activity.log
|
||||||
```
|
```
|
||||||
|
|
||||||
## Log Level
|
## Log Level
|
||||||
|
|
||||||
Set verbosity with `LOG_LEVEL` in your `.env` file or docker-compose.yml:
|
Set verbosity with `LOG_LEVEL` in your `.env` file:
|
||||||
|
|
||||||
```yaml
|
```env
|
||||||
environment:
|
# Options: DEBUG, INFO, WARNING, ERROR
|
||||||
- LOG_LEVEL=DEBUG # Options: DEBUG, INFO, WARNING, ERROR
|
LOG_LEVEL=INFO
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- **DEBUG**: Detailed information for debugging
|
||||||
|
- **INFO**: General operational messages (default)
|
||||||
|
- **WARNING**: Warning messages
|
||||||
|
- **ERROR**: Only error messages
|
||||||
|
|
||||||
|
## Log Format
|
||||||
|
|
||||||
|
Logs use a standardized format with timestamps:
|
||||||
|
|
||||||
|
```
|
||||||
|
2024-06-14 10:30:45,123 - module.name - INFO - Message here
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tips
|
||||||
|
|
||||||
|
- Use `./run-server.sh -f` for the easiest log monitoring experience
|
||||||
|
- Activity logs show only tool-related events for cleaner output
|
||||||
|
- Main server logs include all operational details
|
||||||
|
- Logs persist across server restarts
|
||||||
@@ -5,9 +5,7 @@ This project includes comprehensive test coverage through unit tests and integra
|
|||||||
## Running Tests
|
## Running Tests
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
- Python virtual environment activated: `source venv/bin/activate`
|
- Environment set up: `./run-server.sh`
|
||||||
- All dependencies installed: `pip install -r requirements.txt`
|
|
||||||
- Docker containers running (for simulator tests): `./run-server.sh`
|
|
||||||
- Use `./run-server.sh -f` to automatically follow logs after starting
|
- Use `./run-server.sh -f` to automatically follow logs after starting
|
||||||
|
|
||||||
### Unit Tests
|
### Unit Tests
|
||||||
@@ -23,9 +21,9 @@ python -m pytest tests/test_providers.py -xvs
|
|||||||
|
|
||||||
### Simulator Tests
|
### Simulator Tests
|
||||||
|
|
||||||
Simulator tests replicate real-world Claude CLI interactions with the MCP server running in Docker. Unlike unit tests that test isolated functions, simulator tests validate the complete end-to-end flow including:
|
Simulator tests replicate real-world Claude CLI interactions with the standalone MCP server. Unlike unit tests that test isolated functions, simulator tests validate the complete end-to-end flow including:
|
||||||
- Actual MCP protocol communication
|
- Actual MCP protocol communication
|
||||||
- Docker container interactions
|
- Standalone server interactions
|
||||||
- Multi-turn conversations across tools
|
- Multi-turn conversations across tools
|
||||||
- Log output validation
|
- Log output validation
|
||||||
|
|
||||||
@@ -33,7 +31,7 @@ Simulator tests replicate real-world Claude CLI interactions with the MCP server
|
|||||||
|
|
||||||
#### Monitoring Logs During Tests
|
#### Monitoring Logs During Tests
|
||||||
|
|
||||||
**Important**: The MCP stdio protocol interferes with stderr output during tool execution. While server startup logs appear in `docker compose logs`, tool execution logs are only written to file-based logs inside the container. This is a known limitation of the stdio-based MCP protocol and cannot be fixed without changing the MCP implementation.
|
**Important**: The MCP stdio protocol interferes with stderr output during tool execution. Tool execution logs are written to local log files. This is a known limitation of the stdio-based MCP protocol.
|
||||||
|
|
||||||
To monitor logs during test execution:
|
To monitor logs during test execution:
|
||||||
|
|
||||||
@@ -42,20 +40,20 @@ To monitor logs during test execution:
|
|||||||
./run-server.sh -f
|
./run-server.sh -f
|
||||||
|
|
||||||
# Or manually monitor main server logs (includes all tool execution details)
|
# Or manually monitor main server logs (includes all tool execution details)
|
||||||
docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log
|
tail -f -n 500 logs/mcp_server.log
|
||||||
|
|
||||||
# Monitor MCP activity logs (tool calls and completions)
|
# Monitor MCP activity logs (tool calls and completions)
|
||||||
docker exec zen-mcp-server tail -f /tmp/mcp_activity.log
|
tail -f logs/mcp_activity.log
|
||||||
|
|
||||||
# Check log file sizes (logs rotate at 20MB)
|
# Check log file sizes (logs rotate at 20MB)
|
||||||
docker exec zen-mcp-server ls -lh /tmp/mcp_*.log*
|
ls -lh logs/mcp_*.log*
|
||||||
```
|
```
|
||||||
|
|
||||||
**Log Rotation**: All log files are configured with automatic rotation at 20MB to prevent disk space issues. The server keeps:
|
**Log Rotation**: All log files are configured with automatic rotation at 20MB to prevent disk space issues. The server keeps:
|
||||||
- 10 rotated files for mcp_server.log (200MB total)
|
- 10 rotated files for mcp_server.log (200MB total)
|
||||||
- 5 rotated files for mcp_activity.log (100MB total)
|
- 5 rotated files for mcp_activity.log (100MB total)
|
||||||
|
|
||||||
**Why logs don't appear in docker compose logs**: The MCP stdio_server captures stderr during tool execution to prevent interference with the JSON-RPC protocol communication. This means that while you'll see startup logs in `docker compose logs`, you won't see tool execution logs there.
|
**Why logs appear in files**: The MCP stdio_server captures stderr during tool execution to prevent interference with the JSON-RPC protocol communication. This means tool execution logs are written to files rather than displayed in console output.
|
||||||
|
|
||||||
#### Running All Simulator Tests
|
#### Running All Simulator Tests
|
||||||
```bash
|
```bash
|
||||||
@@ -65,7 +63,7 @@ python communication_simulator_test.py
|
|||||||
# Run with verbose output for debugging
|
# Run with verbose output for debugging
|
||||||
python communication_simulator_test.py --verbose
|
python communication_simulator_test.py --verbose
|
||||||
|
|
||||||
# Keep Docker logs after tests for inspection
|
# Keep server logs after tests for inspection
|
||||||
python communication_simulator_test.py --keep-logs
|
python communication_simulator_test.py --keep-logs
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -79,7 +77,7 @@ python communication_simulator_test.py --individual basic_conversation
|
|||||||
# Examples of available tests:
|
# Examples of available tests:
|
||||||
python communication_simulator_test.py --individual content_validation
|
python communication_simulator_test.py --individual content_validation
|
||||||
python communication_simulator_test.py --individual cross_tool_continuation
|
python communication_simulator_test.py --individual cross_tool_continuation
|
||||||
python communication_simulator_test.py --individual redis_validation
|
python communication_simulator_test.py --individual memory_validation
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Other Options
|
#### Other Options
|
||||||
@@ -90,8 +88,6 @@ python communication_simulator_test.py --list-tests
|
|||||||
# Run multiple specific tests (not all)
|
# Run multiple specific tests (not all)
|
||||||
python communication_simulator_test.py --tests basic_conversation content_validation
|
python communication_simulator_test.py --tests basic_conversation content_validation
|
||||||
|
|
||||||
# Force Docker environment rebuild before running tests
|
|
||||||
python communication_simulator_test.py --rebuild
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Code Quality Checks
|
### Code Quality Checks
|
||||||
@@ -135,11 +131,8 @@ For detailed contribution guidelines, testing requirements, and code quality sta
|
|||||||
### Quick Testing Reference
|
### Quick Testing Reference
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Activate virtual environment
|
# Run quality checks
|
||||||
source venv/bin/activate
|
./code_quality_checks.sh
|
||||||
|
|
||||||
# Run linting checks
|
|
||||||
ruff check . && black --check . && isort --check-only .
|
|
||||||
|
|
||||||
# Run unit tests
|
# Run unit tests
|
||||||
python -m pytest -xvs
|
python -m pytest -xvs
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ bug hunting and reduces the chance of wasting precious tokens back and forth.
|
|||||||
|
|
||||||
**Runtime Environment Issues:**
|
**Runtime Environment Issues:**
|
||||||
```
|
```
|
||||||
"Debug deployment issues with Docker container startup failures, here's the runtime info: [environment details]"
|
"Debug deployment issues with server startup failures, here's the runtime info: [environment details]"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Debugging Methodology
|
## Debugging Methodology
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ The tool displays:
|
|||||||
|
|
||||||
🔹 Custom/Local - ✅ Configured
|
🔹 Custom/Local - ✅ Configured
|
||||||
• local-llama (llama3.2) - 128K context, local inference
|
• local-llama (llama3.2) - 128K context, local inference
|
||||||
• Available at: http://host.docker.internal:11434/v1
|
• Available at: http://localhost:11434/v1
|
||||||
|
|
||||||
🔹 OpenRouter - ❌ Not configured
|
🔹 OpenRouter - ❌ Not configured
|
||||||
Set OPENROUTER_API_KEY to enable access to Claude, GPT-4, and more models
|
Set OPENROUTER_API_KEY to enable access to Claude, GPT-4, and more models
|
||||||
|
|||||||
@@ -42,8 +42,8 @@ The tool provides:
|
|||||||
**System Information:**
|
**System Information:**
|
||||||
- Server uptime and status
|
- Server uptime and status
|
||||||
- Memory and resource usage (if available)
|
- Memory and resource usage (if available)
|
||||||
- Connection status with Redis (for conversation memory)
|
- Conversation memory status
|
||||||
- Docker container information
|
- Server process information
|
||||||
|
|
||||||
## Example Output
|
## Example Output
|
||||||
|
|
||||||
@@ -58,7 +58,7 @@ The tool provides:
|
|||||||
⚙️ Configuration:
|
⚙️ Configuration:
|
||||||
• Default Model: auto
|
• Default Model: auto
|
||||||
• Providers: Google ✅, OpenAI ✅, Custom ✅
|
• Providers: Google ✅, OpenAI ✅, Custom ✅
|
||||||
• Conversation Memory: Redis ✅
|
• Conversation Memory: Active ✅
|
||||||
• Web Search: Enabled
|
• Web Search: Enabled
|
||||||
|
|
||||||
🛠️ Available Tools (12):
|
🛠️ Available Tools (12):
|
||||||
@@ -77,8 +77,8 @@ The tool provides:
|
|||||||
|
|
||||||
🔍 System Status:
|
🔍 System Status:
|
||||||
• Server Uptime: 2h 35m
|
• Server Uptime: 2h 35m
|
||||||
• Redis Connection: Active
|
• Memory Storage: Active
|
||||||
• Docker Container: zen-mcp-server (running)
|
• Server Process: Running
|
||||||
```
|
```
|
||||||
|
|
||||||
## When to Use Version Tool
|
## When to Use Version Tool
|
||||||
@@ -106,7 +106,7 @@ The version tool can help diagnose common issues:
|
|||||||
**Performance Troubleshooting:**
|
**Performance Troubleshooting:**
|
||||||
- Server uptime and stability
|
- Server uptime and stability
|
||||||
- Resource usage patterns
|
- Resource usage patterns
|
||||||
- Redis connection health
|
- Memory storage health
|
||||||
|
|
||||||
## Tool Parameters
|
## Tool Parameters
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ claude.exe --debug
|
|||||||
|
|
||||||
Look for error messages in the console output, especially:
|
Look for error messages in the console output, especially:
|
||||||
- API key errors
|
- API key errors
|
||||||
- Docker connection issues
|
- Python/environment issues
|
||||||
- File permission errors
|
- File permission errors
|
||||||
|
|
||||||
### 3. Verify API Keys
|
### 3. Verify API Keys
|
||||||
@@ -40,60 +40,72 @@ cat .env
|
|||||||
# OPENAI_API_KEY=your-key-here
|
# OPENAI_API_KEY=your-key-here
|
||||||
```
|
```
|
||||||
|
|
||||||
If you need to update your API keys, edit the `.env` file and then run:
|
If you need to update your API keys, edit the `.env` file and then restart Claude for changes to take effect.
|
||||||
|
|
||||||
|
### 4. Check Server Logs
|
||||||
|
|
||||||
|
View the server logs for detailed error information:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Restart services
|
# View recent logs
|
||||||
./run-server.sh
|
tail -n 100 logs/mcp_server.log
|
||||||
|
|
||||||
# Or restart and follow logs for troubleshooting
|
# Follow logs in real-time
|
||||||
./run-server.sh -f
|
tail -f logs/mcp_server.log
|
||||||
```
|
|
||||||
|
|
||||||
This will validate your configuration and restart the services.
|
|
||||||
|
|
||||||
### 4. Check Docker Logs
|
|
||||||
|
|
||||||
View the container logs for detailed error information:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check if containers are running
|
|
||||||
docker-compose ps
|
|
||||||
|
|
||||||
# View MCP server logs (recommended - shows actual tool execution)
|
|
||||||
docker exec zen-mcp-server tail -f -n 500 /tmp/mcp_server.log
|
|
||||||
|
|
||||||
# Or use the -f flag when starting to automatically follow logs
|
# Or use the -f flag when starting to automatically follow logs
|
||||||
./run-server.sh -f
|
./run-server.sh -f
|
||||||
```
|
|
||||||
|
|
||||||
**Note**: Due to MCP protocol limitations, `docker-compose logs` only shows startup logs, not tool execution logs. Always use the docker exec command above or the `-f` flag for debugging.
|
# Search for errors
|
||||||
|
grep "ERROR" logs/mcp_server.log
|
||||||
|
```
|
||||||
|
|
||||||
See [Logging Documentation](logging.md) for more details on accessing logs.
|
See [Logging Documentation](logging.md) for more details on accessing logs.
|
||||||
|
|
||||||
### 5. Common Issues
|
### 5. Common Issues
|
||||||
|
|
||||||
**"Connection failed" in Claude Desktop**
|
**"Connection failed" in Claude Desktop**
|
||||||
- Ensure Docker is running: `docker ps`
|
- Ensure the server path is correct in your Claude config
|
||||||
- Restart services: `docker-compose restart`
|
- Run `./run-server.sh` to verify setup and see configuration
|
||||||
|
- Check that Python is installed: `python3 --version`
|
||||||
|
|
||||||
**"API key environment variable is required"**
|
**"API key environment variable is required"**
|
||||||
- Add your API key to the `.env` file
|
- Add your API key to the `.env` file
|
||||||
- Run: `./run-server.sh` to validate and restart
|
- Restart Claude Desktop after updating `.env`
|
||||||
|
|
||||||
**File path errors**
|
**File path errors**
|
||||||
- Always use absolute paths: `/Users/you/project/file.py`
|
- Always use absolute paths: `/Users/you/project/file.py`
|
||||||
- Never use relative paths: `./file.py`
|
- Never use relative paths: `./file.py`
|
||||||
|
|
||||||
### 6. Still Having Issues?
|
**Python module not found**
|
||||||
|
- Run `./run-server.sh` to reinstall dependencies
|
||||||
|
- Check virtual environment is activated: should see `.zen_venv` in the Python path
|
||||||
|
|
||||||
|
### 6. Environment Issues
|
||||||
|
|
||||||
|
**Virtual Environment Problems**
|
||||||
|
```bash
|
||||||
|
# Reset environment completely
|
||||||
|
rm -rf .zen_venv
|
||||||
|
./run-server.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
**Permission Issues**
|
||||||
|
```bash
|
||||||
|
# Ensure script is executable
|
||||||
|
chmod +x run-server.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Still Having Issues?
|
||||||
|
|
||||||
If the problem persists after trying these steps:
|
If the problem persists after trying these steps:
|
||||||
|
|
||||||
1. **Reproduce the issue** - Note the exact steps that cause the problem
|
1. **Reproduce the issue** - Note the exact steps that cause the problem
|
||||||
2. **Collect logs** - Save relevant error messages from Claude debug mode and Docker logs
|
2. **Collect logs** - Save relevant error messages from Claude debug mode and server logs
|
||||||
3. **Open a GitHub issue** with:
|
3. **Open a GitHub issue** with:
|
||||||
- Your operating system
|
- Your operating system
|
||||||
- Error messages
|
- Python version: `python3 --version`
|
||||||
|
- Error messages from logs
|
||||||
- Steps to reproduce
|
- Steps to reproduce
|
||||||
- What you've already tried
|
- What you've already tried
|
||||||
|
|
||||||
|
|||||||
@@ -1,19 +0,0 @@
|
|||||||
{
|
|
||||||
"comment": "Docker configuration that mounts your home directory",
|
|
||||||
"comment2": "Update paths: /path/to/zen-mcp-server/.env and /Users/your-username",
|
|
||||||
"comment3": "The container auto-detects /workspace as sandbox from WORKSPACE_ROOT",
|
|
||||||
"mcpServers": {
|
|
||||||
"zen": {
|
|
||||||
"command": "docker",
|
|
||||||
"args": [
|
|
||||||
"run",
|
|
||||||
"--rm",
|
|
||||||
"-i",
|
|
||||||
"--env-file", "/path/to/zen-mcp-server/.env",
|
|
||||||
"-e", "WORKSPACE_ROOT=/Users/your-username",
|
|
||||||
"-v", "/Users/your-username:/workspace:ro",
|
|
||||||
"zen-mcp-server:latest"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,17 +1,11 @@
|
|||||||
{
|
{
|
||||||
"comment": "macOS configuration using Docker",
|
"comment": "macOS configuration using standalone server",
|
||||||
"comment2": "Ensure Docker is running and containers are started",
|
"comment2": "Run './run-server.sh' to set up the environment and get exact paths",
|
||||||
"comment3": "Run './run-server.sh' first to set up the environment",
|
"comment3": "Use './run-server.sh -c' to display the correct configuration",
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"zen": {
|
"zen": {
|
||||||
"command": "docker",
|
"command": "/path/to/zen-mcp-server/.zen_venv/bin/python",
|
||||||
"args": [
|
"args": ["/path/to/zen-mcp-server/server.py"]
|
||||||
"exec",
|
|
||||||
"-i",
|
|
||||||
"zen-mcp-server",
|
|
||||||
"python",
|
|
||||||
"server.py"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,17 +1,13 @@
|
|||||||
{
|
{
|
||||||
"comment": "Windows configuration using WSL with Docker",
|
"comment": "Windows configuration using WSL with standalone server",
|
||||||
"comment2": "Ensure Docker Desktop is running and WSL integration is enabled",
|
"comment2": "Run './run-server.sh' in WSL to set up the environment and get exact paths",
|
||||||
"comment3": "Run './run-server.sh' in WSL first to set up the environment",
|
"comment3": "Use './run-server.sh -c' to display the correct configuration",
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
"zen": {
|
"zen": {
|
||||||
"command": "wsl.exe",
|
"command": "wsl.exe",
|
||||||
"args": [
|
"args": [
|
||||||
"docker",
|
"/path/to/zen-mcp-server/.zen_venv/bin/python",
|
||||||
"exec",
|
"/path/to/zen-mcp-server/server.py"
|
||||||
"-i",
|
|
||||||
"zen-mcp-server",
|
|
||||||
"python",
|
|
||||||
"server.py"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
from .base import ModelCapabilities, ModelProvider, ModelResponse
|
from .base import ModelCapabilities, ModelProvider, ModelResponse
|
||||||
from .gemini import GeminiModelProvider
|
from .gemini import GeminiModelProvider
|
||||||
from .openai import OpenAIModelProvider
|
|
||||||
from .openai_compatible import OpenAICompatibleProvider
|
from .openai_compatible import OpenAICompatibleProvider
|
||||||
|
from .openai_provider import OpenAIModelProvider
|
||||||
from .openrouter import OpenRouterProvider
|
from .openrouter import OpenRouterProvider
|
||||||
from .registry import ModelProviderRegistry
|
from .registry import ModelProviderRegistry
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class CustomProvider(OpenAICompatibleProvider):
|
|||||||
api_key: API key for the custom endpoint. Can be empty string for
|
api_key: API key for the custom endpoint. Can be empty string for
|
||||||
providers that don't require authentication (like Ollama).
|
providers that don't require authentication (like Ollama).
|
||||||
Falls back to CUSTOM_API_KEY environment variable if not provided.
|
Falls back to CUSTOM_API_KEY environment variable if not provided.
|
||||||
base_url: Base URL for the custom API endpoint (e.g., 'http://host.docker.internal:11434/v1').
|
base_url: Base URL for the custom API endpoint (e.g., 'http://localhost:11434/v1').
|
||||||
Falls back to CUSTOM_API_URL environment variable if not provided.
|
Falls back to CUSTOM_API_URL environment variable if not provided.
|
||||||
**kwargs: Additional configuration passed to parent OpenAI-compatible provider
|
**kwargs: Additional configuration passed to parent OpenAI-compatible provider
|
||||||
|
|
||||||
|
|||||||
@@ -453,20 +453,13 @@ class GeminiModelProvider(ModelProvider):
|
|||||||
mime_type = header.split(";")[0].split(":")[1]
|
mime_type = header.split(";")[0].split(":")[1]
|
||||||
return {"inline_data": {"mime_type": mime_type, "data": data}}
|
return {"inline_data": {"mime_type": mime_type, "data": data}}
|
||||||
else:
|
else:
|
||||||
# Handle file path - translate for Docker environment
|
# Handle file path
|
||||||
from utils.file_types import get_image_mime_type
|
from utils.file_types import get_image_mime_type
|
||||||
from utils.file_utils import translate_path_for_environment
|
|
||||||
|
|
||||||
translated_path = translate_path_for_environment(image_path)
|
if not os.path.exists(image_path):
|
||||||
logger.debug(f"Translated image path from '{image_path}' to '{translated_path}'")
|
logger.warning(f"Image file not found: {image_path}")
|
||||||
|
|
||||||
if not os.path.exists(translated_path):
|
|
||||||
logger.warning(f"Image file not found: {translated_path} (original: {image_path})")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Use translated path for all subsequent operations
|
|
||||||
image_path = translated_path
|
|
||||||
|
|
||||||
# Detect MIME type from file extension using centralized mappings
|
# Detect MIME type from file extension using centralized mappings
|
||||||
ext = os.path.splitext(image_path)[1].lower()
|
ext = os.path.splitext(image_path)[1].lower()
|
||||||
mime_type = get_image_mime_type(ext)
|
mime_type = get_image_mime_type(ext)
|
||||||
|
|||||||
@@ -151,10 +151,6 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
if hostname in ["localhost", "127.0.0.1", "::1"]:
|
if hostname in ["localhost", "127.0.0.1", "::1"]:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check for Docker internal hostnames (like host.docker.internal)
|
|
||||||
if hostname and ("docker.internal" in hostname or "host.docker.internal" in hostname):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check for private network ranges (local network)
|
# Check for private network ranges (local network)
|
||||||
if hostname:
|
if hostname:
|
||||||
try:
|
try:
|
||||||
@@ -201,8 +197,38 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
def client(self):
|
def client(self):
|
||||||
"""Lazy initialization of OpenAI client with security checks and timeout configuration."""
|
"""Lazy initialization of OpenAI client with security checks and timeout configuration."""
|
||||||
if self._client is None:
|
if self._client is None:
|
||||||
|
import os
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
# Temporarily disable proxy environment variables to prevent httpx from detecting them
|
||||||
|
original_env = {}
|
||||||
|
proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"]
|
||||||
|
|
||||||
|
for var in proxy_env_vars:
|
||||||
|
if var in os.environ:
|
||||||
|
original_env[var] = os.environ[var]
|
||||||
|
del os.environ[var]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create a custom httpx client that explicitly avoids proxy parameters
|
||||||
|
timeout_config = (
|
||||||
|
self.timeout_config
|
||||||
|
if hasattr(self, "timeout_config") and self.timeout_config
|
||||||
|
else httpx.Timeout(30.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create httpx client with minimal config to avoid proxy conflicts
|
||||||
|
# Note: proxies parameter was removed in httpx 0.28.0
|
||||||
|
http_client = httpx.Client(
|
||||||
|
timeout=timeout_config,
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Keep client initialization minimal to avoid proxy parameter conflicts
|
||||||
client_kwargs = {
|
client_kwargs = {
|
||||||
"api_key": self.api_key,
|
"api_key": self.api_key,
|
||||||
|
"http_client": http_client,
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.base_url:
|
if self.base_url:
|
||||||
@@ -215,13 +241,27 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
if self.DEFAULT_HEADERS:
|
if self.DEFAULT_HEADERS:
|
||||||
client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy()
|
client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy()
|
||||||
|
|
||||||
# Add configured timeout settings
|
logging.debug(f"OpenAI client initialized with custom httpx client and timeout: {timeout_config}")
|
||||||
if hasattr(self, "timeout_config") and self.timeout_config:
|
|
||||||
client_kwargs["timeout"] = self.timeout_config
|
|
||||||
logging.debug(f"OpenAI client initialized with custom timeout: {self.timeout_config}")
|
|
||||||
|
|
||||||
|
# Create OpenAI client with custom httpx client
|
||||||
self._client = OpenAI(**client_kwargs)
|
self._client = OpenAI(**client_kwargs)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# If all else fails, try absolute minimal client without custom httpx
|
||||||
|
logging.warning(f"Failed to create client with custom httpx, falling back to minimal config: {e}")
|
||||||
|
try:
|
||||||
|
minimal_kwargs = {"api_key": self.api_key}
|
||||||
|
if self.base_url:
|
||||||
|
minimal_kwargs["base_url"] = self.base_url
|
||||||
|
self._client = OpenAI(**minimal_kwargs)
|
||||||
|
except Exception as fallback_error:
|
||||||
|
logging.error(f"Even minimal OpenAI client creation failed: {fallback_error}")
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
# Restore original proxy environment variables
|
||||||
|
for var, value in original_env.items():
|
||||||
|
os.environ[var] = value
|
||||||
|
|
||||||
return self._client
|
return self._client
|
||||||
|
|
||||||
def _generate_with_responses_endpoint(
|
def _generate_with_responses_endpoint(
|
||||||
@@ -480,7 +520,7 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
|
|
||||||
# Log retry attempt
|
# Log retry attempt
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"{self.FRIENDLY_NAME} API error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
|
f"{self.FRIENDLY_NAME} error for model {model_name}, attempt {attempt + 1}/{max_retries}: {str(e)}. Retrying in {delay}s..."
|
||||||
)
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
||||||
@@ -738,19 +778,11 @@ class OpenAICompatibleProvider(ModelProvider):
|
|||||||
# Handle data URL: data:image/png;base64,iVBORw0...
|
# Handle data URL: data:image/png;base64,iVBORw0...
|
||||||
return {"type": "image_url", "image_url": {"url": image_path}}
|
return {"type": "image_url", "image_url": {"url": image_path}}
|
||||||
else:
|
else:
|
||||||
# Handle file path - translate for Docker environment
|
# Handle file path
|
||||||
from utils.file_utils import translate_path_for_environment
|
if not os.path.exists(image_path):
|
||||||
|
logging.warning(f"Image file not found: {image_path}")
|
||||||
translated_path = translate_path_for_environment(image_path)
|
|
||||||
logging.debug(f"Translated image path from '{image_path}' to '{translated_path}'")
|
|
||||||
|
|
||||||
if not os.path.exists(translated_path):
|
|
||||||
logging.warning(f"Image file not found: {translated_path} (original: {image_path})")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Use translated path for all subsequent operations
|
|
||||||
image_path = translated_path
|
|
||||||
|
|
||||||
# Detect MIME type from file extension using centralized mappings
|
# Detect MIME type from file extension using centralized mappings
|
||||||
from utils.file_types import get_image_mime_type
|
from utils.file_types import get_image_mime_type
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from dataclasses import dataclass, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from utils.file_utils import read_json_file, translate_path_for_environment
|
from utils.file_utils import read_json_file
|
||||||
|
|
||||||
from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
|
from .base import ModelCapabilities, ProviderType, RangeTemperatureConstraint
|
||||||
|
|
||||||
@@ -59,19 +59,17 @@ class OpenRouterModelRegistry:
|
|||||||
|
|
||||||
# Determine config path
|
# Determine config path
|
||||||
if config_path:
|
if config_path:
|
||||||
# Direct config_path parameter - translate for Docker if needed
|
# Direct config_path parameter
|
||||||
translated_path = translate_path_for_environment(config_path)
|
self.config_path = Path(config_path)
|
||||||
self.config_path = Path(translated_path)
|
|
||||||
else:
|
else:
|
||||||
# Check environment variable first
|
# Check environment variable first
|
||||||
env_path = os.getenv("CUSTOM_MODELS_CONFIG_PATH")
|
env_path = os.getenv("CUSTOM_MODELS_CONFIG_PATH")
|
||||||
if env_path:
|
if env_path:
|
||||||
# Environment variable path - translate for Docker if needed
|
# Environment variable path
|
||||||
translated_path = translate_path_for_environment(env_path)
|
self.config_path = Path(env_path)
|
||||||
self.config_path = Path(translated_path)
|
|
||||||
else:
|
else:
|
||||||
# Default to conf/custom_models.json - use relative path from this file
|
# Default to conf/custom_models.json - use relative path from this file
|
||||||
# This works both in development and container environments
|
# This works in development environment
|
||||||
self.config_path = Path(__file__).parent.parent / "conf" / "custom_models.json"
|
self.config_path = Path(__file__).parent.parent / "conf" / "custom_models.json"
|
||||||
|
|
||||||
# Load configuration
|
# Load configuration
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ force_grid_wrap = 0
|
|||||||
use_parentheses = true
|
use_parentheses = true
|
||||||
ensure_newline_before_comments = true
|
ensure_newline_before_comments = true
|
||||||
line_length = 120
|
line_length = 120
|
||||||
skip_glob = ["venv/*", ".venv/*"]
|
skip_glob = ["venv/*", ".venv/*", ".zen_venv/*"]
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
target-version = "py39"
|
target-version = "py39"
|
||||||
|
|||||||
6
requirements-dev.txt
Normal file
6
requirements-dev.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
pytest>=7.4.0
|
||||||
|
pytest-asyncio>=0.21.0
|
||||||
|
pytest-mock>=3.11.0
|
||||||
|
black>=23.0.0
|
||||||
|
ruff>=0.1.0
|
||||||
|
isort>=5.12.0
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
mcp>=1.0.0
|
mcp>=1.0.0
|
||||||
google-genai>=1.19.0
|
google-genai>=1.19.0
|
||||||
openai>=1.0.0
|
openai>=1.55.2 # Minimum version for httpx 0.28.0 compatibility
|
||||||
pydantic>=2.0.0
|
pydantic>=2.0.0
|
||||||
redis>=5.0.0
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
# Development dependencies
|
# Development dependencies (install with pip install -r requirements-dev.txt)
|
||||||
pytest>=7.4.0
|
# pytest>=7.4.0
|
||||||
pytest-asyncio>=0.21.0
|
# pytest-asyncio>=0.21.0
|
||||||
pytest-mock>=3.11.0
|
# pytest-mock>=3.11.0
|
||||||
1242
run-server.sh
1242
run-server.sh
File diff suppressed because it is too large
Load Diff
74
server.py
74
server.py
@@ -25,12 +25,21 @@ import sys
|
|||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
from mcp.server import Server
|
from dotenv import load_dotenv
|
||||||
from mcp.server.models import InitializationOptions
|
|
||||||
from mcp.server.stdio import stdio_server
|
# Load environment variables from .env file in the script's directory
|
||||||
from mcp.types import (
|
# This ensures .env is loaded regardless of the current working directory
|
||||||
|
script_dir = Path(__file__).parent
|
||||||
|
env_file = script_dir / ".env"
|
||||||
|
load_dotenv(dotenv_path=env_file)
|
||||||
|
|
||||||
|
from mcp.server import Server # noqa: E402
|
||||||
|
from mcp.server.models import InitializationOptions # noqa: E402
|
||||||
|
from mcp.server.stdio import stdio_server # noqa: E402
|
||||||
|
from mcp.types import ( # noqa: E402
|
||||||
GetPromptResult,
|
GetPromptResult,
|
||||||
Prompt,
|
Prompt,
|
||||||
PromptMessage,
|
PromptMessage,
|
||||||
@@ -41,13 +50,13 @@ from mcp.types import (
|
|||||||
ToolsCapability,
|
ToolsCapability,
|
||||||
)
|
)
|
||||||
|
|
||||||
from config import (
|
from config import ( # noqa: E402
|
||||||
DEFAULT_MODEL,
|
DEFAULT_MODEL,
|
||||||
__author__,
|
__author__,
|
||||||
__updated__,
|
__updated__,
|
||||||
__version__,
|
__version__,
|
||||||
)
|
)
|
||||||
from tools import (
|
from tools import ( # noqa: E402
|
||||||
AnalyzeTool,
|
AnalyzeTool,
|
||||||
ChatTool,
|
ChatTool,
|
||||||
CodeReviewTool,
|
CodeReviewTool,
|
||||||
@@ -61,7 +70,7 @@ from tools import (
|
|||||||
ThinkDeepTool,
|
ThinkDeepTool,
|
||||||
TracerTool,
|
TracerTool,
|
||||||
)
|
)
|
||||||
from tools.models import ToolOutput
|
from tools.models import ToolOutput # noqa: E402
|
||||||
|
|
||||||
# Configure logging for server operations
|
# Configure logging for server operations
|
||||||
# Can be controlled via LOG_LEVEL environment variable (DEBUG, INFO, WARNING, ERROR)
|
# Can be controlled via LOG_LEVEL environment variable (DEBUG, INFO, WARNING, ERROR)
|
||||||
@@ -101,13 +110,17 @@ root_logger.addHandler(stderr_handler)
|
|||||||
# Set root logger level
|
# Set root logger level
|
||||||
root_logger.setLevel(getattr(logging, log_level, logging.INFO))
|
root_logger.setLevel(getattr(logging, log_level, logging.INFO))
|
||||||
|
|
||||||
# Add rotating file handler for Docker log monitoring
|
# Add rotating file handler for local log monitoring
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Create logs directory in project root
|
||||||
|
log_dir = Path(__file__).parent / "logs"
|
||||||
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
# Main server log with size-based rotation (20MB max per file)
|
# Main server log with size-based rotation (20MB max per file)
|
||||||
# This ensures logs don't grow indefinitely and are properly managed
|
# This ensures logs don't grow indefinitely and are properly managed
|
||||||
file_handler = RotatingFileHandler(
|
file_handler = RotatingFileHandler(
|
||||||
"/tmp/mcp_server.log",
|
log_dir / "mcp_server.log",
|
||||||
maxBytes=20 * 1024 * 1024, # 20MB max file size
|
maxBytes=20 * 1024 * 1024, # 20MB max file size
|
||||||
backupCount=10, # Keep 10 rotated files (200MB total)
|
backupCount=10, # Keep 10 rotated files (200MB total)
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
@@ -119,7 +132,7 @@ try:
|
|||||||
# Create a special logger for MCP activity tracking with size-based rotation
|
# Create a special logger for MCP activity tracking with size-based rotation
|
||||||
mcp_logger = logging.getLogger("mcp_activity")
|
mcp_logger = logging.getLogger("mcp_activity")
|
||||||
mcp_file_handler = RotatingFileHandler(
|
mcp_file_handler = RotatingFileHandler(
|
||||||
"/tmp/mcp_activity.log",
|
log_dir / "mcp_activity.log",
|
||||||
maxBytes=20 * 1024 * 1024, # 20MB max file size
|
maxBytes=20 * 1024 * 1024, # 20MB max file size
|
||||||
backupCount=5, # Keep 5 rotated files (100MB total)
|
backupCount=5, # Keep 5 rotated files (100MB total)
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
@@ -131,16 +144,9 @@ try:
|
|||||||
# Ensure MCP activity also goes to stderr
|
# Ensure MCP activity also goes to stderr
|
||||||
mcp_logger.propagate = True
|
mcp_logger.propagate = True
|
||||||
|
|
||||||
# Also keep a size-based rotation as backup (100MB max per file)
|
# Log setup info directly to root logger since logger isn't defined yet
|
||||||
# This prevents any single day's log from growing too large
|
logging.info(f"Logging to: {log_dir / 'mcp_server.log'}")
|
||||||
size_handler = RotatingFileHandler(
|
logging.info(f"Process PID: {os.getpid()}")
|
||||||
"/tmp/mcp_server_overflow.log",
|
|
||||||
maxBytes=100 * 1024 * 1024,
|
|
||||||
backupCount=3, # 100MB
|
|
||||||
)
|
|
||||||
size_handler.setLevel(logging.WARNING) # Only warnings and errors
|
|
||||||
size_handler.setFormatter(LocalTimeFormatter(log_format))
|
|
||||||
logging.getLogger().addHandler(size_handler)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Could not set up file logging: {e}", file=sys.stderr)
|
print(f"Warning: Could not set up file logging: {e}", file=sys.stderr)
|
||||||
@@ -243,7 +249,7 @@ def configure_providers():
|
|||||||
from providers.base import ProviderType
|
from providers.base import ProviderType
|
||||||
from providers.custom import CustomProvider
|
from providers.custom import CustomProvider
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from providers.openrouter import OpenRouterProvider
|
from providers.openrouter import OpenRouterProvider
|
||||||
from providers.xai import XAIModelProvider
|
from providers.xai import XAIModelProvider
|
||||||
from utils.model_restrictions import get_restriction_service
|
from utils.model_restrictions import get_restriction_service
|
||||||
@@ -450,7 +456,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
|||||||
This function serves as the central orchestrator for multi-turn AI-to-AI conversations:
|
This function serves as the central orchestrator for multi-turn AI-to-AI conversations:
|
||||||
|
|
||||||
1. THREAD RESUMPTION: When continuation_id is present, it reconstructs complete conversation
|
1. THREAD RESUMPTION: When continuation_id is present, it reconstructs complete conversation
|
||||||
context from Redis including conversation history and file references
|
context from in-memory storage including conversation history and file references
|
||||||
|
|
||||||
2. CROSS-TOOL CONTINUATION: Enables seamless handoffs between different tools (analyze →
|
2. CROSS-TOOL CONTINUATION: Enables seamless handoffs between different tools (analyze →
|
||||||
codereview → debug) while preserving full conversation context and file references
|
codereview → debug) while preserving full conversation context and file references
|
||||||
@@ -465,7 +471,7 @@ async def handle_call_tool(name: str, arguments: dict[str, Any]) -> list[TextCon
|
|||||||
|
|
||||||
STATELESS TO STATEFUL BRIDGE:
|
STATELESS TO STATEFUL BRIDGE:
|
||||||
The MCP protocol is inherently stateless, but this function bridges the gap by:
|
The MCP protocol is inherently stateless, but this function bridges the gap by:
|
||||||
- Loading persistent conversation state from Redis
|
- Loading persistent conversation state from in-memory storage
|
||||||
- Reconstructing full multi-turn context for tool execution
|
- Reconstructing full multi-turn context for tool execution
|
||||||
- Enabling tools to access previous exchanges and file references
|
- Enabling tools to access previous exchanges and file references
|
||||||
- Supporting conversation chains across different tool types
|
- Supporting conversation chains across different tool types
|
||||||
@@ -700,13 +706,13 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
Reconstruct conversation context for stateless-to-stateful thread continuation.
|
Reconstruct conversation context for stateless-to-stateful thread continuation.
|
||||||
|
|
||||||
This is a critical function that transforms the inherently stateless MCP protocol into
|
This is a critical function that transforms the inherently stateless MCP protocol into
|
||||||
stateful multi-turn conversations. It loads persistent conversation state from Redis
|
stateful multi-turn conversations. It loads persistent conversation state from in-memory
|
||||||
and rebuilds complete conversation context using the sophisticated dual prioritization
|
storage and rebuilds complete conversation context using the sophisticated dual prioritization
|
||||||
strategy implemented in the conversation memory system.
|
strategy implemented in the conversation memory system.
|
||||||
|
|
||||||
CONTEXT RECONSTRUCTION PROCESS:
|
CONTEXT RECONSTRUCTION PROCESS:
|
||||||
|
|
||||||
1. THREAD RETRIEVAL: Loads complete ThreadContext from Redis using continuation_id
|
1. THREAD RETRIEVAL: Loads complete ThreadContext from storage using continuation_id
|
||||||
- Includes all conversation turns with tool attribution
|
- Includes all conversation turns with tool attribution
|
||||||
- Preserves file references and cross-tool context
|
- Preserves file references and cross-tool context
|
||||||
- Handles conversation chains across multiple linked threads
|
- Handles conversation chains across multiple linked threads
|
||||||
@@ -742,7 +748,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
|
|
||||||
ERROR HANDLING & RECOVERY:
|
ERROR HANDLING & RECOVERY:
|
||||||
- Thread expiration: Provides clear instructions for conversation restart
|
- Thread expiration: Provides clear instructions for conversation restart
|
||||||
- Redis unavailability: Graceful degradation with error messaging
|
- Storage unavailability: Graceful degradation with error messaging
|
||||||
- Invalid continuation_id: Security validation and user-friendly errors
|
- Invalid continuation_id: Security validation and user-friendly errors
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -762,7 +768,7 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
Includes user-friendly recovery instructions
|
Includes user-friendly recovery instructions
|
||||||
|
|
||||||
Performance Characteristics:
|
Performance Characteristics:
|
||||||
- O(1) thread lookup in Redis
|
- O(1) thread lookup in memory
|
||||||
- O(n) conversation history reconstruction where n = number of turns
|
- O(n) conversation history reconstruction where n = number of turns
|
||||||
- Intelligent token budgeting prevents context window overflow
|
- Intelligent token budgeting prevents context window overflow
|
||||||
- Optimized file deduplication minimizes redundant content
|
- Optimized file deduplication minimizes redundant content
|
||||||
@@ -778,12 +784,12 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
|
|
||||||
continuation_id = arguments["continuation_id"]
|
continuation_id = arguments["continuation_id"]
|
||||||
|
|
||||||
# Get thread context from Redis
|
# Get thread context from storage
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Looking up thread {continuation_id} in Redis")
|
logger.debug(f"[CONVERSATION_DEBUG] Looking up thread {continuation_id} in storage")
|
||||||
context = get_thread(continuation_id)
|
context = get_thread(continuation_id)
|
||||||
if not context:
|
if not context:
|
||||||
logger.warning(f"Thread not found: {continuation_id}")
|
logger.warning(f"Thread not found: {continuation_id}")
|
||||||
logger.debug(f"[CONVERSATION_DEBUG] Thread {continuation_id} not found in Redis or expired")
|
logger.debug(f"[CONVERSATION_DEBUG] Thread {continuation_id} not found in storage or expired")
|
||||||
|
|
||||||
# Log to activity file for monitoring
|
# Log to activity file for monitoring
|
||||||
try:
|
try:
|
||||||
@@ -795,8 +801,8 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any
|
|||||||
# Return error asking Claude to restart conversation with full context
|
# Return error asking Claude to restart conversation with full context
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Conversation thread '{continuation_id}' was not found or has expired. "
|
f"Conversation thread '{continuation_id}' was not found or has expired. "
|
||||||
f"This may happen if the conversation was created more than 1 hour ago or if there was an issue "
|
f"This may happen if the conversation was created more than 3 hours ago or if the "
|
||||||
f"with Redis storage. "
|
f"server was restarted. "
|
||||||
f"Please restart the conversation by providing your full question/prompt without the "
|
f"Please restart the conversation by providing your full question/prompt without the "
|
||||||
f"continuation_id parameter. "
|
f"continuation_id parameter. "
|
||||||
f"This will create a new conversation thread that can continue with follow-up exchanges."
|
f"This will create a new conversation thread that can continue with follow-up exchanges."
|
||||||
@@ -1165,7 +1171,7 @@ async def main():
|
|||||||
# Validate and configure providers based on available API keys
|
# Validate and configure providers based on available API keys
|
||||||
configure_providers()
|
configure_providers()
|
||||||
|
|
||||||
# Log startup message for Docker log monitoring
|
# Log startup message
|
||||||
logger.info("Zen MCP Server starting up...")
|
logger.info("Zen MCP Server starting up...")
|
||||||
logger.info(f"Log level: {log_level}")
|
logger.info(f"Log level: {log_level}")
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ from .test_openrouter_models import OpenRouterModelsTest
|
|||||||
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
from .test_per_tool_deduplication import PerToolDeduplicationTest
|
||||||
from .test_planner_continuation_history import PlannerContinuationHistoryTest
|
from .test_planner_continuation_history import PlannerContinuationHistoryTest
|
||||||
from .test_planner_validation import PlannerValidationTest
|
from .test_planner_validation import PlannerValidationTest
|
||||||
from .test_redis_validation import RedisValidationTest
|
|
||||||
|
# Redis validation test removed - no longer needed for standalone server
|
||||||
from .test_refactor_validation import RefactorValidationTest
|
from .test_refactor_validation import RefactorValidationTest
|
||||||
from .test_testgen_validation import TestGenValidationTest
|
from .test_testgen_validation import TestGenValidationTest
|
||||||
from .test_token_allocation_validation import TokenAllocationValidationTest
|
from .test_token_allocation_validation import TokenAllocationValidationTest
|
||||||
@@ -42,7 +43,7 @@ TEST_REGISTRY = {
|
|||||||
"cross_tool_comprehensive": CrossToolComprehensiveTest,
|
"cross_tool_comprehensive": CrossToolComprehensiveTest,
|
||||||
"line_number_validation": LineNumberValidationTest,
|
"line_number_validation": LineNumberValidationTest,
|
||||||
"logs_validation": LogsValidationTest,
|
"logs_validation": LogsValidationTest,
|
||||||
"redis_validation": RedisValidationTest,
|
# "redis_validation": RedisValidationTest, # Removed - no longer needed for standalone server
|
||||||
"model_thinking_config": TestModelThinkingConfig,
|
"model_thinking_config": TestModelThinkingConfig,
|
||||||
"o3_model_selection": O3ModelSelectionTest,
|
"o3_model_selection": O3ModelSelectionTest,
|
||||||
"ollama_custom_url": OllamaCustomUrlTest,
|
"ollama_custom_url": OllamaCustomUrlTest,
|
||||||
@@ -72,7 +73,7 @@ __all__ = [
|
|||||||
"CrossToolComprehensiveTest",
|
"CrossToolComprehensiveTest",
|
||||||
"LineNumberValidationTest",
|
"LineNumberValidationTest",
|
||||||
"LogsValidationTest",
|
"LogsValidationTest",
|
||||||
"RedisValidationTest",
|
# "RedisValidationTest", # Removed - no longer needed for standalone server
|
||||||
"TestModelThinkingConfig",
|
"TestModelThinkingConfig",
|
||||||
"O3ModelSelectionTest",
|
"O3ModelSelectionTest",
|
||||||
"O3ProExpensiveTest",
|
"O3ProExpensiveTest",
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from .log_utils import LogUtils
|
||||||
|
|
||||||
|
|
||||||
class BaseSimulatorTest:
|
class BaseSimulatorTest:
|
||||||
"""Base class for all communication simulator tests"""
|
"""Base class for all communication simulator tests"""
|
||||||
@@ -19,14 +21,25 @@ class BaseSimulatorTest:
|
|||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.test_files = {}
|
self.test_files = {}
|
||||||
self.test_dir = None
|
self.test_dir = None
|
||||||
self.container_name = "zen-mcp-server"
|
self.python_path = self._get_python_path()
|
||||||
self.redis_container = "zen-mcp-redis"
|
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
log_level = logging.DEBUG if verbose else logging.INFO
|
log_level = logging.DEBUG if verbose else logging.INFO
|
||||||
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
self.logger = logging.getLogger(self.__class__.__name__)
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
||||||
|
def _get_python_path(self) -> str:
|
||||||
|
"""Get the Python path for the virtual environment"""
|
||||||
|
current_dir = os.getcwd()
|
||||||
|
venv_python = os.path.join(current_dir, ".zen_venv", "bin", "python")
|
||||||
|
|
||||||
|
if os.path.exists(venv_python):
|
||||||
|
return venv_python
|
||||||
|
|
||||||
|
# Fallback to system python if venv doesn't exist
|
||||||
|
self.logger.warning("Virtual environment not found, using system python")
|
||||||
|
return "python"
|
||||||
|
|
||||||
def setup_test_files(self):
|
def setup_test_files(self):
|
||||||
"""Create test files for the simulation"""
|
"""Create test files for the simulation"""
|
||||||
# Test Python file
|
# Test Python file
|
||||||
@@ -100,7 +113,7 @@ class Calculator:
|
|||||||
self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}")
|
self.logger.debug(f"Created test files with absolute paths: {list(self.test_files.values())}")
|
||||||
|
|
||||||
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||||||
"""Call an MCP tool via Claude CLI (docker exec)"""
|
"""Call an MCP tool via standalone server"""
|
||||||
try:
|
try:
|
||||||
# Prepare the MCP initialization and tool call sequence
|
# Prepare the MCP initialization and tool call sequence
|
||||||
init_request = {
|
init_request = {
|
||||||
@@ -131,8 +144,8 @@ class Calculator:
|
|||||||
# Join with newlines as MCP expects
|
# Join with newlines as MCP expects
|
||||||
input_data = "\n".join(messages) + "\n"
|
input_data = "\n".join(messages) + "\n"
|
||||||
|
|
||||||
# Simulate Claude CLI calling the MCP server via docker exec
|
# Call the standalone MCP server directly
|
||||||
docker_cmd = ["docker", "exec", "-i", self.container_name, "python", "server.py"]
|
server_cmd = [self.python_path, "server.py"]
|
||||||
|
|
||||||
self.logger.debug(f"Calling MCP tool {tool_name} with proper initialization")
|
self.logger.debug(f"Calling MCP tool {tool_name} with proper initialization")
|
||||||
|
|
||||||
@@ -140,7 +153,7 @@ class Calculator:
|
|||||||
# For consensus tool and other long-running tools, we need to ensure
|
# For consensus tool and other long-running tools, we need to ensure
|
||||||
# the subprocess doesn't close prematurely
|
# the subprocess doesn't close prematurely
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
docker_cmd,
|
server_cmd,
|
||||||
input=input_data,
|
input=input_data,
|
||||||
text=True,
|
text=True,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@@ -149,7 +162,7 @@ class Calculator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
self.logger.error(f"Docker exec failed with return code {result.returncode}")
|
self.logger.error(f"Standalone server failed with return code {result.returncode}")
|
||||||
self.logger.error(f"Stderr: {result.stderr}")
|
self.logger.error(f"Stderr: {result.stderr}")
|
||||||
# Still try to parse stdout as the response might have been written before the error
|
# Still try to parse stdout as the response might have been written before the error
|
||||||
self.logger.debug(f"Attempting to parse stdout despite error: {result.stdout[:500]}")
|
self.logger.debug(f"Attempting to parse stdout despite error: {result.stdout[:500]}")
|
||||||
@@ -263,6 +276,56 @@ class Calculator:
|
|||||||
shutil.rmtree(self.test_dir)
|
shutil.rmtree(self.test_dir)
|
||||||
self.logger.debug(f"Removed test files directory: {self.test_dir}")
|
self.logger.debug(f"Removed test files directory: {self.test_dir}")
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Log Utility Methods (delegate to LogUtils)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
def get_server_logs_since(self, since_time: Optional[str] = None) -> str:
|
||||||
|
"""Get server logs from both main and activity log files."""
|
||||||
|
return LogUtils.get_server_logs_since(since_time)
|
||||||
|
|
||||||
|
def get_recent_server_logs(self, lines: int = 500) -> str:
|
||||||
|
"""Get recent server logs from the main log file."""
|
||||||
|
return LogUtils.get_recent_server_logs(lines)
|
||||||
|
|
||||||
|
def get_server_logs_subprocess(self, lines: int = 500) -> str:
|
||||||
|
"""Get server logs using subprocess (alternative method)."""
|
||||||
|
return LogUtils.get_server_logs_subprocess(lines)
|
||||||
|
|
||||||
|
def check_server_logs_for_errors(self, lines: int = 500) -> list[str]:
|
||||||
|
"""Check server logs for error messages."""
|
||||||
|
return LogUtils.check_server_logs_for_errors(lines)
|
||||||
|
|
||||||
|
def extract_conversation_usage_logs(self, logs: str) -> list[dict[str, int]]:
|
||||||
|
"""Extract token budget calculation information from logs."""
|
||||||
|
return LogUtils.extract_conversation_usage_logs(logs)
|
||||||
|
|
||||||
|
def extract_conversation_token_usage(self, logs: str) -> list[int]:
|
||||||
|
"""Extract conversation token usage values from logs."""
|
||||||
|
return LogUtils.extract_conversation_token_usage(logs)
|
||||||
|
|
||||||
|
def extract_thread_creation_logs(self, logs: str) -> list[dict[str, str]]:
|
||||||
|
"""Extract thread creation logs with parent relationships."""
|
||||||
|
return LogUtils.extract_thread_creation_logs(logs)
|
||||||
|
|
||||||
|
def extract_history_traversal_logs(self, logs: str) -> list[dict[str, any]]:
|
||||||
|
"""Extract conversation history traversal logs."""
|
||||||
|
return LogUtils.extract_history_traversal_logs(logs)
|
||||||
|
|
||||||
|
def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
|
||||||
|
"""Validate that logs show file deduplication behavior."""
|
||||||
|
return LogUtils.validate_file_deduplication_in_logs(logs, tool_name, test_file)
|
||||||
|
|
||||||
|
def search_logs_for_pattern(
|
||||||
|
self, pattern: str, logs: Optional[str] = None, case_sensitive: bool = False
|
||||||
|
) -> list[str]:
|
||||||
|
"""Search logs for a specific pattern."""
|
||||||
|
return LogUtils.search_logs_for_pattern(pattern, logs, case_sensitive)
|
||||||
|
|
||||||
|
def get_log_file_info(self) -> dict[str, dict[str, any]]:
|
||||||
|
"""Get information about log files."""
|
||||||
|
return LogUtils.get_log_file_info()
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Run the test - to be implemented by subclasses"""
|
"""Run the test - to be implemented by subclasses"""
|
||||||
raise NotImplementedError("Subclasses must implement run_test()")
|
raise NotImplementedError("Subclasses must implement run_test()")
|
||||||
|
|||||||
216
simulator_tests/conversation_base_test.py
Normal file
216
simulator_tests/conversation_base_test.py
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Conversation Base Test Class for In-Process MCP Tool Testing
|
||||||
|
|
||||||
|
This class enables testing MCP tools within the same process to maintain conversation
|
||||||
|
memory state across tool calls. Unlike BaseSimulatorTest which runs each tool call
|
||||||
|
as a separate subprocess (losing memory state), this class calls tools directly
|
||||||
|
in-process, allowing conversation functionality to work correctly.
|
||||||
|
|
||||||
|
USAGE:
|
||||||
|
- Inherit from ConversationBaseTest instead of BaseSimulatorTest for conversation tests
|
||||||
|
- Use call_mcp_tool_direct() to call tools in-process
|
||||||
|
- Conversation memory persists across tool calls within the same test
|
||||||
|
- setUp() clears memory between test methods for proper isolation
|
||||||
|
|
||||||
|
EXAMPLE:
|
||||||
|
class TestConversationFeature(ConversationBaseTest):
|
||||||
|
def test_cross_tool_continuation(self):
|
||||||
|
# Step 1: Call precommit tool
|
||||||
|
result1, continuation_id = self.call_mcp_tool_direct("precommit", {
|
||||||
|
"path": "/path/to/repo",
|
||||||
|
"prompt": "Review these changes"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Step 2: Continue with codereview tool - memory is preserved!
|
||||||
|
result2, _ = self.call_mcp_tool_direct("codereview", {
|
||||||
|
"files": ["/path/to/file.py"],
|
||||||
|
"prompt": "Focus on security issues",
|
||||||
|
"continuation_id": continuation_id
|
||||||
|
})
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationBaseTest(BaseSimulatorTest):
|
||||||
|
"""Base class for conversation tests that require in-process tool calling"""
|
||||||
|
|
||||||
|
def __init__(self, verbose: bool = False):
|
||||||
|
super().__init__(verbose)
|
||||||
|
self._tools = None
|
||||||
|
self._loop = None
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test environment - clears conversation memory between tests"""
|
||||||
|
super().setup_test_files()
|
||||||
|
|
||||||
|
# Clear conversation memory for test isolation
|
||||||
|
self._clear_conversation_memory()
|
||||||
|
|
||||||
|
# Import tools from server.py for in-process calling
|
||||||
|
if self._tools is None:
|
||||||
|
self._import_tools()
|
||||||
|
|
||||||
|
def _clear_conversation_memory(self):
|
||||||
|
"""Clear all conversation memory to ensure test isolation"""
|
||||||
|
try:
|
||||||
|
from utils.storage_backend import get_storage_backend
|
||||||
|
|
||||||
|
storage = get_storage_backend()
|
||||||
|
# Clear all stored conversation threads
|
||||||
|
with storage._lock:
|
||||||
|
storage._store.clear()
|
||||||
|
self.logger.debug("Cleared conversation memory for test isolation")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not clear conversation memory: {e}")
|
||||||
|
|
||||||
|
def _import_tools(self):
|
||||||
|
"""Import tools from server.py for direct calling"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Add project root to Python path if not already there
|
||||||
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
if project_root not in sys.path:
|
||||||
|
sys.path.insert(0, project_root)
|
||||||
|
|
||||||
|
# Import tools from server
|
||||||
|
from server import TOOLS
|
||||||
|
|
||||||
|
self._tools = TOOLS
|
||||||
|
self.logger.debug(f"Imported {len(self._tools)} tools for in-process testing")
|
||||||
|
except ImportError as e:
|
||||||
|
raise RuntimeError(f"Could not import tools from server.py: {e}")
|
||||||
|
|
||||||
|
def _get_event_loop(self):
|
||||||
|
"""Get or create event loop for async tool execution"""
|
||||||
|
if self._loop is None:
|
||||||
|
try:
|
||||||
|
self._loop = asyncio.get_event_loop()
|
||||||
|
except RuntimeError:
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(self._loop)
|
||||||
|
return self._loop
|
||||||
|
|
||||||
|
def call_mcp_tool_direct(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Call an MCP tool directly in-process without subprocess isolation.
|
||||||
|
|
||||||
|
This method maintains conversation memory across calls, enabling proper
|
||||||
|
testing of conversation functionality.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tool_name: Name of the tool to call (e.g., "precommit", "codereview")
|
||||||
|
params: Parameters to pass to the tool
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (response_content, continuation_id) where continuation_id
|
||||||
|
can be used for follow-up calls
|
||||||
|
"""
|
||||||
|
if self._tools is None:
|
||||||
|
raise RuntimeError("Tools not imported. Call setUp() first.")
|
||||||
|
|
||||||
|
if tool_name not in self._tools:
|
||||||
|
raise ValueError(f"Tool '{tool_name}' not found. Available: {list(self._tools.keys())}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
tool = self._tools[tool_name]
|
||||||
|
self.logger.debug(f"Calling tool '{tool_name}' directly in-process")
|
||||||
|
|
||||||
|
# Set up minimal model context if not provided
|
||||||
|
if "model" not in params:
|
||||||
|
params["model"] = "flash" # Use fast model for testing
|
||||||
|
|
||||||
|
# Execute tool directly using asyncio
|
||||||
|
loop = self._get_event_loop()
|
||||||
|
|
||||||
|
# Import required modules for model resolution (similar to server.py)
|
||||||
|
from config import DEFAULT_MODEL
|
||||||
|
from providers.registry import ModelProviderRegistry
|
||||||
|
from utils.model_context import ModelContext
|
||||||
|
|
||||||
|
# Resolve model (simplified version of server.py logic)
|
||||||
|
model_name = params.get("model", DEFAULT_MODEL)
|
||||||
|
provider = ModelProviderRegistry.get_provider_for_model(model_name)
|
||||||
|
if not provider:
|
||||||
|
# Fallback to available model for testing
|
||||||
|
available_models = list(ModelProviderRegistry.get_available_models(respect_restrictions=True).keys())
|
||||||
|
if available_models:
|
||||||
|
model_name = available_models[0]
|
||||||
|
params["model"] = model_name
|
||||||
|
self.logger.debug(f"Using fallback model for testing: {model_name}")
|
||||||
|
|
||||||
|
# Create model context
|
||||||
|
model_context = ModelContext(model_name)
|
||||||
|
params["_model_context"] = model_context
|
||||||
|
params["_resolved_model_name"] = model_name
|
||||||
|
|
||||||
|
# Execute tool asynchronously
|
||||||
|
result = loop.run_until_complete(tool.execute(params))
|
||||||
|
|
||||||
|
if not result or len(result) == 0:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Extract response content
|
||||||
|
response_text = result[0].text if hasattr(result[0], "text") else str(result[0])
|
||||||
|
|
||||||
|
# Parse response to extract continuation_id
|
||||||
|
continuation_id = self._extract_continuation_id_from_response(response_text)
|
||||||
|
|
||||||
|
self.logger.debug(f"Tool '{tool_name}' completed successfully in-process")
|
||||||
|
return response_text, continuation_id
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Direct tool call failed for '{tool_name}': {e}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def _extract_continuation_id_from_response(self, response_text: str) -> Optional[str]:
|
||||||
|
"""Extract continuation_id from tool response"""
|
||||||
|
try:
|
||||||
|
# Parse the response as JSON to look for continuation metadata
|
||||||
|
response_data = json.loads(response_text)
|
||||||
|
|
||||||
|
# Look for continuation_id in various places
|
||||||
|
if isinstance(response_data, dict):
|
||||||
|
# Check metadata
|
||||||
|
metadata = response_data.get("metadata", {})
|
||||||
|
if "thread_id" in metadata:
|
||||||
|
return metadata["thread_id"]
|
||||||
|
|
||||||
|
# Check continuation_offer
|
||||||
|
continuation_offer = response_data.get("continuation_offer", {})
|
||||||
|
if continuation_offer and "continuation_id" in continuation_offer:
|
||||||
|
return continuation_offer["continuation_id"]
|
||||||
|
|
||||||
|
# Check follow_up_request
|
||||||
|
follow_up = response_data.get("follow_up_request", {})
|
||||||
|
if follow_up and "continuation_id" in follow_up:
|
||||||
|
return follow_up["continuation_id"]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except (json.JSONDecodeError, AttributeError):
|
||||||
|
# If response is not JSON or doesn't have expected structure, return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
"""Clean up after test"""
|
||||||
|
super().cleanup_test_files()
|
||||||
|
# Clear memory again for good measure
|
||||||
|
self._clear_conversation_memory()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def test_name(self) -> str:
|
||||||
|
"""Get the test name"""
|
||||||
|
return self.__class__.__name__
|
||||||
|
|
||||||
|
@property
|
||||||
|
def test_description(self) -> str:
|
||||||
|
"""Get the test description"""
|
||||||
|
return "In-process conversation test"
|
||||||
316
simulator_tests/log_utils.py
Normal file
316
simulator_tests/log_utils.py
Normal file
@@ -0,0 +1,316 @@
|
|||||||
|
"""
|
||||||
|
Centralized log utility for simulator tests.
|
||||||
|
|
||||||
|
This module provides common log reading and parsing functionality
|
||||||
|
used across multiple simulator test files to reduce code duplication.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
|
||||||
|
class LogUtils:
|
||||||
|
"""Centralized logging utilities for simulator tests."""
|
||||||
|
|
||||||
|
# Log file paths
|
||||||
|
MAIN_LOG_FILE = "logs/mcp_server.log"
|
||||||
|
ACTIVITY_LOG_FILE = "logs/mcp_activity.log"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_server_logs_since(cls, since_time: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Get server logs from both main and activity log files.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
since_time: Currently ignored, returns all available logs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Combined logs from both log files
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
main_logs = ""
|
||||||
|
activity_logs = ""
|
||||||
|
|
||||||
|
# Read main server log
|
||||||
|
try:
|
||||||
|
with open(cls.MAIN_LOG_FILE) as f:
|
||||||
|
main_logs = f.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Read activity log
|
||||||
|
try:
|
||||||
|
with open(cls.ACTIVITY_LOG_FILE) as f:
|
||||||
|
activity_logs = f.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return main_logs + "\n" + activity_logs
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to read server logs: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_recent_server_logs(cls, lines: int = 500) -> str:
|
||||||
|
"""
|
||||||
|
Get recent server logs from the main log file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lines: Number of recent lines to retrieve (default: 500)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Recent log content as string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(cls.MAIN_LOG_FILE) as f:
|
||||||
|
all_lines = f.readlines()
|
||||||
|
recent_lines = all_lines[-lines:] if len(all_lines) > lines else all_lines
|
||||||
|
return "".join(recent_lines)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logging.warning(f"Log file {cls.MAIN_LOG_FILE} not found")
|
||||||
|
return ""
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to read recent server logs: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_server_logs_subprocess(cls, lines: int = 500) -> str:
|
||||||
|
"""
|
||||||
|
Get server logs using subprocess (alternative method).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lines: Number of recent lines to retrieve
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Recent log content as string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["tail", "-n", str(lines), cls.MAIN_LOG_FILE], capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
return result.stdout + result.stderr
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to get server logs via subprocess: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def check_server_logs_for_errors(cls, lines: int = 500) -> list[str]:
|
||||||
|
"""
|
||||||
|
Check server logs for error messages.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lines: Number of recent lines to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of error messages found
|
||||||
|
"""
|
||||||
|
logs = cls.get_recent_server_logs(lines)
|
||||||
|
error_patterns = [r"ERROR.*", r"CRITICAL.*", r"Failed.*", r"Exception.*", r"Error:.*"]
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
for line in logs.split("\n"):
|
||||||
|
for pattern in error_patterns:
|
||||||
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
|
errors.append(line.strip())
|
||||||
|
break
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_conversation_usage_logs(cls, logs: str) -> list[dict[str, int]]:
|
||||||
|
"""
|
||||||
|
Extract token budget calculation information from logs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logs: Log content to parse
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries containing token usage data
|
||||||
|
"""
|
||||||
|
usage_data = []
|
||||||
|
pattern = r"\[CONVERSATION_DEBUG\] Token budget calculation:"
|
||||||
|
|
||||||
|
for line in logs.split("\n"):
|
||||||
|
if re.search(pattern, line):
|
||||||
|
# Parse the token usage information
|
||||||
|
usage_info = {}
|
||||||
|
|
||||||
|
# Extract total capacity
|
||||||
|
capacity_match = re.search(r"Total capacity: ([\d,]+)", line)
|
||||||
|
if capacity_match:
|
||||||
|
usage_info["total_capacity"] = int(capacity_match.group(1).replace(",", ""))
|
||||||
|
|
||||||
|
# Extract content allocation
|
||||||
|
content_match = re.search(r"Content allocation: ([\d,]+)", line)
|
||||||
|
if content_match:
|
||||||
|
usage_info["content_allocation"] = int(content_match.group(1).replace(",", ""))
|
||||||
|
|
||||||
|
# Extract conversation tokens
|
||||||
|
conv_match = re.search(r"Conversation tokens: ([\d,]+)", line)
|
||||||
|
if conv_match:
|
||||||
|
usage_info["conversation_tokens"] = int(conv_match.group(1).replace(",", ""))
|
||||||
|
|
||||||
|
# Extract remaining tokens
|
||||||
|
remaining_match = re.search(r"Remaining tokens: ([\d,]+)", line)
|
||||||
|
if remaining_match:
|
||||||
|
usage_info["remaining_tokens"] = int(remaining_match.group(1).replace(",", ""))
|
||||||
|
|
||||||
|
if usage_info:
|
||||||
|
usage_data.append(usage_info)
|
||||||
|
|
||||||
|
return usage_data
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_conversation_token_usage(cls, logs: str) -> list[int]:
|
||||||
|
"""
|
||||||
|
Extract conversation token usage values from logs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logs: Log content to parse
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of token usage values
|
||||||
|
"""
|
||||||
|
pattern = r"Conversation history token usage:\s*([\d,]+)"
|
||||||
|
usage_values = []
|
||||||
|
|
||||||
|
for match in re.finditer(pattern, logs):
|
||||||
|
usage_value = int(match.group(1).replace(",", ""))
|
||||||
|
usage_values.append(usage_value)
|
||||||
|
|
||||||
|
return usage_values
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_thread_creation_logs(cls, logs: str) -> list[dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Extract thread creation logs with parent relationships.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logs: Log content to parse
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries with thread relationship data
|
||||||
|
"""
|
||||||
|
thread_data = []
|
||||||
|
pattern = r"\[THREAD\] Created new thread (\w+)(?: with parent (\w+))?"
|
||||||
|
|
||||||
|
for match in re.finditer(pattern, logs):
|
||||||
|
thread_info = {"thread_id": match.group(1), "parent_id": match.group(2) if match.group(2) else None}
|
||||||
|
thread_data.append(thread_info)
|
||||||
|
|
||||||
|
return thread_data
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_history_traversal_logs(cls, logs: str) -> list[dict[str, Union[str, int]]]:
|
||||||
|
"""
|
||||||
|
Extract conversation history traversal logs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logs: Log content to parse
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries with traversal data
|
||||||
|
"""
|
||||||
|
traversal_data = []
|
||||||
|
pattern = r"\[THREAD\] Retrieved chain of (\d+) messages for thread (\w+)"
|
||||||
|
|
||||||
|
for match in re.finditer(pattern, logs):
|
||||||
|
traversal_info = {"chain_length": int(match.group(1)), "thread_id": match.group(2)}
|
||||||
|
traversal_data.append(traversal_info)
|
||||||
|
|
||||||
|
return traversal_data
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_file_deduplication_in_logs(cls, logs: str, tool_name: str, test_file: str) -> bool:
|
||||||
|
"""
|
||||||
|
Validate that logs show file deduplication behavior.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logs: Log content to parse
|
||||||
|
tool_name: Name of the tool being tested
|
||||||
|
test_file: Name of the test file to check for deduplication
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if deduplication evidence is found, False otherwise
|
||||||
|
"""
|
||||||
|
# Look for embedding calculation
|
||||||
|
embedding_pattern = f"Calculating embeddings for {test_file}"
|
||||||
|
has_embedding = bool(re.search(embedding_pattern, logs))
|
||||||
|
|
||||||
|
# Look for filtering message
|
||||||
|
filtering_pattern = f"Filtering {test_file} to prevent duplication"
|
||||||
|
has_filtering = bool(re.search(filtering_pattern, logs))
|
||||||
|
|
||||||
|
# Look for skip message
|
||||||
|
skip_pattern = f"Skipping {test_file} \\(already processed"
|
||||||
|
has_skip = bool(re.search(skip_pattern, logs))
|
||||||
|
|
||||||
|
# Look for tool-specific processing
|
||||||
|
tool_pattern = f"\\[{tool_name.upper()}\\].*{test_file}"
|
||||||
|
has_tool_processing = bool(re.search(tool_pattern, logs, re.IGNORECASE))
|
||||||
|
|
||||||
|
# Deduplication is confirmed if we see evidence of processing and filtering/skipping
|
||||||
|
return has_embedding and (has_filtering or has_skip) and has_tool_processing
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def search_logs_for_pattern(
|
||||||
|
cls, pattern: str, logs: Optional[str] = None, case_sensitive: bool = False
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Search logs for a specific pattern.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pattern: Regex pattern to search for
|
||||||
|
logs: Log content to search (if None, reads recent logs)
|
||||||
|
case_sensitive: Whether the search should be case sensitive
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of matching lines
|
||||||
|
"""
|
||||||
|
if logs is None:
|
||||||
|
logs = cls.get_recent_server_logs()
|
||||||
|
|
||||||
|
flags = 0 if case_sensitive else re.IGNORECASE
|
||||||
|
matches = []
|
||||||
|
|
||||||
|
for line in logs.split("\n"):
|
||||||
|
if re.search(pattern, line, flags):
|
||||||
|
matches.append(line.strip())
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_log_file_info(cls) -> dict[str, dict[str, Union[str, int, bool]]]:
|
||||||
|
"""
|
||||||
|
Get information about log files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with file information for each log file
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
file_info = {}
|
||||||
|
|
||||||
|
for log_file in [cls.MAIN_LOG_FILE, cls.ACTIVITY_LOG_FILE]:
|
||||||
|
if os.path.exists(log_file):
|
||||||
|
stat = os.stat(log_file)
|
||||||
|
file_info[log_file] = {
|
||||||
|
"exists": True,
|
||||||
|
"size_bytes": stat.st_size,
|
||||||
|
"size_mb": round(stat.st_size / (1024 * 1024), 2),
|
||||||
|
"last_modified": stat.st_mtime,
|
||||||
|
"readable": os.access(log_file, os.R_OK),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
file_info[log_file] = {
|
||||||
|
"exists": False,
|
||||||
|
"size_bytes": 0,
|
||||||
|
"size_mb": 0,
|
||||||
|
"last_modified": 0,
|
||||||
|
"readable": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
return file_info
|
||||||
@@ -7,7 +7,6 @@ and builds conversation context correctly when using continuation_id.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -23,19 +22,16 @@ class TestConsensusConversation(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "Test consensus tool conversation building and continuation"
|
return "Test consensus tool conversation building and continuation"
|
||||||
|
|
||||||
def get_docker_logs(self):
|
def get_server_logs(self):
|
||||||
"""Get Docker container logs"""
|
"""Get server logs from local log file"""
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
log_file_path = "logs/mcp_server.log"
|
||||||
["docker", "logs", "--tail", "100", self.container_name], capture_output=True, text=True, timeout=30
|
with open(log_file_path) as f:
|
||||||
)
|
lines = f.readlines()
|
||||||
if result.returncode == 0:
|
# Return last 100 lines
|
||||||
return result.stdout.split("\n")
|
return [line.strip() for line in lines[-100:]]
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to get Docker logs: {result.stderr}")
|
|
||||||
return []
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Exception getting Docker logs: {e}")
|
self.logger.warning(f"Exception getting server logs: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
@@ -121,9 +117,9 @@ class TestConsensusConversation(BaseSimulatorTest):
|
|||||||
self.logger.info("Phase 3: Checking server logs for conversation building")
|
self.logger.info("Phase 3: Checking server logs for conversation building")
|
||||||
|
|
||||||
# Check for conversation-related log entries
|
# Check for conversation-related log entries
|
||||||
logs = self.get_docker_logs()
|
logs = self.get_server_logs()
|
||||||
if not logs:
|
if not logs:
|
||||||
self.logger.warning("Could not retrieve Docker logs for verification")
|
self.logger.warning("Could not retrieve server logs for verification")
|
||||||
else:
|
else:
|
||||||
# Look for conversation building indicators
|
# Look for conversation building indicators
|
||||||
conversation_logs = [
|
conversation_logs = [
|
||||||
|
|||||||
@@ -22,42 +22,6 @@ class ContentValidationTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "Content validation and duplicate detection"
|
return "Content validation and duplicate detection"
|
||||||
|
|
||||||
def get_docker_logs_since(self, since_time: str) -> str:
|
|
||||||
"""Get docker logs since a specific timestamp"""
|
|
||||||
try:
|
|
||||||
# Check both main server and log monitor for comprehensive logs
|
|
||||||
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
|
||||||
cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"]
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
|
||||||
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
|
||||||
|
|
||||||
# Get the internal log files which have more detailed logging
|
|
||||||
server_log_result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
activity_log_result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combine all logs
|
|
||||||
combined_logs = (
|
|
||||||
result_server.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ result_monitor.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ server_log_result.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ activity_log_result.stdout
|
|
||||||
)
|
|
||||||
return combined_logs
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get docker logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test that file processing system properly handles file deduplication"""
|
"""Test that file processing system properly handles file deduplication"""
|
||||||
try:
|
try:
|
||||||
@@ -151,9 +115,9 @@ DATABASE_CONFIG = {
|
|||||||
else:
|
else:
|
||||||
self.logger.warning(" ⚠️ Different tool failed")
|
self.logger.warning(" ⚠️ Different tool failed")
|
||||||
|
|
||||||
# Validate file processing behavior from Docker logs
|
# Validate file processing behavior from server logs
|
||||||
self.logger.info(" 4: Validating file processing logs")
|
self.logger.info(" 4: Validating file processing logs")
|
||||||
logs = self.get_docker_logs_since(start_time)
|
logs = self.get_server_logs_since(start_time)
|
||||||
|
|
||||||
# Check for proper file embedding logs
|
# Check for proper file embedding logs
|
||||||
embedding_logs = [
|
embedding_logs = [
|
||||||
|
|||||||
@@ -21,8 +21,6 @@ This validates the conversation threading system's ability to:
|
|||||||
- Properly traverse parent relationships for history reconstruction
|
- Properly traverse parent relationships for history reconstruction
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -38,53 +36,6 @@ class ConversationChainValidationTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "Conversation chain and threading validation"
|
return "Conversation chain and threading validation"
|
||||||
|
|
||||||
def get_recent_server_logs(self) -> str:
|
|
||||||
"""Get recent server logs from the log file directly"""
|
|
||||||
try:
|
|
||||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return result.stdout
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get server logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def extract_thread_creation_logs(self, logs: str) -> list[dict[str, str]]:
|
|
||||||
"""Extract thread creation logs with parent relationships"""
|
|
||||||
thread_logs = []
|
|
||||||
|
|
||||||
lines = logs.split("\n")
|
|
||||||
for line in lines:
|
|
||||||
if "[THREAD] Created new thread" in line:
|
|
||||||
# Parse: [THREAD] Created new thread 9dc779eb-645f-4850-9659-34c0e6978d73 with parent a0ce754d-c995-4b3e-9103-88af429455aa
|
|
||||||
match = re.search(r"\[THREAD\] Created new thread ([a-f0-9-]+) with parent ([a-f0-9-]+|None)", line)
|
|
||||||
if match:
|
|
||||||
thread_id = match.group(1)
|
|
||||||
parent_id = match.group(2) if match.group(2) != "None" else None
|
|
||||||
thread_logs.append({"thread_id": thread_id, "parent_id": parent_id, "log_line": line})
|
|
||||||
|
|
||||||
return thread_logs
|
|
||||||
|
|
||||||
def extract_history_traversal_logs(self, logs: str) -> list[dict[str, str]]:
|
|
||||||
"""Extract conversation history traversal logs"""
|
|
||||||
traversal_logs = []
|
|
||||||
|
|
||||||
lines = logs.split("\n")
|
|
||||||
for line in lines:
|
|
||||||
if "[THREAD] Retrieved chain of" in line:
|
|
||||||
# Parse: [THREAD] Retrieved chain of 3 threads for 9dc779eb-645f-4850-9659-34c0e6978d73
|
|
||||||
match = re.search(r"\[THREAD\] Retrieved chain of (\d+) threads for ([a-f0-9-]+)", line)
|
|
||||||
if match:
|
|
||||||
chain_length = int(match.group(1))
|
|
||||||
thread_id = match.group(2)
|
|
||||||
traversal_logs.append({"thread_id": thread_id, "chain_length": chain_length, "log_line": line})
|
|
||||||
|
|
||||||
return traversal_logs
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test conversation chain and threading functionality"""
|
"""Test conversation chain and threading functionality"""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ Validates:
|
|||||||
5. Proper tool chaining with context
|
5. Proper tool chaining with context
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -28,40 +27,6 @@ class CrossToolComprehensiveTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "Comprehensive cross-tool file deduplication and continuation"
|
return "Comprehensive cross-tool file deduplication and continuation"
|
||||||
|
|
||||||
def get_docker_logs_since(self, since_time: str) -> str:
|
|
||||||
"""Get docker logs since a specific timestamp"""
|
|
||||||
try:
|
|
||||||
# Check both main server and log monitor for comprehensive logs
|
|
||||||
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
|
||||||
cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"]
|
|
||||||
|
|
||||||
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
|
||||||
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
|
||||||
|
|
||||||
# Get the internal log files which have more detailed logging
|
|
||||||
server_log_result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
activity_log_result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combine all logs
|
|
||||||
combined_logs = (
|
|
||||||
result_server.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ result_monitor.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ server_log_result.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ activity_log_result.stdout
|
|
||||||
)
|
|
||||||
return combined_logs
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get docker logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Comprehensive cross-tool test with all MCP tools"""
|
"""Comprehensive cross-tool test with all MCP tools"""
|
||||||
try:
|
try:
|
||||||
@@ -247,7 +212,7 @@ def secure_login(user, pwd):
|
|||||||
|
|
||||||
# Validate comprehensive results
|
# Validate comprehensive results
|
||||||
self.logger.info(" 📋 Validating comprehensive cross-tool results...")
|
self.logger.info(" 📋 Validating comprehensive cross-tool results...")
|
||||||
logs = self.get_docker_logs_since(start_time)
|
logs = self.get_server_logs_since(start_time)
|
||||||
|
|
||||||
# Validation criteria
|
# Validation criteria
|
||||||
tools_used = [r[0] for r in responses]
|
tools_used = [r[0] for r in responses]
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ Tests comprehensive cross-tool continuation scenarios to ensure
|
|||||||
conversation context is maintained when switching between different tools.
|
conversation context is maintained when switching between different tools.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .conversation_base_test import ConversationBaseTest
|
||||||
|
|
||||||
|
|
||||||
class CrossToolContinuationTest(BaseSimulatorTest):
|
class CrossToolContinuationTest(ConversationBaseTest):
|
||||||
"""Test comprehensive cross-tool continuation scenarios"""
|
"""Test comprehensive cross-tool continuation scenarios"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -25,8 +25,8 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
try:
|
try:
|
||||||
self.logger.info("🔧 Test: Cross-tool continuation scenarios")
|
self.logger.info("🔧 Test: Cross-tool continuation scenarios")
|
||||||
|
|
||||||
# Setup test files
|
# Setup test environment for conversation testing
|
||||||
self.setup_test_files()
|
self.setUp()
|
||||||
|
|
||||||
success_count = 0
|
success_count = 0
|
||||||
total_scenarios = 3
|
total_scenarios = 3
|
||||||
@@ -62,7 +62,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
self.logger.info(" 1: Testing chat -> thinkdeep -> codereview")
|
self.logger.info(" 1: Testing chat -> thinkdeep -> codereview")
|
||||||
|
|
||||||
# Start with chat
|
# Start with chat
|
||||||
chat_response, chat_id = self.call_mcp_tool(
|
chat_response, chat_id = self.call_mcp_tool_direct(
|
||||||
"chat",
|
"chat",
|
||||||
{
|
{
|
||||||
"prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
|
"prompt": "Please use low thinking mode. Look at this Python code and tell me what you think about it",
|
||||||
@@ -76,7 +76,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# Continue with thinkdeep
|
# Continue with thinkdeep
|
||||||
thinkdeep_response, _ = self.call_mcp_tool(
|
thinkdeep_response, _ = self.call_mcp_tool_direct(
|
||||||
"thinkdeep",
|
"thinkdeep",
|
||||||
{
|
{
|
||||||
"prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
|
"prompt": "Please use low thinking mode. Think deeply about potential performance issues in this code",
|
||||||
@@ -91,7 +91,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# Continue with codereview
|
# Continue with codereview
|
||||||
codereview_response, _ = self.call_mcp_tool(
|
codereview_response, _ = self.call_mcp_tool_direct(
|
||||||
"codereview",
|
"codereview",
|
||||||
{
|
{
|
||||||
"files": [self.test_files["python"]], # Same file should be deduplicated
|
"files": [self.test_files["python"]], # Same file should be deduplicated
|
||||||
@@ -118,8 +118,13 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
self.logger.info(" 2: Testing analyze -> debug -> thinkdeep")
|
self.logger.info(" 2: Testing analyze -> debug -> thinkdeep")
|
||||||
|
|
||||||
# Start with analyze
|
# Start with analyze
|
||||||
analyze_response, analyze_id = self.call_mcp_tool(
|
analyze_response, analyze_id = self.call_mcp_tool_direct(
|
||||||
"analyze", {"files": [self.test_files["python"]], "analysis_type": "code_quality", "model": "flash"}
|
"analyze",
|
||||||
|
{
|
||||||
|
"files": [self.test_files["python"]],
|
||||||
|
"prompt": "Analyze this code for quality and performance issues",
|
||||||
|
"model": "flash",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if not analyze_response or not analyze_id:
|
if not analyze_response or not analyze_id:
|
||||||
@@ -127,7 +132,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# Continue with debug
|
# Continue with debug
|
||||||
debug_response, _ = self.call_mcp_tool(
|
debug_response, _ = self.call_mcp_tool_direct(
|
||||||
"debug",
|
"debug",
|
||||||
{
|
{
|
||||||
"files": [self.test_files["python"]], # Same file should be deduplicated
|
"files": [self.test_files["python"]], # Same file should be deduplicated
|
||||||
@@ -142,7 +147,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# Continue with thinkdeep
|
# Continue with thinkdeep
|
||||||
final_response, _ = self.call_mcp_tool(
|
final_response, _ = self.call_mcp_tool_direct(
|
||||||
"thinkdeep",
|
"thinkdeep",
|
||||||
{
|
{
|
||||||
"prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
|
"prompt": "Please use low thinking mode. Think deeply about the architectural implications of the issues we've found",
|
||||||
@@ -169,7 +174,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
self.logger.info(" 3: Testing multi-file cross-tool continuation")
|
self.logger.info(" 3: Testing multi-file cross-tool continuation")
|
||||||
|
|
||||||
# Start with both files
|
# Start with both files
|
||||||
multi_response, multi_id = self.call_mcp_tool(
|
multi_response, multi_id = self.call_mcp_tool_direct(
|
||||||
"chat",
|
"chat",
|
||||||
{
|
{
|
||||||
"prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
|
"prompt": "Please use low thinking mode. Analyze both the Python code and configuration file",
|
||||||
@@ -183,7 +188,7 @@ class CrossToolContinuationTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# Switch to codereview with same files (should use conversation history)
|
# Switch to codereview with same files (should use conversation history)
|
||||||
multi_review, _ = self.call_mcp_tool(
|
multi_review, _ = self.call_mcp_tool_direct(
|
||||||
"codereview",
|
"codereview",
|
||||||
{
|
{
|
||||||
"files": [self.test_files["python"], self.test_files["config"]], # Same files
|
"files": [self.test_files["python"], self.test_files["config"]], # Same files
|
||||||
|
|||||||
@@ -378,13 +378,8 @@ The code looks correct to me, but something is causing valid sessions to be trea
|
|||||||
# Validate logs
|
# Validate logs
|
||||||
self.logger.info(" 📋 Validating execution logs...")
|
self.logger.info(" 📋 Validating execution logs...")
|
||||||
|
|
||||||
# Get server logs from the actual log file inside the container
|
# Get server logs using inherited method
|
||||||
result = self.run_command(
|
logs = self.get_recent_server_logs(500)
|
||||||
["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
logs = result.stdout.decode() + result.stderr.decode()
|
|
||||||
|
|
||||||
# Look for debug tool execution patterns
|
# Look for debug tool execution patterns
|
||||||
debug_patterns = [
|
debug_patterns = [
|
||||||
@@ -405,8 +400,6 @@ The code looks correct to me, but something is causing valid sessions to be trea
|
|||||||
self.logger.info(f" ✅ Log validation passed ({patterns_found}/{len(debug_patterns)} patterns)")
|
self.logger.info(f" ✅ Log validation passed ({patterns_found}/{len(debug_patterns)} patterns)")
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f" ⚠️ Only found {patterns_found}/{len(debug_patterns)} log patterns")
|
self.logger.warning(f" ⚠️ Only found {patterns_found}/{len(debug_patterns)} log patterns")
|
||||||
else:
|
|
||||||
self.logger.warning(" ⚠️ Could not retrieve Docker logs")
|
|
||||||
|
|
||||||
# Test continuation if available
|
# Test continuation if available
|
||||||
if continuation_id:
|
if continuation_id:
|
||||||
|
|||||||
@@ -145,14 +145,16 @@ def validate_data(data):
|
|||||||
# Test 4: Validate log patterns
|
# Test 4: Validate log patterns
|
||||||
self.logger.info(" 1.4: Validating line number processing in logs")
|
self.logger.info(" 1.4: Validating line number processing in logs")
|
||||||
|
|
||||||
# Get logs from container
|
# Get logs from server
|
||||||
result = self.run_command(
|
try:
|
||||||
["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True
|
log_file_path = "logs/mcp_server.log"
|
||||||
)
|
with open(log_file_path) as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
logs = "".join(lines[-500:])
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to read server logs: {e}")
|
||||||
logs = ""
|
logs = ""
|
||||||
if result.returncode == 0:
|
pass
|
||||||
logs = result.stdout.decode()
|
|
||||||
|
|
||||||
# Check for line number formatting patterns
|
# Check for line number formatting patterns
|
||||||
line_number_patterns = ["Line numbers for", "enabled", "│", "line number"] # The line number separator
|
line_number_patterns = ["Line numbers for", "enabled", "│", "line number"] # The line number separator
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Docker Logs Validation Test
|
Server Logs Validation Test
|
||||||
|
|
||||||
Validates Docker logs to confirm file deduplication behavior and
|
Validates server logs to confirm file deduplication behavior and
|
||||||
conversation threading is working properly.
|
conversation threading is working properly.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -10,7 +10,7 @@ from .base_test import BaseSimulatorTest
|
|||||||
|
|
||||||
|
|
||||||
class LogsValidationTest(BaseSimulatorTest):
|
class LogsValidationTest(BaseSimulatorTest):
|
||||||
"""Validate Docker logs to confirm file deduplication behavior"""
|
"""Validate server logs to confirm file deduplication behavior"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def test_name(self) -> str:
|
def test_name(self) -> str:
|
||||||
@@ -18,39 +18,35 @@ class LogsValidationTest(BaseSimulatorTest):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "Docker logs validation"
|
return "Server logs validation"
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Validate Docker logs to confirm file deduplication behavior"""
|
"""Validate server logs to confirm file deduplication behavior"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("📋 Test: Validating Docker logs for file deduplication...")
|
self.logger.info("📋 Test: Validating server logs for file deduplication...")
|
||||||
|
|
||||||
# Get server logs from main container
|
# Get server logs from log files
|
||||||
result = self.run_command(["docker", "logs", self.container_name], capture_output=True)
|
import os
|
||||||
|
|
||||||
if result.returncode != 0:
|
logs = ""
|
||||||
self.logger.error(f"Failed to get Docker logs: {result.stderr}")
|
log_files = ["logs/mcp_server.log", "logs/mcp_activity.log"]
|
||||||
|
|
||||||
|
for log_file in log_files:
|
||||||
|
if os.path.exists(log_file):
|
||||||
|
try:
|
||||||
|
with open(log_file) as f:
|
||||||
|
file_content = f.read()
|
||||||
|
logs += f"\n=== {log_file} ===\n{file_content}\n"
|
||||||
|
self.logger.debug(f"Read {len(file_content)} characters from {log_file}")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not read {log_file}: {e}")
|
||||||
|
else:
|
||||||
|
self.logger.warning(f"Log file not found: {log_file}")
|
||||||
|
|
||||||
|
if not logs.strip():
|
||||||
|
self.logger.warning("No log content found - server may not have processed any requests yet")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
main_logs = result.stdout.decode() + result.stderr.decode()
|
|
||||||
|
|
||||||
# Get logs from log monitor container (where detailed activity is logged)
|
|
||||||
monitor_result = self.run_command(["docker", "logs", "zen-mcp-log-monitor"], capture_output=True)
|
|
||||||
monitor_logs = ""
|
|
||||||
if monitor_result.returncode == 0:
|
|
||||||
monitor_logs = monitor_result.stdout.decode() + monitor_result.stderr.decode()
|
|
||||||
|
|
||||||
# Also get activity logs for more detailed conversation tracking
|
|
||||||
activity_result = self.run_command(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True
|
|
||||||
)
|
|
||||||
|
|
||||||
activity_logs = ""
|
|
||||||
if activity_result.returncode == 0:
|
|
||||||
activity_logs = activity_result.stdout.decode()
|
|
||||||
|
|
||||||
logs = main_logs + "\n" + monitor_logs + "\n" + activity_logs
|
|
||||||
|
|
||||||
# Look for conversation threading patterns that indicate the system is working
|
# Look for conversation threading patterns that indicate the system is working
|
||||||
conversation_patterns = [
|
conversation_patterns = [
|
||||||
"CONVERSATION_RESUME",
|
"CONVERSATION_RESUME",
|
||||||
|
|||||||
@@ -4,11 +4,10 @@ O3 Model Selection Test
|
|||||||
|
|
||||||
Tests that O3 models are properly selected and used when explicitly specified,
|
Tests that O3 models are properly selected and used when explicitly specified,
|
||||||
regardless of the default model configuration (even when set to auto).
|
regardless of the default model configuration (even when set to auto).
|
||||||
Validates model selection via Docker logs.
|
Validates model selection via server logs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -24,47 +23,16 @@ class O3ModelSelectionTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "O3 model selection and usage validation"
|
return "O3 model selection and usage validation"
|
||||||
|
|
||||||
def get_recent_server_logs(self) -> str:
|
|
||||||
"""Get recent server logs from the log file directly"""
|
|
||||||
try:
|
|
||||||
# Read logs directly from the log file - use more lines to ensure we get all test-related logs
|
|
||||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return result.stdout
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get server logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test O3 model selection and usage"""
|
"""Test O3 model selection and usage"""
|
||||||
try:
|
try:
|
||||||
self.logger.info(" Test: O3 model selection and usage validation")
|
self.logger.info(" Test: O3 model selection and usage validation")
|
||||||
|
|
||||||
# Check which API keys are configured
|
# Check which API keys are configured
|
||||||
check_cmd = [
|
import os
|
||||||
"docker",
|
|
||||||
"exec",
|
|
||||||
self.container_name,
|
|
||||||
"python",
|
|
||||||
"-c",
|
|
||||||
'import os; print(f\'OPENAI_KEY:{bool(os.environ.get("OPENAI_API_KEY"))}|OPENROUTER_KEY:{bool(os.environ.get("OPENROUTER_API_KEY"))}\')',
|
|
||||||
]
|
|
||||||
result = subprocess.run(check_cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
has_openai = False
|
has_openai = bool(os.environ.get("OPENAI_API_KEY"))
|
||||||
has_openrouter = False
|
has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY"))
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
output = result.stdout.strip()
|
|
||||||
if "OPENAI_KEY:True" in output:
|
|
||||||
has_openai = True
|
|
||||||
if "OPENROUTER_KEY:True" in output:
|
|
||||||
has_openrouter = True
|
|
||||||
|
|
||||||
# If only OpenRouter is configured, adjust test expectations
|
# If only OpenRouter is configured, adjust test expectations
|
||||||
if has_openrouter and not has_openai:
|
if has_openrouter and not has_openai:
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ Tests custom API endpoint functionality with Ollama-style local models, includin
|
|||||||
- Model alias resolution for local models
|
- Model alias resolution for local models
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -30,14 +29,15 @@ class OllamaCustomUrlTest(BaseSimulatorTest):
|
|||||||
try:
|
try:
|
||||||
self.logger.info("Test: Ollama custom URL functionality")
|
self.logger.info("Test: Ollama custom URL functionality")
|
||||||
|
|
||||||
# Check if custom URL is configured in the Docker container
|
# Check if custom URL is configured
|
||||||
custom_url = self._check_docker_custom_url()
|
import os
|
||||||
|
|
||||||
|
custom_url = os.environ.get("CUSTOM_API_URL")
|
||||||
if not custom_url:
|
if not custom_url:
|
||||||
self.logger.warning("CUSTOM_API_URL not set in Docker container, skipping Ollama test")
|
self.logger.warning("CUSTOM_API_URL not set, skipping Ollama test")
|
||||||
self.logger.info("To enable this test, add to .env file:")
|
self.logger.info("To enable this test, add to .env file:")
|
||||||
self.logger.info("CUSTOM_API_URL=http://host.docker.internal:11434/v1")
|
self.logger.info("CUSTOM_API_URL=http://localhost:11434/v1")
|
||||||
self.logger.info("CUSTOM_API_KEY=")
|
self.logger.info("CUSTOM_API_KEY=")
|
||||||
self.logger.info("Then restart docker-compose")
|
|
||||||
return True # Skip gracefully
|
return True # Skip gracefully
|
||||||
|
|
||||||
self.logger.info(f"Testing with custom URL: {custom_url}")
|
self.logger.info(f"Testing with custom URL: {custom_url}")
|
||||||
@@ -172,25 +172,6 @@ if __name__ == "__main__":
|
|||||||
finally:
|
finally:
|
||||||
self.cleanup_test_files()
|
self.cleanup_test_files()
|
||||||
|
|
||||||
def _check_docker_custom_url(self) -> str:
|
|
||||||
"""Check if CUSTOM_API_URL is set in the Docker container"""
|
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "printenv", "CUSTOM_API_URL"],
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode == 0 and result.stdout.strip():
|
|
||||||
return result.stdout.strip()
|
|
||||||
|
|
||||||
return ""
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.debug(f"Failed to check Docker CUSTOM_API_URL: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def validate_successful_response(self, response: str, test_name: str, files_provided: bool = False) -> bool:
|
def validate_successful_response(self, response: str, test_name: str, files_provided: bool = False) -> bool:
|
||||||
"""Validate that the response indicates success, not an error
|
"""Validate that the response indicates success, not an error
|
||||||
|
|
||||||
@@ -201,7 +182,7 @@ if __name__ == "__main__":
|
|||||||
"""
|
"""
|
||||||
if not response:
|
if not response:
|
||||||
self.logger.error(f"No response received for {test_name}")
|
self.logger.error(f"No response received for {test_name}")
|
||||||
self._check_docker_logs_for_errors()
|
self._check_server_logs_for_errors()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Check for common error indicators
|
# Check for common error indicators
|
||||||
@@ -227,7 +208,7 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
|
|
||||||
# Special handling for clarification requests from local models
|
# Special handling for clarification requests from local models
|
||||||
if "clarification_required" in response.lower():
|
if "files_required_to_continue" in response.lower():
|
||||||
if files_provided:
|
if files_provided:
|
||||||
# If we provided actual files, clarification request is a FAILURE
|
# If we provided actual files, clarification request is a FAILURE
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
@@ -243,7 +224,7 @@ if __name__ == "__main__":
|
|||||||
self.logger.debug(f"Clarification response: {response[:200]}...")
|
self.logger.debug(f"Clarification response: {response[:200]}...")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check for SSRF security restriction - this is expected for local URLs from Docker
|
# Check for SSRF security restriction - this is expected for local URLs
|
||||||
if "restricted IP address" in response and "security risk (SSRF)" in response:
|
if "restricted IP address" in response and "security risk (SSRF)" in response:
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f"✅ Custom URL routing working - {test_name} correctly attempted to connect to custom API"
|
f"✅ Custom URL routing working - {test_name} correctly attempted to connect to custom API"
|
||||||
@@ -256,19 +237,19 @@ if __name__ == "__main__":
|
|||||||
if error.lower() in response_lower:
|
if error.lower() in response_lower:
|
||||||
self.logger.error(f"Error detected in {test_name}: {error}")
|
self.logger.error(f"Error detected in {test_name}: {error}")
|
||||||
self.logger.debug(f"Full response: {response}")
|
self.logger.debug(f"Full response: {response}")
|
||||||
self._check_docker_logs_for_errors()
|
self._check_server_logs_for_errors()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Response should be substantial (more than just a few words)
|
# Response should be substantial (more than just a few words)
|
||||||
if len(response.strip()) < 10:
|
if len(response.strip()) < 10:
|
||||||
self.logger.error(f"Response too short for {test_name}: {response}")
|
self.logger.error(f"Response too short for {test_name}: {response}")
|
||||||
self._check_docker_logs_for_errors()
|
self._check_server_logs_for_errors()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Verify this looks like a real AI response, not just an error message
|
# Verify this looks like a real AI response, not just an error message
|
||||||
if not self._validate_ai_response_content(response):
|
if not self._validate_ai_response_content(response):
|
||||||
self.logger.error(f"Response doesn't look like valid AI output for {test_name}")
|
self.logger.error(f"Response doesn't look like valid AI output for {test_name}")
|
||||||
self._check_docker_logs_for_errors()
|
self._check_server_logs_for_errors()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.debug(f"Successful response for {test_name}: {response[:100]}...")
|
self.logger.debug(f"Successful response for {test_name}: {response[:100]}...")
|
||||||
@@ -329,24 +310,23 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _check_docker_logs_for_errors(self):
|
def _check_server_logs_for_errors(self):
|
||||||
"""Check Docker logs for any error messages that might explain failures"""
|
"""Check server logs for any error messages that might explain failures"""
|
||||||
try:
|
try:
|
||||||
# Get recent logs from the container
|
# Get recent logs from the log file
|
||||||
result = subprocess.run(
|
log_file_path = "logs/mcp_server.log"
|
||||||
["docker", "logs", "--tail", "50", self.container_name], capture_output=True, text=True, timeout=10
|
with open(log_file_path) as f:
|
||||||
)
|
lines = f.readlines()
|
||||||
|
recent_logs = lines[-50:] # Last 50 lines
|
||||||
|
|
||||||
if result.returncode == 0 and result.stderr:
|
|
||||||
recent_logs = result.stderr.strip()
|
|
||||||
if recent_logs:
|
if recent_logs:
|
||||||
self.logger.info("Recent container logs:")
|
self.logger.info("Recent server logs:")
|
||||||
for line in recent_logs.split("\n")[-10:]: # Last 10 lines
|
for line in recent_logs[-10:]: # Last 10 lines
|
||||||
if line.strip():
|
if line.strip():
|
||||||
self.logger.info(f" {line}")
|
self.logger.info(f" {line.strip()}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.debug(f"Failed to check Docker logs: {e}")
|
self.logger.debug(f"Failed to check server logs: {e}")
|
||||||
|
|
||||||
def validate_local_model_response(self, response: str) -> bool:
|
def validate_local_model_response(self, response: str) -> bool:
|
||||||
"""Validate that response appears to come from a local model"""
|
"""Validate that response appears to come from a local model"""
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ Tests that verify the system correctly falls back to OpenRouter when:
|
|||||||
- Auto mode correctly selects OpenRouter models
|
- Auto mode correctly selects OpenRouter models
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -24,42 +23,17 @@ class OpenRouterFallbackTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "OpenRouter fallback behavior when only provider"
|
return "OpenRouter fallback behavior when only provider"
|
||||||
|
|
||||||
def get_recent_server_logs(self) -> str:
|
|
||||||
"""Get recent server logs from the log file directly"""
|
|
||||||
try:
|
|
||||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return result.stdout
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get server logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test OpenRouter fallback behavior"""
|
"""Test OpenRouter fallback behavior"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("Test: OpenRouter fallback behavior when only provider available")
|
self.logger.info("Test: OpenRouter fallback behavior when only provider available")
|
||||||
|
|
||||||
# Check if ONLY OpenRouter API key is configured (this is a fallback test)
|
# Check if ONLY OpenRouter API key is configured (this is a fallback test)
|
||||||
check_cmd = [
|
import os
|
||||||
"docker",
|
|
||||||
"exec",
|
|
||||||
self.container_name,
|
|
||||||
"python",
|
|
||||||
"-c",
|
|
||||||
'import os; print("OPENROUTER_KEY:" + str(bool(os.environ.get("OPENROUTER_API_KEY"))) + "|GEMINI_KEY:" + str(bool(os.environ.get("GEMINI_API_KEY"))) + "|OPENAI_KEY:" + str(bool(os.environ.get("OPENAI_API_KEY"))))',
|
|
||||||
]
|
|
||||||
result = subprocess.run(check_cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY"))
|
||||||
output = result.stdout.strip()
|
has_gemini = bool(os.environ.get("GEMINI_API_KEY"))
|
||||||
has_openrouter = "OPENROUTER_KEY:True" in output
|
has_openai = bool(os.environ.get("OPENAI_API_KEY"))
|
||||||
has_gemini = "GEMINI_KEY:True" in output
|
|
||||||
has_openai = "OPENAI_KEY:True" in output
|
|
||||||
|
|
||||||
if not has_openrouter:
|
if not has_openrouter:
|
||||||
self.logger.info(" ⚠️ OpenRouter API key not configured - skipping test")
|
self.logger.info(" ⚠️ OpenRouter API key not configured - skipping test")
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ Tests that verify OpenRouter functionality including:
|
|||||||
- Error handling when models are not available
|
- Error handling when models are not available
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -25,39 +24,17 @@ class OpenRouterModelsTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "OpenRouter model functionality and alias mapping"
|
return "OpenRouter model functionality and alias mapping"
|
||||||
|
|
||||||
def get_recent_server_logs(self) -> str:
|
|
||||||
"""Get recent server logs from the log file directly"""
|
|
||||||
try:
|
|
||||||
# Read logs directly from the log file
|
|
||||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return result.stdout
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get server logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test OpenRouter model functionality"""
|
"""Test OpenRouter model functionality"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("Test: OpenRouter model functionality and alias mapping")
|
self.logger.info("Test: OpenRouter model functionality and alias mapping")
|
||||||
|
|
||||||
# Check if OpenRouter API key is configured
|
# Check if OpenRouter API key is configured
|
||||||
check_cmd = [
|
import os
|
||||||
"docker",
|
|
||||||
"exec",
|
|
||||||
self.container_name,
|
|
||||||
"python",
|
|
||||||
"-c",
|
|
||||||
'import os; print("OPENROUTER_KEY:" + str(bool(os.environ.get("OPENROUTER_API_KEY"))))',
|
|
||||||
]
|
|
||||||
result = subprocess.run(check_cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0 and "OPENROUTER_KEY:False" in result.stdout:
|
has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY"))
|
||||||
|
|
||||||
|
if not has_openrouter:
|
||||||
self.logger.info(" ⚠️ OpenRouter API key not configured - skipping test")
|
self.logger.info(" ⚠️ OpenRouter API key not configured - skipping test")
|
||||||
self.logger.info(" ℹ️ This test requires OPENROUTER_API_KEY to be set in .env")
|
self.logger.info(" ℹ️ This test requires OPENROUTER_API_KEY to be set in .env")
|
||||||
return True # Return True to indicate test is skipped, not failed
|
return True # Return True to indicate test is skipped, not failed
|
||||||
|
|||||||
@@ -8,16 +8,15 @@ Validates that:
|
|||||||
1. Files are embedded only once in conversation history
|
1. Files are embedded only once in conversation history
|
||||||
2. Continuation calls don't re-read existing files
|
2. Continuation calls don't re-read existing files
|
||||||
3. New files are still properly embedded
|
3. New files are still properly embedded
|
||||||
4. Docker logs show deduplication behavior
|
4. Server logs show deduplication behavior
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .conversation_base_test import ConversationBaseTest
|
||||||
|
|
||||||
|
|
||||||
class PerToolDeduplicationTest(BaseSimulatorTest):
|
class PerToolDeduplicationTest(ConversationBaseTest):
|
||||||
"""Test file deduplication for each individual tool"""
|
"""Test file deduplication for each individual tool"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -28,74 +27,16 @@ class PerToolDeduplicationTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "File deduplication for individual tools"
|
return "File deduplication for individual tools"
|
||||||
|
|
||||||
def get_docker_logs_since(self, since_time: str) -> str:
|
|
||||||
"""Get docker logs since a specific timestamp"""
|
|
||||||
try:
|
|
||||||
# Check both main server and log monitor for comprehensive logs
|
|
||||||
cmd_server = ["docker", "logs", "--since", since_time, self.container_name]
|
|
||||||
cmd_monitor = ["docker", "logs", "--since", since_time, "zen-mcp-log-monitor"]
|
|
||||||
|
|
||||||
result_server = subprocess.run(cmd_server, capture_output=True, text=True)
|
|
||||||
result_monitor = subprocess.run(cmd_monitor, capture_output=True, text=True)
|
|
||||||
|
|
||||||
# Get the internal log files which have more detailed logging
|
|
||||||
server_log_result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_server.log"], capture_output=True, text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
activity_log_result = subprocess.run(
|
|
||||||
["docker", "exec", self.container_name, "cat", "/tmp/mcp_activity.log"], capture_output=True, text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combine all logs
|
|
||||||
combined_logs = (
|
|
||||||
result_server.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ result_monitor.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ server_log_result.stdout
|
|
||||||
+ "\n"
|
|
||||||
+ activity_log_result.stdout
|
|
||||||
)
|
|
||||||
return combined_logs
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get docker logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# create_additional_test_file method now inherited from base class
|
# create_additional_test_file method now inherited from base class
|
||||||
|
|
||||||
def validate_file_deduplication_in_logs(self, logs: str, tool_name: str, test_file: str) -> bool:
|
|
||||||
"""Validate that logs show file deduplication behavior"""
|
|
||||||
# Look for file embedding messages
|
|
||||||
embedding_messages = [
|
|
||||||
line for line in logs.split("\n") if "📁" in line and "embedding" in line and tool_name in line
|
|
||||||
]
|
|
||||||
|
|
||||||
# Look for deduplication/filtering messages
|
|
||||||
filtering_messages = [
|
|
||||||
line for line in logs.split("\n") if "📁" in line and "Filtering" in line and tool_name in line
|
|
||||||
]
|
|
||||||
skipping_messages = [
|
|
||||||
line for line in logs.split("\n") if "📁" in line and "skipping" in line and tool_name in line
|
|
||||||
]
|
|
||||||
|
|
||||||
deduplication_found = len(filtering_messages) > 0 or len(skipping_messages) > 0
|
|
||||||
|
|
||||||
if deduplication_found:
|
|
||||||
self.logger.info(f" ✅ {tool_name}: Found deduplication evidence in logs")
|
|
||||||
for msg in filtering_messages + skipping_messages:
|
|
||||||
self.logger.debug(f" 📁 {msg.strip()}")
|
|
||||||
else:
|
|
||||||
self.logger.warning(f" ⚠️ {tool_name}: No deduplication evidence found in logs")
|
|
||||||
self.logger.debug(f" 📁 All embedding messages: {embedding_messages}")
|
|
||||||
|
|
||||||
return deduplication_found
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test file deduplication with realistic precommit/codereview workflow"""
|
"""Test file deduplication with realistic precommit/codereview workflow"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("📄 Test: Simplified file deduplication with precommit/codereview workflow")
|
self.logger.info("📄 Test: Simplified file deduplication with precommit/codereview workflow")
|
||||||
|
|
||||||
|
# Setup test environment for conversation testing
|
||||||
|
self.setUp()
|
||||||
|
|
||||||
# Setup test files
|
# Setup test files
|
||||||
self.setup_test_files()
|
self.setup_test_files()
|
||||||
|
|
||||||
@@ -126,7 +67,7 @@ def divide(x, y):
|
|||||||
"model": "flash",
|
"model": "flash",
|
||||||
}
|
}
|
||||||
|
|
||||||
response1, continuation_id = self.call_mcp_tool("precommit", precommit_params)
|
response1, continuation_id = self.call_mcp_tool_direct("precommit", precommit_params)
|
||||||
if not response1:
|
if not response1:
|
||||||
self.logger.error(" ❌ Step 1: precommit tool failed")
|
self.logger.error(" ❌ Step 1: precommit tool failed")
|
||||||
return False
|
return False
|
||||||
@@ -151,7 +92,7 @@ def divide(x, y):
|
|||||||
"model": "flash",
|
"model": "flash",
|
||||||
}
|
}
|
||||||
|
|
||||||
response2, _ = self.call_mcp_tool("codereview", codereview_params)
|
response2, _ = self.call_mcp_tool_direct("codereview", codereview_params)
|
||||||
if not response2:
|
if not response2:
|
||||||
self.logger.error(" ❌ Step 2: codereview tool failed")
|
self.logger.error(" ❌ Step 2: codereview tool failed")
|
||||||
return False
|
return False
|
||||||
@@ -181,16 +122,16 @@ def subtract(a, b):
|
|||||||
"model": "flash",
|
"model": "flash",
|
||||||
}
|
}
|
||||||
|
|
||||||
response3, _ = self.call_mcp_tool("precommit", continue_params)
|
response3, _ = self.call_mcp_tool_direct("precommit", continue_params)
|
||||||
if not response3:
|
if not response3:
|
||||||
self.logger.error(" ❌ Step 3: precommit continuation failed")
|
self.logger.error(" ❌ Step 3: precommit continuation failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.logger.info(" ✅ Step 3: precommit continuation completed")
|
self.logger.info(" ✅ Step 3: precommit continuation completed")
|
||||||
|
|
||||||
# Validate results in docker logs
|
# Validate results in server logs
|
||||||
self.logger.info(" 📋 Validating conversation history and file deduplication...")
|
self.logger.info(" 📋 Validating conversation history and file deduplication...")
|
||||||
logs = self.get_docker_logs_since(start_time)
|
logs = self.get_server_logs_since(start_time)
|
||||||
|
|
||||||
# Check for conversation history building
|
# Check for conversation history building
|
||||||
conversation_logs = [
|
conversation_logs = [
|
||||||
@@ -249,7 +190,7 @@ def subtract(a, b):
|
|||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.logger.warning(" ⚠️ File deduplication workflow test: FAILED")
|
self.logger.warning(" ⚠️ File deduplication workflow test: FAILED")
|
||||||
self.logger.warning(" 💡 Check docker logs for detailed file embedding and continuation activity")
|
self.logger.warning(" 💡 Check server logs for detailed file embedding and continuation activity")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -244,7 +244,7 @@ class PlannerContinuationHistoryTest(BaseSimulatorTest):
|
|||||||
response2, _ = self.call_mcp_tool(
|
response2, _ = self.call_mcp_tool(
|
||||||
"planner",
|
"planner",
|
||||||
{
|
{
|
||||||
"step": "Deployment strategy: Use Kubernetes for container orchestration with Helm charts. Implement CI/CD pipeline with GitOps. Use service mesh (Istio) for traffic management, monitoring, and security. Deploy databases in separate namespaces with backup automation.",
|
"step": "Deployment strategy: Use Kubernetes for orchestration with Helm charts. Implement CI/CD pipeline with GitOps. Use service mesh (Istio) for traffic management, monitoring, and security. Deploy databases in separate namespaces with backup automation.",
|
||||||
"step_number": 2,
|
"step_number": 2,
|
||||||
"total_steps": 2,
|
"total_steps": 2,
|
||||||
"next_step_required": False, # Complete the session
|
"next_step_required": False, # Complete the session
|
||||||
@@ -326,7 +326,7 @@ class PlannerContinuationHistoryTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||||||
"""Call an MCP tool via Claude CLI (docker exec) - override for planner-specific response handling"""
|
"""Call an MCP tool via standalone server - override for planner-specific response handling"""
|
||||||
# Use parent implementation to get the raw response
|
# Use parent implementation to get the raw response
|
||||||
response_text, _ = super().call_mcp_tool(tool_name, params)
|
response_text, _ = super().call_mcp_tool(tool_name, params)
|
||||||
|
|
||||||
|
|||||||
@@ -275,7 +275,7 @@ class PlannerValidationTest(BaseSimulatorTest):
|
|||||||
response3, _ = self.call_mcp_tool(
|
response3, _ = self.call_mcp_tool(
|
||||||
"planner",
|
"planner",
|
||||||
{
|
{
|
||||||
"step": "Revision: Actually, let me revise the Kubernetes approach. I'll use a simpler Docker Swarm deployment initially, then migrate to Kubernetes later.",
|
"step": "Revision: Actually, let me revise the Kubernetes approach. I'll use a simpler deployment initially, then migrate to Kubernetes later.",
|
||||||
"step_number": 3,
|
"step_number": 3,
|
||||||
"total_steps": 4,
|
"total_steps": 4,
|
||||||
"next_step_required": True,
|
"next_step_required": True,
|
||||||
@@ -311,7 +311,7 @@ class PlannerValidationTest(BaseSimulatorTest):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]:
|
||||||
"""Call an MCP tool via Claude CLI (docker exec) - override for planner-specific response handling"""
|
"""Call an MCP tool via standalone server - override for planner-specific response handling"""
|
||||||
# Use parent implementation to get the raw response
|
# Use parent implementation to get the raw response
|
||||||
response_text, _ = super().call_mcp_tool(tool_name, params)
|
response_text, _ = super().call_mcp_tool(tool_name, params)
|
||||||
|
|
||||||
|
|||||||
@@ -1,139 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Redis Conversation Memory Validation Test
|
|
||||||
|
|
||||||
Validates that conversation memory is working via Redis by checking
|
|
||||||
for stored conversation threads and their content.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
|
||||||
|
|
||||||
|
|
||||||
class RedisValidationTest(BaseSimulatorTest):
|
|
||||||
"""Validate that conversation memory is working via Redis"""
|
|
||||||
|
|
||||||
@property
|
|
||||||
def test_name(self) -> str:
|
|
||||||
return "redis_validation"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def test_description(self) -> str:
|
|
||||||
return "Redis conversation memory validation"
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
|
||||||
"""Validate that conversation memory is working via Redis"""
|
|
||||||
try:
|
|
||||||
self.logger.info("💾 Test: Validating conversation memory via Redis...")
|
|
||||||
|
|
||||||
# First, test Redis connectivity
|
|
||||||
ping_result = self.run_command(
|
|
||||||
["docker", "exec", self.redis_container, "redis-cli", "ping"], capture_output=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if ping_result.returncode != 0:
|
|
||||||
self.logger.error("Failed to connect to Redis")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if "PONG" not in ping_result.stdout.decode():
|
|
||||||
self.logger.error("Redis ping failed")
|
|
||||||
return False
|
|
||||||
|
|
||||||
self.logger.info("✅ Redis connectivity confirmed")
|
|
||||||
|
|
||||||
# Check Redis for stored conversations
|
|
||||||
result = self.run_command(
|
|
||||||
["docker", "exec", self.redis_container, "redis-cli", "KEYS", "thread:*"], capture_output=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode != 0:
|
|
||||||
self.logger.error("Failed to query Redis")
|
|
||||||
return False
|
|
||||||
|
|
||||||
keys = result.stdout.decode().strip().split("\n")
|
|
||||||
thread_keys = [k for k in keys if k.startswith("thread:") and k != "thread:*"]
|
|
||||||
|
|
||||||
if thread_keys:
|
|
||||||
self.logger.info(f"✅ Found {len(thread_keys)} conversation threads in Redis")
|
|
||||||
|
|
||||||
# Get details of first thread
|
|
||||||
thread_key = thread_keys[0]
|
|
||||||
result = self.run_command(
|
|
||||||
["docker", "exec", self.redis_container, "redis-cli", "GET", thread_key], capture_output=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
thread_data = result.stdout.decode()
|
|
||||||
try:
|
|
||||||
parsed = json.loads(thread_data)
|
|
||||||
turns = parsed.get("turns", [])
|
|
||||||
self.logger.info(f"✅ Thread has {len(turns)} turns")
|
|
||||||
return True
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
self.logger.warning("Could not parse thread data")
|
|
||||||
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
# If no existing threads, create a test thread to validate Redis functionality
|
|
||||||
self.logger.info(" No existing threads found, creating test thread to validate Redis...")
|
|
||||||
|
|
||||||
test_thread_id = "test_thread_validation"
|
|
||||||
test_data = {
|
|
||||||
"thread_id": test_thread_id,
|
|
||||||
"turns": [
|
|
||||||
{"tool": "chat", "timestamp": "2025-06-11T16:30:00Z", "prompt": "Test validation prompt"}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
# Store test data
|
|
||||||
store_result = self.run_command(
|
|
||||||
[
|
|
||||||
"docker",
|
|
||||||
"exec",
|
|
||||||
self.redis_container,
|
|
||||||
"redis-cli",
|
|
||||||
"SET",
|
|
||||||
f"thread:{test_thread_id}",
|
|
||||||
json.dumps(test_data),
|
|
||||||
],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if store_result.returncode != 0:
|
|
||||||
self.logger.error("Failed to store test data in Redis")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Retrieve test data
|
|
||||||
retrieve_result = self.run_command(
|
|
||||||
["docker", "exec", self.redis_container, "redis-cli", "GET", f"thread:{test_thread_id}"],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if retrieve_result.returncode != 0:
|
|
||||||
self.logger.error("Failed to retrieve test data from Redis")
|
|
||||||
return False
|
|
||||||
|
|
||||||
retrieved_data = retrieve_result.stdout.decode()
|
|
||||||
try:
|
|
||||||
parsed = json.loads(retrieved_data)
|
|
||||||
if parsed.get("thread_id") == test_thread_id:
|
|
||||||
self.logger.info("✅ Redis read/write validation successful")
|
|
||||||
|
|
||||||
# Clean up test data
|
|
||||||
self.run_command(
|
|
||||||
["docker", "exec", self.redis_container, "redis-cli", "DEL", f"thread:{test_thread_id}"],
|
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
self.logger.error("Retrieved data doesn't match stored data")
|
|
||||||
return False
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
self.logger.error("Could not parse retrieved test data")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Conversation memory validation failed: {e}")
|
|
||||||
return False
|
|
||||||
@@ -241,13 +241,8 @@ def handle_everything(user_input, config, database):
|
|||||||
# Validate logs
|
# Validate logs
|
||||||
self.logger.info(" 📋 Validating execution logs...")
|
self.logger.info(" 📋 Validating execution logs...")
|
||||||
|
|
||||||
# Get server logs from the actual log file inside the container
|
# Get server logs using inherited method
|
||||||
result = self.run_command(
|
logs = self.get_recent_server_logs(500)
|
||||||
["docker", "exec", self.container_name, "tail", "-500", "/tmp/mcp_server.log"], capture_output=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
logs = result.stdout.decode() + result.stderr.decode()
|
|
||||||
|
|
||||||
# Look for refactor tool execution patterns
|
# Look for refactor tool execution patterns
|
||||||
refactor_patterns = [
|
refactor_patterns = [
|
||||||
@@ -268,8 +263,6 @@ def handle_everything(user_input, config, database):
|
|||||||
self.logger.info(f" ✅ Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)")
|
self.logger.info(f" ✅ Log validation passed ({patterns_found}/{len(refactor_patterns)} patterns)")
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f" ⚠️ Only found {patterns_found}/{len(refactor_patterns)} log patterns")
|
self.logger.warning(f" ⚠️ Only found {patterns_found}/{len(refactor_patterns)} log patterns")
|
||||||
else:
|
|
||||||
self.logger.warning(" ⚠️ Could not retrieve Docker logs")
|
|
||||||
|
|
||||||
self.logger.info(" ✅ Refactor tool validation completed successfully")
|
self.logger.info(" ✅ Refactor tool validation completed successfully")
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ This test validates that:
|
|||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -27,78 +26,6 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "Token allocation and conversation history validation"
|
return "Token allocation and conversation history validation"
|
||||||
|
|
||||||
def get_recent_server_logs(self) -> str:
|
|
||||||
"""Get recent server logs from the log file directly"""
|
|
||||||
try:
|
|
||||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return result.stdout
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get server logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def extract_conversation_usage_logs(self, logs: str) -> list[dict[str, int]]:
|
|
||||||
"""Extract actual conversation token usage from server logs"""
|
|
||||||
usage_logs = []
|
|
||||||
|
|
||||||
# Look for conversation debug logs that show actual usage
|
|
||||||
lines = logs.split("\n")
|
|
||||||
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
if "[CONVERSATION_DEBUG] Token budget calculation:" in line:
|
|
||||||
# Found start of token budget log, extract the following lines
|
|
||||||
usage = {}
|
|
||||||
for j in range(1, 8): # Next 7 lines contain the usage details
|
|
||||||
if i + j < len(lines):
|
|
||||||
detail_line = lines[i + j]
|
|
||||||
|
|
||||||
# Parse Total capacity: 1,048,576
|
|
||||||
if "Total capacity:" in detail_line:
|
|
||||||
match = re.search(r"Total capacity:\s*([\d,]+)", detail_line)
|
|
||||||
if match:
|
|
||||||
usage["total_capacity"] = int(match.group(1).replace(",", ""))
|
|
||||||
|
|
||||||
# Parse Content allocation: 838,860
|
|
||||||
elif "Content allocation:" in detail_line:
|
|
||||||
match = re.search(r"Content allocation:\s*([\d,]+)", detail_line)
|
|
||||||
if match:
|
|
||||||
usage["content_allocation"] = int(match.group(1).replace(",", ""))
|
|
||||||
|
|
||||||
# Parse Conversation tokens: 12,345
|
|
||||||
elif "Conversation tokens:" in detail_line:
|
|
||||||
match = re.search(r"Conversation tokens:\s*([\d,]+)", detail_line)
|
|
||||||
if match:
|
|
||||||
usage["conversation_tokens"] = int(match.group(1).replace(",", ""))
|
|
||||||
|
|
||||||
# Parse Remaining tokens: 825,515
|
|
||||||
elif "Remaining tokens:" in detail_line:
|
|
||||||
match = re.search(r"Remaining tokens:\s*([\d,]+)", detail_line)
|
|
||||||
if match:
|
|
||||||
usage["remaining_tokens"] = int(match.group(1).replace(",", ""))
|
|
||||||
|
|
||||||
if usage: # Only add if we found some usage data
|
|
||||||
usage_logs.append(usage)
|
|
||||||
|
|
||||||
return usage_logs
|
|
||||||
|
|
||||||
def extract_conversation_token_usage(self, logs: str) -> list[int]:
|
|
||||||
"""Extract conversation token usage from logs"""
|
|
||||||
usage_values = []
|
|
||||||
|
|
||||||
# Look for conversation token usage logs
|
|
||||||
pattern = r"Conversation history token usage:\s*([\d,]+)"
|
|
||||||
matches = re.findall(pattern, logs)
|
|
||||||
|
|
||||||
for match in matches:
|
|
||||||
usage_values.append(int(match.replace(",", "")))
|
|
||||||
|
|
||||||
return usage_values
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test token allocation and conversation history functionality"""
|
"""Test token allocation and conversation history functionality"""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ class VisionCapabilityTest(BaseSimulatorTest):
|
|||||||
"don't have access",
|
"don't have access",
|
||||||
"cannot see",
|
"cannot see",
|
||||||
"no image",
|
"no image",
|
||||||
"clarification_required",
|
"files_required_to_continue",
|
||||||
"image you're referring to",
|
"image you're referring to",
|
||||||
"supply the image",
|
"supply the image",
|
||||||
"error",
|
"error",
|
||||||
@@ -122,7 +122,7 @@ class VisionCapabilityTest(BaseSimulatorTest):
|
|||||||
"don't have access",
|
"don't have access",
|
||||||
"cannot see",
|
"cannot see",
|
||||||
"no image",
|
"no image",
|
||||||
"clarification_required",
|
"files_required_to_continue",
|
||||||
"image you're referring to",
|
"image you're referring to",
|
||||||
"supply the image",
|
"supply the image",
|
||||||
"error",
|
"error",
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ Tests that verify X.AI GROK functionality including:
|
|||||||
- API integration and response validation
|
- API integration and response validation
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from .base_test import BaseSimulatorTest
|
from .base_test import BaseSimulatorTest
|
||||||
|
|
||||||
@@ -25,44 +24,18 @@ class XAIModelsTest(BaseSimulatorTest):
|
|||||||
def test_description(self) -> str:
|
def test_description(self) -> str:
|
||||||
return "X.AI GROK model functionality and integration"
|
return "X.AI GROK model functionality and integration"
|
||||||
|
|
||||||
def get_recent_server_logs(self) -> str:
|
|
||||||
"""Get recent server logs from the log file directly"""
|
|
||||||
try:
|
|
||||||
# Read logs directly from the log file
|
|
||||||
cmd = ["docker", "exec", self.container_name, "tail", "-n", "500", "/tmp/mcp_server.log"]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
return result.stdout
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Failed to read server logs: {result.stderr}")
|
|
||||||
return ""
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.error(f"Failed to get server logs: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def run_test(self) -> bool:
|
def run_test(self) -> bool:
|
||||||
"""Test X.AI GROK model functionality"""
|
"""Test X.AI GROK model functionality"""
|
||||||
try:
|
try:
|
||||||
self.logger.info("Test: X.AI GROK model functionality and integration")
|
self.logger.info("Test: X.AI GROK model functionality and integration")
|
||||||
|
|
||||||
# Check if X.AI API key is configured and not empty
|
# Check if X.AI API key is configured and not empty
|
||||||
check_cmd = [
|
|
||||||
"docker",
|
|
||||||
"exec",
|
|
||||||
self.container_name,
|
|
||||||
"python",
|
|
||||||
"-c",
|
|
||||||
"""
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
xai_key = os.environ.get("XAI_API_KEY", "")
|
xai_key = os.environ.get("XAI_API_KEY", "")
|
||||||
is_valid = bool(xai_key and xai_key != "your_xai_api_key_here" and xai_key.strip())
|
is_valid = bool(xai_key and xai_key != "your_xai_api_key_here" and xai_key.strip())
|
||||||
print(f"XAI_KEY_VALID:{is_valid}")
|
|
||||||
""".strip(),
|
|
||||||
]
|
|
||||||
result = subprocess.run(check_cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
if result.returncode == 0 and "XAI_KEY_VALID:False" in result.stdout:
|
if not is_valid:
|
||||||
self.logger.info(" ⚠️ X.AI API key not configured or empty - skipping test")
|
self.logger.info(" ⚠️ X.AI API key not configured or empty - skipping test")
|
||||||
self.logger.info(" ℹ️ This test requires XAI_API_KEY to be set in .env with a valid key")
|
self.logger.info(" ℹ️ This test requires XAI_API_KEY to be set in .env with a valid key")
|
||||||
return True # Return True to indicate test is skipped, not failed
|
return True # Return True to indicate test is skipped, not failed
|
||||||
|
|||||||
@@ -19,8 +19,11 @@ IF MORE INFORMATION IS NEEDED
|
|||||||
If you need additional context (e.g., dependencies, configuration files, test files) to provide complete analysis, you
|
If you need additional context (e.g., dependencies, configuration files, test files) to provide complete analysis, you
|
||||||
MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless
|
MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been provided unless
|
||||||
for some reason its content is missing or incomplete:
|
for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
ESCALATE TO A FULL CODEREVIEW IF REQUIRED
|
ESCALATE TO A FULL CODEREVIEW IF REQUIRED
|
||||||
If, after thoroughly analysing the question and the provided code, you determine that a comprehensive, code-base–wide
|
If, after thoroughly analysing the question and the provided code, you determine that a comprehensive, code-base–wide
|
||||||
|
|||||||
@@ -18,8 +18,11 @@ If Claude is discussing specific code, functions, or project components that was
|
|||||||
and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful
|
and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful
|
||||||
collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been
|
collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been
|
||||||
provided unless for some reason its content is missing or incomplete:
|
provided unless for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
SCOPE & FOCUS
|
SCOPE & FOCUS
|
||||||
• Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints.
|
• Ground every suggestion in the project's current tech stack, languages, frameworks, and constraints.
|
||||||
|
|||||||
@@ -19,8 +19,11 @@ IF MORE INFORMATION IS NEEDED
|
|||||||
If you need additional context (e.g., related files, configuration, dependencies) to provide
|
If you need additional context (e.g., related files, configuration, dependencies) to provide
|
||||||
a complete and accurate review, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the
|
a complete and accurate review, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the
|
||||||
same file you've been provided unless for some reason its content is missing or incomplete:
|
same file you've been provided unless for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
CRITICAL: Align your review with the user's context and expectations. Focus on issues that matter for their
|
CRITICAL: Align your review with the user's context and expectations. Focus on issues that matter for their
|
||||||
specific use case, constraints, and objectives. Don't provide a generic "find everything" review - tailor
|
specific use case, constraints, and objectives. Don't provide a generic "find everything" review - tailor
|
||||||
|
|||||||
@@ -26,8 +26,11 @@ IF MORE INFORMATION IS NEEDED
|
|||||||
If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide thorough
|
If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide thorough
|
||||||
analysis or response, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've
|
analysis or response, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've
|
||||||
been provided unless for some reason its content is missing or incomplete:
|
been provided unless for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
EVALUATION FRAMEWORK
|
EVALUATION FRAMEWORK
|
||||||
Assess the proposal across these critical dimensions. Your stance influences HOW you present findings, not WHETHER you
|
Assess the proposal across these critical dimensions. Your stance influences HOW you present findings, not WHETHER you
|
||||||
|
|||||||
@@ -49,8 +49,8 @@ Do NOT include any text before or after the JSON. The response must be valid JSO
|
|||||||
IF MORE INFORMATION IS NEEDED:
|
IF MORE INFORMATION IS NEEDED:
|
||||||
If you lack critical information to proceed, you MUST only respond with the following:
|
If you lack critical information to proceed, you MUST only respond with the following:
|
||||||
{
|
{
|
||||||
"status": "clarification_required",
|
"status": "files_required_to_continue",
|
||||||
"question": "<your brief question>",
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,8 +23,11 @@ If Claude is discussing specific code, functions, or project components that was
|
|||||||
and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful
|
and you need additional context (e.g., related files, configuration, dependencies, test files) to provide meaningful
|
||||||
collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been
|
collaboration, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the same file you've been
|
||||||
provided unless for some reason its content is missing or incomplete:
|
provided unless for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
PLANNING METHODOLOGY:
|
PLANNING METHODOLOGY:
|
||||||
|
|
||||||
@@ -63,8 +66,8 @@ Do NOT include any text before or after the JSON. The response must be valid JSO
|
|||||||
IF MORE INFORMATION IS NEEDED:
|
IF MORE INFORMATION IS NEEDED:
|
||||||
If you lack critical information to proceed with planning, you MUST only respond with:
|
If you lack critical information to proceed with planning, you MUST only respond with:
|
||||||
{
|
{
|
||||||
"status": "clarification_required",
|
"status": "files_required_to_continue",
|
||||||
"question": "<your brief question>",
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
"files_needed": ["<file name here>", "<or some folder/>"]
|
"files_needed": ["<file name here>", "<or some folder/>"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,8 +18,11 @@ If you need additional context (e.g., related files not in the diff, test files,
|
|||||||
analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON
|
analysis and without this context your review would be ineffective or biased, you MUST respond ONLY with this JSON
|
||||||
format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is
|
format (and nothing else). Do NOT ask for the same file you've been provided unless for some reason its content is
|
||||||
missing or incomplete:
|
missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
INPUTS PROVIDED
|
INPUTS PROVIDED
|
||||||
1. Git diff (staged or branch comparison)
|
1. Git diff (staged or branch comparison)
|
||||||
|
|||||||
@@ -21,7 +21,11 @@ IF MORE INFORMATION IS NEEDED
|
|||||||
If you need additional context (e.g., related files, configuration, dependencies) to provide accurate refactoring
|
If you need additional context (e.g., related files, configuration, dependencies) to provide accurate refactoring
|
||||||
recommendations, you MUST respond ONLY with this JSON format (and ABSOLUTELY nothing else - no text before or after).
|
recommendations, you MUST respond ONLY with this JSON format (and ABSOLUTELY nothing else - no text before or after).
|
||||||
Do NOT ask for the same file you've been provided unless its content is missing or incomplete:
|
Do NOT ask for the same file you've been provided unless its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>", "files_needed": ["[file name here]", "[or some folder/]"]}
|
{
|
||||||
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
REFACTOR TYPES (PRIORITY ORDER)
|
REFACTOR TYPES (PRIORITY ORDER)
|
||||||
|
|
||||||
|
|||||||
@@ -19,8 +19,11 @@ IF MORE INFORMATION IS NEEDED
|
|||||||
If you need additional context (e.g., test framework details, dependencies, existing test patterns) to provide
|
If you need additional context (e.g., test framework details, dependencies, existing test patterns) to provide
|
||||||
accurate test generation, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the
|
accurate test generation, you MUST respond ONLY with this JSON format (and nothing else). Do NOT ask for the
|
||||||
same file you've been provided unless for some reason its content is missing or incomplete:
|
same file you've been provided unless for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
MULTI-AGENT WORKFLOW
|
MULTI-AGENT WORKFLOW
|
||||||
You sequentially inhabit five expert personas—each passes a concise artefact to the next:
|
You sequentially inhabit five expert personas—each passes a concise artefact to the next:
|
||||||
|
|||||||
@@ -18,8 +18,11 @@ IF MORE INFORMATION IS NEEDED
|
|||||||
If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide
|
If you need additional context (e.g., related files, system architecture, requirements, code snippets) to provide
|
||||||
thorough analysis, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've
|
thorough analysis, you MUST ONLY respond with this exact JSON (and nothing else). Do NOT ask for the same file you've
|
||||||
been provided unless for some reason its content is missing or incomplete:
|
been provided unless for some reason its content is missing or incomplete:
|
||||||
{"status": "clarification_required", "question": "<your brief question>",
|
{
|
||||||
"files_needed": ["[file name here]", "[or some folder/]"]}
|
"status": "files_required_to_continue",
|
||||||
|
"mandatory_instructions": "<your critical instructions for Claude>",
|
||||||
|
"files_needed": ["[file name here]", "[or some folder/]"]
|
||||||
|
}
|
||||||
|
|
||||||
GUIDELINES
|
GUIDELINES
|
||||||
1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints.
|
1. Begin with context analysis: identify tech stack, languages, frameworks, and project constraints.
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import asyncio
|
|||||||
import importlib
|
import importlib
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -33,11 +32,8 @@ import config # noqa: E402
|
|||||||
|
|
||||||
importlib.reload(config)
|
importlib.reload(config)
|
||||||
|
|
||||||
# Set WORKSPACE_ROOT to a temporary directory for tests
|
# Note: This creates a test sandbox environment
|
||||||
# This provides a safe sandbox for file operations during testing
|
# Tests create their own temporary directories as needed
|
||||||
# Create a temporary directory that will be used as the workspace for all tests
|
|
||||||
test_root = tempfile.mkdtemp(prefix="zen_mcp_test_")
|
|
||||||
os.environ["WORKSPACE_ROOT"] = test_root
|
|
||||||
|
|
||||||
# Configure asyncio for Windows compatibility
|
# Configure asyncio for Windows compatibility
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
@@ -47,7 +43,7 @@ if sys.platform == "win32":
|
|||||||
from providers import ModelProviderRegistry # noqa: E402
|
from providers import ModelProviderRegistry # noqa: E402
|
||||||
from providers.base import ProviderType # noqa: E402
|
from providers.base import ProviderType # noqa: E402
|
||||||
from providers.gemini import GeminiModelProvider # noqa: E402
|
from providers.gemini import GeminiModelProvider # noqa: E402
|
||||||
from providers.openai import OpenAIModelProvider # noqa: E402
|
from providers.openai_provider import OpenAIModelProvider # noqa: E402
|
||||||
from providers.xai import XAIModelProvider # noqa: E402
|
from providers.xai import XAIModelProvider # noqa: E402
|
||||||
|
|
||||||
# Register providers at test startup
|
# Register providers at test startup
|
||||||
@@ -59,14 +55,11 @@ ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider)
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def project_path(tmp_path):
|
def project_path(tmp_path):
|
||||||
"""
|
"""
|
||||||
Provides a temporary directory within the WORKSPACE_ROOT sandbox for tests.
|
Provides a temporary directory for tests.
|
||||||
This ensures all file operations during tests are within the allowed directory.
|
This ensures all file operations during tests are isolated.
|
||||||
"""
|
"""
|
||||||
# Get the test workspace root
|
|
||||||
test_root = Path(os.environ.get("WORKSPACE_ROOT", "/tmp"))
|
|
||||||
|
|
||||||
# Create a subdirectory for this specific test
|
# Create a subdirectory for this specific test
|
||||||
test_dir = test_root / f"test_{tmp_path.name}"
|
test_dir = tmp_path / "test_workspace"
|
||||||
test_dir.mkdir(parents=True, exist_ok=True)
|
test_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
return test_dir
|
return test_dir
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
from providers.base import ProviderType
|
from providers.base import ProviderType
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from utils.model_restrictions import ModelRestrictionService
|
from utils.model_restrictions import ModelRestrictionService
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class TestAutoModeComprehensive:
|
|||||||
|
|
||||||
# Re-register providers for subsequent tests (like conftest.py does)
|
# Re-register providers for subsequent tests (like conftest.py does)
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from providers.xai import XAIModelProvider
|
from providers.xai import XAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
@@ -178,7 +178,7 @@ class TestAutoModeComprehensive:
|
|||||||
|
|
||||||
# Register providers based on configuration
|
# Register providers based on configuration
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from providers.openrouter import OpenRouterProvider
|
from providers.openrouter import OpenRouterProvider
|
||||||
from providers.xai import XAIModelProvider
|
from providers.xai import XAIModelProvider
|
||||||
|
|
||||||
@@ -349,7 +349,7 @@ class TestAutoModeComprehensive:
|
|||||||
|
|
||||||
# Register all native providers
|
# Register all native providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from providers.xai import XAIModelProvider
|
from providers.xai import XAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
@@ -460,7 +460,7 @@ class TestAutoModeComprehensive:
|
|||||||
|
|
||||||
# Register providers
|
# Register providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ class TestAutoModeProviderSelection:
|
|||||||
os.environ.pop(key, None)
|
os.environ.pop(key, None)
|
||||||
|
|
||||||
# Register only OpenAI provider
|
# Register only OpenAI provider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ class TestAutoModeProviderSelection:
|
|||||||
|
|
||||||
# Register both providers
|
# Register both providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
@@ -212,7 +212,7 @@ class TestAutoModeProviderSelection:
|
|||||||
|
|
||||||
# Register both providers
|
# Register both providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
@@ -256,7 +256,7 @@ class TestAutoModeProviderSelection:
|
|||||||
|
|
||||||
# Register all providers
|
# Register all providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from providers.xai import XAIModelProvider
|
from providers.xai import XAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
@@ -307,7 +307,7 @@ class TestAutoModeProviderSelection:
|
|||||||
|
|
||||||
# Register all providers
|
# Register all providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from providers.xai import XAIModelProvider
|
from providers.xai import XAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import pytest
|
|||||||
|
|
||||||
from providers.base import ProviderType
|
from providers.base import ProviderType
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from utils.model_restrictions import ModelRestrictionService
|
from utils.model_restrictions import ModelRestrictionService
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -61,16 +61,16 @@ class TestClaudeContinuationOffers:
|
|||||||
# Set default model to avoid effective auto mode
|
# Set default model to avoid effective auto mode
|
||||||
self.tool.default_model = "gemini-2.5-flash-preview-05-20"
|
self.tool.default_model = "gemini-2.5-flash-preview-05-20"
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_new_conversation_offers_continuation(self, mock_redis):
|
async def test_new_conversation_offers_continuation(self, mock_storage):
|
||||||
"""Test that new conversations offer Claude continuation opportunity"""
|
"""Test that new conversations offer Claude continuation opportunity"""
|
||||||
# Create tool AFTER providers are registered (in conftest.py fixture)
|
# Create tool AFTER providers are registered (in conftest.py fixture)
|
||||||
tool = ClaudeContinuationTool()
|
tool = ClaudeContinuationTool()
|
||||||
tool.default_model = "gemini-2.5-flash-preview-05-20"
|
tool.default_model = "gemini-2.5-flash-preview-05-20"
|
||||||
|
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the model
|
# Mock the model
|
||||||
with patch.object(tool, "get_model_provider") as mock_get_provider:
|
with patch.object(tool, "get_model_provider") as mock_get_provider:
|
||||||
@@ -97,12 +97,12 @@ class TestClaudeContinuationOffers:
|
|||||||
assert "continuation_offer" in response_data
|
assert "continuation_offer" in response_data
|
||||||
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1
|
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_existing_conversation_still_offers_continuation(self, mock_redis):
|
async def test_existing_conversation_still_offers_continuation(self, mock_storage):
|
||||||
"""Test that existing threaded conversations still offer continuation if turns remain"""
|
"""Test that existing threaded conversations still offer continuation if turns remain"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock existing thread context with 2 turns
|
# Mock existing thread context with 2 turns
|
||||||
from utils.conversation_memory import ConversationTurn, ThreadContext
|
from utils.conversation_memory import ConversationTurn, ThreadContext
|
||||||
@@ -155,12 +155,12 @@ class TestClaudeContinuationOffers:
|
|||||||
# MAX_CONVERSATION_TURNS - 2 existing - 1 new = remaining
|
# MAX_CONVERSATION_TURNS - 2 existing - 1 new = remaining
|
||||||
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 3
|
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 3
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_full_response_flow_with_continuation_offer(self, mock_redis):
|
async def test_full_response_flow_with_continuation_offer(self, mock_storage):
|
||||||
"""Test complete response flow that creates continuation offer"""
|
"""Test complete response flow that creates continuation offer"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the model to return a response without follow-up question
|
# Mock the model to return a response without follow-up question
|
||||||
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
||||||
@@ -193,12 +193,12 @@ class TestClaudeContinuationOffers:
|
|||||||
assert "You have" in offer["note"]
|
assert "You have" in offer["note"]
|
||||||
assert "more exchange(s) available" in offer["note"]
|
assert "more exchange(s) available" in offer["note"]
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_continuation_always_offered_with_natural_language(self, mock_redis):
|
async def test_continuation_always_offered_with_natural_language(self, mock_storage):
|
||||||
"""Test that continuation is always offered with natural language prompts"""
|
"""Test that continuation is always offered with natural language prompts"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the model to return a response with natural language follow-up
|
# Mock the model to return a response with natural language follow-up
|
||||||
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
||||||
@@ -229,12 +229,12 @@ I'd be happy to examine the error handling patterns in more detail if that would
|
|||||||
assert "continuation_offer" in response_data
|
assert "continuation_offer" in response_data
|
||||||
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1
|
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_threaded_conversation_with_continuation_offer(self, mock_redis):
|
async def test_threaded_conversation_with_continuation_offer(self, mock_storage):
|
||||||
"""Test that threaded conversations still get continuation offers when turns remain"""
|
"""Test that threaded conversations still get continuation offers when turns remain"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock existing thread context
|
# Mock existing thread context
|
||||||
from utils.conversation_memory import ThreadContext
|
from utils.conversation_memory import ThreadContext
|
||||||
@@ -274,12 +274,12 @@ I'd be happy to examine the error handling patterns in more detail if that would
|
|||||||
assert response_data.get("continuation_offer") is not None
|
assert response_data.get("continuation_offer") is not None
|
||||||
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1
|
assert response_data["continuation_offer"]["remaining_turns"] == MAX_CONVERSATION_TURNS - 1
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_max_turns_reached_no_continuation_offer(self, mock_redis):
|
async def test_max_turns_reached_no_continuation_offer(self, mock_storage):
|
||||||
"""Test that no continuation is offered when max turns would be exceeded"""
|
"""Test that no continuation is offered when max turns would be exceeded"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock existing thread context at max turns
|
# Mock existing thread context at max turns
|
||||||
from utils.conversation_memory import ConversationTurn, ThreadContext
|
from utils.conversation_memory import ConversationTurn, ThreadContext
|
||||||
@@ -338,12 +338,12 @@ class TestContinuationIntegration:
|
|||||||
# Set default model to avoid effective auto mode
|
# Set default model to avoid effective auto mode
|
||||||
self.tool.default_model = "gemini-2.5-flash-preview-05-20"
|
self.tool.default_model = "gemini-2.5-flash-preview-05-20"
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_continuation_offer_creates_proper_thread(self, mock_redis):
|
async def test_continuation_offer_creates_proper_thread(self, mock_storage):
|
||||||
"""Test that continuation offers create properly formatted threads"""
|
"""Test that continuation offers create properly formatted threads"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the get call that add_turn makes to retrieve the existing thread
|
# Mock the get call that add_turn makes to retrieve the existing thread
|
||||||
# We'll set this up after the first setex call
|
# We'll set this up after the first setex call
|
||||||
@@ -402,12 +402,12 @@ class TestContinuationIntegration:
|
|||||||
assert thread_context["initial_context"]["prompt"] == "Initial analysis"
|
assert thread_context["initial_context"]["prompt"] == "Initial analysis"
|
||||||
assert thread_context["initial_context"]["files"] == ["/test/file.py"]
|
assert thread_context["initial_context"]["files"] == ["/test/file.py"]
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_claude_can_use_continuation_id(self, mock_redis):
|
async def test_claude_can_use_continuation_id(self, mock_storage):
|
||||||
"""Test that Claude can use the provided continuation_id in subsequent calls"""
|
"""Test that Claude can use the provided continuation_id in subsequent calls"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Step 1: Initial request creates continuation offer
|
# Step 1: Initial request creates continuation offer
|
||||||
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
with patch.object(self.tool, "get_model_provider") as mock_get_provider:
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import pytest
|
|||||||
from tests.mock_helpers import create_mock_provider
|
from tests.mock_helpers import create_mock_provider
|
||||||
from tools.analyze import AnalyzeTool
|
from tools.analyze import AnalyzeTool
|
||||||
from tools.debug import DebugIssueTool
|
from tools.debug import DebugIssueTool
|
||||||
from tools.models import ClarificationRequest, ToolOutput
|
from tools.models import FilesNeededRequest, ToolOutput
|
||||||
|
|
||||||
|
|
||||||
class TestDynamicContextRequests:
|
class TestDynamicContextRequests:
|
||||||
@@ -31,8 +31,8 @@ class TestDynamicContextRequests:
|
|||||||
# Mock model to return a clarification request
|
# Mock model to return a clarification request
|
||||||
clarification_json = json.dumps(
|
clarification_json = json.dumps(
|
||||||
{
|
{
|
||||||
"status": "clarification_required",
|
"status": "files_required_to_continue",
|
||||||
"question": "I need to see the package.json file to understand dependencies",
|
"mandatory_instructions": "I need to see the package.json file to understand dependencies",
|
||||||
"files_needed": ["package.json", "package-lock.json"],
|
"files_needed": ["package.json", "package-lock.json"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -56,12 +56,16 @@ class TestDynamicContextRequests:
|
|||||||
|
|
||||||
# Parse the response
|
# Parse the response
|
||||||
response_data = json.loads(result[0].text)
|
response_data = json.loads(result[0].text)
|
||||||
assert response_data["status"] == "clarification_required"
|
assert response_data["status"] == "files_required_to_continue"
|
||||||
assert response_data["content_type"] == "json"
|
assert response_data["content_type"] == "json"
|
||||||
|
|
||||||
# Parse the clarification request
|
# Parse the clarification request
|
||||||
clarification = json.loads(response_data["content"])
|
clarification = json.loads(response_data["content"])
|
||||||
assert clarification["question"] == "I need to see the package.json file to understand dependencies"
|
# Check that the enhanced instructions contain the original message and additional guidance
|
||||||
|
expected_start = "I need to see the package.json file to understand dependencies"
|
||||||
|
assert clarification["mandatory_instructions"].startswith(expected_start)
|
||||||
|
assert "IMPORTANT GUIDANCE:" in clarification["mandatory_instructions"]
|
||||||
|
assert "Use FULL absolute paths" in clarification["mandatory_instructions"]
|
||||||
assert clarification["files_needed"] == ["package.json", "package-lock.json"]
|
assert clarification["files_needed"] == ["package.json", "package-lock.json"]
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -100,7 +104,7 @@ class TestDynamicContextRequests:
|
|||||||
@patch("tools.base.BaseTool.get_model_provider")
|
@patch("tools.base.BaseTool.get_model_provider")
|
||||||
async def test_malformed_clarification_request_treated_as_normal(self, mock_get_provider, analyze_tool):
|
async def test_malformed_clarification_request_treated_as_normal(self, mock_get_provider, analyze_tool):
|
||||||
"""Test that malformed JSON clarification requests are treated as normal responses"""
|
"""Test that malformed JSON clarification requests are treated as normal responses"""
|
||||||
malformed_json = '{"status": "clarification_required", "prompt": "Missing closing brace"'
|
malformed_json = '{"status": "files_required_to_continue", "prompt": "Missing closing brace"'
|
||||||
|
|
||||||
mock_provider = create_mock_provider()
|
mock_provider = create_mock_provider()
|
||||||
mock_provider.get_provider_type.return_value = Mock(value="google")
|
mock_provider.get_provider_type.return_value = Mock(value="google")
|
||||||
@@ -125,8 +129,8 @@ class TestDynamicContextRequests:
|
|||||||
"""Test clarification request with suggested next action"""
|
"""Test clarification request with suggested next action"""
|
||||||
clarification_json = json.dumps(
|
clarification_json = json.dumps(
|
||||||
{
|
{
|
||||||
"status": "clarification_required",
|
"status": "files_required_to_continue",
|
||||||
"question": "I need to see the database configuration to diagnose the connection error",
|
"mandatory_instructions": "I need to see the database configuration to diagnose the connection error",
|
||||||
"files_needed": ["config/database.yml", "src/db.py"],
|
"files_needed": ["config/database.yml", "src/db.py"],
|
||||||
"suggested_next_action": {
|
"suggested_next_action": {
|
||||||
"tool": "debug",
|
"tool": "debug",
|
||||||
@@ -160,7 +164,7 @@ class TestDynamicContextRequests:
|
|||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|
||||||
response_data = json.loads(result[0].text)
|
response_data = json.loads(result[0].text)
|
||||||
assert response_data["status"] == "clarification_required"
|
assert response_data["status"] == "files_required_to_continue"
|
||||||
|
|
||||||
clarification = json.loads(response_data["content"])
|
clarification = json.loads(response_data["content"])
|
||||||
assert "suggested_next_action" in clarification
|
assert "suggested_next_action" in clarification
|
||||||
@@ -184,17 +188,54 @@ class TestDynamicContextRequests:
|
|||||||
assert parsed["metadata"]["tool_name"] == "test"
|
assert parsed["metadata"]["tool_name"] == "test"
|
||||||
|
|
||||||
def test_clarification_request_model(self):
|
def test_clarification_request_model(self):
|
||||||
"""Test ClarificationRequest model"""
|
"""Test FilesNeededRequest model"""
|
||||||
request = ClarificationRequest(
|
request = FilesNeededRequest(
|
||||||
question="Need more context",
|
mandatory_instructions="Need more context",
|
||||||
files_needed=["file1.py", "file2.py"],
|
files_needed=["file1.py", "file2.py"],
|
||||||
suggested_next_action={"tool": "analyze", "args": {}},
|
suggested_next_action={"tool": "analyze", "args": {}},
|
||||||
)
|
)
|
||||||
|
|
||||||
assert request.question == "Need more context"
|
assert request.mandatory_instructions == "Need more context"
|
||||||
assert len(request.files_needed) == 2
|
assert len(request.files_needed) == 2
|
||||||
assert request.suggested_next_action["tool"] == "analyze"
|
assert request.suggested_next_action["tool"] == "analyze"
|
||||||
|
|
||||||
|
def test_mandatory_instructions_enhancement(self):
|
||||||
|
"""Test that mandatory_instructions are enhanced with additional guidance"""
|
||||||
|
from tools.base import BaseTool
|
||||||
|
|
||||||
|
# Create a dummy tool instance for testing
|
||||||
|
class TestTool(BaseTool):
|
||||||
|
def get_name(self):
|
||||||
|
return "test"
|
||||||
|
|
||||||
|
def get_description(self):
|
||||||
|
return "test"
|
||||||
|
|
||||||
|
def get_request_model(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def prepare_prompt(self, request):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_system_prompt(self):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_input_schema(self):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
tool = TestTool()
|
||||||
|
original = "I need additional files to proceed"
|
||||||
|
enhanced = tool._enhance_mandatory_instructions(original)
|
||||||
|
|
||||||
|
# Verify the original instructions are preserved
|
||||||
|
assert enhanced.startswith(original)
|
||||||
|
|
||||||
|
# Verify additional guidance is added
|
||||||
|
assert "IMPORTANT GUIDANCE:" in enhanced
|
||||||
|
assert "CRITICAL for providing accurate analysis" in enhanced
|
||||||
|
assert "Use FULL absolute paths" in enhanced
|
||||||
|
assert "continuation_id to continue" in enhanced
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@patch("tools.base.BaseTool.get_model_provider")
|
@patch("tools.base.BaseTool.get_model_provider")
|
||||||
async def test_error_response_format(self, mock_get_provider, analyze_tool):
|
async def test_error_response_format(self, mock_get_provider, analyze_tool):
|
||||||
@@ -223,8 +264,8 @@ class TestCollaborationWorkflow:
|
|||||||
# Mock Gemini to request package.json when asked about dependencies
|
# Mock Gemini to request package.json when asked about dependencies
|
||||||
clarification_json = json.dumps(
|
clarification_json = json.dumps(
|
||||||
{
|
{
|
||||||
"status": "clarification_required",
|
"status": "files_required_to_continue",
|
||||||
"question": "I need to see the package.json file to analyze npm dependencies",
|
"mandatory_instructions": "I need to see the package.json file to analyze npm dependencies",
|
||||||
"files_needed": ["package.json", "package-lock.json"],
|
"files_needed": ["package.json", "package-lock.json"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -247,7 +288,7 @@ class TestCollaborationWorkflow:
|
|||||||
|
|
||||||
response = json.loads(result[0].text)
|
response = json.loads(result[0].text)
|
||||||
assert (
|
assert (
|
||||||
response["status"] == "clarification_required"
|
response["status"] == "files_required_to_continue"
|
||||||
), "Should request clarification when asked about dependencies without package files"
|
), "Should request clarification when asked about dependencies without package files"
|
||||||
|
|
||||||
clarification = json.loads(response["content"])
|
clarification = json.loads(response["content"])
|
||||||
@@ -262,8 +303,8 @@ class TestCollaborationWorkflow:
|
|||||||
# Step 1: Initial request returns clarification needed
|
# Step 1: Initial request returns clarification needed
|
||||||
clarification_json = json.dumps(
|
clarification_json = json.dumps(
|
||||||
{
|
{
|
||||||
"status": "clarification_required",
|
"status": "files_required_to_continue",
|
||||||
"question": "I need to see the configuration file to understand the connection settings",
|
"mandatory_instructions": "I need to see the configuration file to understand the connection settings",
|
||||||
"files_needed": ["config.py"],
|
"files_needed": ["config.py"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -284,7 +325,7 @@ class TestCollaborationWorkflow:
|
|||||||
)
|
)
|
||||||
|
|
||||||
response1 = json.loads(result1[0].text)
|
response1 = json.loads(result1[0].text)
|
||||||
assert response1["status"] == "clarification_required"
|
assert response1["status"] == "files_required_to_continue"
|
||||||
|
|
||||||
# Step 2: Claude would provide additional context and re-invoke
|
# Step 2: Claude would provide additional context and re-invoke
|
||||||
# This simulates the second call with more context
|
# This simulates the second call with more context
|
||||||
|
|||||||
@@ -26,11 +26,11 @@ from utils.conversation_memory import (
|
|||||||
class TestConversationMemory:
|
class TestConversationMemory:
|
||||||
"""Test the conversation memory system for stateless MCP requests"""
|
"""Test the conversation memory system for stateless MCP requests"""
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_create_thread(self, mock_redis):
|
def test_create_thread(self, mock_storage):
|
||||||
"""Test creating a new thread"""
|
"""Test creating a new thread"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
thread_id = create_thread("chat", {"prompt": "Hello", "files": ["/test.py"]})
|
thread_id = create_thread("chat", {"prompt": "Hello", "files": ["/test.py"]})
|
||||||
|
|
||||||
@@ -43,11 +43,11 @@ class TestConversationMemory:
|
|||||||
assert call_args[0][0] == f"thread:{thread_id}" # key
|
assert call_args[0][0] == f"thread:{thread_id}" # key
|
||||||
assert call_args[0][1] == CONVERSATION_TIMEOUT_SECONDS # TTL from configuration
|
assert call_args[0][1] == CONVERSATION_TIMEOUT_SECONDS # TTL from configuration
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_get_thread_valid(self, mock_redis):
|
def test_get_thread_valid(self, mock_storage):
|
||||||
"""Test retrieving an existing thread"""
|
"""Test retrieving an existing thread"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
test_uuid = "12345678-1234-1234-1234-123456789012"
|
test_uuid = "12345678-1234-1234-1234-123456789012"
|
||||||
|
|
||||||
@@ -69,27 +69,27 @@ class TestConversationMemory:
|
|||||||
assert context.tool_name == "chat"
|
assert context.tool_name == "chat"
|
||||||
mock_client.get.assert_called_once_with(f"thread:{test_uuid}")
|
mock_client.get.assert_called_once_with(f"thread:{test_uuid}")
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_get_thread_invalid_uuid(self, mock_redis):
|
def test_get_thread_invalid_uuid(self, mock_storage):
|
||||||
"""Test handling invalid UUID"""
|
"""Test handling invalid UUID"""
|
||||||
context = get_thread("invalid-uuid")
|
context = get_thread("invalid-uuid")
|
||||||
assert context is None
|
assert context is None
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_get_thread_not_found(self, mock_redis):
|
def test_get_thread_not_found(self, mock_storage):
|
||||||
"""Test handling thread not found"""
|
"""Test handling thread not found"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
mock_client.get.return_value = None
|
mock_client.get.return_value = None
|
||||||
|
|
||||||
context = get_thread("12345678-1234-1234-1234-123456789012")
|
context = get_thread("12345678-1234-1234-1234-123456789012")
|
||||||
assert context is None
|
assert context is None
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_add_turn_success(self, mock_redis):
|
def test_add_turn_success(self, mock_storage):
|
||||||
"""Test adding a turn to existing thread"""
|
"""Test adding a turn to existing thread"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
test_uuid = "12345678-1234-1234-1234-123456789012"
|
test_uuid = "12345678-1234-1234-1234-123456789012"
|
||||||
|
|
||||||
@@ -111,11 +111,11 @@ class TestConversationMemory:
|
|||||||
mock_client.get.assert_called_once()
|
mock_client.get.assert_called_once()
|
||||||
mock_client.setex.assert_called_once()
|
mock_client.setex.assert_called_once()
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_add_turn_max_limit(self, mock_redis):
|
def test_add_turn_max_limit(self, mock_storage):
|
||||||
"""Test turn limit enforcement"""
|
"""Test turn limit enforcement"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
test_uuid = "12345678-1234-1234-1234-123456789012"
|
test_uuid = "12345678-1234-1234-1234-123456789012"
|
||||||
|
|
||||||
@@ -237,11 +237,11 @@ class TestConversationMemory:
|
|||||||
class TestConversationFlow:
|
class TestConversationFlow:
|
||||||
"""Test complete conversation flows simulating stateless MCP requests"""
|
"""Test complete conversation flows simulating stateless MCP requests"""
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_complete_conversation_cycle(self, mock_redis):
|
def test_complete_conversation_cycle(self, mock_storage):
|
||||||
"""Test a complete 5-turn conversation until limit reached"""
|
"""Test a complete 5-turn conversation until limit reached"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Simulate independent MCP request cycles
|
# Simulate independent MCP request cycles
|
||||||
|
|
||||||
@@ -341,13 +341,13 @@ class TestConversationFlow:
|
|||||||
success = add_turn(thread_id, "user", "This should be rejected")
|
success = add_turn(thread_id, "user", "This should be rejected")
|
||||||
assert success is False # CONVERSATION STOPS HERE
|
assert success is False # CONVERSATION STOPS HERE
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_invalid_continuation_id_error(self, mock_redis):
|
def test_invalid_continuation_id_error(self, mock_storage):
|
||||||
"""Test that invalid continuation IDs raise proper error for restart"""
|
"""Test that invalid continuation IDs raise proper error for restart"""
|
||||||
from server import reconstruct_thread_context
|
from server import reconstruct_thread_context
|
||||||
|
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
mock_client.get.return_value = None # Thread not found
|
mock_client.get.return_value = None # Thread not found
|
||||||
|
|
||||||
arguments = {"continuation_id": "invalid-uuid-12345", "prompt": "Continue conversation"}
|
arguments = {"continuation_id": "invalid-uuid-12345", "prompt": "Continue conversation"}
|
||||||
@@ -439,11 +439,11 @@ class TestConversationFlow:
|
|||||||
expected_remaining = MAX_CONVERSATION_TURNS - 1
|
expected_remaining = MAX_CONVERSATION_TURNS - 1
|
||||||
assert f"({expected_remaining} exchanges remaining)" in instructions
|
assert f"({expected_remaining} exchanges remaining)" in instructions
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_complete_conversation_with_dynamic_turns(self, mock_redis):
|
def test_complete_conversation_with_dynamic_turns(self, mock_storage):
|
||||||
"""Test complete conversation respecting MAX_CONVERSATION_TURNS dynamically"""
|
"""Test complete conversation respecting MAX_CONVERSATION_TURNS dynamically"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
thread_id = create_thread("chat", {"prompt": "Start conversation"})
|
thread_id = create_thread("chat", {"prompt": "Start conversation"})
|
||||||
|
|
||||||
@@ -495,16 +495,16 @@ class TestConversationFlow:
|
|||||||
success = add_turn(thread_id, "user", "This should fail")
|
success = add_turn(thread_id, "user", "This should fail")
|
||||||
assert success is False, f"Turn {MAX_CONVERSATION_TURNS + 1} should fail"
|
assert success is False, f"Turn {MAX_CONVERSATION_TURNS + 1} should fail"
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
|
@patch.dict(os.environ, {"GEMINI_API_KEY": "test-key", "OPENAI_API_KEY": ""}, clear=False)
|
||||||
def test_conversation_with_files_and_context_preservation(self, mock_redis):
|
def test_conversation_with_files_and_context_preservation(self, mock_storage):
|
||||||
"""Test complete conversation flow with file tracking and context preservation"""
|
"""Test complete conversation flow with file tracking and context preservation"""
|
||||||
from providers.registry import ModelProviderRegistry
|
from providers.registry import ModelProviderRegistry
|
||||||
|
|
||||||
ModelProviderRegistry.clear_cache()
|
ModelProviderRegistry.clear_cache()
|
||||||
|
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Start conversation with files
|
# Start conversation with files
|
||||||
thread_id = create_thread("analyze", {"prompt": "Analyze this codebase", "files": ["/project/src/"]})
|
thread_id = create_thread("analyze", {"prompt": "Analyze this codebase", "files": ["/project/src/"]})
|
||||||
@@ -648,11 +648,11 @@ class TestConversationFlow:
|
|||||||
|
|
||||||
assert turn_1_pos < turn_2_pos < turn_3_pos
|
assert turn_1_pos < turn_2_pos < turn_3_pos
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_stateless_request_isolation(self, mock_redis):
|
def test_stateless_request_isolation(self, mock_storage):
|
||||||
"""Test that each request cycle is independent but shares context via Redis"""
|
"""Test that each request cycle is independent but shares context via Redis"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Simulate two different "processes" accessing same thread
|
# Simulate two different "processes" accessing same thread
|
||||||
thread_id = "12345678-1234-1234-1234-123456789012"
|
thread_id = "12345678-1234-1234-1234-123456789012"
|
||||||
|
|||||||
@@ -93,12 +93,12 @@ class TestCrossToolContinuation:
|
|||||||
self.analysis_tool = MockAnalysisTool()
|
self.analysis_tool = MockAnalysisTool()
|
||||||
self.review_tool = MockReviewTool()
|
self.review_tool = MockReviewTool()
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_continuation_id_works_across_different_tools(self, mock_redis):
|
async def test_continuation_id_works_across_different_tools(self, mock_storage):
|
||||||
"""Test that a continuation_id from one tool can be used with another tool"""
|
"""Test that a continuation_id from one tool can be used with another tool"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Step 1: Analysis tool creates a conversation with continuation offer
|
# Step 1: Analysis tool creates a conversation with continuation offer
|
||||||
with patch.object(self.analysis_tool, "get_model_provider") as mock_get_provider:
|
with patch.object(self.analysis_tool, "get_model_provider") as mock_get_provider:
|
||||||
@@ -195,11 +195,11 @@ I'd be happy to review these security findings in detail if that would be helpfu
|
|||||||
assert second_turn["tool_name"] == "test_review" # New tool name
|
assert second_turn["tool_name"] == "test_review" # New tool name
|
||||||
assert "Critical security vulnerability confirmed" in second_turn["content"]
|
assert "Critical security vulnerability confirmed" in second_turn["content"]
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_cross_tool_conversation_history_includes_tool_names(self, mock_redis):
|
def test_cross_tool_conversation_history_includes_tool_names(self, mock_storage):
|
||||||
"""Test that conversation history properly shows which tool was used for each turn"""
|
"""Test that conversation history properly shows which tool was used for each turn"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Create a thread context with turns from different tools
|
# Create a thread context with turns from different tools
|
||||||
thread_context = ThreadContext(
|
thread_context = ThreadContext(
|
||||||
@@ -247,13 +247,13 @@ I'd be happy to review these security findings in detail if that would be helpfu
|
|||||||
assert "Review complete: 2 critical, 1 minor issue" in history
|
assert "Review complete: 2 critical, 1 minor issue" in history
|
||||||
assert "Deep analysis: Root cause identified" in history
|
assert "Deep analysis: Root cause identified" in history
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch("utils.conversation_memory.get_thread")
|
@patch("utils.conversation_memory.get_thread")
|
||||||
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
@patch.dict("os.environ", {"PYTEST_CURRENT_TEST": ""}, clear=False)
|
||||||
async def test_cross_tool_conversation_with_files_context(self, mock_get_thread, mock_redis):
|
async def test_cross_tool_conversation_with_files_context(self, mock_get_thread, mock_storage):
|
||||||
"""Test that file context is preserved across tool switches"""
|
"""Test that file context is preserved across tool switches"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Create existing context with files from analysis tool
|
# Create existing context with files from analysis tool
|
||||||
existing_context = ThreadContext(
|
existing_context = ThreadContext(
|
||||||
@@ -317,12 +317,12 @@ I'd be happy to review these security findings in detail if that would be helpfu
|
|||||||
analysis_turn = final_context["turns"][0] # First turn (analysis tool)
|
analysis_turn = final_context["turns"][0] # First turn (analysis tool)
|
||||||
assert analysis_turn["files"] == ["/src/auth.py", "/src/utils.py"]
|
assert analysis_turn["files"] == ["/src/auth.py", "/src/utils.py"]
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch("utils.conversation_memory.get_thread")
|
@patch("utils.conversation_memory.get_thread")
|
||||||
def test_thread_preserves_original_tool_name(self, mock_get_thread, mock_redis):
|
def test_thread_preserves_original_tool_name(self, mock_get_thread, mock_storage):
|
||||||
"""Test that the thread's original tool_name is preserved even when other tools contribute"""
|
"""Test that the thread's original tool_name is preserved even when other tools contribute"""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Create existing thread from analysis tool
|
# Create existing thread from analysis tool
|
||||||
existing_context = ThreadContext(
|
existing_context = ThreadContext(
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ class TestCustomProvider:
|
|||||||
|
|
||||||
def test_provider_initialization_missing_url(self):
|
def test_provider_initialization_missing_url(self):
|
||||||
"""Test CustomProvider raises error when URL is missing."""
|
"""Test CustomProvider raises error when URL is missing."""
|
||||||
|
with patch.dict(os.environ, {"CUSTOM_API_URL": ""}, clear=False):
|
||||||
with pytest.raises(ValueError, match="Custom API URL must be provided"):
|
with pytest.raises(ValueError, match="Custom API URL must be provided"):
|
||||||
CustomProvider(api_key="test-key")
|
CustomProvider(api_key="test-key")
|
||||||
|
|
||||||
|
|||||||
@@ -121,10 +121,10 @@ def helper_function():
|
|||||||
assert any(str(Path(f).resolve()) == expected_resolved for f in captured_files)
|
assert any(str(Path(f).resolve()) == expected_resolved for f in captured_files)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
@patch("providers.ModelProviderRegistry.get_provider_for_model")
|
@patch("providers.ModelProviderRegistry.get_provider_for_model")
|
||||||
async def test_conversation_continuation_with_directory_files(
|
async def test_conversation_continuation_with_directory_files(
|
||||||
self, mock_get_provider, mock_redis, tool, temp_directory_with_files
|
self, mock_get_provider, mock_storage, tool, temp_directory_with_files
|
||||||
):
|
):
|
||||||
"""Test that conversation continuation works correctly with directory expansion"""
|
"""Test that conversation continuation works correctly with directory expansion"""
|
||||||
# Setup mock Redis client with in-memory storage
|
# Setup mock Redis client with in-memory storage
|
||||||
@@ -140,7 +140,7 @@ def helper_function():
|
|||||||
|
|
||||||
mock_client.get.side_effect = mock_get
|
mock_client.get.side_effect = mock_get
|
||||||
mock_client.setex.side_effect = mock_setex
|
mock_client.setex.side_effect = mock_setex
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Setup mock provider
|
# Setup mock provider
|
||||||
mock_provider = create_mock_provider()
|
mock_provider = create_mock_provider()
|
||||||
@@ -196,8 +196,8 @@ def helper_function():
|
|||||||
# This test shows the fix is working - conversation continuation properly filters out
|
# This test shows the fix is working - conversation continuation properly filters out
|
||||||
# already-embedded files. The exact length depends on whether any new files are found.
|
# already-embedded files. The exact length depends on whether any new files are found.
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_get_conversation_embedded_files_with_expanded_files(self, mock_redis, tool, temp_directory_with_files):
|
def test_get_conversation_embedded_files_with_expanded_files(self, mock_storage, tool, temp_directory_with_files):
|
||||||
"""Test that get_conversation_embedded_files returns expanded files"""
|
"""Test that get_conversation_embedded_files returns expanded files"""
|
||||||
# Setup mock Redis client with in-memory storage
|
# Setup mock Redis client with in-memory storage
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
@@ -212,7 +212,7 @@ def helper_function():
|
|||||||
|
|
||||||
mock_client.get.side_effect = mock_get
|
mock_client.get.side_effect = mock_get
|
||||||
mock_client.setex.side_effect = mock_setex
|
mock_client.setex.side_effect = mock_setex
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
directory = temp_directory_with_files["directory"]
|
directory = temp_directory_with_files["directory"]
|
||||||
expected_files = temp_directory_with_files["files"]
|
expected_files = temp_directory_with_files["files"]
|
||||||
@@ -237,8 +237,8 @@ def helper_function():
|
|||||||
assert set(embedded_files) == set(expected_files)
|
assert set(embedded_files) == set(expected_files)
|
||||||
assert directory not in embedded_files
|
assert directory not in embedded_files
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_file_filtering_with_mixed_files_and_directories(self, mock_redis, tool, temp_directory_with_files):
|
def test_file_filtering_with_mixed_files_and_directories(self, mock_storage, tool, temp_directory_with_files):
|
||||||
"""Test file filtering when request contains both individual files and directories"""
|
"""Test file filtering when request contains both individual files and directories"""
|
||||||
# Setup mock Redis client with in-memory storage
|
# Setup mock Redis client with in-memory storage
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
@@ -253,7 +253,7 @@ def helper_function():
|
|||||||
|
|
||||||
mock_client.get.side_effect = mock_get
|
mock_client.get.side_effect = mock_get
|
||||||
mock_client.setex.side_effect = mock_setex
|
mock_client.setex.side_effect = mock_setex
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
directory = temp_directory_with_files["directory"]
|
directory = temp_directory_with_files["directory"]
|
||||||
python_file = temp_directory_with_files["python_file"]
|
python_file = temp_directory_with_files["python_file"]
|
||||||
|
|||||||
@@ -1,320 +0,0 @@
|
|||||||
"""
|
|
||||||
Integration tests for Docker path translation
|
|
||||||
|
|
||||||
These tests verify the actual behavior when running in a Docker-like environment
|
|
||||||
by creating temporary directories and testing the path translation logic.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import importlib
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
# We'll reload the module to test different environment configurations
|
|
||||||
import utils.file_utils
|
|
||||||
|
|
||||||
|
|
||||||
def test_docker_path_translation_integration():
|
|
||||||
"""Test path translation in a simulated Docker environment"""
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
# Set up directories
|
|
||||||
host_workspace = Path(tmpdir) / "host_workspace"
|
|
||||||
host_workspace.mkdir()
|
|
||||||
container_workspace = Path(tmpdir) / "container_workspace"
|
|
||||||
container_workspace.mkdir()
|
|
||||||
|
|
||||||
# Create a test file structure
|
|
||||||
(host_workspace / "src").mkdir()
|
|
||||||
test_file = host_workspace / "src" / "test.py"
|
|
||||||
test_file.write_text("# test file")
|
|
||||||
|
|
||||||
# Set environment variables and reload the module
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
try:
|
|
||||||
os.environ["WORKSPACE_ROOT"] = str(host_workspace)
|
|
||||||
|
|
||||||
# Reload the modules to pick up new environment variables
|
|
||||||
# Need to reload security_config first since it sets WORKSPACE_ROOT
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
# Properly mock the CONTAINER_WORKSPACE
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace):
|
|
||||||
# Test the translation
|
|
||||||
from utils.file_utils import translate_path_for_environment
|
|
||||||
|
|
||||||
# This should translate the host path to container path
|
|
||||||
host_path = str(test_file)
|
|
||||||
result = translate_path_for_environment(host_path)
|
|
||||||
|
|
||||||
# Verify the translation worked
|
|
||||||
expected = str(container_workspace / "src" / "test.py")
|
|
||||||
assert result == expected
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# Restore original environment
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
|
|
||||||
def test_docker_security_validation():
|
|
||||||
"""Test that path traversal attempts are properly blocked"""
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
# Set up directories
|
|
||||||
host_workspace = Path(tmpdir) / "workspace"
|
|
||||||
host_workspace.mkdir()
|
|
||||||
secret_dir = Path(tmpdir) / "secret"
|
|
||||||
secret_dir.mkdir()
|
|
||||||
secret_file = secret_dir / "password.txt"
|
|
||||||
secret_file.write_text("secret")
|
|
||||||
|
|
||||||
# Create a symlink inside workspace pointing to secret
|
|
||||||
symlink = host_workspace / "link_to_secret"
|
|
||||||
symlink.symlink_to(secret_file)
|
|
||||||
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
try:
|
|
||||||
os.environ["WORKSPACE_ROOT"] = str(host_workspace)
|
|
||||||
|
|
||||||
# Reload the modules
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
# Properly mock the CONTAINER_WORKSPACE
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE", Path("/workspace")):
|
|
||||||
from utils.file_utils import resolve_and_validate_path
|
|
||||||
|
|
||||||
# Trying to access the symlink should fail
|
|
||||||
with pytest.raises(PermissionError):
|
|
||||||
resolve_and_validate_path(str(symlink))
|
|
||||||
|
|
||||||
finally:
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
|
|
||||||
def test_no_docker_environment():
|
|
||||||
"""Test that paths are unchanged when Docker environment is not set"""
|
|
||||||
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
try:
|
|
||||||
# Clear Docker-related environment variables
|
|
||||||
os.environ.pop("WORKSPACE_ROOT", None)
|
|
||||||
|
|
||||||
# Reload the module
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
from utils.file_utils import translate_path_for_environment
|
|
||||||
|
|
||||||
# Path should remain unchanged
|
|
||||||
test_path = "/some/random/path.py"
|
|
||||||
assert translate_path_for_environment(test_path) == test_path
|
|
||||||
|
|
||||||
finally:
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
|
|
||||||
def test_review_changes_docker_path_translation():
|
|
||||||
"""Test that review_changes tool properly translates Docker paths"""
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
# Set up directories to simulate Docker mount
|
|
||||||
host_workspace = Path(tmpdir) / "host_workspace"
|
|
||||||
host_workspace.mkdir()
|
|
||||||
container_workspace = Path(tmpdir) / "container_workspace"
|
|
||||||
container_workspace.mkdir()
|
|
||||||
|
|
||||||
# Create a git repository in the container workspace
|
|
||||||
project_dir = container_workspace / "project"
|
|
||||||
project_dir.mkdir()
|
|
||||||
|
|
||||||
# Initialize git repo
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
|
|
||||||
|
|
||||||
# Create a test file
|
|
||||||
test_file = project_dir / "test.py"
|
|
||||||
test_file.write_text("print('hello')")
|
|
||||||
|
|
||||||
# Stage the file
|
|
||||||
subprocess.run(["git", "add", "test.py"], cwd=project_dir, capture_output=True)
|
|
||||||
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
try:
|
|
||||||
# Simulate Docker environment
|
|
||||||
os.environ["WORKSPACE_ROOT"] = str(host_workspace)
|
|
||||||
|
|
||||||
# Reload the modules
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
# Properly mock the CONTAINER_WORKSPACE and reload precommit module
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace):
|
|
||||||
# Need to also patch it in the modules that import it
|
|
||||||
with patch("utils.security_config.CONTAINER_WORKSPACE", container_workspace):
|
|
||||||
# Import after patching to get updated environment
|
|
||||||
from tools.precommit import Precommit
|
|
||||||
|
|
||||||
# Create tool instance
|
|
||||||
tool = Precommit()
|
|
||||||
|
|
||||||
# Test path translation in prepare_prompt
|
|
||||||
request = tool.get_request_model()(
|
|
||||||
path=str(host_workspace / "project"), # Host path that needs translation
|
|
||||||
review_type="quick",
|
|
||||||
severity_filter="all",
|
|
||||||
)
|
|
||||||
|
|
||||||
# This should translate the path and find the git repository
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
result = asyncio.run(tool.prepare_prompt(request))
|
|
||||||
|
|
||||||
# Should find the repository (not raise an error about inaccessible path)
|
|
||||||
# If we get here without exception, the path was successfully translated
|
|
||||||
assert isinstance(result, str)
|
|
||||||
# The result should contain git diff information or indicate no changes
|
|
||||||
assert "No git repositories found" not in result or "changes" in result.lower()
|
|
||||||
|
|
||||||
finally:
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
|
|
||||||
def test_review_changes_docker_path_error():
|
|
||||||
"""Test that review_changes tool raises error for inaccessible paths"""
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
# Set up directories to simulate Docker mount
|
|
||||||
host_workspace = Path(tmpdir) / "host_workspace"
|
|
||||||
host_workspace.mkdir()
|
|
||||||
container_workspace = Path(tmpdir) / "container_workspace"
|
|
||||||
container_workspace.mkdir()
|
|
||||||
|
|
||||||
# Create a path outside the mounted workspace
|
|
||||||
outside_path = Path(tmpdir) / "outside_workspace"
|
|
||||||
outside_path.mkdir()
|
|
||||||
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
try:
|
|
||||||
# Simulate Docker environment
|
|
||||||
os.environ["WORKSPACE_ROOT"] = str(host_workspace)
|
|
||||||
|
|
||||||
# Reload the modules
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
# Properly mock the CONTAINER_WORKSPACE
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace):
|
|
||||||
with patch("utils.security_config.CONTAINER_WORKSPACE", container_workspace):
|
|
||||||
# Import after patching to get updated environment
|
|
||||||
from tools.precommit import Precommit
|
|
||||||
|
|
||||||
# Create tool instance
|
|
||||||
tool = Precommit()
|
|
||||||
|
|
||||||
# Test path translation with an inaccessible path
|
|
||||||
request = tool.get_request_model()(
|
|
||||||
path=str(outside_path), # Path outside the mounted workspace
|
|
||||||
review_type="quick",
|
|
||||||
severity_filter="all",
|
|
||||||
)
|
|
||||||
|
|
||||||
# This should raise a ValueError
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
with pytest.raises(ValueError) as exc_info:
|
|
||||||
asyncio.run(tool.prepare_prompt(request))
|
|
||||||
|
|
||||||
# Check the error message
|
|
||||||
assert "not accessible from within the Docker container" in str(exc_info.value)
|
|
||||||
assert "mounted workspace" in str(exc_info.value)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
|
|
||||||
def test_double_translation_prevention():
|
|
||||||
"""Test that already-translated paths are not double-translated"""
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
# Set up directories
|
|
||||||
host_workspace = Path(tmpdir) / "host_workspace"
|
|
||||||
host_workspace.mkdir()
|
|
||||||
container_workspace = Path(tmpdir) / "container_workspace"
|
|
||||||
container_workspace.mkdir()
|
|
||||||
|
|
||||||
original_env = os.environ.copy()
|
|
||||||
try:
|
|
||||||
os.environ["WORKSPACE_ROOT"] = str(host_workspace)
|
|
||||||
|
|
||||||
# Reload the modules
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
# Properly mock the CONTAINER_WORKSPACE
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE", container_workspace):
|
|
||||||
from utils.file_utils import translate_path_for_environment
|
|
||||||
|
|
||||||
# Test 1: Normal translation
|
|
||||||
host_path = str(host_workspace / "src" / "main.py")
|
|
||||||
translated_once = translate_path_for_environment(host_path)
|
|
||||||
expected = str(container_workspace / "src" / "main.py")
|
|
||||||
assert translated_once == expected
|
|
||||||
|
|
||||||
# Test 2: Double translation should return the same path
|
|
||||||
translated_twice = translate_path_for_environment(translated_once)
|
|
||||||
assert translated_twice == translated_once
|
|
||||||
assert translated_twice == expected
|
|
||||||
|
|
||||||
# Test 3: Container workspace root should not be double-translated
|
|
||||||
root_path = str(container_workspace)
|
|
||||||
translated_root = translate_path_for_environment(root_path)
|
|
||||||
assert translated_root == root_path
|
|
||||||
|
|
||||||
finally:
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(original_env)
|
|
||||||
import utils.security_config
|
|
||||||
|
|
||||||
importlib.reload(utils.security_config)
|
|
||||||
importlib.reload(utils.file_utils)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
pytest.main([__file__, "-v"])
|
|
||||||
@@ -5,12 +5,10 @@ Test file protection mechanisms to ensure MCP doesn't scan:
|
|||||||
3. Excluded directories
|
3. Excluded directories
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
from utils.file_utils import (
|
from utils.file_utils import (
|
||||||
MCP_SIGNATURE_FILES,
|
|
||||||
expand_paths,
|
expand_paths,
|
||||||
get_user_home_directory,
|
get_user_home_directory,
|
||||||
is_home_directory_root,
|
is_home_directory_root,
|
||||||
@@ -21,25 +19,31 @@ from utils.file_utils import (
|
|||||||
class TestMCPDirectoryDetection:
|
class TestMCPDirectoryDetection:
|
||||||
"""Test MCP self-detection to prevent scanning its own code."""
|
"""Test MCP self-detection to prevent scanning its own code."""
|
||||||
|
|
||||||
def test_detect_mcp_directory_with_all_signatures(self, tmp_path):
|
def test_detect_mcp_directory_dynamically(self, tmp_path):
|
||||||
"""Test detection when all signature files are present."""
|
"""Test dynamic MCP directory detection based on script location."""
|
||||||
# Create a fake MCP directory with signature files
|
# The is_mcp_directory function now uses __file__ to detect MCP location
|
||||||
for sig_file in list(MCP_SIGNATURE_FILES)[:4]: # Use 4 files
|
# It checks if the given path is a subdirectory of the MCP server
|
||||||
if "/" in sig_file:
|
from pathlib import Path
|
||||||
(tmp_path / sig_file).parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
(tmp_path / sig_file).touch()
|
|
||||||
|
|
||||||
assert is_mcp_directory(tmp_path) is True
|
import utils.file_utils
|
||||||
|
|
||||||
def test_no_detection_with_few_signatures(self, tmp_path):
|
# Get the actual MCP server directory
|
||||||
"""Test no detection with only 1-2 signature files."""
|
mcp_server_dir = Path(utils.file_utils.__file__).parent.parent.resolve()
|
||||||
# Create only 2 signature files (less than threshold)
|
|
||||||
for sig_file in list(MCP_SIGNATURE_FILES)[:2]:
|
|
||||||
if "/" in sig_file:
|
|
||||||
(tmp_path / sig_file).parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
(tmp_path / sig_file).touch()
|
|
||||||
|
|
||||||
assert is_mcp_directory(tmp_path) is False
|
# Test that the MCP server directory itself is detected
|
||||||
|
assert is_mcp_directory(mcp_server_dir) is True
|
||||||
|
|
||||||
|
# Test that a subdirectory of MCP is also detected
|
||||||
|
if (mcp_server_dir / "tools").exists():
|
||||||
|
assert is_mcp_directory(mcp_server_dir / "tools") is True
|
||||||
|
|
||||||
|
def test_no_detection_on_non_mcp_directory(self, tmp_path):
|
||||||
|
"""Test no detection on directories outside MCP."""
|
||||||
|
# Any directory outside the MCP server should not be detected
|
||||||
|
non_mcp_dir = tmp_path / "some_other_project"
|
||||||
|
non_mcp_dir.mkdir()
|
||||||
|
|
||||||
|
assert is_mcp_directory(non_mcp_dir) is False
|
||||||
|
|
||||||
def test_no_detection_on_regular_directory(self, tmp_path):
|
def test_no_detection_on_regular_directory(self, tmp_path):
|
||||||
"""Test no detection on regular project directories."""
|
"""Test no detection on regular project directories."""
|
||||||
@@ -59,7 +63,11 @@ class TestMCPDirectoryDetection:
|
|||||||
|
|
||||||
def test_mcp_directory_excluded_from_scan(self, tmp_path):
|
def test_mcp_directory_excluded_from_scan(self, tmp_path):
|
||||||
"""Test that MCP directories are excluded during path expansion."""
|
"""Test that MCP directories are excluded during path expansion."""
|
||||||
# Create a project with MCP as subdirectory
|
# For this test, we need to mock is_mcp_directory since we can't
|
||||||
|
# actually create the MCP directory structure in tmp_path
|
||||||
|
from unittest.mock import patch as mock_patch
|
||||||
|
|
||||||
|
# Create a project with a subdirectory we'll pretend is MCP
|
||||||
project_root = tmp_path / "my_project"
|
project_root = tmp_path / "my_project"
|
||||||
project_root.mkdir()
|
project_root.mkdir()
|
||||||
|
|
||||||
@@ -67,19 +75,18 @@ class TestMCPDirectoryDetection:
|
|||||||
(project_root / "app.py").write_text("# My app")
|
(project_root / "app.py").write_text("# My app")
|
||||||
(project_root / "config.py").write_text("# Config")
|
(project_root / "config.py").write_text("# Config")
|
||||||
|
|
||||||
# Create MCP subdirectory
|
# Create a subdirectory that we'll mock as MCP
|
||||||
mcp_dir = project_root / "gemini-mcp-server"
|
fake_mcp_dir = project_root / "gemini-mcp-server"
|
||||||
mcp_dir.mkdir()
|
fake_mcp_dir.mkdir()
|
||||||
for sig_file in list(MCP_SIGNATURE_FILES)[:4]:
|
(fake_mcp_dir / "server.py").write_text("# MCP server")
|
||||||
if "/" in sig_file:
|
(fake_mcp_dir / "test.py").write_text("# Should not be included")
|
||||||
(mcp_dir / sig_file).parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
(mcp_dir / sig_file).write_text("# MCP file")
|
|
||||||
|
|
||||||
# Also add a regular file to MCP dir
|
# Mock is_mcp_directory to return True for our fake MCP dir
|
||||||
(mcp_dir / "test.py").write_text("# Should not be included")
|
def mock_is_mcp(path):
|
||||||
|
return str(path).endswith("gemini-mcp-server")
|
||||||
|
|
||||||
# Scan the project - use parent as SECURITY_ROOT to avoid workspace root check
|
# Scan the project with mocked MCP detection
|
||||||
with patch("utils.file_utils.SECURITY_ROOT", tmp_path):
|
with mock_patch("utils.file_utils.is_mcp_directory", side_effect=mock_is_mcp):
|
||||||
files = expand_paths([str(project_root)])
|
files = expand_paths([str(project_root)])
|
||||||
|
|
||||||
# Verify project files are included but MCP files are not
|
# Verify project files are included but MCP files are not
|
||||||
@@ -135,7 +142,6 @@ class TestHomeDirectoryProtection:
|
|||||||
"""Test that home directory root is excluded during path expansion."""
|
"""Test that home directory root is excluded during path expansion."""
|
||||||
with patch("utils.file_utils.get_user_home_directory") as mock_home:
|
with patch("utils.file_utils.get_user_home_directory") as mock_home:
|
||||||
mock_home.return_value = tmp_path
|
mock_home.return_value = tmp_path
|
||||||
with patch("utils.file_utils.SECURITY_ROOT", tmp_path):
|
|
||||||
# Try to scan home directory
|
# Try to scan home directory
|
||||||
files = expand_paths([str(tmp_path)])
|
files = expand_paths([str(tmp_path)])
|
||||||
# Should return empty as home root is skipped
|
# Should return empty as home root is skipped
|
||||||
@@ -145,32 +151,36 @@ class TestHomeDirectoryProtection:
|
|||||||
class TestUserHomeEnvironmentVariable:
|
class TestUserHomeEnvironmentVariable:
|
||||||
"""Test USER_HOME environment variable handling."""
|
"""Test USER_HOME environment variable handling."""
|
||||||
|
|
||||||
def test_user_home_from_env(self):
|
def test_user_home_from_pathlib(self):
|
||||||
"""Test USER_HOME is used when set."""
|
"""Test that get_user_home_directory uses Path.home()."""
|
||||||
test_home = "/Users/dockeruser"
|
|
||||||
with patch.dict(os.environ, {"USER_HOME": test_home}):
|
|
||||||
home = get_user_home_directory()
|
|
||||||
assert home == Path(test_home).resolve()
|
|
||||||
|
|
||||||
def test_fallback_to_workspace_root_in_docker(self):
|
|
||||||
"""Test fallback to WORKSPACE_ROOT in Docker when USER_HOME not set."""
|
|
||||||
with patch("utils.file_utils.WORKSPACE_ROOT", "/Users/realuser"):
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE") as mock_container:
|
|
||||||
mock_container.exists.return_value = True
|
|
||||||
# Clear USER_HOME to test fallback
|
|
||||||
with patch.dict(os.environ, {"USER_HOME": ""}, clear=False):
|
|
||||||
home = get_user_home_directory()
|
|
||||||
assert str(home) == "/Users/realuser"
|
|
||||||
|
|
||||||
def test_fallback_to_system_home(self):
|
|
||||||
"""Test fallback to system home when not in Docker."""
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
with patch("utils.file_utils.CONTAINER_WORKSPACE") as mock_container:
|
|
||||||
mock_container.exists.return_value = False
|
|
||||||
with patch("pathlib.Path.home") as mock_home:
|
with patch("pathlib.Path.home") as mock_home:
|
||||||
mock_home.return_value = Path("/home/user")
|
mock_home.return_value = Path("/Users/testuser")
|
||||||
home = get_user_home_directory()
|
home = get_user_home_directory()
|
||||||
assert home == Path("/home/user")
|
assert home == Path("/Users/testuser")
|
||||||
|
|
||||||
|
def test_get_home_directory_uses_pathlib(self):
|
||||||
|
"""Test that get_user_home_directory always uses Path.home()."""
|
||||||
|
with patch("pathlib.Path.home") as mock_home:
|
||||||
|
mock_home.return_value = Path("/home/testuser")
|
||||||
|
home = get_user_home_directory()
|
||||||
|
assert home == Path("/home/testuser")
|
||||||
|
# Verify Path.home() was called
|
||||||
|
mock_home.assert_called_once()
|
||||||
|
|
||||||
|
def test_home_directory_on_different_platforms(self):
|
||||||
|
"""Test home directory detection on different platforms."""
|
||||||
|
# Test different platform home directories
|
||||||
|
test_homes = [
|
||||||
|
Path("/Users/john"), # macOS
|
||||||
|
Path("/home/ubuntu"), # Linux
|
||||||
|
Path("C:\\Users\\John"), # Windows
|
||||||
|
]
|
||||||
|
|
||||||
|
for test_home in test_homes:
|
||||||
|
with patch("pathlib.Path.home") as mock_home:
|
||||||
|
mock_home.return_value = test_home
|
||||||
|
home = get_user_home_directory()
|
||||||
|
assert home == test_home
|
||||||
|
|
||||||
|
|
||||||
class TestExcludedDirectories:
|
class TestExcludedDirectories:
|
||||||
@@ -198,7 +208,6 @@ class TestExcludedDirectories:
|
|||||||
src.mkdir()
|
src.mkdir()
|
||||||
(src / "utils.py").write_text("# Utils")
|
(src / "utils.py").write_text("# Utils")
|
||||||
|
|
||||||
with patch("utils.file_utils.SECURITY_ROOT", tmp_path):
|
|
||||||
files = expand_paths([str(project)])
|
files = expand_paths([str(project)])
|
||||||
|
|
||||||
file_names = [Path(f).name for f in files]
|
file_names = [Path(f).name for f in files]
|
||||||
@@ -226,7 +235,6 @@ class TestExcludedDirectories:
|
|||||||
# Create an allowed file
|
# Create an allowed file
|
||||||
(project / "index.js").write_text("// Index")
|
(project / "index.js").write_text("// Index")
|
||||||
|
|
||||||
with patch("utils.file_utils.SECURITY_ROOT", tmp_path):
|
|
||||||
files = expand_paths([str(project)])
|
files = expand_paths([str(project)])
|
||||||
|
|
||||||
file_names = [Path(f).name for f in files]
|
file_names = [Path(f).name for f in files]
|
||||||
@@ -254,10 +262,12 @@ class TestIntegrationScenarios:
|
|||||||
# MCP cloned inside the project
|
# MCP cloned inside the project
|
||||||
mcp = user_project / "tools" / "gemini-mcp-server"
|
mcp = user_project / "tools" / "gemini-mcp-server"
|
||||||
mcp.mkdir(parents=True)
|
mcp.mkdir(parents=True)
|
||||||
for sig_file in list(MCP_SIGNATURE_FILES)[:4]:
|
# Create typical MCP files
|
||||||
if "/" in sig_file:
|
(mcp / "server.py").write_text("# MCP server code")
|
||||||
(mcp / sig_file).parent.mkdir(parents=True, exist_ok=True)
|
(mcp / "config.py").write_text("# MCP config")
|
||||||
(mcp / sig_file).write_text("# MCP code")
|
tools_dir = mcp / "tools"
|
||||||
|
tools_dir.mkdir()
|
||||||
|
(tools_dir / "chat.py").write_text("# Chat tool")
|
||||||
(mcp / "LICENSE").write_text("MIT License")
|
(mcp / "LICENSE").write_text("MIT License")
|
||||||
(mcp / "README.md").write_text("# Gemini MCP")
|
(mcp / "README.md").write_text("# Gemini MCP")
|
||||||
|
|
||||||
@@ -266,7 +276,11 @@ class TestIntegrationScenarios:
|
|||||||
node_modules.mkdir()
|
node_modules.mkdir()
|
||||||
(node_modules / "package.json").write_text("{}")
|
(node_modules / "package.json").write_text("{}")
|
||||||
|
|
||||||
with patch("utils.file_utils.SECURITY_ROOT", tmp_path):
|
# Mock is_mcp_directory for this test
|
||||||
|
def mock_is_mcp(path):
|
||||||
|
return "gemini-mcp-server" in str(path)
|
||||||
|
|
||||||
|
with patch("utils.file_utils.is_mcp_directory", side_effect=mock_is_mcp):
|
||||||
files = expand_paths([str(user_project)])
|
files = expand_paths([str(user_project)])
|
||||||
|
|
||||||
file_paths = [str(f) for f in files]
|
file_paths = [str(f) for f in files]
|
||||||
@@ -278,23 +292,28 @@ class TestIntegrationScenarios:
|
|||||||
|
|
||||||
# MCP files should NOT be included
|
# MCP files should NOT be included
|
||||||
assert not any("gemini-mcp-server" in p for p in file_paths)
|
assert not any("gemini-mcp-server" in p for p in file_paths)
|
||||||
assert not any("zen_server.py" in p for p in file_paths)
|
assert not any("server.py" in p for p in file_paths)
|
||||||
|
|
||||||
# node_modules should NOT be included
|
# node_modules should NOT be included
|
||||||
assert not any("node_modules" in p for p in file_paths)
|
assert not any("node_modules" in p for p in file_paths)
|
||||||
|
|
||||||
def test_cannot_scan_above_workspace_root(self, tmp_path):
|
def test_security_without_workspace_root(self, tmp_path):
|
||||||
"""Test that we cannot scan outside the workspace root."""
|
"""Test that security still works with the new security model."""
|
||||||
workspace = tmp_path / "workspace"
|
# The system now relies on is_dangerous_path and is_home_directory_root
|
||||||
workspace.mkdir()
|
# for security protection
|
||||||
|
|
||||||
# Create a file in workspace
|
# Test that we can scan regular project directories
|
||||||
(workspace / "allowed.py").write_text("# Allowed")
|
project_dir = tmp_path / "my_project"
|
||||||
|
project_dir.mkdir()
|
||||||
|
(project_dir / "app.py").write_text("# App")
|
||||||
|
|
||||||
# Create a file outside workspace
|
files = expand_paths([str(project_dir)])
|
||||||
(tmp_path / "outside.py").write_text("# Outside")
|
assert len(files) == 1
|
||||||
|
assert "app.py" in files[0]
|
||||||
|
|
||||||
with patch("utils.file_utils.SECURITY_ROOT", workspace):
|
# Test that home directory root is still protected
|
||||||
# Try to expand paths outside workspace - should return empty list
|
with patch("utils.file_utils.get_user_home_directory") as mock_home:
|
||||||
|
mock_home.return_value = tmp_path
|
||||||
|
# Scanning home root should return empty
|
||||||
files = expand_paths([str(tmp_path)])
|
files = expand_paths([str(tmp_path)])
|
||||||
assert files == [] # Path outside workspace is skipped silently
|
assert files == []
|
||||||
|
|||||||
@@ -80,11 +80,11 @@ class TestImageSupportIntegration:
|
|||||||
expected = ["shared.png", "new_diagram.png", "middle.png", "old_diagram.png"]
|
expected = ["shared.png", "new_diagram.png", "middle.png", "old_diagram.png"]
|
||||||
assert image_list == expected
|
assert image_list == expected
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_add_turn_with_images(self, mock_redis):
|
def test_add_turn_with_images(self, mock_storage):
|
||||||
"""Test adding a conversation turn with images."""
|
"""Test adding a conversation turn with images."""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the Redis operations to return success
|
# Mock the Redis operations to return success
|
||||||
mock_client.set.return_value = True
|
mock_client.set.return_value = True
|
||||||
@@ -348,11 +348,11 @@ class TestImageSupportIntegration:
|
|||||||
importlib.reload(config)
|
importlib.reload(config)
|
||||||
ModelProviderRegistry._instance = None
|
ModelProviderRegistry._instance = None
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_cross_tool_image_context_preservation(self, mock_redis):
|
def test_cross_tool_image_context_preservation(self, mock_storage):
|
||||||
"""Test that images are preserved across different tools in conversation."""
|
"""Test that images are preserved across different tools in conversation."""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the Redis operations to return success
|
# Mock the Redis operations to return success
|
||||||
mock_client.set.return_value = True
|
mock_client.set.return_value = True
|
||||||
@@ -521,11 +521,11 @@ class TestImageSupportIntegration:
|
|||||||
result = tool._validate_image_limits(None, "test_model")
|
result = tool._validate_image_limits(None, "test_model")
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
@patch("utils.conversation_memory.get_redis_client")
|
@patch("utils.conversation_memory.get_storage")
|
||||||
def test_conversation_memory_thread_chaining_with_images(self, mock_redis):
|
def test_conversation_memory_thread_chaining_with_images(self, mock_storage):
|
||||||
"""Test that images work correctly with conversation thread chaining."""
|
"""Test that images work correctly with conversation thread chaining."""
|
||||||
mock_client = Mock()
|
mock_client = Mock()
|
||||||
mock_redis.return_value = mock_client
|
mock_storage.return_value = mock_client
|
||||||
|
|
||||||
# Mock the Redis operations to return success
|
# Mock the Redis operations to return success
|
||||||
mock_client.set.return_value = True
|
mock_client.set.return_value = True
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class TestIntelligentFallback:
|
|||||||
def test_prefers_openai_o3_mini_when_available(self):
|
def test_prefers_openai_o3_mini_when_available(self):
|
||||||
"""Test that o4-mini is preferred when OpenAI API key is available"""
|
"""Test that o4-mini is preferred when OpenAI API key is available"""
|
||||||
# Register only OpenAI provider for this test
|
# Register only OpenAI provider for this test
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
@@ -62,7 +62,7 @@ class TestIntelligentFallback:
|
|||||||
"""Test that OpenAI is preferred when both API keys are available"""
|
"""Test that OpenAI is preferred when both API keys are available"""
|
||||||
# Register both OpenAI and Gemini providers
|
# Register both OpenAI and Gemini providers
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
@@ -75,7 +75,7 @@ class TestIntelligentFallback:
|
|||||||
"""Test fallback behavior when no API keys are available"""
|
"""Test fallback behavior when no API keys are available"""
|
||||||
# Register providers but with no API keys available
|
# Register providers but with no API keys available
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
@@ -86,7 +86,7 @@ class TestIntelligentFallback:
|
|||||||
def test_available_providers_with_keys(self):
|
def test_available_providers_with_keys(self):
|
||||||
"""Test the get_available_providers_with_keys method"""
|
"""Test the get_available_providers_with_keys method"""
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False):
|
with patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False):
|
||||||
# Clear and register providers
|
# Clear and register providers
|
||||||
@@ -119,7 +119,7 @@ class TestIntelligentFallback:
|
|||||||
patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False),
|
patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-key", "GEMINI_API_KEY": ""}, clear=False),
|
||||||
):
|
):
|
||||||
# Register only OpenAI provider for this test
|
# Register only OpenAI provider for this test
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
|
|
||||||
|
|||||||
@@ -246,9 +246,9 @@ class TestLargePromptHandling:
|
|||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
output = json.loads(result[0].text)
|
output = json.loads(result[0].text)
|
||||||
# The precommit tool may return success or clarification_required depending on git state
|
# The precommit tool may return success or files_required_to_continue depending on git state
|
||||||
# The core fix ensures large prompts are detected at the right time
|
# The core fix ensures large prompts are detected at the right time
|
||||||
assert output["status"] in ["success", "clarification_required", "resend_prompt"]
|
assert output["status"] in ["success", "files_required_to_continue", "resend_prompt"]
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_debug_large_error_description(self, large_prompt):
|
async def test_debug_large_error_description(self, large_prompt):
|
||||||
@@ -298,13 +298,22 @@ class TestLargePromptHandling:
|
|||||||
)
|
)
|
||||||
mock_get_provider.return_value = mock_provider
|
mock_get_provider.return_value = mock_provider
|
||||||
|
|
||||||
# Mock the centralized file preparation method to avoid file system access
|
# Mock handle_prompt_file to verify prompt.txt is handled
|
||||||
|
with patch.object(tool, "handle_prompt_file") as mock_handle_prompt:
|
||||||
|
# Return the prompt content and updated files list (without prompt.txt)
|
||||||
|
mock_handle_prompt.return_value = ("Large prompt content from file", [other_file])
|
||||||
|
|
||||||
|
# Mock the centralized file preparation method
|
||||||
with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
|
with patch.object(tool, "_prepare_file_content_for_prompt") as mock_prepare_files:
|
||||||
mock_prepare_files.return_value = ("File content", [other_file])
|
mock_prepare_files.return_value = ("File content", [other_file])
|
||||||
|
|
||||||
await tool.execute({"prompt": "", "files": [temp_prompt_file, other_file]})
|
# Use a small prompt to avoid triggering size limit
|
||||||
|
await tool.execute({"prompt": "Test prompt", "files": [temp_prompt_file, other_file]})
|
||||||
|
|
||||||
# Verify prompt.txt was removed from files list
|
# Verify handle_prompt_file was called with the original files list
|
||||||
|
mock_handle_prompt.assert_called_once_with([temp_prompt_file, other_file])
|
||||||
|
|
||||||
|
# Verify _prepare_file_content_for_prompt was called with the updated files list (without prompt.txt)
|
||||||
mock_prepare_files.assert_called_once()
|
mock_prepare_files.assert_called_once()
|
||||||
files_arg = mock_prepare_files.call_args[0][0]
|
files_arg = mock_prepare_files.call_args[0][0]
|
||||||
assert len(files_arg) == 1
|
assert len(files_arg) == 1
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import pytest
|
|||||||
|
|
||||||
from providers.base import ProviderType
|
from providers.base import ProviderType
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
from utils.model_restrictions import ModelRestrictionService
|
from utils.model_restrictions import ModelRestrictionService
|
||||||
|
|
||||||
|
|
||||||
@@ -677,7 +677,7 @@ class TestAutoModeWithRestrictions:
|
|||||||
# Clear registry and register only OpenAI and Gemini providers
|
# Clear registry and register only OpenAI and Gemini providers
|
||||||
ModelProviderRegistry._instance = None
|
ModelProviderRegistry._instance = None
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
||||||
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
|
||||||
|
|||||||
@@ -195,7 +195,7 @@ class TestOldBehaviorSimulation:
|
|||||||
Verify that our fix provides comprehensive alias->target coverage.
|
Verify that our fix provides comprehensive alias->target coverage.
|
||||||
"""
|
"""
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
# Test real providers to ensure they implement our fix correctly
|
# Test real providers to ensure they implement our fix correctly
|
||||||
providers = [OpenAIModelProvider(api_key="test-key"), GeminiModelProvider(api_key="test-key")]
|
providers = [OpenAIModelProvider(api_key="test-key"), GeminiModelProvider(api_key="test-key")]
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import os
|
|||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from providers.base import ProviderType
|
from providers.base import ProviderType
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
|
|
||||||
class TestOpenAIProvider:
|
class TestOpenAIProvider:
|
||||||
|
|||||||
@@ -115,7 +115,7 @@ class TestPlannerTool:
|
|||||||
"""Test execute method for subsequent planning step."""
|
"""Test execute method for subsequent planning step."""
|
||||||
tool = PlannerTool()
|
tool = PlannerTool()
|
||||||
arguments = {
|
arguments = {
|
||||||
"step": "Set up Docker containers for each microservice",
|
"step": "Set up deployment configuration for each microservice",
|
||||||
"step_number": 2,
|
"step_number": 2,
|
||||||
"total_steps": 8,
|
"total_steps": 8,
|
||||||
"next_step_required": True,
|
"next_step_required": True,
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ Enhanced tests for precommit tool using mock storage to test real logic
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@@ -50,21 +49,18 @@ class TestPrecommitToolWithMockStore:
|
|||||||
"""Test precommit tool with mock storage to validate actual logic"""
|
"""Test precommit tool with mock storage to validate actual logic"""
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_redis(self):
|
def mock_storage(self):
|
||||||
"""Create mock Redis client"""
|
"""Create mock Redis client"""
|
||||||
return MockRedisClient()
|
return MockRedisClient()
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tool(self, mock_redis, temp_repo):
|
def tool(self, mock_storage, temp_repo):
|
||||||
"""Create tool instance with mocked Redis"""
|
"""Create tool instance with mocked Redis"""
|
||||||
temp_dir, _ = temp_repo
|
temp_dir, _ = temp_repo
|
||||||
tool = Precommit()
|
tool = Precommit()
|
||||||
|
|
||||||
# Mock the Redis client getter and SECURITY_ROOT to allow access to temp files
|
# Mock the Redis client getter to use our mock storage
|
||||||
with (
|
with patch("utils.conversation_memory.get_storage", return_value=mock_storage):
|
||||||
patch("utils.conversation_memory.get_redis_client", return_value=mock_redis),
|
|
||||||
patch("utils.file_utils.SECURITY_ROOT", Path(temp_dir).resolve()),
|
|
||||||
):
|
|
||||||
yield tool
|
yield tool
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -112,7 +108,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
|||||||
shutil.rmtree(temp_dir)
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_redis):
|
async def test_no_duplicate_file_content_in_prompt(self, tool, temp_repo, mock_storage):
|
||||||
"""Test that file content appears in expected locations
|
"""Test that file content appears in expected locations
|
||||||
|
|
||||||
This test validates our design decision that files can legitimately appear in both:
|
This test validates our design decision that files can legitimately appear in both:
|
||||||
@@ -145,12 +141,12 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
|||||||
# This is intentional and provides comprehensive context to the AI
|
# This is intentional and provides comprehensive context to the AI
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_conversation_memory_integration(self, tool, temp_repo, mock_redis):
|
async def test_conversation_memory_integration(self, tool, temp_repo, mock_storage):
|
||||||
"""Test that conversation memory works with mock storage"""
|
"""Test that conversation memory works with mock storage"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
# Mock conversation memory functions to use our mock redis
|
# Mock conversation memory functions to use our mock redis
|
||||||
with patch("utils.conversation_memory.get_redis_client", return_value=mock_redis):
|
with patch("utils.conversation_memory.get_storage", return_value=mock_storage):
|
||||||
# First request - should embed file content
|
# First request - should embed file content
|
||||||
PrecommitRequest(path=temp_dir, files=[config_path], prompt="First review")
|
PrecommitRequest(path=temp_dir, files=[config_path], prompt="First review")
|
||||||
|
|
||||||
@@ -173,7 +169,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
|||||||
assert len(files_to_embed_2) == 0, "Continuation should skip already embedded files"
|
assert len(files_to_embed_2) == 0, "Continuation should skip already embedded files"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_prompt_structure_integrity(self, tool, temp_repo, mock_redis):
|
async def test_prompt_structure_integrity(self, tool, temp_repo, mock_storage):
|
||||||
"""Test that the prompt structure is well-formed and doesn't have content duplication"""
|
"""Test that the prompt structure is well-formed and doesn't have content duplication"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
@@ -227,7 +223,7 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
|||||||
assert '__version__ = "1.0.0"' not in after_file_section
|
assert '__version__ = "1.0.0"' not in after_file_section
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_file_content_formatting(self, tool, temp_repo, mock_redis):
|
async def test_file_content_formatting(self, tool, temp_repo, mock_storage):
|
||||||
"""Test that file content is properly formatted without duplication"""
|
"""Test that file content is properly formatted without duplication"""
|
||||||
temp_dir, config_path = temp_repo
|
temp_dir, config_path = temp_repo
|
||||||
|
|
||||||
@@ -254,18 +250,18 @@ TEMPERATURE_ANALYTICAL = 0.2 # For code review, debugging
|
|||||||
assert file_content.count('__version__ = "1.0.0"') == 1
|
assert file_content.count('__version__ = "1.0.0"') == 1
|
||||||
|
|
||||||
|
|
||||||
def test_mock_redis_basic_operations():
|
def test_mock_storage_basic_operations():
|
||||||
"""Test that our mock Redis implementation works correctly"""
|
"""Test that our mock Redis implementation works correctly"""
|
||||||
mock_redis = MockRedisClient()
|
mock_storage = MockRedisClient()
|
||||||
|
|
||||||
# Test basic operations
|
# Test basic operations
|
||||||
assert mock_redis.get("nonexistent") is None
|
assert mock_storage.get("nonexistent") is None
|
||||||
assert mock_redis.exists("nonexistent") == 0
|
assert mock_storage.exists("nonexistent") == 0
|
||||||
|
|
||||||
mock_redis.set("test_key", "test_value")
|
mock_storage.set("test_key", "test_value")
|
||||||
assert mock_redis.get("test_key") == "test_value"
|
assert mock_storage.get("test_key") == "test_value"
|
||||||
assert mock_redis.exists("test_key") == 1
|
assert mock_storage.exists("test_key") == 1
|
||||||
|
|
||||||
assert mock_redis.delete("test_key") == 1
|
assert mock_storage.delete("test_key") == 1
|
||||||
assert mock_redis.get("test_key") is None
|
assert mock_storage.get("test_key") is None
|
||||||
assert mock_redis.delete("test_key") == 0 # Already deleted
|
assert mock_storage.delete("test_key") == 0 # Already deleted
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import pytest
|
|||||||
from providers import ModelProviderRegistry, ModelResponse
|
from providers import ModelProviderRegistry, ModelResponse
|
||||||
from providers.base import ProviderType
|
from providers.base import ProviderType
|
||||||
from providers.gemini import GeminiModelProvider
|
from providers.gemini import GeminiModelProvider
|
||||||
from providers.openai import OpenAIModelProvider
|
from providers.openai_provider import OpenAIModelProvider
|
||||||
|
|
||||||
|
|
||||||
class TestModelProviderRegistry:
|
class TestModelProviderRegistry:
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user