Files
my-pal-mcp-server/simulator_tests/test_xai_models.py

223 lines
8.2 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
X.AI GROK Model Tests
Tests that verify X.AI GROK functionality including:
- Model alias resolution (grok maps to Grok 4)
- GROK-4 and GROK-4.1 Fast Reasoning models work correctly
- Conversation continuity works with GROK models
- API integration and response validation
"""
from .base_test import BaseSimulatorTest
class XAIModelsTest(BaseSimulatorTest):
"""Test X.AI GROK model functionality and integration"""
@property
def test_name(self) -> str:
return "xai_models"
@property
def test_description(self) -> str:
return "X.AI GROK model functionality and integration"
def run_test(self) -> bool:
"""Test X.AI GROK model functionality"""
try:
self.logger.info("Test: X.AI GROK model functionality and integration")
# Check if X.AI API key is configured and not empty
import os
xai_key = os.environ.get("XAI_API_KEY", "")
is_valid = bool(xai_key and xai_key != "your_xai_api_key_here" and xai_key.strip())
if not is_valid:
self.logger.info(" ⚠️ X.AI API key not configured or empty - skipping test")
self.logger.info(" This test requires XAI_API_KEY to be set in .env with a valid key")
return True # Return True to indicate test is skipped, not failed
# Setup test files for later use
self.setup_test_files()
# Test 1: 'grok' alias (should map to grok-4)
self.logger.info(" 1: Testing 'grok' alias (should map to grok-4)")
response1, continuation_id = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from GROK model!' and nothing else.",
"model": "grok",
"temperature": 0.1,
},
)
if not response1:
self.logger.error(" ❌ GROK alias test failed")
return False
self.logger.info(" ✅ GROK alias call completed")
if continuation_id:
self.logger.info(f" ✅ Got continuation_id: {continuation_id}")
# Test 2: Direct grok-4.1-fast model name
self.logger.info(" 2: Testing direct model name (grok-4.1-fast)")
response2, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from GROK-4.1 Fast!' and nothing else.",
"model": "grok-4.1-fast",
"temperature": 0.1,
},
)
if not response2:
self.logger.error(" ❌ Direct GROK-4.1-fast model test failed")
return False
self.logger.info(" ✅ Direct GROK-4.1-fast model call completed")
# Test 3: grok-4.1-fast-reasoning alias
self.logger.info(" 3: Testing 'grok-4.1-fast-reasoning' alias")
response3, _ = self.call_mcp_tool(
"chat",
{
"prompt": "Say 'Hello from GROK-4.1 Fast Reasoning alias!' and nothing else.",
"model": "grok-4.1-fast-reasoning",
"temperature": 0.1,
},
)
if not response3:
self.logger.error(" ❌ GROK-4.1-fast-reasoning alias test failed")
return False
self.logger.info(" ✅ GROK-4.1-fast-reasoning alias call completed")
# Test 4: Conversation continuity with GROK models
self.logger.info(" 4: Testing conversation continuity with GROK")
response6, new_continuation_id = self.call_mcp_tool(
"chat",
{
"prompt": "Remember this number: 87. What number did I just tell you?",
"model": "grok",
"temperature": 0.1,
},
)
if not response6 or not new_continuation_id:
self.logger.error(" ❌ Failed to start conversation with continuation_id")
return False
# Continue the conversation
response7, _ = self.call_mcp_tool(
"chat",
{
"prompt": "What was the number I told you earlier?",
"model": "grok",
"continuation_id": new_continuation_id,
"temperature": 0.1,
},
)
if not response7:
self.logger.error(" ❌ Failed to continue conversation")
return False
# Check if the model remembered the number
if "87" in response7:
self.logger.info(" ✅ Conversation continuity working with GROK")
else:
self.logger.warning(" ⚠️ Model may not have remembered the number")
# Test 5: Validate X.AI API usage from logs
self.logger.info(" 5: Validating X.AI API usage in logs")
logs = self.get_recent_server_logs()
# Check for X.AI API calls
xai_logs = [line for line in logs.split("\n") if "x.ai" in line.lower()]
xai_api_logs = [line for line in logs.split("\n") if "api.x.ai" in line]
grok_logs = [line for line in logs.split("\n") if "grok" in line.lower()]
# Check for specific model resolution
grok_resolution_logs = [
line
for line in logs.split("\n")
if ("Resolved model" in line and "grok" in line.lower()) or ("grok" in line and "->" in line)
]
# Check for X.AI provider usage
xai_provider_logs = [line for line in logs.split("\n") if "XAI" in line or "X.AI" in line]
# Log findings
self.logger.info(f" X.AI-related logs: {len(xai_logs)}")
self.logger.info(f" X.AI API logs: {len(xai_api_logs)}")
self.logger.info(f" GROK-related logs: {len(grok_logs)}")
self.logger.info(f" Model resolution logs: {len(grok_resolution_logs)}")
self.logger.info(f" X.AI provider logs: {len(xai_provider_logs)}")
# Sample log output for debugging
if self.verbose and xai_logs:
self.logger.debug(" 📋 Sample X.AI logs:")
for log in xai_logs[:3]:
self.logger.debug(f" {log}")
if self.verbose and grok_logs:
self.logger.debug(" 📋 Sample GROK logs:")
for log in grok_logs[:3]:
self.logger.debug(f" {log}")
# Success criteria
grok_mentioned = len(grok_logs) > 0
api_used = len(xai_api_logs) > 0 or len(xai_logs) > 0
provider_used = len(xai_provider_logs) > 0
success_criteria = [
("GROK models mentioned in logs", grok_mentioned),
("X.AI API calls made", api_used),
("X.AI provider used", provider_used),
("All model calls succeeded", True), # We already checked this above
("Conversation continuity works", True), # We already tested this
]
passed_criteria = sum(1 for _, passed in success_criteria if passed)
self.logger.info(f" Success criteria met: {passed_criteria}/{len(success_criteria)}")
for criterion, passed in success_criteria:
status = "" if passed else ""
self.logger.info(f" {status} {criterion}")
if passed_criteria >= 3: # At least 3 out of 5 criteria
self.logger.info(" ✅ X.AI GROK model tests passed")
return True
else:
self.logger.error(" ❌ X.AI GROK model tests failed")
return False
except Exception as e:
self.logger.error(f"X.AI GROK model test failed: {e}")
return False
finally:
self.cleanup_test_files()
def main():
"""Run the X.AI GROK model tests"""
import sys
verbose = "--verbose" in sys.argv or "-v" in sys.argv
test = XAIModelsTest(verbose=verbose)
success = test.run_test()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()