my-pal-mcp-server/simulator_tests/test_xai_models.py

#!/usr/bin/env python3
"""
X.AI GROK Model Tests

Tests that verify X.AI GROK functionality including:
- Model alias resolution (grok maps to Grok 4)
- GROK-4 and GROK-4.1 Fast Reasoning models work correctly
- Conversation continuity works with GROK models
- API integration and response validation
"""


from .base_test import BaseSimulatorTest


class XAIModelsTest(BaseSimulatorTest):
    """Test X.AI GROK model functionality and integration"""

    @property
    def test_name(self) -> str:
        return "xai_models"

    @property
    def test_description(self) -> str:
        return "X.AI GROK model functionality and integration"

    def run_test(self) -> bool:
        """Test X.AI GROK model functionality"""
        try:
            self.logger.info("Test: X.AI GROK model functionality and integration")

            # Check if X.AI API key is configured and not empty
            import os

            xai_key = os.environ.get("XAI_API_KEY", "")
            is_valid = bool(xai_key and xai_key != "your_xai_api_key_here" and xai_key.strip())

            if not is_valid:
                self.logger.info("  ⚠️  X.AI API key not configured or empty - skipping test")
                self.logger.info("  ℹ️  This test requires XAI_API_KEY to be set in .env with a valid key")
                return True  # Return True to indicate test is skipped, not failed

            # Setup test files for later use
            self.setup_test_files()

            # Test 1: 'grok' alias (should map to grok-4)
            self.logger.info("  1: Testing 'grok' alias (should map to grok-4)")

            response1, continuation_id = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "Say 'Hello from GROK model!' and nothing else.",
                    "model": "grok",
                    "temperature": 0.1,
                },
            )

            if not response1:
                self.logger.error("  ❌ GROK alias test failed")
                return False

            self.logger.info("  ✅ GROK alias call completed")
            if continuation_id:
                self.logger.info(f"  ✅ Got continuation_id: {continuation_id}")

            # Test 2: Direct grok-4.1-fast model name
            self.logger.info("  2: Testing direct model name (grok-4.1-fast)")

            response2, _ = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "Say 'Hello from GROK-4.1 Fast!' and nothing else.",
                    "model": "grok-4.1-fast",
                    "temperature": 0.1,
                },
            )

            if not response2:
                self.logger.error("  ❌ Direct GROK-4.1-fast model test failed")
                return False

            self.logger.info("  ✅ Direct GROK-4.1-fast model call completed")

            # Test 3: grok-4.1-fast-reasoning alias
            self.logger.info("  3: Testing 'grok-4.1-fast-reasoning' alias")

            response3, _ = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "Say 'Hello from GROK-4.1 Fast Reasoning alias!' and nothing else.",
                    "model": "grok-4.1-fast-reasoning",
                    "temperature": 0.1,
                },
            )

            if not response3:
                self.logger.error("  ❌ GROK-4.1-fast-reasoning alias test failed")
                return False

            self.logger.info("  ✅ GROK-4.1-fast-reasoning alias call completed")

            # Test 4: Conversation continuity with GROK models
            self.logger.info("  4: Testing conversation continuity with GROK")

            response6, new_continuation_id = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "Remember this number: 87. What number did I just tell you?",
                    "model": "grok",
                    "temperature": 0.1,
                },
            )

            if not response6 or not new_continuation_id:
                self.logger.error("  ❌ Failed to start conversation with continuation_id")
                return False

            # Continue the conversation
            response7, _ = self.call_mcp_tool(
                "chat",
                {
                    "prompt": "What was the number I told you earlier?",
                    "model": "grok",
                    "continuation_id": new_continuation_id,
                    "temperature": 0.1,
                },
            )

            if not response7:
                self.logger.error("  ❌ Failed to continue conversation")
                return False

            # Check if the model remembered the number
            if "87" in response7:
                self.logger.info("  ✅ Conversation continuity working with GROK")
            else:
                self.logger.warning("  ⚠️  Model may not have remembered the number")

            # Test 5: Validate X.AI API usage from logs
            self.logger.info("  5: Validating X.AI API usage in logs")
            logs = self.get_recent_server_logs()

            # Check for X.AI API calls
            xai_logs = [line for line in logs.split("\n") if "x.ai" in line.lower()]
            xai_api_logs = [line for line in logs.split("\n") if "api.x.ai" in line]
            grok_logs = [line for line in logs.split("\n") if "grok" in line.lower()]

            # Check for specific model resolution
            grok_resolution_logs = [
                line
                for line in logs.split("\n")
                if ("Resolved model" in line and "grok" in line.lower()) or ("grok" in line and "->" in line)
            ]

            # Check for X.AI provider usage
            xai_provider_logs = [line for line in logs.split("\n") if "XAI" in line or "X.AI" in line]

            # Log findings
            self.logger.info(f"   X.AI-related logs: {len(xai_logs)}")
            self.logger.info(f"   X.AI API logs: {len(xai_api_logs)}")
            self.logger.info(f"   GROK-related logs: {len(grok_logs)}")
            self.logger.info(f"   Model resolution logs: {len(grok_resolution_logs)}")
            self.logger.info(f"   X.AI provider logs: {len(xai_provider_logs)}")

            # Sample log output for debugging
            if self.verbose and xai_logs:
                self.logger.debug("  📋 Sample X.AI logs:")
                for log in xai_logs[:3]:
                    self.logger.debug(f"    {log}")

            if self.verbose and grok_logs:
                self.logger.debug("  📋 Sample GROK logs:")
                for log in grok_logs[:3]:
                    self.logger.debug(f"    {log}")

            # Success criteria
            grok_mentioned = len(grok_logs) > 0
            api_used = len(xai_api_logs) > 0 or len(xai_logs) > 0
            provider_used = len(xai_provider_logs) > 0

            success_criteria = [
                ("GROK models mentioned in logs", grok_mentioned),
                ("X.AI API calls made", api_used),
                ("X.AI provider used", provider_used),
                ("All model calls succeeded", True),  # We already checked this above
                ("Conversation continuity works", True),  # We already tested this
            ]

            passed_criteria = sum(1 for _, passed in success_criteria if passed)
            self.logger.info(f"   Success criteria met: {passed_criteria}/{len(success_criteria)}")

            for criterion, passed in success_criteria:
                status = "✅" if passed else "❌"
                self.logger.info(f"    {status} {criterion}")

            if passed_criteria >= 3:  # At least 3 out of 5 criteria
                self.logger.info("  ✅ X.AI GROK model tests passed")
                return True
            else:
                self.logger.error("  ❌ X.AI GROK model tests failed")
                return False

        except Exception as e:
            self.logger.error(f"X.AI GROK model test failed: {e}")
            return False
        finally:
            self.cleanup_test_files()


def main():
    """Run the X.AI GROK model tests"""
    import sys

    verbose = "--verbose" in sys.argv or "-v" in sys.argv
    test = XAIModelsTest(verbose=verbose)

    success = test.run_test()
    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()