- Fix o3-pro response parsing to use output_text convenience field - Replace respx with custom httpx transport solution for better reliability - Implement comprehensive PII sanitization to prevent secret exposure - Add HTTP request/response recording with cassette format for testing - Sanitize all existing cassettes to remove exposed API keys - Update documentation to reflect new HTTP transport recorder - Add test suite for PII sanitization and HTTP recording This change: 1. Fixes timeout issues with o3-pro API calls (was 2+ minutes, now ~15-22 seconds) 2. Properly captures response content without httpx.ResponseNotRead exceptions 3. Preserves original HTTP response format including gzip compression 4. Prevents future secret exposure with automatic PII sanitization 5. Enables reliable replay testing for o3-pro interactions Co-Authored-By: Claude <noreply@anthropic.com>
139 lines
5.8 KiB
Python
139 lines
5.8 KiB
Python
"""
|
|
Tests for o3-pro output_text parsing fix using respx response recording.
|
|
|
|
This test validates the fix that uses `response.output_text` convenience field
|
|
instead of manually parsing `response.output.content[].text`.
|
|
|
|
Uses respx to record real o3-pro API responses at the HTTP level while allowing
|
|
the OpenAI SDK to create real response objects that we can test.
|
|
|
|
RECORDING: To record new responses, delete the cassette file and run with real API keys.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from dotenv import load_dotenv
|
|
|
|
from tools.chat import ChatTool
|
|
from providers import ModelProviderRegistry
|
|
from providers.base import ProviderType
|
|
from providers.openai_provider import OpenAIModelProvider
|
|
from tests.http_transport_recorder import TransportFactory
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
# Use absolute path for cassette directory
|
|
cassette_dir = Path(__file__).parent / "openai_cassettes"
|
|
cassette_dir.mkdir(exist_ok=True)
|
|
|
|
|
|
@pytest.mark.no_mock_provider # Disable provider mocking for this test
|
|
class TestO3ProOutputTextFix(unittest.IsolatedAsyncioTestCase):
|
|
"""Test o3-pro response parsing fix using respx for HTTP recording/replay."""
|
|
|
|
def setUp(self):
|
|
"""Set up the test by ensuring OpenAI provider is registered."""
|
|
# Manually register the OpenAI provider to ensure it's available
|
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
|
|
|
async def test_o3_pro_uses_output_text_field(self):
|
|
"""Test that o3-pro parsing uses the output_text convenience field via ChatTool."""
|
|
cassette_path = cassette_dir / "o3_pro_basic_math.json"
|
|
|
|
# Skip if no API key available and cassette doesn't exist
|
|
if not cassette_path.exists() and not os.getenv("OPENAI_API_KEY"):
|
|
pytest.skip("Set real OPENAI_API_KEY to record cassettes")
|
|
|
|
# Create transport (automatically selects record vs replay mode)
|
|
transport = TransportFactory.create_transport(str(cassette_path))
|
|
|
|
# Get provider and inject custom transport
|
|
provider = ModelProviderRegistry.get_provider_for_model("o3-pro")
|
|
if not provider:
|
|
self.fail("OpenAI provider not available for o3-pro model")
|
|
|
|
# Inject transport for this test
|
|
original_transport = getattr(provider, '_test_transport', None)
|
|
provider._test_transport = transport
|
|
|
|
try:
|
|
# Execute ChatTool test with custom transport
|
|
result = await self._execute_chat_tool_test()
|
|
|
|
# Verify the response works correctly
|
|
self._verify_chat_tool_response(result)
|
|
|
|
# Verify cassette was created/used
|
|
if not cassette_path.exists():
|
|
self.fail(f"Cassette should exist at {cassette_path}")
|
|
|
|
print(f"✅ HTTP transport {'recorded' if isinstance(transport, type(transport).__bases__[0]) else 'replayed'} o3-pro interaction")
|
|
|
|
finally:
|
|
# Restore original transport (if any)
|
|
if original_transport:
|
|
provider._test_transport = original_transport
|
|
elif hasattr(provider, '_test_transport'):
|
|
delattr(provider, '_test_transport')
|
|
|
|
async def _execute_chat_tool_test(self):
|
|
"""Execute the ChatTool with o3-pro and return the result."""
|
|
chat_tool = ChatTool()
|
|
arguments = {"prompt": "What is 2 + 2?", "model": "o3-pro", "temperature": 1.0}
|
|
|
|
return await chat_tool.execute(arguments)
|
|
|
|
def _verify_chat_tool_response(self, result):
|
|
"""Verify the ChatTool response contains expected data."""
|
|
# Verify we got a valid response
|
|
self.assertIsNotNone(result, "Should get response from ChatTool")
|
|
|
|
# Parse the result content (ChatTool returns MCP TextContent format)
|
|
self.assertIsInstance(result, list, "ChatTool should return list of content")
|
|
self.assertTrue(len(result) > 0, "Should have at least one content item")
|
|
|
|
# Get the text content (result is a list of TextContent objects)
|
|
content_item = result[0]
|
|
self.assertEqual(content_item.type, "text", "First item should be text content")
|
|
|
|
text_content = content_item.text
|
|
self.assertTrue(len(text_content) > 0, "Should have text content")
|
|
|
|
# Parse the JSON response from chat tool
|
|
try:
|
|
response_data = json.loads(text_content)
|
|
except json.JSONDecodeError:
|
|
self.fail(f"Could not parse chat tool response as JSON: {text_content}")
|
|
|
|
# Verify the response makes sense for the math question
|
|
actual_content = response_data.get("content", "")
|
|
self.assertIn("4", actual_content, "Should contain the answer '4'")
|
|
|
|
# Verify metadata shows o3-pro was used
|
|
metadata = response_data.get("metadata", {})
|
|
self.assertEqual(metadata.get("model_used"), "o3-pro", "Should use o3-pro model")
|
|
self.assertEqual(metadata.get("provider_used"), "openai", "Should use OpenAI provider")
|
|
|
|
# Additional verification that the fix is working
|
|
self.assertTrue(actual_content.strip(), "Content should not be empty")
|
|
self.assertIsInstance(actual_content, str, "Content should be string")
|
|
|
|
# Verify successful status
|
|
self.assertEqual(response_data.get("status"), "continuation_available", "Should have successful status")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("🎥 OpenAI Response Recording Tests for O3-Pro Output Text Fix")
|
|
print("=" * 50)
|
|
print("RECORD MODE: Requires OPENAI_API_KEY - makes real API calls through ChatTool")
|
|
print("REPLAY MODE: Uses recorded HTTP responses - free and fast")
|
|
print("RECORDING: Delete .json files in tests/openai_cassettes/ to re-record")
|
|
print()
|
|
|
|
unittest.main()
|