model definitions now support a new `allow_code_generation` flag, only to be used with higher reasoning models such as GPT-5-Pro and-Gemini 2.5-Pro When `true`, the `chat` tool can now request the external model to generate a full implementation / update / instructions etc and then share the implementation with the calling agent. This effectively allows us to utilize more powerful models such as GPT-5-Pro to generate code for us or entire implementations (which are either API-only or part of the $200 Pro plan from within the ChatGPT app)
178 lines
7.0 KiB
Python
178 lines
7.0 KiB
Python
"""Integration test for ChatTool auto-mode using OpenAI o3/gpt models with cassette recording."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from providers.registry import ModelProviderRegistry
|
|
from providers.shared import ProviderType
|
|
from tests.transport_helpers import inject_transport
|
|
from tools.chat import ChatTool
|
|
|
|
# Directory for recorded HTTP interactions
|
|
CASSETTE_DIR = Path(__file__).parent / "openai_cassettes"
|
|
CASSETTE_DIR.mkdir(exist_ok=True)
|
|
CASSETTE_PATH = CASSETTE_DIR / "chat_gpt5_moon_distance.json"
|
|
CASSETTE_CONTINUATION_PATH = CASSETTE_DIR / "chat_gpt5_continuation.json"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.no_mock_provider
|
|
async def test_chat_auto_mode_with_openai(monkeypatch, tmp_path):
|
|
"""Ensure ChatTool in auto mode selects gpt-5 via OpenAI and returns a valid response."""
|
|
# Prepare environment so only OpenAI is available in auto mode
|
|
env_updates = {
|
|
"DEFAULT_MODEL": "auto",
|
|
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
|
|
}
|
|
# Remove Gemini/XAI keys to force OpenAI selection
|
|
keys_to_clear = ["GEMINI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]
|
|
|
|
with monkeypatch.context() as m:
|
|
m.setenv("DEFAULT_MODEL", env_updates["DEFAULT_MODEL"])
|
|
m.setenv("OPENAI_ALLOWED_MODELS", "gpt-5")
|
|
if env_updates["OPENAI_API_KEY"]:
|
|
m.setenv("OPENAI_API_KEY", env_updates["OPENAI_API_KEY"])
|
|
for key in keys_to_clear:
|
|
m.delenv(key, raising=False)
|
|
|
|
# Choose recording or replay mode based on cassette presence
|
|
if not CASSETTE_PATH.exists():
|
|
real_key = os.getenv("OPENAI_API_KEY", "").strip()
|
|
if not real_key or real_key.startswith("dummy"):
|
|
pytest.skip(
|
|
"Cassette missing and OPENAI_API_KEY not configured. Provide a real key and re-run to record."
|
|
)
|
|
else:
|
|
# Replay mode uses dummy key to keep secrets out of the cassette
|
|
m.setenv("OPENAI_API_KEY", "dummy-key-for-replay")
|
|
|
|
# Reset registry and register only OpenAI provider
|
|
ModelProviderRegistry.reset_for_testing()
|
|
from providers.openai import OpenAIModelProvider
|
|
|
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
|
|
|
# Inject HTTP transport (records or replays depending on cassette state)
|
|
inject_transport(monkeypatch, CASSETTE_PATH)
|
|
|
|
# Execute ChatTool request targeting gpt-5 directly (server normally resolves auto→model)
|
|
chat_tool = ChatTool()
|
|
working_directory = str(tmp_path)
|
|
arguments = {
|
|
"prompt": "Use chat with gpt5 and ask how far the moon is from earth.",
|
|
"model": "gpt-5",
|
|
"temperature": 1.0,
|
|
"working_directory": working_directory,
|
|
}
|
|
|
|
result = await chat_tool.execute(arguments)
|
|
|
|
# Validate response
|
|
assert result and result[0].type == "text"
|
|
response_data = json.loads(result[0].text)
|
|
|
|
assert response_data["status"] in {"success", "continuation_available"}
|
|
metadata = response_data.get("metadata", {})
|
|
assert metadata.get("provider_used") == "openai"
|
|
assert metadata.get("model_used") in {"gpt-5", "gpt5"}
|
|
assert "moon" in response_data["content"].lower()
|
|
|
|
# Ensure cassette recorded for future replays
|
|
assert CASSETTE_PATH.exists()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.no_mock_provider
|
|
async def test_chat_openai_continuation(monkeypatch, tmp_path):
|
|
"""Verify continuation_id workflow against gpt-5 using recorded OpenAI responses."""
|
|
|
|
env_updates = {
|
|
"DEFAULT_MODEL": "auto",
|
|
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
|
|
}
|
|
keys_to_clear = ["GEMINI_API_KEY", "XAI_API_KEY", "OPENROUTER_API_KEY"]
|
|
|
|
recording_mode = not CASSETTE_CONTINUATION_PATH.exists()
|
|
if recording_mode:
|
|
real_key = env_updates["OPENAI_API_KEY"].strip()
|
|
if not real_key or real_key.startswith("dummy"):
|
|
pytest.skip("Continuation cassette missing and OPENAI_API_KEY not configured. Set a real key to record.")
|
|
|
|
fixed_thread_id = uuid.UUID("95d60035-1aa3-4398-9936-fca71989d906")
|
|
|
|
with monkeypatch.context() as m:
|
|
m.setenv("DEFAULT_MODEL", env_updates["DEFAULT_MODEL"])
|
|
m.setenv("OPENAI_ALLOWED_MODELS", "gpt-5")
|
|
if recording_mode:
|
|
m.setenv("OPENAI_API_KEY", env_updates["OPENAI_API_KEY"])
|
|
else:
|
|
m.setenv("OPENAI_API_KEY", "dummy-key-for-replay")
|
|
for key in keys_to_clear:
|
|
m.delenv(key, raising=False)
|
|
|
|
ModelProviderRegistry.reset_for_testing()
|
|
from providers.openai import OpenAIModelProvider
|
|
|
|
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
|
|
|
|
inject_transport(monkeypatch, CASSETTE_CONTINUATION_PATH)
|
|
|
|
from utils import conversation_memory
|
|
|
|
m.setattr(conversation_memory.uuid, "uuid4", lambda: fixed_thread_id)
|
|
|
|
chat_tool = ChatTool()
|
|
working_directory = str(tmp_path)
|
|
|
|
# First message: obtain continuation_id
|
|
first_args = {
|
|
"prompt": "In one word, which sells better: iOS app or macOS app?",
|
|
"model": "gpt-5",
|
|
"temperature": 1.0,
|
|
"working_directory": working_directory,
|
|
}
|
|
first_result = await chat_tool.execute(first_args)
|
|
|
|
assert first_result and first_result[0].type == "text"
|
|
first_data = json.loads(first_result[0].text)
|
|
assert first_data["status"] == "continuation_available"
|
|
first_metadata = first_data.get("metadata", {})
|
|
assert first_metadata.get("provider_used") == "openai"
|
|
assert first_metadata.get("model_used") in {"gpt-5", "gpt5"}
|
|
continuation = first_data.get("continuation_offer")
|
|
assert continuation is not None
|
|
continuation_id = continuation.get("continuation_id")
|
|
assert continuation_id
|
|
|
|
# Second message using continuation_id (reuse same tool instance for clarity)
|
|
second_args = {
|
|
"prompt": "In one word then, SwiftUI or ReactNative?",
|
|
"model": "gpt-5",
|
|
"continuation_id": continuation_id,
|
|
"temperature": 1.0,
|
|
"working_directory": working_directory,
|
|
}
|
|
|
|
second_result = await chat_tool.execute(second_args)
|
|
|
|
assert second_result and second_result[0].type == "text"
|
|
second_data = json.loads(second_result[0].text)
|
|
assert second_data["status"] in {"success", "continuation_available"}
|
|
second_metadata = second_data.get("metadata", {})
|
|
assert second_metadata.get("provider_used") == "openai"
|
|
assert second_metadata.get("model_used") in {"gpt-5", "gpt5"}
|
|
assert second_metadata.get("conversation_ready") is True
|
|
assert second_data.get("continuation_offer") is not None
|
|
|
|
# Ensure the cassette file exists for future replays
|
|
assert CASSETTE_CONTINUATION_PATH.exists()
|
|
|
|
# Clean up registry state for subsequent tests
|
|
ModelProviderRegistry.reset_for_testing()
|