feat: support for codex as external CLI
fix: improved handling of MCP token limits when handling CLI output
This commit is contained in:
70
tests/test_clink_codex_agent.py
Normal file
70
tests/test_clink_codex_agent.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from clink.agents.base import CLIAgentError
|
||||
from clink.agents.codex import CodexAgent
|
||||
from clink.models import ResolvedCLIClient, ResolvedCLIRole
|
||||
|
||||
|
||||
class DummyProcess:
|
||||
def __init__(self, *, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
|
||||
self._stdout = stdout
|
||||
self._stderr = stderr
|
||||
self.returncode = returncode
|
||||
|
||||
async def communicate(self, _input):
|
||||
return self._stdout, self._stderr
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def codex_agent():
|
||||
prompt_path = Path("systemprompts/clink/codex_default.txt").resolve()
|
||||
role = ResolvedCLIRole(name="default", prompt_path=prompt_path, role_args=[])
|
||||
client = ResolvedCLIClient(
|
||||
name="codex",
|
||||
executable=["codex"],
|
||||
internal_args=["exec"],
|
||||
config_args=["--json", "--dangerously-bypass-approvals-and-sandbox"],
|
||||
env={},
|
||||
timeout_seconds=30,
|
||||
parser="codex_jsonl",
|
||||
roles={"default": role},
|
||||
output_to_file=None,
|
||||
working_dir=None,
|
||||
)
|
||||
return CodexAgent(client), role
|
||||
|
||||
|
||||
async def _run_agent_with_process(monkeypatch, agent, role, process):
|
||||
async def fake_create_subprocess_exec(*_args, **_kwargs):
|
||||
return process
|
||||
|
||||
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create_subprocess_exec)
|
||||
return await agent.run(role=role, prompt="do something", files=[], images=[])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_codex_agent_recovers_jsonl(monkeypatch, codex_agent):
|
||||
agent, role = codex_agent
|
||||
stdout = b"""
|
||||
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello from Codex"}}
|
||||
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
|
||||
"""
|
||||
process = DummyProcess(stdout=stdout, returncode=124)
|
||||
result = await _run_agent_with_process(monkeypatch, agent, role, process)
|
||||
|
||||
assert result.returncode == 124
|
||||
assert "Hello from Codex" in result.parsed.content
|
||||
assert result.parsed.metadata["usage"]["output_tokens"] == 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_codex_agent_propagates_invalid_json(monkeypatch, codex_agent):
|
||||
agent, role = codex_agent
|
||||
stdout = b"not json"
|
||||
process = DummyProcess(stdout=stdout, returncode=1)
|
||||
|
||||
with pytest.raises(CLIAgentError):
|
||||
await _run_agent_with_process(monkeypatch, agent, role, process)
|
||||
22
tests/test_clink_parsers.py
Normal file
22
tests/test_clink_parsers.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import pytest
|
||||
|
||||
from clink.parsers.base import ParserError
|
||||
from clink.parsers.codex import CodexJSONLParser
|
||||
|
||||
|
||||
def test_codex_parser_success():
|
||||
parser = CodexJSONLParser()
|
||||
stdout = """
|
||||
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello"}}
|
||||
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
|
||||
"""
|
||||
parsed = parser.parse(stdout=stdout, stderr="")
|
||||
assert parsed.content == "Hello"
|
||||
assert parsed.metadata["usage"]["output_tokens"] == 5
|
||||
|
||||
|
||||
def test_codex_parser_requires_agent_message():
|
||||
parser = CodexJSONLParser()
|
||||
stdout = '{"type":"turn.completed"}'
|
||||
with pytest.raises(ParserError):
|
||||
parser.parse(stdout=stdout, stderr="")
|
||||
@@ -5,7 +5,7 @@ import pytest
|
||||
from clink import get_registry
|
||||
from clink.agents import AgentOutput
|
||||
from clink.parsers.base import ParsedCLIResponse
|
||||
from tools.clink import CLinkTool
|
||||
from tools.clink import MAX_RESPONSE_CHARS, CLinkTool
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -55,9 +55,10 @@ async def test_clink_tool_execute(monkeypatch):
|
||||
def test_registry_lists_roles():
|
||||
registry = get_registry()
|
||||
clients = registry.list_clients()
|
||||
assert "gemini" in clients
|
||||
assert {"codex", "gemini"}.issubset(set(clients))
|
||||
roles = registry.list_roles("gemini")
|
||||
assert "default" in roles
|
||||
assert "default" in registry.list_roles("codex")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -66,7 +67,7 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
|
||||
|
||||
async def fake_run(**kwargs):
|
||||
return AgentOutput(
|
||||
parsed=ParsedCLIResponse(content="Default CLI response", metadata={}),
|
||||
parsed=ParsedCLIResponse(content="Default CLI response", metadata={"events": ["foo"]}),
|
||||
sanitized_command=["gemini"],
|
||||
returncode=0,
|
||||
stdout='{"response": "Default CLI response"}',
|
||||
@@ -92,3 +93,87 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
|
||||
payload = json.loads(result[0].text)
|
||||
metadata = payload.get("metadata", {})
|
||||
assert metadata.get("cli_name") == tool._default_cli_name
|
||||
assert metadata.get("events_removed_for_normal") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clink_tool_truncates_large_output(monkeypatch):
|
||||
tool = CLinkTool()
|
||||
|
||||
summary_section = "<SUMMARY>This is the condensed summary.</SUMMARY>"
|
||||
long_text = "A" * (MAX_RESPONSE_CHARS + 500) + summary_section
|
||||
|
||||
async def fake_run(**kwargs):
|
||||
return AgentOutput(
|
||||
parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event1", "event2"]}),
|
||||
sanitized_command=["codex"],
|
||||
returncode=0,
|
||||
stdout="{}",
|
||||
stderr="",
|
||||
duration_seconds=0.2,
|
||||
parser_name="codex_jsonl",
|
||||
output_file_content=None,
|
||||
)
|
||||
|
||||
class DummyAgent:
|
||||
async def run(self, **kwargs):
|
||||
return await fake_run(**kwargs)
|
||||
|
||||
monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
|
||||
|
||||
arguments = {
|
||||
"prompt": "Summarize",
|
||||
"cli_name": tool._default_cli_name,
|
||||
"files": [],
|
||||
"images": [],
|
||||
}
|
||||
|
||||
result = await tool.execute(arguments)
|
||||
payload = json.loads(result[0].text)
|
||||
assert payload["status"] in {"success", "continuation_available"}
|
||||
assert payload["content"].strip() == "This is the condensed summary."
|
||||
metadata = payload.get("metadata", {})
|
||||
assert metadata.get("output_summarized") is True
|
||||
assert metadata.get("events_removed_for_normal") is True
|
||||
assert metadata.get("output_original_length") == len(long_text)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clink_tool_truncates_without_summary(monkeypatch):
|
||||
tool = CLinkTool()
|
||||
|
||||
long_text = "B" * (MAX_RESPONSE_CHARS + 1000)
|
||||
|
||||
async def fake_run(**kwargs):
|
||||
return AgentOutput(
|
||||
parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event"]}),
|
||||
sanitized_command=["codex"],
|
||||
returncode=0,
|
||||
stdout="{}",
|
||||
stderr="",
|
||||
duration_seconds=0.2,
|
||||
parser_name="codex_jsonl",
|
||||
output_file_content=None,
|
||||
)
|
||||
|
||||
class DummyAgent:
|
||||
async def run(self, **kwargs):
|
||||
return await fake_run(**kwargs)
|
||||
|
||||
monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
|
||||
|
||||
arguments = {
|
||||
"prompt": "Summarize",
|
||||
"cli_name": tool._default_cli_name,
|
||||
"files": [],
|
||||
"images": [],
|
||||
}
|
||||
|
||||
result = await tool.execute(arguments)
|
||||
payload = json.loads(result[0].text)
|
||||
assert payload["status"] in {"success", "continuation_available"}
|
||||
assert "exceeding the configured clink limit" in payload["content"]
|
||||
metadata = payload.get("metadata", {})
|
||||
assert metadata.get("output_truncated") is True
|
||||
assert metadata.get("events_removed_for_normal") is True
|
||||
assert metadata.get("output_original_length") == len(long_text)
|
||||
|
||||
Reference in New Issue
Block a user