feat: support for codex as external CLI

fix: improved handling of MCP token limits when handling CLI output
This commit is contained in:
Fahad
2025-10-06 00:39:00 +04:00
parent d052927bac
commit 561e4aaaa8
18 changed files with 480 additions and 31 deletions

View File

@@ -0,0 +1,70 @@
import asyncio
from pathlib import Path
import pytest
from clink.agents.base import CLIAgentError
from clink.agents.codex import CodexAgent
from clink.models import ResolvedCLIClient, ResolvedCLIRole
class DummyProcess:
def __init__(self, *, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
self._stdout = stdout
self._stderr = stderr
self.returncode = returncode
async def communicate(self, _input):
return self._stdout, self._stderr
@pytest.fixture()
def codex_agent():
prompt_path = Path("systemprompts/clink/codex_default.txt").resolve()
role = ResolvedCLIRole(name="default", prompt_path=prompt_path, role_args=[])
client = ResolvedCLIClient(
name="codex",
executable=["codex"],
internal_args=["exec"],
config_args=["--json", "--dangerously-bypass-approvals-and-sandbox"],
env={},
timeout_seconds=30,
parser="codex_jsonl",
roles={"default": role},
output_to_file=None,
working_dir=None,
)
return CodexAgent(client), role
async def _run_agent_with_process(monkeypatch, agent, role, process):
async def fake_create_subprocess_exec(*_args, **_kwargs):
return process
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create_subprocess_exec)
return await agent.run(role=role, prompt="do something", files=[], images=[])
@pytest.mark.asyncio
async def test_codex_agent_recovers_jsonl(monkeypatch, codex_agent):
agent, role = codex_agent
stdout = b"""
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello from Codex"}}
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
"""
process = DummyProcess(stdout=stdout, returncode=124)
result = await _run_agent_with_process(monkeypatch, agent, role, process)
assert result.returncode == 124
assert "Hello from Codex" in result.parsed.content
assert result.parsed.metadata["usage"]["output_tokens"] == 5
@pytest.mark.asyncio
async def test_codex_agent_propagates_invalid_json(monkeypatch, codex_agent):
agent, role = codex_agent
stdout = b"not json"
process = DummyProcess(stdout=stdout, returncode=1)
with pytest.raises(CLIAgentError):
await _run_agent_with_process(monkeypatch, agent, role, process)

View File

@@ -0,0 +1,22 @@
import pytest
from clink.parsers.base import ParserError
from clink.parsers.codex import CodexJSONLParser
def test_codex_parser_success():
parser = CodexJSONLParser()
stdout = """
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello"}}
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
"""
parsed = parser.parse(stdout=stdout, stderr="")
assert parsed.content == "Hello"
assert parsed.metadata["usage"]["output_tokens"] == 5
def test_codex_parser_requires_agent_message():
parser = CodexJSONLParser()
stdout = '{"type":"turn.completed"}'
with pytest.raises(ParserError):
parser.parse(stdout=stdout, stderr="")

View File

@@ -5,7 +5,7 @@ import pytest
from clink import get_registry
from clink.agents import AgentOutput
from clink.parsers.base import ParsedCLIResponse
from tools.clink import CLinkTool
from tools.clink import MAX_RESPONSE_CHARS, CLinkTool
@pytest.mark.asyncio
@@ -55,9 +55,10 @@ async def test_clink_tool_execute(monkeypatch):
def test_registry_lists_roles():
registry = get_registry()
clients = registry.list_clients()
assert "gemini" in clients
assert {"codex", "gemini"}.issubset(set(clients))
roles = registry.list_roles("gemini")
assert "default" in roles
assert "default" in registry.list_roles("codex")
@pytest.mark.asyncio
@@ -66,7 +67,7 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
async def fake_run(**kwargs):
return AgentOutput(
parsed=ParsedCLIResponse(content="Default CLI response", metadata={}),
parsed=ParsedCLIResponse(content="Default CLI response", metadata={"events": ["foo"]}),
sanitized_command=["gemini"],
returncode=0,
stdout='{"response": "Default CLI response"}',
@@ -92,3 +93,87 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
payload = json.loads(result[0].text)
metadata = payload.get("metadata", {})
assert metadata.get("cli_name") == tool._default_cli_name
assert metadata.get("events_removed_for_normal") is True
@pytest.mark.asyncio
async def test_clink_tool_truncates_large_output(monkeypatch):
tool = CLinkTool()
summary_section = "<SUMMARY>This is the condensed summary.</SUMMARY>"
long_text = "A" * (MAX_RESPONSE_CHARS + 500) + summary_section
async def fake_run(**kwargs):
return AgentOutput(
parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event1", "event2"]}),
sanitized_command=["codex"],
returncode=0,
stdout="{}",
stderr="",
duration_seconds=0.2,
parser_name="codex_jsonl",
output_file_content=None,
)
class DummyAgent:
async def run(self, **kwargs):
return await fake_run(**kwargs)
monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
arguments = {
"prompt": "Summarize",
"cli_name": tool._default_cli_name,
"files": [],
"images": [],
}
result = await tool.execute(arguments)
payload = json.loads(result[0].text)
assert payload["status"] in {"success", "continuation_available"}
assert payload["content"].strip() == "This is the condensed summary."
metadata = payload.get("metadata", {})
assert metadata.get("output_summarized") is True
assert metadata.get("events_removed_for_normal") is True
assert metadata.get("output_original_length") == len(long_text)
@pytest.mark.asyncio
async def test_clink_tool_truncates_without_summary(monkeypatch):
tool = CLinkTool()
long_text = "B" * (MAX_RESPONSE_CHARS + 1000)
async def fake_run(**kwargs):
return AgentOutput(
parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event"]}),
sanitized_command=["codex"],
returncode=0,
stdout="{}",
stderr="",
duration_seconds=0.2,
parser_name="codex_jsonl",
output_file_content=None,
)
class DummyAgent:
async def run(self, **kwargs):
return await fake_run(**kwargs)
monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
arguments = {
"prompt": "Summarize",
"cli_name": tool._default_cli_name,
"files": [],
"images": [],
}
result = await tool.execute(arguments)
payload = json.loads(result[0].text)
assert payload["status"] in {"success", "continuation_available"}
assert "exceeding the configured clink limit" in payload["content"]
metadata = payload.get("metadata", {})
assert metadata.get("output_truncated") is True
assert metadata.get("events_removed_for_normal") is True
assert metadata.get("output_original_length") == len(long_text)