feat: support for codex as external CLI

fix: improved handling of MCP token limits when handling CLI output
2025-10-06 00:39:00 +04:00
parent d052927bac
commit 561e4aaaa8
18 changed files with 480 additions and 31 deletions
--- a/tests/test_clink_codex_agent.py
+++ b/tests/test_clink_codex_agent.py
@@ -0,0 +1,70 @@
+import asyncio
+from pathlib import Path
+
+import pytest
+
+from clink.agents.base import CLIAgentError
+from clink.agents.codex import CodexAgent
+from clink.models import ResolvedCLIClient, ResolvedCLIRole
+
+
+class DummyProcess:
+    def __init__(self, *, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
+        self._stdout = stdout
+        self._stderr = stderr
+        self.returncode = returncode
+
+    async def communicate(self, _input):
+        return self._stdout, self._stderr
+
+
+@pytest.fixture()
+def codex_agent():
+    prompt_path = Path("systemprompts/clink/codex_default.txt").resolve()
+    role = ResolvedCLIRole(name="default", prompt_path=prompt_path, role_args=[])
+    client = ResolvedCLIClient(
+        name="codex",
+        executable=["codex"],
+        internal_args=["exec"],
+        config_args=["--json", "--dangerously-bypass-approvals-and-sandbox"],
+        env={},
+        timeout_seconds=30,
+        parser="codex_jsonl",
+        roles={"default": role},
+        output_to_file=None,
+        working_dir=None,
+    )
+    return CodexAgent(client), role
+
+
+async def _run_agent_with_process(monkeypatch, agent, role, process):
+    async def fake_create_subprocess_exec(*_args, **_kwargs):
+        return process
+
+    monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create_subprocess_exec)
+    return await agent.run(role=role, prompt="do something", files=[], images=[])
+
+
+@pytest.mark.asyncio
+async def test_codex_agent_recovers_jsonl(monkeypatch, codex_agent):
+    agent, role = codex_agent
+    stdout = b"""
+{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello from Codex"}}
+{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
+"""
+    process = DummyProcess(stdout=stdout, returncode=124)
+    result = await _run_agent_with_process(monkeypatch, agent, role, process)
+
+    assert result.returncode == 124
+    assert "Hello from Codex" in result.parsed.content
+    assert result.parsed.metadata["usage"]["output_tokens"] == 5
+
+
+@pytest.mark.asyncio
+async def test_codex_agent_propagates_invalid_json(monkeypatch, codex_agent):
+    agent, role = codex_agent
+    stdout = b"not json"
+    process = DummyProcess(stdout=stdout, returncode=1)
+
+    with pytest.raises(CLIAgentError):
+        await _run_agent_with_process(monkeypatch, agent, role, process)
--- a/tests/test_clink_parsers.py
+++ b/tests/test_clink_parsers.py
@@ -0,0 +1,22 @@
+import pytest
+
+from clink.parsers.base import ParserError
+from clink.parsers.codex import CodexJSONLParser
+
+
+def test_codex_parser_success():
+    parser = CodexJSONLParser()
+    stdout = """
+{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello"}}
+{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
+"""
+    parsed = parser.parse(stdout=stdout, stderr="")
+    assert parsed.content == "Hello"
+    assert parsed.metadata["usage"]["output_tokens"] == 5
+
+
+def test_codex_parser_requires_agent_message():
+    parser = CodexJSONLParser()
+    stdout = '{"type":"turn.completed"}'
+    with pytest.raises(ParserError):
+        parser.parse(stdout=stdout, stderr="")
--- a/tests/test_clink_tool.py
+++ b/tests/test_clink_tool.py
@@ -5,7 +5,7 @@ import pytest
 from clink import get_registry
 from clink.agents import AgentOutput
 from clink.parsers.base import ParsedCLIResponse
-from tools.clink import CLinkTool
+from tools.clink import MAX_RESPONSE_CHARS, CLinkTool


@pytest.mark.asyncio
@@ -55,9 +55,10 @@ async def test_clink_tool_execute(monkeypatch):
 def test_registry_lists_roles():
    registry = get_registry()
    clients = registry.list_clients()
-    assert "gemini" in clients
+    assert {"codex", "gemini"}.issubset(set(clients))
    roles = registry.list_roles("gemini")
    assert "default" in roles
+    assert "default" in registry.list_roles("codex")


@pytest.mark.asyncio
@@ -66,7 +67,7 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):

    async def fake_run(**kwargs):
        return AgentOutput(
-            parsed=ParsedCLIResponse(content="Default CLI response", metadata={}),
+            parsed=ParsedCLIResponse(content="Default CLI response", metadata={"events": ["foo"]}),
            sanitized_command=["gemini"],
            returncode=0,
            stdout='{"response": "Default CLI response"}',
@@ -92,3 +93,87 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
    payload = json.loads(result[0].text)
    metadata = payload.get("metadata", {})
    assert metadata.get("cli_name") == tool._default_cli_name
+    assert metadata.get("events_removed_for_normal") is True
+
+
+@pytest.mark.asyncio
+async def test_clink_tool_truncates_large_output(monkeypatch):
+    tool = CLinkTool()
+
+    summary_section = "<SUMMARY>This is the condensed summary.</SUMMARY>"
+    long_text = "A" * (MAX_RESPONSE_CHARS + 500) + summary_section
+
+    async def fake_run(**kwargs):
+        return AgentOutput(
+            parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event1", "event2"]}),
+            sanitized_command=["codex"],
+            returncode=0,
+            stdout="{}",
+            stderr="",
+            duration_seconds=0.2,
+            parser_name="codex_jsonl",
+            output_file_content=None,
+        )
+
+    class DummyAgent:
+        async def run(self, **kwargs):
+            return await fake_run(**kwargs)
+
+    monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
+
+    arguments = {
+        "prompt": "Summarize",
+        "cli_name": tool._default_cli_name,
+        "files": [],
+        "images": [],
+    }
+
+    result = await tool.execute(arguments)
+    payload = json.loads(result[0].text)
+    assert payload["status"] in {"success", "continuation_available"}
+    assert payload["content"].strip() == "This is the condensed summary."
+    metadata = payload.get("metadata", {})
+    assert metadata.get("output_summarized") is True
+    assert metadata.get("events_removed_for_normal") is True
+    assert metadata.get("output_original_length") == len(long_text)
+
+
+@pytest.mark.asyncio
+async def test_clink_tool_truncates_without_summary(monkeypatch):
+    tool = CLinkTool()
+
+    long_text = "B" * (MAX_RESPONSE_CHARS + 1000)
+
+    async def fake_run(**kwargs):
+        return AgentOutput(
+            parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event"]}),
+            sanitized_command=["codex"],
+            returncode=0,
+            stdout="{}",
+            stderr="",
+            duration_seconds=0.2,
+            parser_name="codex_jsonl",
+            output_file_content=None,
+        )
+
+    class DummyAgent:
+        async def run(self, **kwargs):
+            return await fake_run(**kwargs)
+
+    monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
+
+    arguments = {
+        "prompt": "Summarize",
+        "cli_name": tool._default_cli_name,
+        "files": [],
+        "images": [],
+    }
+
+    result = await tool.execute(arguments)
+    payload = json.loads(result[0].text)
+    assert payload["status"] in {"success", "continuation_available"}
+    assert "exceeding the configured clink limit" in payload["content"]
+    metadata = payload.get("metadata", {})
+    assert metadata.get("output_truncated") is True
+    assert metadata.get("events_removed_for_normal") is True
+    assert metadata.get("output_original_length") == len(long_text)