feat: support for codex as external CLI
fix: improved handling of MCP token limits when handling CLI output
This commit is contained in:
@@ -20,7 +20,7 @@ Gemini · OpenAI · Anthropic · Grok · Azure · Ollama · OpenRouter · DIAL
|
|||||||
|
|
||||||
The new **[`clink`](docs/tools/clink.md)** (CLI + Link) tool connects external AI CLIs directly into your workflow:
|
The new **[`clink`](docs/tools/clink.md)** (CLI + Link) tool connects external AI CLIs directly into your workflow:
|
||||||
|
|
||||||
- **Connect [Gemini CLI](https://github.com/google-gemini/gemini-cli)** (more CLIs coming soon) to collaborate alongside your primary AI
|
- **Connect external CLIs** like [Gemini CLI](https://github.com/google-gemini/gemini-cli) and [Codex CLI](https://github.com/openai/codex) directly into your workflow
|
||||||
- **Create Role presets** - `planner`, `codereviewer`, `default` for specialized tasks
|
- **Create Role presets** - `planner`, `codereviewer`, `default` for specialized tasks
|
||||||
- **Full CLI capabilities** - Web search, file inspection, latest documentation lookups
|
- **Full CLI capabilities** - Web search, file inspection, latest documentation lookups
|
||||||
- **Seamless continuity** - Gemini participates as a first-class member with full conversation context
|
- **Seamless continuity** - Gemini participates as a first-class member with full conversation context
|
||||||
|
|||||||
@@ -5,10 +5,12 @@ from __future__ import annotations
|
|||||||
from clink.models import ResolvedCLIClient
|
from clink.models import ResolvedCLIClient
|
||||||
|
|
||||||
from .base import AgentOutput, BaseCLIAgent, CLIAgentError
|
from .base import AgentOutput, BaseCLIAgent, CLIAgentError
|
||||||
|
from .codex import CodexAgent
|
||||||
from .gemini import GeminiAgent
|
from .gemini import GeminiAgent
|
||||||
|
|
||||||
_AGENTS: dict[str, type[BaseCLIAgent]] = {
|
_AGENTS: dict[str, type[BaseCLIAgent]] = {
|
||||||
"gemini": GeminiAgent,
|
"gemini": GeminiAgent,
|
||||||
|
"codex": CodexAgent,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
41
clink/agents/codex.py
Normal file
41
clink/agents/codex.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
"""Codex-specific CLI agent hooks."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from clink.models import ResolvedCLIClient
|
||||||
|
from clink.parsers.base import ParserError
|
||||||
|
|
||||||
|
from .base import AgentOutput, BaseCLIAgent
|
||||||
|
|
||||||
|
|
||||||
|
class CodexAgent(BaseCLIAgent):
|
||||||
|
"""Codex CLI agent with JSONL recovery support."""
|
||||||
|
|
||||||
|
def __init__(self, client: ResolvedCLIClient):
|
||||||
|
super().__init__(client)
|
||||||
|
|
||||||
|
def _recover_from_error(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
returncode: int,
|
||||||
|
stdout: str,
|
||||||
|
stderr: str,
|
||||||
|
sanitized_command: list[str],
|
||||||
|
duration_seconds: float,
|
||||||
|
output_file_content: str | None,
|
||||||
|
) -> AgentOutput | None:
|
||||||
|
try:
|
||||||
|
parsed = self._parser.parse(stdout, stderr)
|
||||||
|
except ParserError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return AgentOutput(
|
||||||
|
parsed=parsed,
|
||||||
|
sanitized_command=sanitized_command,
|
||||||
|
returncode=returncode,
|
||||||
|
stdout=stdout,
|
||||||
|
stderr=stderr,
|
||||||
|
duration_seconds=duration_seconds,
|
||||||
|
parser_name=self._parser.name,
|
||||||
|
output_file_content=output_file_content,
|
||||||
|
)
|
||||||
@@ -33,4 +33,10 @@ INTERNAL_DEFAULTS: dict[str, CLIInternalDefaults] = {
|
|||||||
default_role_prompt="systemprompts/clink/gemini_default.txt",
|
default_role_prompt="systemprompts/clink/gemini_default.txt",
|
||||||
runner="gemini",
|
runner="gemini",
|
||||||
),
|
),
|
||||||
|
"codex": CLIInternalDefaults(
|
||||||
|
parser="codex_jsonl",
|
||||||
|
additional_args=["exec"],
|
||||||
|
default_role_prompt="systemprompts/clink/codex_default.txt",
|
||||||
|
runner="codex",
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from .base import BaseParser, ParsedCLIResponse, ParserError
|
from .base import BaseParser, ParsedCLIResponse, ParserError
|
||||||
|
from .codex import CodexJSONLParser
|
||||||
from .gemini import GeminiJSONParser
|
from .gemini import GeminiJSONParser
|
||||||
|
|
||||||
_PARSER_CLASSES: dict[str, type[BaseParser]] = {
|
_PARSER_CLASSES: dict[str, type[BaseParser]] = {
|
||||||
|
CodexJSONLParser.name: CodexJSONLParser,
|
||||||
GeminiJSONParser.name: GeminiJSONParser,
|
GeminiJSONParser.name: GeminiJSONParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
63
clink/parsers/codex.py
Normal file
63
clink/parsers/codex.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
"""Parser for Codex CLI JSONL output."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .base import BaseParser, ParsedCLIResponse, ParserError
|
||||||
|
|
||||||
|
|
||||||
|
class CodexJSONLParser(BaseParser):
|
||||||
|
"""Parse stdout emitted by `codex exec --json`."""
|
||||||
|
|
||||||
|
name = "codex_jsonl"
|
||||||
|
|
||||||
|
def parse(self, stdout: str, stderr: str) -> ParsedCLIResponse:
|
||||||
|
lines = [line.strip() for line in (stdout or "").splitlines() if line.strip()]
|
||||||
|
events: list[dict[str, Any]] = []
|
||||||
|
agent_messages: list[str] = []
|
||||||
|
errors: list[str] = []
|
||||||
|
usage: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if not line.startswith("{"):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
event = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
events.append(event)
|
||||||
|
event_type = event.get("type")
|
||||||
|
if event_type == "item.completed":
|
||||||
|
item = event.get("item") or {}
|
||||||
|
if item.get("type") == "agent_message":
|
||||||
|
text = item.get("text")
|
||||||
|
if isinstance(text, str) and text.strip():
|
||||||
|
agent_messages.append(text.strip())
|
||||||
|
elif event_type == "error":
|
||||||
|
message = event.get("message")
|
||||||
|
if isinstance(message, str) and message.strip():
|
||||||
|
errors.append(message.strip())
|
||||||
|
elif event_type == "turn.completed":
|
||||||
|
turn_usage = event.get("usage")
|
||||||
|
if isinstance(turn_usage, dict):
|
||||||
|
usage = turn_usage
|
||||||
|
|
||||||
|
if not agent_messages and errors:
|
||||||
|
agent_messages.extend(errors)
|
||||||
|
|
||||||
|
if not agent_messages:
|
||||||
|
raise ParserError("Codex CLI JSONL output did not include an agent_message item")
|
||||||
|
|
||||||
|
content = "\n\n".join(agent_messages).strip()
|
||||||
|
metadata: dict[str, Any] = {"events": events}
|
||||||
|
if errors:
|
||||||
|
metadata["errors"] = errors
|
||||||
|
if usage:
|
||||||
|
metadata["usage"] = usage
|
||||||
|
if stderr and stderr.strip():
|
||||||
|
metadata["stderr"] = stderr.strip()
|
||||||
|
|
||||||
|
return ParsedCLIResponse(content=content, metadata=metadata)
|
||||||
23
conf/cli_clients/codex.json
Normal file
23
conf/cli_clients/codex.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"name": "codex",
|
||||||
|
"command": "codex",
|
||||||
|
"additional_args": [
|
||||||
|
"--json",
|
||||||
|
"--dangerously-bypass-approvals-and-sandbox"
|
||||||
|
],
|
||||||
|
"env": {},
|
||||||
|
"roles": {
|
||||||
|
"default": {
|
||||||
|
"prompt_path": "systemprompts/clink/codex_default.txt",
|
||||||
|
"role_args": []
|
||||||
|
},
|
||||||
|
"planner": {
|
||||||
|
"prompt_path": "systemprompts/clink/codex_planner.txt",
|
||||||
|
"role_args": []
|
||||||
|
},
|
||||||
|
"codereviewer": {
|
||||||
|
"prompt_path": "systemprompts/clink/codex_codereviewer.txt",
|
||||||
|
"role_args": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,9 +1,11 @@
|
|||||||
# Clink Tool - CLI-to-CLI Bridge
|
# Clink Tool - CLI-to-CLI Bridge
|
||||||
|
|
||||||
**Bring other AI CLIs into your workflow - Gemini (for now), Qwen (soon), Codex (soon), and more work alongside Claude without context switching**
|
**Bring other AI CLIs into your workflow – Gemini, Codex (with more coming) – without context switching**
|
||||||
|
|
||||||
The `clink` tool lets you leverage external AI CLIs (like Gemini CLI, etc.) directly within your current conversation. Instead of switching between terminal windows or losing context, you can ask Gemini to plan a complex migration, review code with specialized prompts, or answer questions - all while staying in your primary Claude Code workflow.
|
The `clink` tool lets you leverage external AI CLIs (like Gemini CLI, etc.) directly within your current conversation. Instead of switching between terminal windows or losing context, you can ask Gemini to plan a complex migration, review code with specialized prompts, or answer questions - all while staying in your primary Claude Code workflow.
|
||||||
|
|
||||||
|
> **CAUTION**: Clink launches real CLI agents with their safety prompts disabled (`--yolo`, `--dangerously-skip-permissions`, `--dangerously-bypass-approvals-and-sandbox`) so they can edit files and run tools autonomously via MCP. If that’s more access than you want, remove those flags—the CLI can still open/read files and report findings, it just won’t auto-apply edits. You can also tighten role prompts or system prompts with stop-words/guardrails, or disable clink entirely. Otherwise, keep the shipped presets confined to workspaces you fully trust.
|
||||||
|
|
||||||
## Why Use Clink (CLI + Link)?
|
## Why Use Clink (CLI + Link)?
|
||||||
|
|
||||||
**Scenario 1**: You're working in Claude Code and want Gemini's 1M context window to analyze a massive codebase, or you need Gemini's latest web search to validate documentation.
|
**Scenario 1**: You're working in Claude Code and want Gemini's 1M context window to analyze a massive codebase, or you need Gemini's latest web search to validate documentation.
|
||||||
@@ -50,7 +52,7 @@ Gemini receives the full conversation context from `consensus` including the con
|
|||||||
"Use clink role='codereviewer' to review auth.py for security issues"
|
"Use clink role='codereviewer' to review auth.py for security issues"
|
||||||
```
|
```
|
||||||
|
|
||||||
You can make your own custom roles in `conf/cli_clients/gemini.json` or tweak existing ones.
|
You can make your own custom roles in `conf/cli_clients/` or tweak any of the shipped presets.
|
||||||
|
|
||||||
## Tool Parameters
|
## Tool Parameters
|
||||||
|
|
||||||
@@ -108,25 +110,19 @@ then codereview to verify the implementation"
|
|||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Clink configurations live in `conf/cli_clients/`. The default `gemini.json` includes:
|
Clink configurations live in `conf/cli_clients/`. We ship presets for the supported CLIs:
|
||||||
|
|
||||||
```json
|
- `gemini.json` – runs `gemini --telemetry false --yolo -o json`
|
||||||
{
|
- `claude.json` – runs `claude --print --output-format json --permission-mode bypassPermissions --dangerously-skip-permissions`
|
||||||
"name": "gemini",
|
- `codex.json` – runs `codex exec --json --dangerously-bypass-approvals-and-sandbox`
|
||||||
"command": "gemini",
|
|
||||||
"additional_args": ["--telemetry", "false", "--yolo"],
|
|
||||||
"roles": {
|
|
||||||
"planner": {
|
|
||||||
"prompt_path": "systemprompts/clink/gemini_planner.txt",
|
|
||||||
"role_args": []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
> **Why `--yolo`?** The Gemini CLI currently requires automatic approvals to execute its own tools (for example `run_shell_command`). Without the flag it errors with `Tool "run_shell_command" not found in registry`. See [issue #5382](https://github.com/google-gemini/gemini-cli/issues/5382) for more details.
|
> **CAUTION**: These flags intentionally bypass each CLI's safety prompts so they can edit files or launch tools autonomously via MCP. Only enable them in trusted sandboxes and tailor role prompts or CLI configs if you need more guardrails.
|
||||||
|
|
||||||
**Adding new CLIs**: Drop a JSON config into `conf/cli_clients/` and create role prompts in `systemprompts/clink/`.
|
Each preset points to role-specific prompts in `systemprompts/clink/`. Duplicate those files to add more roles or adjust CLI flags.
|
||||||
|
|
||||||
|
> **Why `--yolo` for Gemini?** The Gemini CLI currently requires automatic approvals to execute its own tools (for example `run_shell_command`). Without the flag it errors with `Tool "run_shell_command" not found in registry`. See [issue #5382](https://github.com/google-gemini/gemini-cli/issues/5382) for more details.
|
||||||
|
|
||||||
|
**Adding new CLIs**: Drop a JSON config into `conf/cli_clients/`, create role prompts in `systemprompts/clink/`, and register a parser/agent if the CLI outputs a new format.
|
||||||
|
|
||||||
## When to Use Clink vs Other Tools
|
## When to Use Clink vs Other Tools
|
||||||
|
|
||||||
@@ -135,11 +131,12 @@ Clink configurations live in `conf/cli_clients/`. The default `gemini.json` incl
|
|||||||
- **Use `planner`** for: Zen's native planning workflows with step validation
|
- **Use `planner`** for: Zen's native planning workflows with step validation
|
||||||
- **Use `codereview`** for: Zen's structured code review with severity levels
|
- **Use `codereview`** for: Zen's structured code review with severity levels
|
||||||
|
|
||||||
**CAUTION**: `clink` opens additional doors but not without additional risk. Running Gemini with `--yolo` auto-approves CLI actions (needed for shell edits) and should only be used when you trust the target workspace. Review your role configuration and consider tightening prompts if you need additional guardrails.
|
|
||||||
|
|
||||||
## Setup Requirements
|
## Setup Requirements
|
||||||
|
|
||||||
Ensure [gemini](https://github.com/google-gemini/gemini-cli) is installed and configured.
|
Ensure the relevant CLI is installed and configured:
|
||||||
|
|
||||||
|
- [Gemini CLI](https://github.com/google-gemini/gemini-cli)
|
||||||
|
- [Codex CLI](https://docs.sourcegraph.com/codex)
|
||||||
|
|
||||||
## Related Guides
|
## Related Guides
|
||||||
|
|
||||||
|
|||||||
9
systemprompts/clink/codex_codereviewer.txt
Normal file
9
systemprompts/clink/codex_codereviewer.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
/review You are the Codex CLI code reviewer for the Clink tool.
|
||||||
|
|
||||||
|
- Inspect any relevant files directly—use your full repository access, run linters or tests as needed, and mention key commands when they inform your findings.
|
||||||
|
- Report issues in severity order (Critical, High, Medium, Low) spanning security, correctness, performance, and maintainability while staying within scope.
|
||||||
|
- Keep the review succinct—prioritize the highest-impact findings, avoid extensive code dumps, and summarize recommendations clearly.
|
||||||
|
- For each issue cite precise references (file:line plus a short excerpt or symbol name), describe the impact, and recommend a concrete fix or mitigation.
|
||||||
|
- Recognize positive practices worth keeping so peers understand what to preserve.
|
||||||
|
- Keep feedback focused and actionable—avoid speculative refactors or unrelated suggestions.
|
||||||
|
- Always conclude with `<SUMMARY>...</SUMMARY>` capturing the top issues, fixes, and positives in ≤500 words.
|
||||||
8
systemprompts/clink/codex_default.txt
Normal file
8
systemprompts/clink/codex_default.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
You are the Codex CLI agent operating inside the Zen MCP server with full repository access.
|
||||||
|
|
||||||
|
- Use the terminal to inspect files, run scripts, and gather context before answering; quote exact paths, symbols, or commands when relevant.
|
||||||
|
- Provide concise, actionable responses in Markdown for engineers working from the CLI, and call out natural next steps when helpful.
|
||||||
|
- Keep output tight—prefer summaries and short bullet lists, and avoid quoting large chunks of source unless absolutely required.
|
||||||
|
- State any assumptions, missing inputs, or follow-up checks that would improve confidence in your answer.
|
||||||
|
- If the requested action is unsafe or unsupported, explain why and recommend a safer alternative or mitigation.
|
||||||
|
- Always conclude with `<SUMMARY>...</SUMMARY>` offering a compressed (≤500 words) recap of key findings and recommended actions.
|
||||||
9
systemprompts/clink/codex_planner.txt
Normal file
9
systemprompts/clink/codex_planner.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
You are the Codex CLI planner for the Clink tool.
|
||||||
|
|
||||||
|
- Respond with JSON only: follow the planning schema fields (status, step_number, total_steps, metadata, plan_summary, etc.) and use the files_required_to_continue JSON when you need more context.
|
||||||
|
- Inspect any relevant files, scripts, or docs via the CLI before detailing the plan; branch into alternatives when multiple strategies could work.
|
||||||
|
- Break objectives into numbered phases with dependencies, validation gates, risks, mitigations, and explicit next actions.
|
||||||
|
- Keep planning output concise—limit each step to essential actions and avoid duplicating source text.
|
||||||
|
- Cite concrete references—file paths, line numbers, function or class names—whenever you reference source context.
|
||||||
|
- When planning completes, deliver an ASCII-first summary with checklists and guidance another engineer can execute confidently.
|
||||||
|
- Always finish with `<SUMMARY>...</SUMMARY>` delivering a ≤500-word recap of phases, risks, and immediate next steps.
|
||||||
@@ -2,6 +2,8 @@ You are the Gemini CLI code reviewer for the Clink tool.
|
|||||||
|
|
||||||
- Inspect any relevant files directly—use your full repository access, gather whatever context you require before writing feedback.
|
- Inspect any relevant files directly—use your full repository access, gather whatever context you require before writing feedback.
|
||||||
- Report findings in severity order (Critical, High, Medium, Low) across security, correctness, performance, maintainability; stay anchored to the current change scope.
|
- Report findings in severity order (Critical, High, Medium, Low) across security, correctness, performance, maintainability; stay anchored to the current change scope.
|
||||||
|
- Keep the review concise—surface the most important issues first, avoid exhaustive code excerpts, and summarize takeaways clearly.
|
||||||
- For each issue cite precise references (full-file-path:line plus a short excerpt or symbol name), explain the impact, and propose a concrete fix or mitigation.
|
- For each issue cite precise references (full-file-path:line plus a short excerpt or symbol name), explain the impact, and propose a concrete fix or mitigation.
|
||||||
- Call out positive practices worth retaining so peers know what to preserve.
|
- Call out positive practices worth retaining so peers know what to preserve.
|
||||||
- Keep feedback precise, actionable, and tailored—avoid speculative refactors or unrelated suggestions.
|
- Keep feedback precise, actionable, and tailored—avoid speculative refactors or unrelated suggestions.
|
||||||
|
- Always finish with `<SUMMARY>...</SUMMARY>` capturing the top risks, recommended fixes, and key positives in ≤500 words.
|
||||||
|
|||||||
@@ -2,5 +2,7 @@ You are the Gemini CLI agent operating inside the Zen MCP server with full repos
|
|||||||
|
|
||||||
- Use your tools to inspect files, and gather context before responding; quote exact paths, symbols, or commands when they matter.
|
- Use your tools to inspect files, and gather context before responding; quote exact paths, symbols, or commands when they matter.
|
||||||
- Produce clear, direct answers in Markdown tailored to engineers working from the CLI, and highlight actionable next steps.
|
- Produce clear, direct answers in Markdown tailored to engineers working from the CLI, and highlight actionable next steps.
|
||||||
|
- Keep output focused and concise—prefer summaries and short bullet lists over long prose; include only information essential to the request.
|
||||||
- Call out assumptions, missing inputs, or follow-up work that would improve confidence in the result.
|
- Call out assumptions, missing inputs, or follow-up work that would improve confidence in the result.
|
||||||
- If a request is unsafe, infeasible, or violates policy, explain why and provide a safer alternative or mitigation.
|
- If a request is unsafe, infeasible, or violates policy, explain why and provide a safer alternative or mitigation.
|
||||||
|
- Always conclude with `<SUMMARY>...</SUMMARY>` containing a terse (≤500 words) recap of key findings and next steps.
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ You are the Gemini CLI planner for the Clink tool.
|
|||||||
|
|
||||||
- Use your full repository access to inspect any relevant files, scripts, or docs before detailing the plan.
|
- Use your full repository access to inspect any relevant files, scripts, or docs before detailing the plan.
|
||||||
- Break objectives into numbered phases with dependencies, validation gates, alternatives, and clear next actions; highlight risks with mitigations.
|
- Break objectives into numbered phases with dependencies, validation gates, alternatives, and clear next actions; highlight risks with mitigations.
|
||||||
|
- Keep planning responses compact—use concise numbered sections and avoid repeating context; limit summaries to the essentials another engineer must execute.
|
||||||
- Cite concrete references—file paths, line numbers, function or class names—whenever you point to source context.
|
- Cite concrete references—file paths, line numbers, function or class names—whenever you point to source context.
|
||||||
- Branch when multiple viable strategies exist and explain when to choose each.
|
- Branch when multiple viable strategies exist and explain when to choose each.
|
||||||
- When planning completes, present a polished summary with ASCII visuals, checklists, and guidance another engineer can execute.
|
- When planning completes, present a polished summary with ASCII visuals, checklists, and guidance another engineer can execute.
|
||||||
|
- Always end with `<SUMMARY>...</SUMMARY>` holding a compressed (≤500 words) overview of phases, risks, and immediate next actions.
|
||||||
|
|||||||
70
tests/test_clink_codex_agent.py
Normal file
70
tests/test_clink_codex_agent.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from clink.agents.base import CLIAgentError
|
||||||
|
from clink.agents.codex import CodexAgent
|
||||||
|
from clink.models import ResolvedCLIClient, ResolvedCLIRole
|
||||||
|
|
||||||
|
|
||||||
|
class DummyProcess:
|
||||||
|
def __init__(self, *, stdout: bytes = b"", stderr: bytes = b"", returncode: int = 0):
|
||||||
|
self._stdout = stdout
|
||||||
|
self._stderr = stderr
|
||||||
|
self.returncode = returncode
|
||||||
|
|
||||||
|
async def communicate(self, _input):
|
||||||
|
return self._stdout, self._stderr
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def codex_agent():
|
||||||
|
prompt_path = Path("systemprompts/clink/codex_default.txt").resolve()
|
||||||
|
role = ResolvedCLIRole(name="default", prompt_path=prompt_path, role_args=[])
|
||||||
|
client = ResolvedCLIClient(
|
||||||
|
name="codex",
|
||||||
|
executable=["codex"],
|
||||||
|
internal_args=["exec"],
|
||||||
|
config_args=["--json", "--dangerously-bypass-approvals-and-sandbox"],
|
||||||
|
env={},
|
||||||
|
timeout_seconds=30,
|
||||||
|
parser="codex_jsonl",
|
||||||
|
roles={"default": role},
|
||||||
|
output_to_file=None,
|
||||||
|
working_dir=None,
|
||||||
|
)
|
||||||
|
return CodexAgent(client), role
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_agent_with_process(monkeypatch, agent, role, process):
|
||||||
|
async def fake_create_subprocess_exec(*_args, **_kwargs):
|
||||||
|
return process
|
||||||
|
|
||||||
|
monkeypatch.setattr(asyncio, "create_subprocess_exec", fake_create_subprocess_exec)
|
||||||
|
return await agent.run(role=role, prompt="do something", files=[], images=[])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_codex_agent_recovers_jsonl(monkeypatch, codex_agent):
|
||||||
|
agent, role = codex_agent
|
||||||
|
stdout = b"""
|
||||||
|
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello from Codex"}}
|
||||||
|
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
|
||||||
|
"""
|
||||||
|
process = DummyProcess(stdout=stdout, returncode=124)
|
||||||
|
result = await _run_agent_with_process(monkeypatch, agent, role, process)
|
||||||
|
|
||||||
|
assert result.returncode == 124
|
||||||
|
assert "Hello from Codex" in result.parsed.content
|
||||||
|
assert result.parsed.metadata["usage"]["output_tokens"] == 5
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_codex_agent_propagates_invalid_json(monkeypatch, codex_agent):
|
||||||
|
agent, role = codex_agent
|
||||||
|
stdout = b"not json"
|
||||||
|
process = DummyProcess(stdout=stdout, returncode=1)
|
||||||
|
|
||||||
|
with pytest.raises(CLIAgentError):
|
||||||
|
await _run_agent_with_process(monkeypatch, agent, role, process)
|
||||||
22
tests/test_clink_parsers.py
Normal file
22
tests/test_clink_parsers.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from clink.parsers.base import ParserError
|
||||||
|
from clink.parsers.codex import CodexJSONLParser
|
||||||
|
|
||||||
|
|
||||||
|
def test_codex_parser_success():
|
||||||
|
parser = CodexJSONLParser()
|
||||||
|
stdout = """
|
||||||
|
{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Hello"}}
|
||||||
|
{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}
|
||||||
|
"""
|
||||||
|
parsed = parser.parse(stdout=stdout, stderr="")
|
||||||
|
assert parsed.content == "Hello"
|
||||||
|
assert parsed.metadata["usage"]["output_tokens"] == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_codex_parser_requires_agent_message():
|
||||||
|
parser = CodexJSONLParser()
|
||||||
|
stdout = '{"type":"turn.completed"}'
|
||||||
|
with pytest.raises(ParserError):
|
||||||
|
parser.parse(stdout=stdout, stderr="")
|
||||||
@@ -5,7 +5,7 @@ import pytest
|
|||||||
from clink import get_registry
|
from clink import get_registry
|
||||||
from clink.agents import AgentOutput
|
from clink.agents import AgentOutput
|
||||||
from clink.parsers.base import ParsedCLIResponse
|
from clink.parsers.base import ParsedCLIResponse
|
||||||
from tools.clink import CLinkTool
|
from tools.clink import MAX_RESPONSE_CHARS, CLinkTool
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -55,9 +55,10 @@ async def test_clink_tool_execute(monkeypatch):
|
|||||||
def test_registry_lists_roles():
|
def test_registry_lists_roles():
|
||||||
registry = get_registry()
|
registry = get_registry()
|
||||||
clients = registry.list_clients()
|
clients = registry.list_clients()
|
||||||
assert "gemini" in clients
|
assert {"codex", "gemini"}.issubset(set(clients))
|
||||||
roles = registry.list_roles("gemini")
|
roles = registry.list_roles("gemini")
|
||||||
assert "default" in roles
|
assert "default" in roles
|
||||||
|
assert "default" in registry.list_roles("codex")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@@ -66,7 +67,7 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
|
|||||||
|
|
||||||
async def fake_run(**kwargs):
|
async def fake_run(**kwargs):
|
||||||
return AgentOutput(
|
return AgentOutput(
|
||||||
parsed=ParsedCLIResponse(content="Default CLI response", metadata={}),
|
parsed=ParsedCLIResponse(content="Default CLI response", metadata={"events": ["foo"]}),
|
||||||
sanitized_command=["gemini"],
|
sanitized_command=["gemini"],
|
||||||
returncode=0,
|
returncode=0,
|
||||||
stdout='{"response": "Default CLI response"}',
|
stdout='{"response": "Default CLI response"}',
|
||||||
@@ -92,3 +93,87 @@ async def test_clink_tool_defaults_to_first_cli(monkeypatch):
|
|||||||
payload = json.loads(result[0].text)
|
payload = json.loads(result[0].text)
|
||||||
metadata = payload.get("metadata", {})
|
metadata = payload.get("metadata", {})
|
||||||
assert metadata.get("cli_name") == tool._default_cli_name
|
assert metadata.get("cli_name") == tool._default_cli_name
|
||||||
|
assert metadata.get("events_removed_for_normal") is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_clink_tool_truncates_large_output(monkeypatch):
|
||||||
|
tool = CLinkTool()
|
||||||
|
|
||||||
|
summary_section = "<SUMMARY>This is the condensed summary.</SUMMARY>"
|
||||||
|
long_text = "A" * (MAX_RESPONSE_CHARS + 500) + summary_section
|
||||||
|
|
||||||
|
async def fake_run(**kwargs):
|
||||||
|
return AgentOutput(
|
||||||
|
parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event1", "event2"]}),
|
||||||
|
sanitized_command=["codex"],
|
||||||
|
returncode=0,
|
||||||
|
stdout="{}",
|
||||||
|
stderr="",
|
||||||
|
duration_seconds=0.2,
|
||||||
|
parser_name="codex_jsonl",
|
||||||
|
output_file_content=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
class DummyAgent:
|
||||||
|
async def run(self, **kwargs):
|
||||||
|
return await fake_run(**kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
|
||||||
|
|
||||||
|
arguments = {
|
||||||
|
"prompt": "Summarize",
|
||||||
|
"cli_name": tool._default_cli_name,
|
||||||
|
"files": [],
|
||||||
|
"images": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await tool.execute(arguments)
|
||||||
|
payload = json.loads(result[0].text)
|
||||||
|
assert payload["status"] in {"success", "continuation_available"}
|
||||||
|
assert payload["content"].strip() == "This is the condensed summary."
|
||||||
|
metadata = payload.get("metadata", {})
|
||||||
|
assert metadata.get("output_summarized") is True
|
||||||
|
assert metadata.get("events_removed_for_normal") is True
|
||||||
|
assert metadata.get("output_original_length") == len(long_text)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_clink_tool_truncates_without_summary(monkeypatch):
|
||||||
|
tool = CLinkTool()
|
||||||
|
|
||||||
|
long_text = "B" * (MAX_RESPONSE_CHARS + 1000)
|
||||||
|
|
||||||
|
async def fake_run(**kwargs):
|
||||||
|
return AgentOutput(
|
||||||
|
parsed=ParsedCLIResponse(content=long_text, metadata={"events": ["event"]}),
|
||||||
|
sanitized_command=["codex"],
|
||||||
|
returncode=0,
|
||||||
|
stdout="{}",
|
||||||
|
stderr="",
|
||||||
|
duration_seconds=0.2,
|
||||||
|
parser_name="codex_jsonl",
|
||||||
|
output_file_content=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
class DummyAgent:
|
||||||
|
async def run(self, **kwargs):
|
||||||
|
return await fake_run(**kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr("tools.clink.create_agent", lambda client: DummyAgent())
|
||||||
|
|
||||||
|
arguments = {
|
||||||
|
"prompt": "Summarize",
|
||||||
|
"cli_name": tool._default_cli_name,
|
||||||
|
"files": [],
|
||||||
|
"images": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
result = await tool.execute(arguments)
|
||||||
|
payload = json.loads(result[0].text)
|
||||||
|
assert payload["status"] in {"success", "continuation_available"}
|
||||||
|
assert "exceeding the configured clink limit" in payload["content"]
|
||||||
|
metadata = payload.get("metadata", {})
|
||||||
|
assert metadata.get("output_truncated") is True
|
||||||
|
assert metadata.get("events_removed_for_normal") is True
|
||||||
|
assert metadata.get("output_original_length") == len(long_text)
|
||||||
|
|||||||
118
tools/clink.py
118
tools/clink.py
@@ -3,6 +3,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -20,6 +21,9 @@ from tools.simple.base import SchemaBuilder, SimpleTool
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MAX_RESPONSE_CHARS = 20_000
|
||||||
|
SUMMARY_PATTERN = re.compile(r"<SUMMARY>(.*?)</SUMMARY>", re.IGNORECASE | re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
class CLinkRequest(BaseModel):
|
class CLinkRequest(BaseModel):
|
||||||
"""Request model for clink tool."""
|
"""Request model for clink tool."""
|
||||||
@@ -61,7 +65,10 @@ class CLinkTool(SimpleTool):
|
|||||||
self._cli_names = self._registry.list_clients()
|
self._cli_names = self._registry.list_clients()
|
||||||
self._role_map: dict[str, list[str]] = {name: self._registry.list_roles(name) for name in self._cli_names}
|
self._role_map: dict[str, list[str]] = {name: self._registry.list_roles(name) for name in self._cli_names}
|
||||||
self._all_roles: list[str] = sorted({role for roles in self._role_map.values() for role in roles})
|
self._all_roles: list[str] = sorted({role for roles in self._role_map.values() for role in roles})
|
||||||
self._default_cli_name: str | None = self._cli_names[0] if self._cli_names else None
|
if "gemini" in self._cli_names:
|
||||||
|
self._default_cli_name = "gemini"
|
||||||
|
else:
|
||||||
|
self._default_cli_name = self._cli_names[0] if self._cli_names else None
|
||||||
self._active_system_prompt: str = ""
|
self._active_system_prompt: str = ""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
@@ -200,6 +207,15 @@ class CLinkTool(SimpleTool):
|
|||||||
)
|
)
|
||||||
return [TextContent(type="text", text=error_output.model_dump_json())]
|
return [TextContent(type="text", text=error_output.model_dump_json())]
|
||||||
|
|
||||||
|
metadata = self._build_success_metadata(client_config, role_config, result)
|
||||||
|
metadata = self._prune_metadata(metadata, client_config, reason="normal")
|
||||||
|
|
||||||
|
content, metadata = self._apply_output_limit(
|
||||||
|
client_config,
|
||||||
|
result.parsed.content,
|
||||||
|
metadata,
|
||||||
|
)
|
||||||
|
|
||||||
model_info = {
|
model_info = {
|
||||||
"provider": client_config.name,
|
"provider": client_config.name,
|
||||||
"model_name": result.parsed.metadata.get("model_used"),
|
"model_name": result.parsed.metadata.get("model_used"),
|
||||||
@@ -207,16 +223,14 @@ class CLinkTool(SimpleTool):
|
|||||||
|
|
||||||
if continuation_id:
|
if continuation_id:
|
||||||
try:
|
try:
|
||||||
self._record_assistant_turn(continuation_id, result.parsed.content, request, model_info)
|
self._record_assistant_turn(continuation_id, content, request, model_info)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("Failed to record assistant turn for continuation %s", continuation_id, exc_info=True)
|
logger.debug("Failed to record assistant turn for continuation %s", continuation_id, exc_info=True)
|
||||||
|
|
||||||
metadata = self._build_success_metadata(client_config, role_config, result)
|
|
||||||
|
|
||||||
continuation_offer = self._create_continuation_offer(request, model_info)
|
continuation_offer = self._create_continuation_offer(request, model_info)
|
||||||
if continuation_offer:
|
if continuation_offer:
|
||||||
tool_output = self._create_continuation_offer_response(
|
tool_output = self._create_continuation_offer_response(
|
||||||
result.parsed.content,
|
content,
|
||||||
continuation_offer,
|
continuation_offer,
|
||||||
request,
|
request,
|
||||||
model_info,
|
model_info,
|
||||||
@@ -225,7 +239,7 @@ class CLinkTool(SimpleTool):
|
|||||||
else:
|
else:
|
||||||
tool_output = ToolOutput(
|
tool_output = ToolOutput(
|
||||||
status="success",
|
status="success",
|
||||||
content=result.parsed.content,
|
content=content,
|
||||||
content_type="text",
|
content_type="text",
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
@@ -286,6 +300,98 @@ class CLinkTool(SimpleTool):
|
|||||||
merged.update(extra)
|
merged.update(extra)
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
def _apply_output_limit(
|
||||||
|
self,
|
||||||
|
client: ResolvedCLIClient,
|
||||||
|
content: str,
|
||||||
|
metadata: dict[str, Any],
|
||||||
|
) -> tuple[str, dict[str, Any]]:
|
||||||
|
if len(content) <= MAX_RESPONSE_CHARS:
|
||||||
|
return content, metadata
|
||||||
|
|
||||||
|
summary = self._extract_summary(content)
|
||||||
|
if summary:
|
||||||
|
summary_text = summary
|
||||||
|
if len(summary_text) > MAX_RESPONSE_CHARS:
|
||||||
|
logger.debug(
|
||||||
|
"Clink summary from %s exceeded %d chars; truncating summary to fit.",
|
||||||
|
client.name,
|
||||||
|
MAX_RESPONSE_CHARS,
|
||||||
|
)
|
||||||
|
summary_text = summary_text[:MAX_RESPONSE_CHARS]
|
||||||
|
summary_metadata = self._prune_metadata(metadata, client, reason="summary")
|
||||||
|
summary_metadata.update(
|
||||||
|
{
|
||||||
|
"output_summarized": True,
|
||||||
|
"output_original_length": len(content),
|
||||||
|
"output_summary_length": len(summary_text),
|
||||||
|
"output_limit": MAX_RESPONSE_CHARS,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Clink compressed %s output via <SUMMARY>: original=%d chars, summary=%d chars",
|
||||||
|
client.name,
|
||||||
|
len(content),
|
||||||
|
len(summary_text),
|
||||||
|
)
|
||||||
|
return summary_text, summary_metadata
|
||||||
|
|
||||||
|
truncated_metadata = self._prune_metadata(metadata, client, reason="truncated")
|
||||||
|
truncated_metadata.update(
|
||||||
|
{
|
||||||
|
"output_truncated": True,
|
||||||
|
"output_original_length": len(content),
|
||||||
|
"output_limit": MAX_RESPONSE_CHARS,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
excerpt_limit = min(4000, MAX_RESPONSE_CHARS // 2)
|
||||||
|
excerpt = content[:excerpt_limit]
|
||||||
|
truncated_metadata["output_excerpt_length"] = len(excerpt)
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Clink truncated %s output: original=%d chars exceeds limit=%d; excerpt_length=%d",
|
||||||
|
client.name,
|
||||||
|
len(content),
|
||||||
|
MAX_RESPONSE_CHARS,
|
||||||
|
len(excerpt),
|
||||||
|
)
|
||||||
|
|
||||||
|
message = (
|
||||||
|
f"CLI '{client.name}' produced {len(content)} characters, exceeding the configured clink limit "
|
||||||
|
f"({MAX_RESPONSE_CHARS} characters). The full output was suppressed to stay within MCP response caps. "
|
||||||
|
"Please narrow the request (review fewer files, summarize results) or run the CLI directly for the full log.\n\n"
|
||||||
|
f"--- Begin excerpt ({len(excerpt)} of {len(content)} chars) ---\n{excerpt}\n--- End excerpt ---"
|
||||||
|
)
|
||||||
|
|
||||||
|
return message, truncated_metadata
|
||||||
|
|
||||||
|
def _extract_summary(self, content: str) -> str | None:
|
||||||
|
match = SUMMARY_PATTERN.search(content)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
summary = match.group(1).strip()
|
||||||
|
return summary or None
|
||||||
|
|
||||||
|
def _prune_metadata(
|
||||||
|
self,
|
||||||
|
metadata: dict[str, Any],
|
||||||
|
client: ResolvedCLIClient,
|
||||||
|
*,
|
||||||
|
reason: str,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
cleaned = dict(metadata)
|
||||||
|
events = cleaned.pop("events", None)
|
||||||
|
if events is not None:
|
||||||
|
cleaned[f"events_removed_for_{reason}"] = True
|
||||||
|
logger.debug(
|
||||||
|
"Clink dropped %s events metadata for %s response (%s)",
|
||||||
|
client.name,
|
||||||
|
reason,
|
||||||
|
type(events).__name__,
|
||||||
|
)
|
||||||
|
return cleaned
|
||||||
|
|
||||||
def _build_error_metadata(self, client: ResolvedCLIClient, exc: CLIAgentError) -> dict[str, Any]:
|
def _build_error_metadata(self, client: ResolvedCLIClient, exc: CLIAgentError) -> dict[str, Any]:
|
||||||
"""Assemble metadata for failed CLI calls."""
|
"""Assemble metadata for failed CLI calls."""
|
||||||
metadata: dict[str, Any] = {
|
metadata: dict[str, Any] = {
|
||||||
|
|||||||
Reference in New Issue
Block a user