Files
my-pal-mcp-server/scripts/sync_zen_models.py
2026-04-01 23:48:16 +02:00

238 lines
8.3 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import sys
from pathlib import Path
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
from utils.env import get_env
from utils.file_utils import read_json_file
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_OUTPUT = ROOT / "conf" / "zen_models_live.json"
DEFAULT_CURATED = ROOT / "conf" / "zen_models.json"
ZEN_MODELS_URL = "https://opencode.ai/zen/v1/models"
def fetch_zen_models(url: str, api_key: str) -> dict:
request = Request(
url,
headers={
"Accept": "application/json",
"Authorization": f"Bearer {api_key}",
"User-Agent": "pal-mcp-server/zen-model-sync",
},
)
with urlopen(request, timeout=30) as response:
charset = response.headers.get_content_charset("utf-8")
payload = response.read().decode(charset)
data = json.loads(payload)
if not isinstance(data, dict):
raise ValueError("Zen models payload must be a JSON object")
return data
def load_curated_models(path: Path) -> dict[str, dict]:
if not path.exists():
return {}
data = read_json_file(str(path)) or {}
models = data.get("models", [])
if not isinstance(models, list):
return {}
curated: dict[str, dict] = {}
for item in models:
if not isinstance(item, dict):
continue
model_name = item.get("model_name")
if isinstance(model_name, str) and model_name:
curated[model_name] = dict(item)
return curated
def _infer_defaults_from_model_name(model_name: str) -> dict:
lower_name = model_name.lower()
defaults = {
"aliases": [],
"context_window": 200000,
"max_output_tokens": 64000,
"supports_extended_thinking": False,
"supports_json_mode": True,
"supports_function_calling": True,
"supports_images": False,
"max_image_size_mb": 0.0,
"supports_temperature": True,
"temperature_constraint": "range",
"description": f"OpenCode Zen live model: {model_name}",
"intelligence_score": 10,
"allow_code_generation": False,
}
if lower_name.startswith("claude-"):
defaults.update(
{
"supports_extended_thinking": True,
"supports_images": True,
"max_image_size_mb": 20.0,
}
)
if "-4-6" in lower_name:
defaults.update({"context_window": 1000000, "max_output_tokens": 128000})
elif lower_name.startswith("gemini-"):
defaults.update(
{
"context_window": 1048576,
"max_output_tokens": 65536,
"supports_extended_thinking": True,
"supports_images": True,
"max_image_size_mb": 20.0,
}
)
elif lower_name.startswith("gpt-"):
defaults.update(
{
"context_window": 400000,
"max_output_tokens": 128000,
"supports_extended_thinking": True,
"supports_images": True,
"max_image_size_mb": 20.0,
}
)
if "5.4" in lower_name:
defaults["context_window"] = 1050000 if "-pro" in lower_name or lower_name == "gpt-5.4" else 400000
if lower_name in {
"gpt-5.4",
"gpt-5.4-pro",
"gpt-5.4-mini",
"gpt-5.4-nano",
"gpt-5.3-codex",
"gpt-5.3-codex-spark",
"gpt-5.2",
"gpt-5.2-codex",
"gpt-5.1",
"gpt-5.1-codex",
"gpt-5.1-codex-max",
"gpt-5.1-codex-mini",
"gpt-5",
"gpt-5-codex",
}:
defaults["use_openai_response_api"] = True
return defaults
def convert_model(model_data: dict, curated_models: dict[str, dict]) -> dict | None:
model_name = model_data.get("id")
if not isinstance(model_name, str) or not model_name:
return None
curated_entry = curated_models.get(model_name, {})
defaults = _infer_defaults_from_model_name(model_name)
return {
"model_name": model_name,
"aliases": [],
"context_window": int(curated_entry.get("context_window", defaults["context_window"])),
"max_output_tokens": int(curated_entry.get("max_output_tokens", defaults["max_output_tokens"])),
"supports_extended_thinking": bool(
curated_entry.get("supports_extended_thinking", defaults["supports_extended_thinking"])
),
"supports_json_mode": bool(curated_entry.get("supports_json_mode", defaults["supports_json_mode"])),
"supports_function_calling": bool(
curated_entry.get("supports_function_calling", defaults["supports_function_calling"])
),
"supports_images": bool(curated_entry.get("supports_images", defaults["supports_images"])),
"max_image_size_mb": float(curated_entry.get("max_image_size_mb", defaults["max_image_size_mb"])),
"supports_temperature": bool(curated_entry.get("supports_temperature", defaults["supports_temperature"])),
"temperature_constraint": curated_entry.get("temperature_constraint", defaults["temperature_constraint"]),
"description": curated_entry.get("description")
or f"Generated baseline metadata for OpenCode Zen model {model_name}.",
"intelligence_score": int(curated_entry.get("intelligence_score", defaults["intelligence_score"])),
"allow_code_generation": bool(curated_entry.get("allow_code_generation", defaults["allow_code_generation"])),
**(
{"use_openai_response_api": bool(curated_entry.get("use_openai_response_api", True))}
if curated_entry.get("use_openai_response_api") is not None or defaults.get("use_openai_response_api")
else {}
),
}
def build_output_document(source: dict, source_url: str, curated_models: dict[str, dict]) -> dict:
models = []
for model_data in source.get("data", []):
if not isinstance(model_data, dict):
continue
converted = convert_model(model_data, curated_models)
if converted:
models.append(converted)
models.sort(key=lambda item: item["model_name"])
return {
"_README": {
"description": "Generated baseline OpenCode Zen catalogue for PAL MCP Server.",
"source": source_url,
"usage": "Generated by scripts/sync_zen_models.py. Curated overrides belong in conf/zen_models.json.",
"field_notes": "Entries are conservative discovery data. Curated manifest values override these at runtime.",
},
"models": models,
}
def write_output(path: Path, document: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="\n") as handle:
json.dump(document, handle, indent=2, ensure_ascii=False)
handle.write("\n")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Sync OpenCode Zen live model catalogue into PAL config.")
parser.add_argument("--url", default=ZEN_MODELS_URL, help="Zen models endpoint")
parser.add_argument(
"--output",
default=str(DEFAULT_OUTPUT),
help="Path to the generated live Zen manifest",
)
parser.add_argument(
"--curated",
default=str(DEFAULT_CURATED),
help="Path to the curated Zen manifest used for metadata enrichment",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
output_path = Path(args.output)
curated_path = Path(args.curated)
api_key = get_env("ZEN_API_KEY")
if not api_key:
print("Failed to sync Zen models: ZEN_API_KEY is not set", file=sys.stderr)
return 1
try:
curated_models = load_curated_models(curated_path)
source = fetch_zen_models(args.url, api_key)
document = build_output_document(source, args.url, curated_models)
write_output(output_path, document)
except (HTTPError, URLError, TimeoutError, ValueError, json.JSONDecodeError) as exc:
print(f"Failed to sync Zen models: {exc}", file=sys.stderr)
return 1
print(f"Wrote {len(document['models'])} Zen models to {output_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())