From 860c0d6c2d27365d5b35a4b29a373408f9cec1ed Mon Sep 17 00:00:00 2001 From: liuwuyu118 Date: Tue, 13 Jan 2026 11:17:17 +0800 Subject: [PATCH] fix: add image response support Convert Google's inlineData format to Anthropic's image format: - response-converter.js: Handle inlineData in non-streaming responses - sse-parser.js: Parse inlineData for thinking models - sse-streamer.js: Stream inlineData as image content blocks Co-Authored-By: Claude --- src/cloudcode/sse-parser.js | 7 ++++++- src/cloudcode/sse-streamer.js | 33 ++++++++++++++++++++++++++++++++ src/format/response-converter.js | 10 ++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/cloudcode/sse-parser.js b/src/cloudcode/sse-parser.js index f1b2551..041baf1 100644 --- a/src/cloudcode/sse-parser.js +++ b/src/cloudcode/sse-parser.js @@ -89,6 +89,11 @@ export async function parseThinkingSSEResponse(response, originalModel) { if (!part.text) continue; flushThinking(); accumulatedText += part.text; + } else if (part.inlineData) { + // Handle image content + flushThinking(); + flushText(); + finalParts.push(part); } } } catch (e) { @@ -105,7 +110,7 @@ export async function parseThinkingSSEResponse(response, originalModel) { usageMetadata }; - const partTypes = finalParts.map(p => p.thought ? 'thought' : (p.functionCall ? 'functionCall' : 'text')); + const partTypes = finalParts.map(p => p.thought ? 'thought' : (p.functionCall ? 'functionCall' : (p.inlineData ? 'inlineData' : 'text'))); logger.debug('[CloudCode] Response received (SSE), part types:', partTypes); if (finalParts.some(p => p.thought)) { const thinkingPart = finalParts.find(p => p.thought); diff --git a/src/cloudcode/sse-streamer.js b/src/cloudcode/sse-streamer.js index 3fe80a1..9d72bc0 100644 --- a/src/cloudcode/sse-streamer.js +++ b/src/cloudcode/sse-streamer.js @@ -209,6 +209,39 @@ export async function* streamSSEResponse(response, originalModel) { partial_json: JSON.stringify(part.functionCall.args || {}) } }; + } else if (part.inlineData) { + // Handle image content from Google format + if (currentBlockType === 'thinking' && currentThinkingSignature) { + yield { + type: 'content_block_delta', + index: blockIndex, + delta: { type: 'signature_delta', signature: currentThinkingSignature } + }; + currentThinkingSignature = ''; + } + if (currentBlockType !== null) { + yield { type: 'content_block_stop', index: blockIndex }; + blockIndex++; + } + currentBlockType = 'image'; + + // Emit image block as a complete block + yield { + type: 'content_block_start', + index: blockIndex, + content_block: { + type: 'image', + source: { + type: 'base64', + media_type: part.inlineData.mimeType, + data: part.inlineData.data + } + } + }; + + yield { type: 'content_block_stop', index: blockIndex }; + blockIndex++; + currentBlockType = null; } } diff --git a/src/format/response-converter.js b/src/format/response-converter.js index c58d57c..082825c 100644 --- a/src/format/response-converter.js +++ b/src/format/response-converter.js @@ -71,6 +71,16 @@ export function convertGoogleToAnthropic(googleResponse, model) { anthropicContent.push(toolUseBlock); hasToolCalls = true; + } else if (part.inlineData) { + // Handle image content from Google format + anthropicContent.push({ + type: 'image', + source: { + type: 'base64', + media_type: part.inlineData.mimeType, + data: part.inlineData.data + } + }); } }