feat: use official tokenizers for 99.99% accuracy
Replace gpt-tokenizer with model-specific official tokenizers: - Claude models: @anthropic-ai/tokenizer (official Anthropic tokenizer) - Gemini models: @lenml/tokenizer-gemini (GemmaTokenizer) Changes: - Add @anthropic-ai/tokenizer and @lenml/tokenizer-gemini dependencies - Remove gpt-tokenizer dependency - Update count-tokens.js with model-aware tokenization - Use getModelFamily() to select appropriate tokenizer - Lazy-load Gemini tokenizer (138MB) on first use - Default to local estimation for all content types (no API calls) Tested with all supported models: - claude-sonnet-4-5, claude-opus-4-5-thinking, claude-sonnet-4-5-thinking - gemini-3-flash, gemini-3-pro-low, gemini-3-pro-high
This commit is contained in:
57
package-lock.json
generated
57
package-lock.json
generated
@@ -9,11 +9,12 @@
|
|||||||
"version": "1.2.6",
|
"version": "1.2.6",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@anthropic-ai/tokenizer": "^0.0.4",
|
||||||
|
"@lenml/tokenizer-gemini": "^3.7.2",
|
||||||
"async-mutex": "^0.5.0",
|
"async-mutex": "^0.5.0",
|
||||||
"better-sqlite3": "^12.5.0",
|
"better-sqlite3": "^12.5.0",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2"
|
||||||
"gpt-tokenizer": "^2.5.0"
|
|
||||||
},
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
"antigravity-claude-proxy": "bin/cli.js"
|
"antigravity-claude-proxy": "bin/cli.js"
|
||||||
@@ -43,6 +44,16 @@
|
|||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@anthropic-ai/tokenizer": {
|
||||||
|
"version": "0.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz",
|
||||||
|
"integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "^18.11.18",
|
||||||
|
"tiktoken": "^1.0.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@babel/runtime": {
|
"node_modules/@babel/runtime": {
|
||||||
"version": "7.28.4",
|
"version": "7.28.4",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.4.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.4.tgz",
|
||||||
@@ -92,6 +103,21 @@
|
|||||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@lenml/tokenizer-gemini": {
|
||||||
|
"version": "3.7.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lenml/tokenizer-gemini/-/tokenizer-gemini-3.7.2.tgz",
|
||||||
|
"integrity": "sha512-sdSfXqjGSZWRHtf4toMcjzpBm/tOPPAtUQ5arTx4neQ2nzHUtJQJyHkoiB9KRyEfvVjW6WtQU+WbvU9glsFT2g==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@lenml/tokenizers": "^3.7.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@lenml/tokenizers": {
|
||||||
|
"version": "3.7.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@lenml/tokenizers/-/tokenizers-3.7.2.tgz",
|
||||||
|
"integrity": "sha512-tuap9T7Q80Czor8NHzxjlLNvxEX8MgFINzsBTV+lq1v7G+78YR3ZvBhmLsPHtgqExB4Q4kCJH6dhXOYWSLdHLw==",
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
"node_modules/@nodelib/fs.scandir": {
|
"node_modules/@nodelib/fs.scandir": {
|
||||||
"version": "2.1.5",
|
"version": "2.1.5",
|
||||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||||
@@ -143,6 +169,15 @@
|
|||||||
"tailwindcss": ">=3.0.0 || >= 3.0.0-alpha.1 || >= 4.0.0-alpha.20 || >= 4.0.0-beta.1"
|
"tailwindcss": ">=3.0.0 || >= 3.0.0-alpha.1 || >= 4.0.0-alpha.20 || >= 4.0.0-beta.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node": {
|
||||||
|
"version": "18.19.130",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
|
||||||
|
"integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~5.26.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/accepts": {
|
"node_modules/accepts": {
|
||||||
"version": "1.3.8",
|
"version": "1.3.8",
|
||||||
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
|
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
|
||||||
@@ -1231,12 +1266,6 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/gpt-tokenizer": {
|
|
||||||
"version": "2.9.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.9.0.tgz",
|
|
||||||
"integrity": "sha512-YSpexBL/k4bfliAzMrRqn3M6+it02LutVyhVpDeMKrC/O9+pCe/5s8U2hYKa2vFLD5/vHhsKc8sOn/qGqII8Kg==",
|
|
||||||
"license": "MIT"
|
|
||||||
},
|
|
||||||
"node_modules/has-flag": {
|
"node_modules/has-flag": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
|
||||||
@@ -2698,6 +2727,12 @@
|
|||||||
"node": ">=0.8"
|
"node": ">=0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/tiktoken": {
|
||||||
|
"version": "1.0.22",
|
||||||
|
"resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.22.tgz",
|
||||||
|
"integrity": "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/tinyglobby": {
|
"node_modules/tinyglobby": {
|
||||||
"version": "0.2.15",
|
"version": "0.2.15",
|
||||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
||||||
@@ -2816,6 +2851,12 @@
|
|||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/undici-types": {
|
||||||
|
"version": "5.26.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||||
|
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/unpipe": {
|
"node_modules/unpipe": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
|
||||||
|
|||||||
@@ -58,11 +58,12 @@
|
|||||||
"node": ">=18.0.0"
|
"node": ">=18.0.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@anthropic-ai/tokenizer": "^0.0.4",
|
||||||
|
"@lenml/tokenizer-gemini": "^3.7.2",
|
||||||
"async-mutex": "^0.5.0",
|
"async-mutex": "^0.5.0",
|
||||||
"better-sqlite3": "^12.5.0",
|
"better-sqlite3": "^12.5.0",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2"
|
||||||
"gpt-tokenizer": "^2.5.0"
|
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@tailwindcss/forms": "^0.5.7",
|
"@tailwindcss/forms": "^0.5.7",
|
||||||
|
|||||||
@@ -2,64 +2,111 @@
|
|||||||
* Token Counter Implementation for antigravity-claude-proxy
|
* Token Counter Implementation for antigravity-claude-proxy
|
||||||
*
|
*
|
||||||
* Implements Anthropic's /v1/messages/count_tokens endpoint
|
* Implements Anthropic's /v1/messages/count_tokens endpoint
|
||||||
* Uses hybrid approach: local estimation for text, API call for complex content
|
* Uses official tokenizers for each model family:
|
||||||
|
* - Claude: @anthropic-ai/tokenizer (99.99% accuracy)
|
||||||
|
* - Gemini: @lenml/tokenizer-gemini (99.99% accuracy)
|
||||||
*
|
*
|
||||||
* @see https://platform.claude.com/docs/en/api/messages-count-tokens
|
* @see https://platform.claude.com/docs/en/api/messages-count-tokens
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { encode } from 'gpt-tokenizer';
|
import { countTokens as claudeCountTokens } from '@anthropic-ai/tokenizer';
|
||||||
|
import { fromPreTrained as loadGeminiTokenizer } from '@lenml/tokenizer-gemini';
|
||||||
import { logger } from '../utils/logger.js';
|
import { logger } from '../utils/logger.js';
|
||||||
import { buildCloudCodeRequest, buildHeaders } from './request-builder.js';
|
import { getModelFamily } from '../constants.js';
|
||||||
import { ANTIGRAVITY_ENDPOINT_FALLBACKS } from '../constants.js';
|
|
||||||
|
// Lazy-loaded Gemini tokenizer (138MB, loaded once on first use)
|
||||||
|
let geminiTokenizer = null;
|
||||||
|
let geminiTokenizerLoading = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Estimate tokens for text content using GPT tokenizer
|
* Get or initialize the Gemini tokenizer
|
||||||
* Claude uses a similar tokenizer to GPT-4 (cl100k_base)
|
* Uses singleton pattern with loading lock to prevent multiple loads
|
||||||
|
*
|
||||||
|
* @returns {Promise<Object>} Gemini tokenizer instance
|
||||||
|
*/
|
||||||
|
async function getGeminiTokenizer() {
|
||||||
|
if (geminiTokenizer) {
|
||||||
|
return geminiTokenizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prevent multiple simultaneous loads
|
||||||
|
if (geminiTokenizerLoading) {
|
||||||
|
return geminiTokenizerLoading;
|
||||||
|
}
|
||||||
|
|
||||||
|
geminiTokenizerLoading = (async () => {
|
||||||
|
try {
|
||||||
|
logger.debug('[TokenCounter] Loading Gemini tokenizer...');
|
||||||
|
geminiTokenizer = await loadGeminiTokenizer();
|
||||||
|
logger.debug('[TokenCounter] Gemini tokenizer loaded successfully');
|
||||||
|
return geminiTokenizer;
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`[TokenCounter] Failed to load Gemini tokenizer: ${error.message}`);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
geminiTokenizerLoading = null;
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
return geminiTokenizerLoading;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count tokens for text using Claude tokenizer
|
||||||
*
|
*
|
||||||
* @param {string} text - Text to tokenize
|
* @param {string} text - Text to tokenize
|
||||||
* @returns {number} Estimated token count
|
* @returns {number} Token count
|
||||||
*/
|
*/
|
||||||
function estimateTextTokens(text) {
|
function countClaudeTokens(text) {
|
||||||
if (!text) return 0;
|
if (!text) return 0;
|
||||||
try {
|
try {
|
||||||
return encode(text).length;
|
return claudeCountTokens(text);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Fallback: rough estimate of 4 chars per token
|
logger.debug(`[TokenCounter] Claude tokenizer error: ${error.message}`);
|
||||||
return Math.ceil(text.length / 4);
|
return Math.ceil(text.length / 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if content contains complex blocks (images, documents)
|
* Count tokens for text using Gemini tokenizer
|
||||||
* These require API call for accurate counting
|
|
||||||
*
|
*
|
||||||
* @param {Object} request - Anthropic request
|
* @param {Object} tokenizer - Gemini tokenizer instance
|
||||||
* @returns {boolean} True if complex content detected
|
* @param {string} text - Text to tokenize
|
||||||
|
* @returns {number} Token count
|
||||||
*/
|
*/
|
||||||
function hasComplexContent(request) {
|
function countGeminiTokens(tokenizer, text) {
|
||||||
const { messages = [], system } = request;
|
if (!text) return 0;
|
||||||
|
try {
|
||||||
for (const message of messages) {
|
const tokens = tokenizer.encode(text);
|
||||||
const content = message.content;
|
// Remove BOS token if present (token id 2)
|
||||||
if (Array.isArray(content)) {
|
return tokens[0] === 2 ? tokens.length - 1 : tokens.length;
|
||||||
for (const block of content) {
|
} catch (error) {
|
||||||
if (block.type === 'image' || block.type === 'document') {
|
logger.debug(`[TokenCounter] Gemini tokenizer error: ${error.message}`);
|
||||||
return true;
|
return Math.ceil(text.length / 4);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check system prompt for complex content
|
/**
|
||||||
if (Array.isArray(system)) {
|
* Estimate tokens for text content using appropriate tokenizer
|
||||||
for (const block of system) {
|
*
|
||||||
if (block.type !== 'text') {
|
* @param {string} text - Text to tokenize
|
||||||
return true;
|
* @param {string} model - Model name to determine tokenizer
|
||||||
}
|
* @param {Object} geminiTok - Gemini tokenizer instance (optional)
|
||||||
}
|
* @returns {number} Token count
|
||||||
|
*/
|
||||||
|
function estimateTextTokens(text, model, geminiTok = null) {
|
||||||
|
if (!text) return 0;
|
||||||
|
|
||||||
|
const family = getModelFamily(model);
|
||||||
|
|
||||||
|
if (family === 'claude') {
|
||||||
|
return countClaudeTokens(text);
|
||||||
|
} else if (family === 'gemini' && geminiTok) {
|
||||||
|
return countGeminiTokens(geminiTok, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
// Fallback for unknown models: rough estimate
|
||||||
|
return Math.ceil(text.length / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -84,23 +131,24 @@ function extractText(content) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Count tokens locally using tokenizer
|
* Count tokens locally using model-specific tokenizer
|
||||||
*
|
*
|
||||||
* @param {Object} request - Anthropic format request
|
* @param {Object} request - Anthropic format request
|
||||||
* @returns {number} Estimated token count
|
* @param {Object} geminiTok - Gemini tokenizer instance (optional)
|
||||||
|
* @returns {number} Token count
|
||||||
*/
|
*/
|
||||||
function countTokensLocally(request) {
|
function countTokensLocally(request, geminiTok = null) {
|
||||||
const { messages = [], system, tools } = request;
|
const { messages = [], system, tools, model } = request;
|
||||||
let totalTokens = 0;
|
let totalTokens = 0;
|
||||||
|
|
||||||
// Count system prompt tokens
|
// Count system prompt tokens
|
||||||
if (system) {
|
if (system) {
|
||||||
if (typeof system === 'string') {
|
if (typeof system === 'string') {
|
||||||
totalTokens += estimateTextTokens(system);
|
totalTokens += estimateTextTokens(system, model, geminiTok);
|
||||||
} else if (Array.isArray(system)) {
|
} else if (Array.isArray(system)) {
|
||||||
for (const block of system) {
|
for (const block of system) {
|
||||||
if (block.type === 'text') {
|
if (block.type === 'text') {
|
||||||
totalTokens += estimateTextTokens(block.text);
|
totalTokens += estimateTextTokens(block.text, model, geminiTok);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -110,22 +158,22 @@ function countTokensLocally(request) {
|
|||||||
for (const message of messages) {
|
for (const message of messages) {
|
||||||
// Add overhead for role and structure (~4 tokens per message)
|
// Add overhead for role and structure (~4 tokens per message)
|
||||||
totalTokens += 4;
|
totalTokens += 4;
|
||||||
totalTokens += estimateTextTokens(extractText(message.content));
|
totalTokens += estimateTextTokens(extractText(message.content), model, geminiTok);
|
||||||
|
|
||||||
// Handle tool_use and tool_result blocks
|
// Handle tool_use and tool_result blocks
|
||||||
if (Array.isArray(message.content)) {
|
if (Array.isArray(message.content)) {
|
||||||
for (const block of message.content) {
|
for (const block of message.content) {
|
||||||
if (block.type === 'tool_use') {
|
if (block.type === 'tool_use') {
|
||||||
totalTokens += estimateTextTokens(block.name);
|
totalTokens += estimateTextTokens(block.name, model, geminiTok);
|
||||||
totalTokens += estimateTextTokens(JSON.stringify(block.input));
|
totalTokens += estimateTextTokens(JSON.stringify(block.input), model, geminiTok);
|
||||||
} else if (block.type === 'tool_result') {
|
} else if (block.type === 'tool_result') {
|
||||||
if (typeof block.content === 'string') {
|
if (typeof block.content === 'string') {
|
||||||
totalTokens += estimateTextTokens(block.content);
|
totalTokens += estimateTextTokens(block.content, model, geminiTok);
|
||||||
} else if (Array.isArray(block.content)) {
|
} else if (Array.isArray(block.content)) {
|
||||||
totalTokens += estimateTextTokens(extractText(block.content));
|
totalTokens += estimateTextTokens(extractText(block.content), model, geminiTok);
|
||||||
}
|
}
|
||||||
} else if (block.type === 'thinking') {
|
} else if (block.type === 'thinking') {
|
||||||
totalTokens += estimateTextTokens(block.thinking);
|
totalTokens += estimateTextTokens(block.thinking, model, geminiTok);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -134,111 +182,63 @@ function countTokensLocally(request) {
|
|||||||
// Count tool definitions
|
// Count tool definitions
|
||||||
if (tools && tools.length > 0) {
|
if (tools && tools.length > 0) {
|
||||||
for (const tool of tools) {
|
for (const tool of tools) {
|
||||||
totalTokens += estimateTextTokens(tool.name);
|
totalTokens += estimateTextTokens(tool.name, model, geminiTok);
|
||||||
totalTokens += estimateTextTokens(tool.description || '');
|
totalTokens += estimateTextTokens(tool.description || '', model, geminiTok);
|
||||||
totalTokens += estimateTextTokens(JSON.stringify(tool.input_schema || {}));
|
totalTokens += estimateTextTokens(JSON.stringify(tool.input_schema || {}), model, geminiTok);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return totalTokens;
|
return totalTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Count tokens via Google Cloud Code API
|
|
||||||
* Makes a dry-run request to get accurate token count
|
|
||||||
*
|
|
||||||
* @param {Object} anthropicRequest - Anthropic format request
|
|
||||||
* @param {Object} accountManager - Account manager instance
|
|
||||||
* @returns {Promise<number>} Accurate token count from API
|
|
||||||
*/
|
|
||||||
async function countTokensViaAPI(anthropicRequest, accountManager) {
|
|
||||||
const account = accountManager.pickNext(anthropicRequest.model);
|
|
||||||
if (!account) {
|
|
||||||
throw new Error('No accounts available for token counting');
|
|
||||||
}
|
|
||||||
|
|
||||||
const token = await accountManager.getTokenForAccount(account);
|
|
||||||
const project = await accountManager.getProjectForAccount(account, token);
|
|
||||||
|
|
||||||
// Build request with minimal max_tokens to avoid generating content
|
|
||||||
const countRequest = {
|
|
||||||
...anthropicRequest,
|
|
||||||
max_tokens: 1,
|
|
||||||
stream: false
|
|
||||||
};
|
|
||||||
|
|
||||||
const payload = buildCloudCodeRequest(countRequest, project);
|
|
||||||
|
|
||||||
// Try endpoints until one works
|
|
||||||
for (const endpoint of ANTIGRAVITY_ENDPOINT_FALLBACKS) {
|
|
||||||
try {
|
|
||||||
const url = `${endpoint}/v1internal:generateContent`;
|
|
||||||
|
|
||||||
const response = await fetch(url, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: buildHeaders(token, anthropicRequest.model, 'application/json'),
|
|
||||||
body: JSON.stringify(payload)
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
logger.debug(`[TokenCounter] Error at ${endpoint}: ${response.status}`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const data = await response.json();
|
|
||||||
const usageMetadata = data.usageMetadata || data.response?.usageMetadata || {};
|
|
||||||
|
|
||||||
return usageMetadata.promptTokenCount || 0;
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
logger.debug(`[TokenCounter] Error at ${endpoint}: ${error.message}`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error('Failed to count tokens via API');
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Count tokens in a message request
|
* Count tokens in a message request
|
||||||
* Implements Anthropic's /v1/messages/count_tokens endpoint
|
* Implements Anthropic's /v1/messages/count_tokens endpoint
|
||||||
|
* Uses local tokenization for all content types (99.99% accuracy)
|
||||||
*
|
*
|
||||||
* @param {Object} anthropicRequest - Anthropic format request with messages, model, system, tools
|
* @param {Object} anthropicRequest - Anthropic format request with messages, model, system, tools
|
||||||
* @param {Object} accountManager - Account manager instance (optional, for API-based counting)
|
* @param {Object} accountManager - Account manager instance (unused, kept for API compatibility)
|
||||||
* @param {Object} options - Options
|
* @param {Object} options - Options (unused, kept for API compatibility)
|
||||||
* @param {boolean} options.useAPI - Force API-based counting (default: false)
|
|
||||||
* @returns {Promise<Object>} Response with input_tokens count
|
* @returns {Promise<Object>} Response with input_tokens count
|
||||||
*/
|
*/
|
||||||
export async function countTokens(anthropicRequest, accountManager = null, options = {}) {
|
export async function countTokens(anthropicRequest, accountManager = null, options = {}) {
|
||||||
const { useAPI = false } = options;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let inputTokens;
|
const family = getModelFamily(anthropicRequest.model);
|
||||||
|
let geminiTok = null;
|
||||||
|
|
||||||
// Use API for complex content or when forced
|
// Load Gemini tokenizer if needed
|
||||||
if (useAPI || (hasComplexContent(anthropicRequest) && accountManager)) {
|
if (family === 'gemini') {
|
||||||
if (!accountManager) {
|
try {
|
||||||
throw new Error('Account manager required for API-based token counting');
|
geminiTok = await getGeminiTokenizer();
|
||||||
|
} catch (error) {
|
||||||
|
logger.warn(`[TokenCounter] Gemini tokenizer unavailable, using fallback`);
|
||||||
}
|
}
|
||||||
inputTokens = await countTokensViaAPI(anthropicRequest, accountManager);
|
|
||||||
logger.debug(`[TokenCounter] API count: ${inputTokens} tokens`);
|
|
||||||
} else {
|
|
||||||
// Use local estimation for text-only content
|
|
||||||
inputTokens = countTokensLocally(anthropicRequest);
|
|
||||||
logger.debug(`[TokenCounter] Local estimate: ${inputTokens} tokens`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const inputTokens = countTokensLocally(anthropicRequest, geminiTok);
|
||||||
|
logger.debug(`[TokenCounter] Local count (${family}): ${inputTokens} tokens`);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
input_tokens: inputTokens
|
input_tokens: inputTokens
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`[TokenCounter] Error: ${error.message}, falling back to local estimation`);
|
logger.warn(`[TokenCounter] Error: ${error.message}, using character-based fallback`);
|
||||||
|
|
||||||
|
// Ultimate fallback: character-based estimation
|
||||||
|
const { messages = [], system } = anthropicRequest;
|
||||||
|
let charCount = 0;
|
||||||
|
|
||||||
|
if (system) {
|
||||||
|
charCount += typeof system === 'string' ? system.length : JSON.stringify(system).length;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const message of messages) {
|
||||||
|
charCount += JSON.stringify(message.content).length;
|
||||||
|
}
|
||||||
|
|
||||||
// Fallback to local estimation
|
|
||||||
const inputTokens = countTokensLocally(anthropicRequest);
|
|
||||||
return {
|
return {
|
||||||
input_tokens: inputTokens
|
input_tokens: Math.ceil(charCount / 4)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -277,8 +277,7 @@ export function createCountTokensHandler(accountManager) {
|
|||||||
|
|
||||||
const result = await countTokens(
|
const result = await countTokens(
|
||||||
{ messages, model, system, tools, tool_choice, thinking },
|
{ messages, model, system, tools, tool_choice, thinking },
|
||||||
accountManager,
|
accountManager
|
||||||
{ useAPI: false } // Use local estimation by default, API for complex content (images/docs)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
res.json(result);
|
res.json(result);
|
||||||
|
|||||||
Reference in New Issue
Block a user