refactor: cleanup token counting

2025-10-02 11:35:29 +04:00
parent 14a35afa1d
commit 7fe9fc49f8
4 changed files with 43 additions and 61 deletions
--- a/providers/base.py
+++ b/providers/base.py
@@ -73,10 +73,24 @@ class ModelProvider(ABC):
        """
        pass

-    @abstractmethod
    def count_tokens(self, text: str, model_name: str) -> int:
-        """Count tokens for the given text using the specified model's tokenizer."""
-        pass
+        """Estimate token usage for a piece of text.
+
+        Providers can rely on this shared implementation or override it when
+        they expose a more accurate tokenizer. This default uses a simple
+        character-based heuristic so it works even without provider-specific
+        tooling.
+        """
+
+        resolved_model = self._resolve_model_name(model_name)
+
+        if not text:
+            return 0
+
+        # Rough estimation: ~4 characters per token for English text
+        estimated = max(1, len(text) // 4)
+        logger.debug("Estimating %s tokens for model %s via character heuristic", estimated, resolved_model)
+        return estimated

    @abstractmethod
    def get_provider_type(self) -> ProviderType: