feat: added intelligence_score to the model capabilities schema; a 1-20 number that can be specified to influence the sort order of models presented to the CLI in auto selection mode

fix: model definition re-introduced into the schema but intelligently and only a summary is generated per tool. Required to ensure CLI calls and uses the correct model fix: removed `model` param from some tools where this wasn't needed fix: fixed adherence to `*_ALLOWED_MODELS` by advertising only the allowed models to the CLI fix: removed duplicates across providers when passing canonical names back to the CLI; the first enabled provider wins
2025-10-02 21:43:44 +04:00
parent e78fe35a1b
commit 6cab9e56fc
22 changed files with 525 additions and 110 deletions
--- a/tests/test_consensus.py
+++ b/tests/test_consensus.py
@@ -122,8 +122,8 @@ class TestConsensusTool:
        # relevant_files should be present as it's used by consensus
        assert "relevant_files" in schema["properties"]

-        # model field should be present for Gemini compatibility (consensus uses 'models' as well)
-        assert "model" in schema["properties"]
+        # model field should NOT be present as consensus uses 'models' field instead
+        assert "model" not in schema["properties"]

        # Verify workflow fields that should NOT be present
        assert "files_checked" not in schema["properties"]
--- a/tests/test_image_support_integration.py
+++ b/tests/test_image_support_integration.py
@@ -26,6 +26,7 @@ from utils.conversation_memory import (
    get_conversation_image_list,
    get_thread,
 )
+from utils.model_context import ModelContext


@pytest.mark.no_mock_provider
@@ -180,17 +181,18 @@ class TestImageSupportIntegration:

        try:
            # Test with an invalid model name that doesn't exist in any provider
-            result = tool._validate_image_limits(small_images, "non-existent-model-12345")
+            # Use model_context parameter name (not positional)
+            result = tool._validate_image_limits(small_images, model_context=ModelContext("non-existent-model-12345"))
            # Should return error because model not available or doesn't support images
            assert result is not None
            assert result["status"] == "error"
            assert "is not available" in result["content"] or "does not support image processing" in result["content"]

            # Test that empty/None images always pass regardless of model
-            result = tool._validate_image_limits([], "any-model")
+            result = tool._validate_image_limits([], model_context=ModelContext("gemini-2.5-pro"))
            assert result is None

-            result = tool._validate_image_limits(None, "any-model")
+            result = tool._validate_image_limits(None, model_context=ModelContext("gemini-2.5-pro"))
            assert result is None

        finally:
@@ -215,7 +217,7 @@ class TestImageSupportIntegration:
                small_image_path = temp_file.name

            # Test with the default model from test environment (gemini-2.5-flash)
-            result = tool._validate_image_limits([small_image_path], "gemini-2.5-flash")
+            result = tool._validate_image_limits([small_image_path], ModelContext("gemini-2.5-flash"))
            assert result is None  # Should pass for Gemini models

            # Create 150MB image (over typical limits)
@@ -223,7 +225,7 @@ class TestImageSupportIntegration:
                temp_file.write(b"\x00" * (150 * 1024 * 1024))  # 150MB
                large_image_path = temp_file.name

-            result = tool._validate_image_limits([large_image_path], "gemini-2.5-flash")
+            result = tool._validate_image_limits([large_image_path], ModelContext("gemini-2.5-flash"))
            # Large images should fail validation
            assert result is not None
            assert result["status"] == "error"
@@ -429,14 +431,14 @@ class TestImageSupportIntegration:
        images = [data_url]

        # Test with a dummy model that doesn't exist in any provider
-        result = tool._validate_image_limits(images, "test-dummy-model-name")
+        result = tool._validate_image_limits(images, ModelContext("test-dummy-model-name"))
        # Should return error because model not available or doesn't support images
        assert result is not None
        assert result["status"] == "error"
        assert "is not available" in result["content"] or "does not support image processing" in result["content"]

        # Test with another non-existent model to check error handling
-        result = tool._validate_image_limits(images, "another-dummy-model")
+        result = tool._validate_image_limits(images, ModelContext("another-dummy-model"))
        # Should return error because model not available
        assert result is not None
        assert result["status"] == "error"
@@ -446,11 +448,11 @@ class TestImageSupportIntegration:
        tool = ChatTool()

        # Empty list should not fail validation (no need for provider setup)
-        result = tool._validate_image_limits([], "test_model")
+        result = tool._validate_image_limits([], ModelContext("gemini-2.5-pro"))
        assert result is None

        # None should not fail validation (no need for provider setup)
-        result = tool._validate_image_limits(None, "test_model")
+        result = tool._validate_image_limits(None, ModelContext("gemini-2.5-pro"))
        assert result is None

    @patch("utils.conversation_memory.get_storage")
--- a/tests/test_listmodels_restrictions.py
+++ b/tests/test_listmodels_restrictions.py
@@ -70,11 +70,25 @@ class TestListModelsRestrictions(unittest.TestCase):
                config = MagicMock()
                config.model_name = "anthropic/claude-opus-4-20240229"
                config.context_window = 200000
+                config.get_effective_capability_rank.return_value = 90  # High rank for Opus
                return config
            elif "sonnet" in model_name.lower():
                config = MagicMock()
                config.model_name = "anthropic/claude-sonnet-4-20240229"
                config.context_window = 200000
+                config.get_effective_capability_rank.return_value = 80  # Lower rank for Sonnet
+                return config
+            elif "deepseek" in model_name.lower():
+                config = MagicMock()
+                config.model_name = "deepseek/deepseek-r1-0528:free"
+                config.context_window = 100000
+                config.get_effective_capability_rank.return_value = 70
+                return config
+            elif "qwen" in model_name.lower():
+                config = MagicMock()
+                config.model_name = "qwen/qwen3-235b-a22b-04-28:free"
+                config.context_window = 100000
+                config.get_effective_capability_rank.return_value = 60
                return config
            return None  # No config for models without aliases

@@ -90,6 +104,9 @@ class TestListModelsRestrictions(unittest.TestCase):

        mock_get_provider.side_effect = get_provider_side_effect

+        # Ensure registry is cleared before test
+        ModelProviderRegistry._registry = {}
+
        # Mock available models
        mock_get_models.return_value = {
            "gemini-2.5-flash": ProviderType.GOOGLE,
@@ -131,6 +148,9 @@ class TestListModelsRestrictions(unittest.TestCase):
        # Parse the output
        lines = result.split("\n")

+        # Debug: print the actual result for troubleshooting
+        # print(f"DEBUG: Full result:\n{result}")
+
        # Check that OpenRouter section exists
        openrouter_section_found = False
        openrouter_models = []
@@ -141,15 +161,18 @@ class TestListModelsRestrictions(unittest.TestCase):
                openrouter_section_found = True
            elif "Available Models" in line and openrouter_section_found:
                in_openrouter_section = True
-            elif in_openrouter_section and line.strip().startswith("- "):
-                # Extract model name from various line formats:
-                # - `model-name` → `full-name` (context)
-                # - `model-name`
-                line_content = line.strip()[2:]  # Remove "- "
-                if "`" in line_content:
-                    # Extract content between first pair of backticks
-                    model_name = line_content.split("`")[1]
-                    openrouter_models.append(model_name)
+            elif in_openrouter_section:
+                # Check for lines with model names in backticks
+                # Format: - `model-name` (score X)
+                if line.strip().startswith("- ") and "`" in line:
+                    # Extract model name between backticks
+                    parts = line.split("`")
+                    if len(parts) >= 2:
+                        model_name = parts[1]
+                        openrouter_models.append(model_name)
+                # Stop parsing when we hit the next section
+                elif "##" in line and in_openrouter_section:
+                    break

        self.assertTrue(openrouter_section_found, "OpenRouter section not found")
        self.assertEqual(
--- a/tests/test_openrouter_provider.py
+++ b/tests/test_openrouter_provider.py
@@ -174,6 +174,7 @@ class TestOpenRouterAutoMode:
                mock_config = Mock()
                mock_config.is_custom = False
                mock_config.aliases = []  # Empty list of aliases
+                mock_config.get_effective_capability_rank = Mock(return_value=50)  # Add ranking method
                return mock_config
            return None

@@ -220,6 +221,7 @@ class TestOpenRouterAutoMode:
        # Mock the resolve method to return model configs with aliases
        mock_model_config = Mock()
        mock_model_config.aliases = []  # Empty aliases for simplicity
+        mock_model_config.get_effective_capability_rank = Mock(return_value=50)  # Add ranking method
        mock_registry.resolve.return_value = mock_model_config

        ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider)