Rebranding, refactoring, renaming, cleanup, updated docs

2025-06-12 10:40:43 +04:00
parent 9a55ca8898
commit fb66825bf6
55 changed files with 1048 additions and 1474 deletions
--- a/simulator_tests/test_token_allocation_validation.py
+++ b/simulator_tests/test_token_allocation_validation.py
@@ -10,9 +10,8 @@ This test validates that:
 """

 import datetime
-import subprocess
 import re
-from typing import Dict, List, Tuple
+import subprocess

 from .base_test import BaseSimulatorTest

@@ -33,7 +32,7 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
        try:
            cmd = ["docker", "exec", self.container_name, "tail", "-n", "300", "/tmp/mcp_server.log"]
            result = subprocess.run(cmd, capture_output=True, text=True)
-            
+
            if result.returncode == 0:
                return result.stdout
            else:
@@ -43,13 +42,13 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
            self.logger.error(f"Failed to get server logs: {e}")
            return ""

-    def extract_conversation_usage_logs(self, logs: str) -> List[Dict[str, int]]:
+    def extract_conversation_usage_logs(self, logs: str) -> list[dict[str, int]]:
        """Extract actual conversation token usage from server logs"""
        usage_logs = []
-        
+
        # Look for conversation debug logs that show actual usage
-        lines = logs.split('\n')
-        
+        lines = logs.split("\n")
+
        for i, line in enumerate(lines):
            if "[CONVERSATION_DEBUG] Token budget calculation:" in line:
                # Found start of token budget log, extract the following lines
@@ -57,47 +56,47 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
                for j in range(1, 8):  # Next 7 lines contain the usage details
                    if i + j < len(lines):
                        detail_line = lines[i + j]
-                        
+
                        # Parse Total capacity: 1,048,576
                        if "Total capacity:" in detail_line:
-                            match = re.search(r'Total capacity:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Total capacity:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['total_capacity'] = int(match.group(1).replace(',', ''))
-                        
+                                usage["total_capacity"] = int(match.group(1).replace(",", ""))
+
                        # Parse Content allocation: 838,860
                        elif "Content allocation:" in detail_line:
-                            match = re.search(r'Content allocation:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Content allocation:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['content_allocation'] = int(match.group(1).replace(',', ''))
-                        
-                        # Parse Conversation tokens: 12,345  
+                                usage["content_allocation"] = int(match.group(1).replace(",", ""))
+
+                        # Parse Conversation tokens: 12,345
                        elif "Conversation tokens:" in detail_line:
-                            match = re.search(r'Conversation tokens:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Conversation tokens:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['conversation_tokens'] = int(match.group(1).replace(',', ''))
-                        
+                                usage["conversation_tokens"] = int(match.group(1).replace(",", ""))
+
                        # Parse Remaining tokens: 825,515
                        elif "Remaining tokens:" in detail_line:
-                            match = re.search(r'Remaining tokens:\s*([\d,]+)', detail_line)
+                            match = re.search(r"Remaining tokens:\s*([\d,]+)", detail_line)
                            if match:
-                                usage['remaining_tokens'] = int(match.group(1).replace(',', ''))
-                
+                                usage["remaining_tokens"] = int(match.group(1).replace(",", ""))
+
                if usage:  # Only add if we found some usage data
                    usage_logs.append(usage)
-        
+
        return usage_logs

-    def extract_conversation_token_usage(self, logs: str) -> List[int]:
+    def extract_conversation_token_usage(self, logs: str) -> list[int]:
        """Extract conversation token usage from logs"""
        usage_values = []
-        
+
        # Look for conversation token usage logs
-        pattern = r'Conversation history token usage:\s*([\d,]+)'
+        pattern = r"Conversation history token usage:\s*([\d,]+)"
        matches = re.findall(pattern, logs)
-        
+
        for match in matches:
-            usage_values.append(int(match.replace(',', '')))
-        
+            usage_values.append(int(match.replace(",", "")))
+
        return usage_values

    def run_test(self) -> bool:
@@ -111,11 +110,11 @@ class TokenAllocationValidationTest(BaseSimulatorTest):
            # Create additional test files for this test - make them substantial enough to see token differences
            file1_content = """def fibonacci(n):
    '''Calculate fibonacci number recursively
-    
+
    This is a classic recursive algorithm that demonstrates
    the exponential time complexity of naive recursion.
    For large values of n, this becomes very slow.
-    
+
    Time complexity: O(2^n)
    Space complexity: O(n) due to call stack
    '''
@@ -125,10 +124,10 @@ class TokenAllocationValidationTest(BaseSimulatorTest):

 def factorial(n):
    '''Calculate factorial using recursion
-    
+
    More efficient than fibonacci as each value
    is calculated only once.
-    
+
    Time complexity: O(n)
    Space complexity: O(n) due to call stack
    '''
@@ -157,14 +156,14 @@ if __name__ == "__main__":
    for i in range(10):
        print(f"  F({i}) = {fibonacci(i)}")
 """
-            
+
            file2_content = """class Calculator:
    '''Advanced calculator class with error handling and logging'''
-    
+
    def __init__(self):
        self.history = []
        self.last_result = 0
-    
+
    def add(self, a, b):
        '''Addition with history tracking'''
        result = a + b
@@ -172,7 +171,7 @@ if __name__ == "__main__":
        self.history.append(operation)
        self.last_result = result
        return result
-    
+
    def multiply(self, a, b):
        '''Multiplication with history tracking'''
        result = a * b
@@ -180,20 +179,20 @@ if __name__ == "__main__":
        self.history.append(operation)
        self.last_result = result
        return result
-    
+
    def divide(self, a, b):
        '''Division with error handling and history tracking'''
        if b == 0:
            error_msg = f"Division by zero error: {a} / {b}"
            self.history.append(error_msg)
            raise ValueError("Cannot divide by zero")
-        
+
        result = a / b
        operation = f"{a} / {b} = {result}"
        self.history.append(operation)
        self.last_result = result
        return result
-    
+
    def power(self, base, exponent):
        '''Exponentiation with history tracking'''
        result = base ** exponent
@@ -201,11 +200,11 @@ if __name__ == "__main__":
        self.history.append(operation)
        self.last_result = result
        return result
-    
+
    def get_history(self):
        '''Return calculation history'''
        return self.history.copy()
-    
+
    def clear_history(self):
        '''Clear calculation history'''
        self.history.clear()
@@ -215,32 +214,32 @@ if __name__ == "__main__":
 if __name__ == "__main__":
    calc = Calculator()
    print("=== Calculator Demo ===")
-    
+
    # Perform various calculations
    print(f"Addition: {calc.add(10, 20)}")
    print(f"Multiplication: {calc.multiply(5, 8)}")
    print(f"Division: {calc.divide(100, 4)}")
    print(f"Power: {calc.power(2, 8)}")
-    
+
    print("\\nCalculation History:")
    for operation in calc.get_history():
        print(f"  {operation}")
-    
+
    print(f"\\nLast result: {calc.last_result}")
 """

            # Create test files
            file1_path = self.create_additional_test_file("math_functions.py", file1_content)
            file2_path = self.create_additional_test_file("calculator.py", file2_content)
-            
+
            # Track continuation IDs to validate each step generates new ones
            continuation_ids = []

            # Step 1: Initial chat with first file
            self.logger.info("  Step 1: Initial chat with file1 - checking token allocation")
-            
-            step1_start_time = datetime.datetime.now()
-            
+
+            datetime.datetime.now()
+
            response1, continuation_id1 = self.call_mcp_tool(
                "chat",
                {
@@ -260,31 +259,33 @@ if __name__ == "__main__":

            # Get logs and analyze file processing (Step 1 is new conversation, no conversation debug logs expected)
            logs_step1 = self.get_recent_server_logs()
-            
+
            # For Step 1, check for file embedding logs instead of conversation usage
            file_embedding_logs_step1 = [
-                line for line in logs_step1.split('\n')
-                if 'successfully embedded' in line and 'files' in line and 'tokens' in line
+                line
+                for line in logs_step1.split("\n")
+                if "successfully embedded" in line and "files" in line and "tokens" in line
            ]
-            
+
            if not file_embedding_logs_step1:
                self.logger.error("  ❌ Step 1: No file embedding logs found")
                return False
-            
+
            # Extract file token count from embedding logs
            step1_file_tokens = 0
            for log in file_embedding_logs_step1:
                # Look for pattern like "successfully embedded 1 files (146 tokens)"
                import re
-                match = re.search(r'\((\d+) tokens\)', log)
+
+                match = re.search(r"\((\d+) tokens\)", log)
                if match:
                    step1_file_tokens = int(match.group(1))
                    break
-            
+
            self.logger.info(f"  📊 Step 1 File Processing - Embedded files: {step1_file_tokens:,} tokens")
-            
+
            # Validate that file1 is actually mentioned in the embedding logs (check for actual filename)
-            file1_mentioned = any('math_functions.py' in log for log in file_embedding_logs_step1)
+            file1_mentioned = any("math_functions.py" in log for log in file_embedding_logs_step1)
            if not file1_mentioned:
                # Debug: show what files were actually found in the logs
                self.logger.debug("  📋 Files found in embedding logs:")
@@ -300,8 +301,10 @@ if __name__ == "__main__":
                    # Continue test - the important thing is that files were processed

            # Step 2: Different tool continuing same conversation - should build conversation history
-            self.logger.info("  Step 2: Analyze tool continuing chat conversation - checking conversation history buildup")
-            
+            self.logger.info(
+                "  Step 2: Analyze tool continuing chat conversation - checking conversation history buildup"
+            )
+
            response2, continuation_id2 = self.call_mcp_tool(
                "analyze",
                {
@@ -314,12 +317,12 @@ if __name__ == "__main__":
            )

            if not response2 or not continuation_id2:
-                self.logger.error("  ❌ Step 2 failed - no response or continuation ID") 
+                self.logger.error("  ❌ Step 2 failed - no response or continuation ID")
                return False

            self.logger.info(f"  ✅ Step 2 completed with continuation_id: {continuation_id2[:8]}...")
            continuation_ids.append(continuation_id2)
-            
+
            # Validate that we got a different continuation ID
            if continuation_id2 == continuation_id1:
                self.logger.error("  ❌ Step 2: Got same continuation ID as Step 1 - continuation not working")
@@ -328,33 +331,37 @@ if __name__ == "__main__":
            # Get logs and analyze token usage
            logs_step2 = self.get_recent_server_logs()
            usage_step2 = self.extract_conversation_usage_logs(logs_step2)
-            
+
            if len(usage_step2) < 2:
-                self.logger.warning(f"  ⚠️ Step 2: Only found {len(usage_step2)} conversation usage logs, expected at least 2")
-                # Debug: Look for any CONVERSATION_DEBUG logs 
-                conversation_debug_lines = [line for line in logs_step2.split('\n') if 'CONVERSATION_DEBUG' in line]
+                self.logger.warning(
+                    f"  ⚠️ Step 2: Only found {len(usage_step2)} conversation usage logs, expected at least 2"
+                )
+                # Debug: Look for any CONVERSATION_DEBUG logs
+                conversation_debug_lines = [line for line in logs_step2.split("\n") if "CONVERSATION_DEBUG" in line]
                self.logger.debug(f"  📋 Found {len(conversation_debug_lines)} CONVERSATION_DEBUG lines in step 2")
-                
+
                if conversation_debug_lines:
                    self.logger.debug("  📋 Recent CONVERSATION_DEBUG lines:")
                    for line in conversation_debug_lines[-10:]:  # Show last 10
                        self.logger.debug(f"    {line}")
-                
+
                # If we have at least 1 usage log, continue with adjusted expectations
                if len(usage_step2) >= 1:
                    self.logger.info("  📋 Continuing with single usage log for analysis")
                else:
                    self.logger.error("  ❌ No conversation usage logs found at all")
                    return False
-            
+
            latest_usage_step2 = usage_step2[-1]  # Get most recent usage
-            self.logger.info(f"  📊 Step 2 Token Usage - Total Capacity: {latest_usage_step2.get('total_capacity', 0):,}, "
-                            f"Conversation: {latest_usage_step2.get('conversation_tokens', 0):,}, "
-                            f"Remaining: {latest_usage_step2.get('remaining_tokens', 0):,}")
+            self.logger.info(
+                f"  📊 Step 2 Token Usage - Total Capacity: {latest_usage_step2.get('total_capacity', 0):,}, "
+                f"Conversation: {latest_usage_step2.get('conversation_tokens', 0):,}, "
+                f"Remaining: {latest_usage_step2.get('remaining_tokens', 0):,}"
+            )

            # Step 3: Continue conversation with additional file - should show increased token usage
            self.logger.info("  Step 3: Continue conversation with file1 + file2 - checking token growth")
-            
+
            response3, continuation_id3 = self.call_mcp_tool(
                "chat",
                {
@@ -376,26 +383,30 @@ if __name__ == "__main__":
            # Get logs and analyze final token usage
            logs_step3 = self.get_recent_server_logs()
            usage_step3 = self.extract_conversation_usage_logs(logs_step3)
-            
+
            self.logger.info(f"  📋 Found {len(usage_step3)} total conversation usage logs")
-            
+
            if len(usage_step3) < 3:
-                self.logger.warning(f"  ⚠️ Step 3: Only found {len(usage_step3)} conversation usage logs, expected at least 3")
+                self.logger.warning(
+                    f"  ⚠️ Step 3: Only found {len(usage_step3)} conversation usage logs, expected at least 3"
+                )
                # Let's check if we have at least some logs to work with
                if len(usage_step3) == 0:
                    self.logger.error("  ❌ No conversation usage logs found at all")
                    # Debug: show some recent logs
-                    recent_lines = logs_step3.split('\n')[-50:]
+                    recent_lines = logs_step3.split("\n")[-50:]
                    self.logger.debug("  📋 Recent log lines:")
                    for line in recent_lines:
                        if line.strip() and "CONVERSATION_DEBUG" in line:
                            self.logger.debug(f"    {line}")
                    return False
-            
+
            latest_usage_step3 = usage_step3[-1]  # Get most recent usage
-            self.logger.info(f"  📊 Step 3 Token Usage - Total Capacity: {latest_usage_step3.get('total_capacity', 0):,}, "
-                            f"Conversation: {latest_usage_step3.get('conversation_tokens', 0):,}, "
-                            f"Remaining: {latest_usage_step3.get('remaining_tokens', 0):,}")
+            self.logger.info(
+                f"  📊 Step 3 Token Usage - Total Capacity: {latest_usage_step3.get('total_capacity', 0):,}, "
+                f"Conversation: {latest_usage_step3.get('conversation_tokens', 0):,}, "
+                f"Remaining: {latest_usage_step3.get('remaining_tokens', 0):,}"
+            )

            # Validation: Check token processing and conversation history
            self.logger.info("  📋 Validating token processing and conversation history...")
@@ -405,14 +416,14 @@ if __name__ == "__main__":
            step2_remaining = 0
            step3_conversation = 0
            step3_remaining = 0
-            
+
            if len(usage_step2) > 0:
-                step2_conversation = latest_usage_step2.get('conversation_tokens', 0)
-                step2_remaining = latest_usage_step2.get('remaining_tokens', 0)
-            
+                step2_conversation = latest_usage_step2.get("conversation_tokens", 0)
+                step2_remaining = latest_usage_step2.get("remaining_tokens", 0)
+
            if len(usage_step3) >= len(usage_step2) + 1:  # Should have one more log than step2
-                step3_conversation = latest_usage_step3.get('conversation_tokens', 0) 
-                step3_remaining = latest_usage_step3.get('remaining_tokens', 0)
+                step3_conversation = latest_usage_step3.get("conversation_tokens", 0)
+                step3_remaining = latest_usage_step3.get("remaining_tokens", 0)
            else:
                # Use step2 values as fallback
                step3_conversation = step2_conversation
@@ -421,62 +432,78 @@ if __name__ == "__main__":

            # Validation criteria
            criteria = []
-            
+
            # 1. Step 1 should have processed files successfully
            step1_processed_files = step1_file_tokens > 0
            criteria.append(("Step 1 processed files successfully", step1_processed_files))
-            
+
            # 2. Step 2 should have conversation history (if continuation worked)
-            step2_has_conversation = step2_conversation > 0 if len(usage_step2) > 0 else True  # Pass if no logs (might be different issue)
+            step2_has_conversation = (
+                step2_conversation > 0 if len(usage_step2) > 0 else True
+            )  # Pass if no logs (might be different issue)
            step2_has_remaining = step2_remaining > 0 if len(usage_step2) > 0 else True
            criteria.append(("Step 2 has conversation history", step2_has_conversation))
            criteria.append(("Step 2 has remaining tokens", step2_has_remaining))
-            
+
            # 3. Step 3 should show conversation growth
-            step3_has_conversation = step3_conversation >= step2_conversation if len(usage_step3) > len(usage_step2) else True
+            step3_has_conversation = (
+                step3_conversation >= step2_conversation if len(usage_step3) > len(usage_step2) else True
+            )
            criteria.append(("Step 3 maintains conversation history", step3_has_conversation))
-            
+
            # 4. Check that we got some conversation usage logs for continuation calls
            has_conversation_logs = len(usage_step3) > 0
            criteria.append(("Found conversation usage logs", has_conversation_logs))
-            
+
            # 5. Validate unique continuation IDs per response
            unique_continuation_ids = len(set(continuation_ids)) == len(continuation_ids)
            criteria.append(("Each response generated unique continuation ID", unique_continuation_ids))
-            
+
            # 6. Validate continuation IDs were different from each step
-            step_ids_different = len(continuation_ids) == 3 and continuation_ids[0] != continuation_ids[1] and continuation_ids[1] != continuation_ids[2]
+            step_ids_different = (
+                len(continuation_ids) == 3
+                and continuation_ids[0] != continuation_ids[1]
+                and continuation_ids[1] != continuation_ids[2]
+            )
            criteria.append(("All continuation IDs are different", step_ids_different))

            # Log detailed analysis
-            self.logger.info(f"  📊 Token Processing Analysis:")
+            self.logger.info("  📊 Token Processing Analysis:")
            self.logger.info(f"    Step 1 - File tokens: {step1_file_tokens:,} (new conversation)")
            self.logger.info(f"    Step 2 - Conversation: {step2_conversation:,}, Remaining: {step2_remaining:,}")
            self.logger.info(f"    Step 3 - Conversation: {step3_conversation:,}, Remaining: {step3_remaining:,}")
-            
+
            # Log continuation ID analysis
-            self.logger.info(f"  📊 Continuation ID Analysis:")
+            self.logger.info("  📊 Continuation ID Analysis:")
            self.logger.info(f"    Step 1 ID: {continuation_ids[0][:8]}... (generated)")
            self.logger.info(f"    Step 2 ID: {continuation_ids[1][:8]}... (generated from Step 1)")
            self.logger.info(f"    Step 3 ID: {continuation_ids[2][:8]}... (generated from Step 2)")
-            
+
            # Check for file mentions in step 3 (should include both files)
            # Look for file processing in conversation memory logs and tool embedding logs
-            file2_mentioned_step3 = any('calculator.py' in log for log in logs_step3.split('\n') if ('embedded' in log.lower() and ('conversation' in log.lower() or 'tool' in log.lower())))
-            file1_still_mentioned_step3 = any('math_functions.py' in log for log in logs_step3.split('\n') if ('embedded' in log.lower() and ('conversation' in log.lower() or 'tool' in log.lower())))
-            
-            self.logger.info(f"  📊 File Processing in Step 3:")
+            file2_mentioned_step3 = any(
+                "calculator.py" in log
+                for log in logs_step3.split("\n")
+                if ("embedded" in log.lower() and ("conversation" in log.lower() or "tool" in log.lower()))
+            )
+            file1_still_mentioned_step3 = any(
+                "math_functions.py" in log
+                for log in logs_step3.split("\n")
+                if ("embedded" in log.lower() and ("conversation" in log.lower() or "tool" in log.lower()))
+            )
+
+            self.logger.info("  📊 File Processing in Step 3:")
            self.logger.info(f"    File1 (math_functions.py) mentioned: {file1_still_mentioned_step3}")
            self.logger.info(f"    File2 (calculator.py) mentioned: {file2_mentioned_step3}")
-            
-            # Add file increase validation 
+
+            # Add file increase validation
            step3_file_increase = file2_mentioned_step3  # New file should be visible
            criteria.append(("Step 3 shows new file being processed", step3_file_increase))

            # Check validation criteria
            passed_criteria = sum(1 for _, passed in criteria if passed)
            total_criteria = len(criteria)
-            
+
            self.logger.info(f"  📊 Validation criteria: {passed_criteria}/{total_criteria}")
            for criterion, passed in criteria:
                status = "✅" if passed else "❌"
@@ -484,15 +511,11 @@ if __name__ == "__main__":

            # Check for file embedding logs
            file_embedding_logs = [
-                line for line in logs_step3.split('\n')
-                if 'tool embedding' in line and 'files' in line
-            ]
-            
-            conversation_logs = [
-                line for line in logs_step3.split('\n') 
-                if 'conversation history' in line.lower()
+                line for line in logs_step3.split("\n") if "tool embedding" in line and "files" in line
            ]

+            conversation_logs = [line for line in logs_step3.split("\n") if "conversation history" in line.lower()]
+
            self.logger.info(f"  📊 File embedding logs: {len(file_embedding_logs)}")
            self.logger.info(f"  📊 Conversation history logs: {len(conversation_logs)}")

@@ -516,13 +539,13 @@ if __name__ == "__main__":
 def main():
    """Run the token allocation validation test"""
    import sys
-    
+
    verbose = "--verbose" in sys.argv or "-v" in sys.argv
    test = TokenAllocationValidationTest(verbose=verbose)
-    
+
    success = test.run_test()
    sys.exit(0 if success else 1)


 if __name__ == "__main__":
-    main()
+    main()