fixed all remaining issues with the session manager

This commit is contained in:
2026-01-18 23:28:49 +01:00
parent 0243cfc250
commit 2f5464e1d2
11 changed files with 4040 additions and 101 deletions

View File

@@ -0,0 +1,248 @@
"""
Resource Management and Monitoring Utilities
Provides validation, enforcement, and monitoring of container resource limits
to prevent resource exhaustion attacks and ensure fair resource allocation.
"""
import os
import psutil
import logging
from typing import Dict, Optional, Tuple
from dataclasses import dataclass
from datetime import datetime, timedelta
logger = logging.getLogger(__name__)
@dataclass
class ResourceLimits:
"""Container resource limits configuration."""
memory_limit: str # e.g., "4g", "512m"
cpu_quota: int # CPU quota in microseconds
cpu_period: int # CPU period in microseconds
def validate(self) -> Tuple[bool, str]:
"""Validate resource limits configuration."""
# Validate memory limit format
memory_limit = self.memory_limit.lower()
if not (memory_limit.endswith(("g", "m", "k")) or memory_limit.isdigit()):
return (
False,
f"Invalid memory limit format: {self.memory_limit}. Use format like '4g', '512m', '256k'",
)
# Validate CPU quota and period
if self.cpu_quota <= 0:
return False, f"CPU quota must be positive, got {self.cpu_quota}"
if self.cpu_period <= 0:
return False, f"CPU period must be positive, got {self.cpu_period}"
if self.cpu_quota > self.cpu_period:
return (
False,
f"CPU quota ({self.cpu_quota}) cannot exceed CPU period ({self.cpu_period})",
)
return True, "Valid"
def to_docker_limits(self) -> Dict[str, any]:
"""Convert to Docker container limits format."""
return {
"mem_limit": self.memory_limit,
"cpu_quota": self.cpu_quota,
"cpu_period": self.cpu_period,
}
class ResourceMonitor:
"""Monitor system and container resource usage."""
def __init__(self):
self._last_check = datetime.now()
self._alerts_sent = set() # Track alerts to prevent spam
def get_system_resources(self) -> Dict[str, any]:
"""Get current system resource usage."""
try:
memory = psutil.virtual_memory()
cpu = psutil.cpu_percent(interval=1)
return {
"memory_percent": memory.percent / 100.0,
"memory_used_gb": memory.used / (1024**3),
"memory_total_gb": memory.total / (1024**3),
"cpu_percent": cpu / 100.0,
"cpu_count": psutil.cpu_count(),
}
except Exception as e:
logger.warning(f"Failed to get system resources: {e}")
return {}
def check_resource_limits(
self, limits: ResourceLimits, warning_thresholds: Dict[str, float]
) -> Dict[str, any]:
"""Check if system resources are approaching limits."""
system_resources = self.get_system_resources()
alerts = []
# Check memory usage
memory_usage = system_resources.get("memory_percent", 0)
memory_threshold = warning_thresholds.get("memory", 0.8)
if memory_usage >= memory_threshold:
alerts.append(
{
"type": "memory",
"level": "warning" if memory_usage < 0.95 else "critical",
"message": f"System memory usage at {memory_usage:.1%}",
"current": memory_usage,
"threshold": memory_threshold,
}
)
# Check CPU usage
cpu_usage = system_resources.get("cpu_percent", 0)
cpu_threshold = warning_thresholds.get("cpu", 0.9)
if cpu_usage >= cpu_threshold:
alerts.append(
{
"type": "cpu",
"level": "warning" if cpu_usage < 0.95 else "critical",
"message": f"System CPU usage at {cpu_usage:.1%}",
"current": cpu_usage,
"threshold": cpu_threshold,
}
)
return {
"system_resources": system_resources,
"alerts": alerts,
"timestamp": datetime.now(),
}
def should_throttle_sessions(self, resource_check: Dict) -> Tuple[bool, str]:
"""Determine if new sessions should be throttled based on resource usage."""
alerts = resource_check.get("alerts", [])
# Critical alerts always throttle
critical_alerts = [a for a in alerts if a["level"] == "critical"]
if critical_alerts:
return (
True,
f"Critical resource usage: {[a['message'] for a in critical_alerts]}",
)
# Multiple warnings also throttle
warning_alerts = [a for a in alerts if a["level"] == "warning"]
if len(warning_alerts) >= 2:
return (
True,
f"Multiple resource warnings: {[a['message'] for a in warning_alerts]}",
)
return False, "Resources OK"
class ResourceValidator:
"""Validate and parse resource limit configurations."""
@staticmethod
def parse_memory_limit(memory_str: str) -> Tuple[int, str]:
"""Parse memory limit string and return bytes."""
if not memory_str:
raise ValueError("Memory limit cannot be empty")
memory_str = memory_str.lower().strip()
# Handle different units
if memory_str.endswith("g"):
bytes_val = int(memory_str[:-1]) * (1024**3)
unit = "GB"
elif memory_str.endswith("m"):
bytes_val = int(memory_str[:-1]) * (1024**2)
unit = "MB"
elif memory_str.endswith("k"):
bytes_val = int(memory_str[:-1]) * 1024
unit = "KB"
else:
# Assume bytes if no unit
bytes_val = int(memory_str)
unit = "bytes"
if bytes_val <= 0:
raise ValueError(f"Memory limit must be positive, got {bytes_val}")
# Reasonable limits check
if bytes_val > 32 * (1024**3): # 32GB
logger.warning(f"Very high memory limit: {bytes_val} bytes")
return bytes_val, unit
@staticmethod
def validate_resource_config(
config: Dict[str, any],
) -> Tuple[bool, str, Optional[ResourceLimits]]:
"""Validate complete resource configuration."""
try:
limits = ResourceLimits(
memory_limit=config.get("memory_limit", "4g"),
cpu_quota=config.get("cpu_quota", 100000),
cpu_period=config.get("cpu_period", 100000),
)
valid, message = limits.validate()
if not valid:
return False, message, None
# Additional validation
memory_bytes, _ = ResourceValidator.parse_memory_limit(limits.memory_limit)
# Warn about potentially problematic configurations
if memory_bytes < 128 * (1024**2): # Less than 128MB
logger.warning("Very low memory limit may cause container instability")
return True, "Configuration valid", limits
except (ValueError, TypeError) as e:
return False, f"Invalid configuration: {e}", None
# Global instances
resource_monitor = ResourceMonitor()
def get_resource_limits() -> ResourceLimits:
"""Get validated resource limits from environment."""
config = {
"memory_limit": os.getenv("CONTAINER_MEMORY_LIMIT", "4g"),
"cpu_quota": int(os.getenv("CONTAINER_CPU_QUOTA", "100000")),
"cpu_period": int(os.getenv("CONTAINER_CPU_PERIOD", "100000")),
}
valid, message, limits = ResourceValidator.validate_resource_config(config)
if not valid or limits is None:
raise ValueError(f"Resource configuration error: {message}")
logger.info(
f"Using resource limits: memory={limits.memory_limit}, cpu_quota={limits.cpu_quota}"
)
return limits
def check_system_resources() -> Dict[str, any]:
"""Check current system resource status."""
limits = get_resource_limits()
warning_thresholds = {
"memory": float(os.getenv("MEMORY_WARNING_THRESHOLD", "0.8")),
"cpu": float(os.getenv("CPU_WARNING_THRESHOLD", "0.9")),
}
return resource_monitor.check_resource_limits(limits, warning_thresholds)
def should_throttle_sessions() -> Tuple[bool, str]:
"""Check if new sessions should be throttled due to resource constraints."""
resource_check = check_system_resources()
return resource_monitor.should_throttle_sessions(resource_check)