from datetime import datetime from fastapi import APIRouter, HTTPException from config import ( CONTAINER_MEMORY_LIMIT, CONTAINER_CPU_QUOTA, CONTAINER_CPU_PERIOD, MAX_CONCURRENT_SESSIONS, USE_ASYNC_DOCKER, USE_DATABASE_STORAGE, ) from session_manager import session_manager from host_ip_detector import async_get_host_ip from resource_manager import check_system_resources from http_pool import get_connection_pool_stats from database import get_database_stats from container_health import get_container_health_stats, get_container_health_history from logging_config import get_logger logger = get_logger(__name__) router = APIRouter(tags=["health"]) @router.get("/health/container") async def get_container_health(): stats = get_container_health_stats() return stats @router.get("/health/container/{session_id}") async def get_session_container_health(session_id: str): session = await session_manager.get_session(session_id) if not session: raise HTTPException(status_code=404, detail="Session not found") stats = get_container_health_stats(session_id) history = get_container_health_history(session_id, limit=20) return { "session_id": session_id, "container_id": session.container_id, "stats": stats.get(f"session_{session_id}", {}), "recent_history": history, } @router.get("/health") async def health_check(): docker_ok = False host_ip_ok = False detected_host_ip = None resource_status = {} http_pool_stats = {} try: docker_ok = await session_manager.docker_service.ping() except Exception as e: logger.warning(f"Docker health check failed: {e}") docker_ok = False try: detected_host_ip = await async_get_host_ip() host_ip_ok = True except Exception as e: logger.warning(f"Host IP detection failed: {e}") host_ip_ok = False try: resource_status = check_system_resources() except Exception as e: logger.warning("Resource monitoring failed", extra={"error": str(e)}) resource_status = {"error": str(e)} try: http_pool_stats = await get_connection_pool_stats() except Exception as e: logger.warning("HTTP pool stats failed", extra={"error": str(e)}) http_pool_stats = {"error": str(e)} database_status = {} if USE_DATABASE_STORAGE: try: database_status = await get_database_stats() except Exception as e: logger.warning("Database stats failed", extra={"error": str(e)}) database_status = {"status": "error", "error": str(e)} container_health_stats = {} try: container_health_stats = get_container_health_stats() except Exception as e: logger.warning("Container health stats failed", extra={"error": str(e)}) container_health_stats = {"error": str(e)} resource_alerts = ( resource_status.get("alerts", []) if isinstance(resource_status, dict) else [] ) critical_alerts = [ a for a in resource_alerts if isinstance(a, dict) and a.get("level") == "critical" ] http_healthy = ( http_pool_stats.get("status") == "healthy" if isinstance(http_pool_stats, dict) else False ) if critical_alerts or not (docker_ok and host_ip_ok and http_healthy): status = "unhealthy" elif resource_alerts: status = "degraded" else: status = "healthy" health_data = { "status": status, "docker": docker_ok, "docker_mode": "async" if USE_ASYNC_DOCKER else "sync", "host_ip_detection": host_ip_ok, "detected_host_ip": detected_host_ip, "http_connection_pool": http_pool_stats, "storage_backend": "database" if USE_DATABASE_STORAGE else "json_file", "active_sessions": len( [s for s in session_manager.sessions.values() if s.status == "running"] ), "resource_limits": { "memory_limit": CONTAINER_MEMORY_LIMIT, "cpu_quota": CONTAINER_CPU_QUOTA, "cpu_period": CONTAINER_CPU_PERIOD, "max_concurrent_sessions": MAX_CONCURRENT_SESSIONS, }, "system_resources": resource_status.get("system_resources", {}) if isinstance(resource_status, dict) else {}, "resource_alerts": resource_alerts, "timestamp": datetime.now().isoformat(), } if USE_DATABASE_STORAGE and database_status: health_data["database"] = database_status if container_health_stats: health_data["container_health"] = container_health_stats return health_data