150 lines
4.6 KiB
Python
150 lines
4.6 KiB
Python
from datetime import datetime
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
|
|
from config import (
|
|
CONTAINER_MEMORY_LIMIT,
|
|
CONTAINER_CPU_QUOTA,
|
|
CONTAINER_CPU_PERIOD,
|
|
MAX_CONCURRENT_SESSIONS,
|
|
USE_ASYNC_DOCKER,
|
|
USE_DATABASE_STORAGE,
|
|
)
|
|
from session_manager import session_manager
|
|
from host_ip_detector import async_get_host_ip
|
|
from resource_manager import check_system_resources
|
|
from http_pool import get_connection_pool_stats
|
|
from database import get_database_stats
|
|
from container_health import get_container_health_stats, get_container_health_history
|
|
from logging_config import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
router = APIRouter(tags=["health"])
|
|
|
|
|
|
@router.get("/health/container")
|
|
async def get_container_health():
|
|
stats = get_container_health_stats()
|
|
return stats
|
|
|
|
|
|
@router.get("/health/container/{session_id}")
|
|
async def get_session_container_health(session_id: str):
|
|
session = await session_manager.get_session(session_id)
|
|
if not session:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
stats = get_container_health_stats(session_id)
|
|
history = get_container_health_history(session_id, limit=20)
|
|
|
|
return {
|
|
"session_id": session_id,
|
|
"container_id": session.container_id,
|
|
"stats": stats.get(f"session_{session_id}", {}),
|
|
"recent_history": history,
|
|
}
|
|
|
|
|
|
@router.get("/health")
|
|
async def health_check():
|
|
docker_ok = False
|
|
host_ip_ok = False
|
|
detected_host_ip = None
|
|
resource_status = {}
|
|
http_pool_stats = {}
|
|
|
|
try:
|
|
docker_ok = await session_manager.docker_service.ping()
|
|
except Exception as e:
|
|
logger.warning(f"Docker health check failed: {e}")
|
|
docker_ok = False
|
|
|
|
try:
|
|
detected_host_ip = await async_get_host_ip()
|
|
host_ip_ok = True
|
|
except Exception as e:
|
|
logger.warning(f"Host IP detection failed: {e}")
|
|
host_ip_ok = False
|
|
|
|
try:
|
|
resource_status = check_system_resources()
|
|
except Exception as e:
|
|
logger.warning("Resource monitoring failed", extra={"error": str(e)})
|
|
resource_status = {"error": str(e)}
|
|
|
|
try:
|
|
http_pool_stats = await get_connection_pool_stats()
|
|
except Exception as e:
|
|
logger.warning("HTTP pool stats failed", extra={"error": str(e)})
|
|
http_pool_stats = {"error": str(e)}
|
|
|
|
database_status = {}
|
|
if USE_DATABASE_STORAGE:
|
|
try:
|
|
database_status = await get_database_stats()
|
|
except Exception as e:
|
|
logger.warning("Database stats failed", extra={"error": str(e)})
|
|
database_status = {"status": "error", "error": str(e)}
|
|
|
|
container_health_stats = {}
|
|
try:
|
|
container_health_stats = get_container_health_stats()
|
|
except Exception as e:
|
|
logger.warning("Container health stats failed", extra={"error": str(e)})
|
|
container_health_stats = {"error": str(e)}
|
|
|
|
resource_alerts = (
|
|
resource_status.get("alerts", []) if isinstance(resource_status, dict) else []
|
|
)
|
|
critical_alerts = [
|
|
a
|
|
for a in resource_alerts
|
|
if isinstance(a, dict) and a.get("level") == "critical"
|
|
]
|
|
|
|
http_healthy = (
|
|
http_pool_stats.get("status") == "healthy"
|
|
if isinstance(http_pool_stats, dict)
|
|
else False
|
|
)
|
|
|
|
if critical_alerts or not (docker_ok and host_ip_ok and http_healthy):
|
|
status = "unhealthy"
|
|
elif resource_alerts:
|
|
status = "degraded"
|
|
else:
|
|
status = "healthy"
|
|
|
|
health_data = {
|
|
"status": status,
|
|
"docker": docker_ok,
|
|
"docker_mode": "async" if USE_ASYNC_DOCKER else "sync",
|
|
"host_ip_detection": host_ip_ok,
|
|
"detected_host_ip": detected_host_ip,
|
|
"http_connection_pool": http_pool_stats,
|
|
"storage_backend": "database" if USE_DATABASE_STORAGE else "json_file",
|
|
"active_sessions": len(
|
|
[s for s in session_manager.sessions.values() if s.status == "running"]
|
|
),
|
|
"resource_limits": {
|
|
"memory_limit": CONTAINER_MEMORY_LIMIT,
|
|
"cpu_quota": CONTAINER_CPU_QUOTA,
|
|
"cpu_period": CONTAINER_CPU_PERIOD,
|
|
"max_concurrent_sessions": MAX_CONCURRENT_SESSIONS,
|
|
},
|
|
"system_resources": resource_status.get("system_resources", {})
|
|
if isinstance(resource_status, dict)
|
|
else {},
|
|
"resource_alerts": resource_alerts,
|
|
"timestamp": datetime.now().isoformat(),
|
|
}
|
|
|
|
if USE_DATABASE_STORAGE and database_status:
|
|
health_data["database"] = database_status
|
|
|
|
if container_health_stats:
|
|
health_data["container_health"] = container_health_stats
|
|
|
|
return health_data
|