refactored the big main.py file
This commit is contained in:
410
session-manager/session_manager.py
Normal file
410
session-manager/session_manager.py
Normal file
@@ -0,0 +1,410 @@
|
||||
import os
|
||||
import uuid
|
||||
import json
|
||||
import asyncio
|
||||
import shutil
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from config import (
|
||||
SESSIONS_DIR,
|
||||
SESSIONS_FILE,
|
||||
CONTAINER_IMAGE,
|
||||
MAX_CONCURRENT_SESSIONS,
|
||||
SESSION_TIMEOUT_MINUTES,
|
||||
USE_ASYNC_DOCKER,
|
||||
USE_DATABASE_STORAGE,
|
||||
)
|
||||
from models import SessionData
|
||||
from docker_service import DockerService
|
||||
from database import SessionModel
|
||||
from resource_manager import get_resource_limits, should_throttle_sessions
|
||||
from session_auth import generate_session_auth_token, cleanup_expired_auth_tokens
|
||||
from logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SessionManager:
|
||||
def __init__(self, docker_service: Optional[DockerService] = None):
|
||||
if docker_service:
|
||||
self.docker_service = docker_service
|
||||
else:
|
||||
self.docker_service = DockerService(use_async=USE_ASYNC_DOCKER)
|
||||
|
||||
if USE_DATABASE_STORAGE:
|
||||
self.sessions: Dict[str, SessionData] = {}
|
||||
logger.info("Session storage initialized", extra={"backend": "database"})
|
||||
else:
|
||||
self.sessions: Dict[str, SessionData] = {}
|
||||
self._load_sessions_from_file()
|
||||
logger.info("Session storage initialized", extra={"backend": "json_file"})
|
||||
|
||||
from container_health import get_container_health_monitor
|
||||
|
||||
self.health_monitor = get_container_health_monitor()
|
||||
|
||||
logger.info(
|
||||
"SessionManager initialized",
|
||||
extra={
|
||||
"docker_service_type": type(self.docker_service).__name__,
|
||||
"storage_backend": "database" if USE_DATABASE_STORAGE else "json_file",
|
||||
},
|
||||
)
|
||||
|
||||
def _load_sessions_from_file(self):
|
||||
if SESSIONS_FILE.exists():
|
||||
try:
|
||||
with open(SESSIONS_FILE, "r") as f:
|
||||
data = json.load(f)
|
||||
for session_id, session_dict in data.items():
|
||||
session_dict["created_at"] = datetime.fromisoformat(
|
||||
session_dict["created_at"]
|
||||
)
|
||||
session_dict["last_accessed"] = datetime.fromisoformat(
|
||||
session_dict["last_accessed"]
|
||||
)
|
||||
self.sessions[session_id] = SessionData(**session_dict)
|
||||
logger.info(
|
||||
"Sessions loaded from JSON file",
|
||||
extra={"count": len(self.sessions)},
|
||||
)
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
logger.warning("Could not load sessions file", extra={"error": str(e)})
|
||||
self.sessions = {}
|
||||
|
||||
async def _load_sessions_from_database(self):
|
||||
try:
|
||||
db_sessions = await SessionModel.get_sessions_by_status("running")
|
||||
db_sessions.extend(await SessionModel.get_sessions_by_status("creating"))
|
||||
|
||||
self.sessions = {}
|
||||
for session_dict in db_sessions:
|
||||
session_data = SessionData(**session_dict)
|
||||
self.sessions[session_dict["session_id"]] = session_data
|
||||
|
||||
logger.info(
|
||||
"Sessions loaded from database", extra={"count": len(self.sessions)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to load sessions from database", extra={"error": str(e)}
|
||||
)
|
||||
self.sessions = {}
|
||||
|
||||
def _save_sessions(self):
|
||||
SESSIONS_DIR.mkdir(exist_ok=True)
|
||||
data = {}
|
||||
for session_id, session in self.sessions.items():
|
||||
data[session_id] = session.dict()
|
||||
|
||||
with open(SESSIONS_FILE, "w") as f:
|
||||
json.dump(data, f, indent=2, default=str)
|
||||
|
||||
def _generate_session_id(self) -> str:
|
||||
return str(uuid.uuid4()).replace("-", "")[:16]
|
||||
|
||||
def _get_available_port(self) -> int:
|
||||
used_ports = {s.port for s in self.sessions.values() if s.port}
|
||||
port = 8081
|
||||
while port in used_ports:
|
||||
port += 1
|
||||
return port
|
||||
|
||||
def _check_container_limits(self) -> bool:
|
||||
active_sessions = sum(
|
||||
1 for s in self.sessions.values() if s.status in ["creating", "running"]
|
||||
)
|
||||
return active_sessions < MAX_CONCURRENT_SESSIONS
|
||||
|
||||
async def _async_check_container_limits(self) -> bool:
|
||||
return self._check_container_limits()
|
||||
|
||||
async def create_session(self) -> SessionData:
|
||||
if USE_ASYNC_DOCKER:
|
||||
limits_ok = await self._async_check_container_limits()
|
||||
else:
|
||||
limits_ok = self._check_container_limits()
|
||||
|
||||
if not limits_ok:
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail=f"Maximum concurrent sessions ({MAX_CONCURRENT_SESSIONS}) reached",
|
||||
)
|
||||
|
||||
should_throttle, reason = should_throttle_sessions()
|
||||
if should_throttle:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"System resource constraints prevent new sessions: {reason}",
|
||||
)
|
||||
|
||||
session_id = self._generate_session_id()
|
||||
container_name = f"opencode-{session_id}"
|
||||
host_dir = str(SESSIONS_DIR / session_id)
|
||||
port = self._get_available_port()
|
||||
|
||||
Path(host_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
auth_token = generate_session_auth_token(session_id)
|
||||
|
||||
session = SessionData(
|
||||
session_id=session_id,
|
||||
container_name=container_name,
|
||||
host_dir=host_dir,
|
||||
port=port,
|
||||
auth_token=auth_token,
|
||||
created_at=datetime.now(),
|
||||
last_accessed=datetime.now(),
|
||||
status="creating",
|
||||
)
|
||||
|
||||
self.sessions[session_id] = session
|
||||
|
||||
if USE_DATABASE_STORAGE:
|
||||
try:
|
||||
await SessionModel.create_session(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"container_name": container_name,
|
||||
"host_dir": host_dir,
|
||||
"port": port,
|
||||
"auth_token": auth_token,
|
||||
"status": "creating",
|
||||
}
|
||||
)
|
||||
logger.info(
|
||||
"Session created in database", extra={"session_id": session_id}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to create session in database",
|
||||
extra={"session_id": session_id, "error": str(e)},
|
||||
)
|
||||
|
||||
if USE_ASYNC_DOCKER:
|
||||
asyncio.create_task(self._start_container_async(session))
|
||||
else:
|
||||
asyncio.create_task(self._start_container_sync(session))
|
||||
|
||||
return session
|
||||
|
||||
async def _start_container_async(self, session: SessionData):
|
||||
try:
|
||||
resource_limits = get_resource_limits()
|
||||
|
||||
logger.info(
|
||||
f"Starting container {session.container_name} with resource limits: "
|
||||
f"memory={resource_limits.memory_limit}, cpu_quota={resource_limits.cpu_quota}"
|
||||
)
|
||||
|
||||
container_info = await self.docker_service.create_container(
|
||||
name=session.container_name,
|
||||
image=CONTAINER_IMAGE,
|
||||
volumes={session.host_dir: {"bind": "/app/somedir", "mode": "rw"}},
|
||||
ports={"8080": session.port},
|
||||
environment={
|
||||
"MCP_SERVER": os.getenv("MCP_SERVER", ""),
|
||||
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
|
||||
"ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", ""),
|
||||
"GOOGLE_API_KEY": os.getenv("GOOGLE_API_KEY", ""),
|
||||
"SESSION_AUTH_TOKEN": session.auth_token or "",
|
||||
"SESSION_ID": session.session_id,
|
||||
},
|
||||
network_mode="bridge",
|
||||
mem_limit=resource_limits.memory_limit,
|
||||
cpu_quota=resource_limits.cpu_quota,
|
||||
cpu_period=resource_limits.cpu_period,
|
||||
tmpfs={
|
||||
"/tmp": "rw,noexec,nosuid,size=100m",
|
||||
"/var/tmp": "rw,noexec,nosuid,size=50m",
|
||||
},
|
||||
)
|
||||
|
||||
await self.docker_service.start_container(container_info.container_id)
|
||||
|
||||
session.container_id = container_info.container_id
|
||||
session.status = "running"
|
||||
|
||||
self.sessions[session.session_id] = session
|
||||
|
||||
if USE_DATABASE_STORAGE:
|
||||
try:
|
||||
await SessionModel.update_session(
|
||||
session.session_id,
|
||||
{
|
||||
"container_id": container_info.container_id,
|
||||
"status": "running",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to update session in database",
|
||||
extra={"session_id": session.session_id, "error": str(e)},
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Container started successfully",
|
||||
extra={
|
||||
"session_id": session.session_id,
|
||||
"container_name": session.container_name,
|
||||
"container_id": container_info.container_id,
|
||||
"port": session.port,
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
session.status = "error"
|
||||
self._save_sessions()
|
||||
logger.error(f"Failed to start container {session.container_name}: {e}")
|
||||
|
||||
async def _start_container_sync(self, session: SessionData):
|
||||
try:
|
||||
resource_limits = get_resource_limits()
|
||||
|
||||
logger.info(
|
||||
f"Starting container {session.container_name} with resource limits: "
|
||||
f"memory={resource_limits.memory_limit}, cpu_quota={resource_limits.cpu_quota}"
|
||||
)
|
||||
|
||||
container_info = await self.docker_service.create_container(
|
||||
name=session.container_name,
|
||||
image=CONTAINER_IMAGE,
|
||||
volumes={session.host_dir: {"bind": "/app/somedir", "mode": "rw"}},
|
||||
ports={"8080": session.port},
|
||||
environment={
|
||||
"MCP_SERVER": os.getenv("MCP_SERVER", ""),
|
||||
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
|
||||
"ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", ""),
|
||||
"GOOGLE_API_KEY": os.getenv("GOOGLE_API_KEY", ""),
|
||||
"SESSION_AUTH_TOKEN": session.auth_token or "",
|
||||
"SESSION_ID": session.session_id,
|
||||
},
|
||||
network_mode="bridge",
|
||||
mem_limit=resource_limits.memory_limit,
|
||||
cpu_quota=resource_limits.cpu_quota,
|
||||
cpu_period=resource_limits.cpu_period,
|
||||
tmpfs={
|
||||
"/tmp": "rw,noexec,nosuid,size=100m",
|
||||
"/var/tmp": "rw,noexec,nosuid,size=50m",
|
||||
},
|
||||
)
|
||||
|
||||
session.container_id = container_info.container_id
|
||||
session.status = "running"
|
||||
|
||||
self.sessions[session.session_id] = session
|
||||
|
||||
if USE_DATABASE_STORAGE:
|
||||
try:
|
||||
await SessionModel.update_session(
|
||||
session.session_id,
|
||||
{
|
||||
"container_id": container_info.container_id,
|
||||
"status": "running",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to update session in database",
|
||||
extra={"session_id": session.session_id, "error": str(e)},
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Container started successfully",
|
||||
extra={
|
||||
"session_id": session.session_id,
|
||||
"container_name": session.container_name,
|
||||
"container_id": container_info.container_id,
|
||||
"port": session.port,
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
session.status = "error"
|
||||
self._save_sessions()
|
||||
logger.error(f"Failed to start container {session.container_name}: {e}")
|
||||
|
||||
async def get_session(self, session_id: str) -> Optional[SessionData]:
|
||||
session = self.sessions.get(session_id)
|
||||
if session:
|
||||
session.last_accessed = datetime.now()
|
||||
if USE_DATABASE_STORAGE:
|
||||
try:
|
||||
await SessionModel.update_session(
|
||||
session_id, {"last_accessed": datetime.now()}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to update session access time in database",
|
||||
extra={"session_id": session_id, "error": str(e)},
|
||||
)
|
||||
return session
|
||||
|
||||
if USE_DATABASE_STORAGE:
|
||||
try:
|
||||
db_session = await SessionModel.get_session(session_id)
|
||||
if db_session:
|
||||
session_data = SessionData(**db_session)
|
||||
self.sessions[session_id] = session_data
|
||||
logger.debug(
|
||||
"Session loaded from database", extra={"session_id": session_id}
|
||||
)
|
||||
return session_data
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to load session from database",
|
||||
extra={"session_id": session_id, "error": str(e)},
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
async def list_sessions(self) -> List[SessionData]:
|
||||
return list(self.sessions.values())
|
||||
|
||||
async def list_containers_async(self, all: bool = False) -> List:
|
||||
return await self.docker_service.list_containers(all=all)
|
||||
|
||||
async def cleanup_expired_sessions(self):
|
||||
now = datetime.now()
|
||||
expired_sessions = []
|
||||
|
||||
for session_id, session in self.sessions.items():
|
||||
if now - session.last_accessed > timedelta(minutes=SESSION_TIMEOUT_MINUTES):
|
||||
expired_sessions.append(session_id)
|
||||
|
||||
try:
|
||||
await self.docker_service.stop_container(
|
||||
session.container_name, timeout=10
|
||||
)
|
||||
await self.docker_service.remove_container(session.container_name)
|
||||
logger.info(f"Cleaned up container {session.container_name}")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error cleaning up container {session.container_name}: {e}"
|
||||
)
|
||||
|
||||
try:
|
||||
shutil.rmtree(session.host_dir)
|
||||
logger.info(f"Removed session directory {session.host_dir}")
|
||||
except OSError as e:
|
||||
logger.error(
|
||||
f"Error removing session directory {session.host_dir}: {e}"
|
||||
)
|
||||
|
||||
for session_id in expired_sessions:
|
||||
del self.sessions[session_id]
|
||||
|
||||
if expired_sessions:
|
||||
self._save_sessions()
|
||||
logger.info(f"Cleaned up {len(expired_sessions)} expired sessions")
|
||||
|
||||
expired_tokens = cleanup_expired_auth_tokens()
|
||||
if expired_tokens > 0:
|
||||
logger.info(f"Cleaned up {expired_tokens} expired authentication tokens")
|
||||
|
||||
|
||||
session_manager = SessionManager()
|
||||
Reference in New Issue
Block a user