refactored the big main.py file

This commit is contained in:
2026-02-03 00:17:26 +01:00
parent 5e1cb64a81
commit 9281c0e02a
5 changed files with 582 additions and 1284 deletions

96
session-manager/app.py Normal file
View File

@@ -0,0 +1,96 @@
import asyncio
from contextlib import asynccontextmanager
from fastapi import FastAPI
from config import USE_ASYNC_DOCKER, USE_DATABASE_STORAGE
from session_manager import session_manager
from async_docker_client import get_async_docker_client
from http_pool import init_http_pool, shutdown_http_pool
from database import init_database, shutdown_database, run_migrations
from container_health import (
start_container_health_monitoring,
stop_container_health_monitoring,
)
from logging_config import get_logger, init_logging
from routes import sessions_router, auth_router, health_router, proxy_router
init_logging()
logger = get_logger(__name__)
_use_database_storage = USE_DATABASE_STORAGE
@asynccontextmanager
async def lifespan(app: FastAPI):
global _use_database_storage
logger.info("Starting Session Management Service")
await init_http_pool()
if _use_database_storage:
try:
await init_database()
await run_migrations()
await session_manager._load_sessions_from_database()
logger.info("Database initialized and sessions loaded")
except Exception as e:
logger.error("Database initialization failed", extra={"error": str(e)})
if _use_database_storage:
logger.warning("Falling back to JSON file storage")
_use_database_storage = False
session_manager._load_sessions_from_file()
try:
docker_client = None
if USE_ASYNC_DOCKER:
async with get_async_docker_client() as client:
docker_client = client._docker if hasattr(client, "_docker") else None
else:
docker_client = session_manager.docker_service
await start_container_health_monitoring(session_manager, docker_client)
logger.info("Container health monitoring started")
except Exception as e:
logger.error(
"Failed to start container health monitoring", extra={"error": str(e)}
)
async def cleanup_task():
while True:
await session_manager.cleanup_expired_sessions()
await asyncio.sleep(300)
cleanup_coro = asyncio.create_task(cleanup_task())
yield
logger.info("Shutting down Session Management Service")
cleanup_coro.cancel()
await shutdown_http_pool()
try:
await stop_container_health_monitoring()
logger.info("Container health monitoring stopped")
except Exception as e:
logger.error(
"Error stopping container health monitoring", extra={"error": str(e)}
)
if _use_database_storage:
await shutdown_database()
app = FastAPI(
title="Lovdata Chat Session Manager",
description="Manages isolated OpenCode containers for Norwegian legal research sessions",
version="1.0.0",
lifespan=lifespan,
)
app.include_router(sessions_router)
app.include_router(auth_router)
app.include_router(health_router)
app.include_router(proxy_router)

47
session-manager/config.py Normal file
View File

@@ -0,0 +1,47 @@
"""
Configuration module for Session Management Service
Centralized configuration loading from environment variables with defaults.
"""
import os
from pathlib import Path
# Session storage configuration
SESSIONS_DIR = Path("/app/sessions")
SESSIONS_FILE = Path("/app/sessions/sessions.json")
# Container configuration
CONTAINER_IMAGE = os.getenv("CONTAINER_IMAGE", "lovdata-opencode:latest")
# Resource limits - configurable via environment variables with defaults
CONTAINER_MEMORY_LIMIT = os.getenv(
"CONTAINER_MEMORY_LIMIT", "4g"
) # Memory limit per container
CONTAINER_CPU_QUOTA = int(
os.getenv("CONTAINER_CPU_QUOTA", "100000")
) # CPU quota (100000 = 1 core)
CONTAINER_CPU_PERIOD = int(
os.getenv("CONTAINER_CPU_PERIOD", "100000")
) # CPU period (microseconds)
# Session management
MAX_CONCURRENT_SESSIONS = int(
os.getenv("MAX_CONCURRENT_SESSIONS", "3")
) # Max concurrent sessions
SESSION_TIMEOUT_MINUTES = int(
os.getenv("SESSION_TIMEOUT_MINUTES", "60")
) # Auto-cleanup timeout
# Resource monitoring thresholds
MEMORY_WARNING_THRESHOLD = float(
os.getenv("MEMORY_WARNING_THRESHOLD", "0.8")
) # 80% memory usage
CPU_WARNING_THRESHOLD = float(
os.getenv("CPU_WARNING_THRESHOLD", "0.9")
) # 90% CPU usage
# Feature flags
USE_ASYNC_DOCKER = os.getenv("USE_ASYNC_DOCKER", "true").lower() == "true"
USE_DATABASE_STORAGE = os.getenv("USE_DATABASE_STORAGE", "true").lower() == "true"

File diff suppressed because it is too large Load Diff

24
session-manager/models.py Normal file
View File

@@ -0,0 +1,24 @@
"""
Data models for Session Management Service
Pydantic models for session data and API request/response schemas.
"""
from datetime import datetime
from typing import Optional
from pydantic import BaseModel
class SessionData(BaseModel):
"""Represents a user session with its associated container"""
session_id: str
container_name: str
container_id: Optional[str] = None
host_dir: str
port: Optional[int] = None
auth_token: Optional[str] = None # Authentication token for the session
created_at: datetime
last_accessed: datetime
status: str = "creating" # creating, running, stopped, error

View File

@@ -0,0 +1,410 @@
import os
import uuid
import json
import asyncio
import shutil
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Optional, List
from fastapi import HTTPException
from config import (
SESSIONS_DIR,
SESSIONS_FILE,
CONTAINER_IMAGE,
MAX_CONCURRENT_SESSIONS,
SESSION_TIMEOUT_MINUTES,
USE_ASYNC_DOCKER,
USE_DATABASE_STORAGE,
)
from models import SessionData
from docker_service import DockerService
from database import SessionModel
from resource_manager import get_resource_limits, should_throttle_sessions
from session_auth import generate_session_auth_token, cleanup_expired_auth_tokens
from logging_config import get_logger
logger = get_logger(__name__)
class SessionManager:
def __init__(self, docker_service: Optional[DockerService] = None):
if docker_service:
self.docker_service = docker_service
else:
self.docker_service = DockerService(use_async=USE_ASYNC_DOCKER)
if USE_DATABASE_STORAGE:
self.sessions: Dict[str, SessionData] = {}
logger.info("Session storage initialized", extra={"backend": "database"})
else:
self.sessions: Dict[str, SessionData] = {}
self._load_sessions_from_file()
logger.info("Session storage initialized", extra={"backend": "json_file"})
from container_health import get_container_health_monitor
self.health_monitor = get_container_health_monitor()
logger.info(
"SessionManager initialized",
extra={
"docker_service_type": type(self.docker_service).__name__,
"storage_backend": "database" if USE_DATABASE_STORAGE else "json_file",
},
)
def _load_sessions_from_file(self):
if SESSIONS_FILE.exists():
try:
with open(SESSIONS_FILE, "r") as f:
data = json.load(f)
for session_id, session_dict in data.items():
session_dict["created_at"] = datetime.fromisoformat(
session_dict["created_at"]
)
session_dict["last_accessed"] = datetime.fromisoformat(
session_dict["last_accessed"]
)
self.sessions[session_id] = SessionData(**session_dict)
logger.info(
"Sessions loaded from JSON file",
extra={"count": len(self.sessions)},
)
except (json.JSONDecodeError, KeyError) as e:
logger.warning("Could not load sessions file", extra={"error": str(e)})
self.sessions = {}
async def _load_sessions_from_database(self):
try:
db_sessions = await SessionModel.get_sessions_by_status("running")
db_sessions.extend(await SessionModel.get_sessions_by_status("creating"))
self.sessions = {}
for session_dict in db_sessions:
session_data = SessionData(**session_dict)
self.sessions[session_dict["session_id"]] = session_data
logger.info(
"Sessions loaded from database", extra={"count": len(self.sessions)}
)
except Exception as e:
logger.error(
"Failed to load sessions from database", extra={"error": str(e)}
)
self.sessions = {}
def _save_sessions(self):
SESSIONS_DIR.mkdir(exist_ok=True)
data = {}
for session_id, session in self.sessions.items():
data[session_id] = session.dict()
with open(SESSIONS_FILE, "w") as f:
json.dump(data, f, indent=2, default=str)
def _generate_session_id(self) -> str:
return str(uuid.uuid4()).replace("-", "")[:16]
def _get_available_port(self) -> int:
used_ports = {s.port for s in self.sessions.values() if s.port}
port = 8081
while port in used_ports:
port += 1
return port
def _check_container_limits(self) -> bool:
active_sessions = sum(
1 for s in self.sessions.values() if s.status in ["creating", "running"]
)
return active_sessions < MAX_CONCURRENT_SESSIONS
async def _async_check_container_limits(self) -> bool:
return self._check_container_limits()
async def create_session(self) -> SessionData:
if USE_ASYNC_DOCKER:
limits_ok = await self._async_check_container_limits()
else:
limits_ok = self._check_container_limits()
if not limits_ok:
raise HTTPException(
status_code=429,
detail=f"Maximum concurrent sessions ({MAX_CONCURRENT_SESSIONS}) reached",
)
should_throttle, reason = should_throttle_sessions()
if should_throttle:
raise HTTPException(
status_code=503,
detail=f"System resource constraints prevent new sessions: {reason}",
)
session_id = self._generate_session_id()
container_name = f"opencode-{session_id}"
host_dir = str(SESSIONS_DIR / session_id)
port = self._get_available_port()
Path(host_dir).mkdir(parents=True, exist_ok=True)
auth_token = generate_session_auth_token(session_id)
session = SessionData(
session_id=session_id,
container_name=container_name,
host_dir=host_dir,
port=port,
auth_token=auth_token,
created_at=datetime.now(),
last_accessed=datetime.now(),
status="creating",
)
self.sessions[session_id] = session
if USE_DATABASE_STORAGE:
try:
await SessionModel.create_session(
{
"session_id": session_id,
"container_name": container_name,
"host_dir": host_dir,
"port": port,
"auth_token": auth_token,
"status": "creating",
}
)
logger.info(
"Session created in database", extra={"session_id": session_id}
)
except Exception as e:
logger.error(
"Failed to create session in database",
extra={"session_id": session_id, "error": str(e)},
)
if USE_ASYNC_DOCKER:
asyncio.create_task(self._start_container_async(session))
else:
asyncio.create_task(self._start_container_sync(session))
return session
async def _start_container_async(self, session: SessionData):
try:
resource_limits = get_resource_limits()
logger.info(
f"Starting container {session.container_name} with resource limits: "
f"memory={resource_limits.memory_limit}, cpu_quota={resource_limits.cpu_quota}"
)
container_info = await self.docker_service.create_container(
name=session.container_name,
image=CONTAINER_IMAGE,
volumes={session.host_dir: {"bind": "/app/somedir", "mode": "rw"}},
ports={"8080": session.port},
environment={
"MCP_SERVER": os.getenv("MCP_SERVER", ""),
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
"ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", ""),
"GOOGLE_API_KEY": os.getenv("GOOGLE_API_KEY", ""),
"SESSION_AUTH_TOKEN": session.auth_token or "",
"SESSION_ID": session.session_id,
},
network_mode="bridge",
mem_limit=resource_limits.memory_limit,
cpu_quota=resource_limits.cpu_quota,
cpu_period=resource_limits.cpu_period,
tmpfs={
"/tmp": "rw,noexec,nosuid,size=100m",
"/var/tmp": "rw,noexec,nosuid,size=50m",
},
)
await self.docker_service.start_container(container_info.container_id)
session.container_id = container_info.container_id
session.status = "running"
self.sessions[session.session_id] = session
if USE_DATABASE_STORAGE:
try:
await SessionModel.update_session(
session.session_id,
{
"container_id": container_info.container_id,
"status": "running",
},
)
except Exception as e:
logger.error(
"Failed to update session in database",
extra={"session_id": session.session_id, "error": str(e)},
)
logger.info(
"Container started successfully",
extra={
"session_id": session.session_id,
"container_name": session.container_name,
"container_id": container_info.container_id,
"port": session.port,
},
)
except Exception as e:
session.status = "error"
self._save_sessions()
logger.error(f"Failed to start container {session.container_name}: {e}")
async def _start_container_sync(self, session: SessionData):
try:
resource_limits = get_resource_limits()
logger.info(
f"Starting container {session.container_name} with resource limits: "
f"memory={resource_limits.memory_limit}, cpu_quota={resource_limits.cpu_quota}"
)
container_info = await self.docker_service.create_container(
name=session.container_name,
image=CONTAINER_IMAGE,
volumes={session.host_dir: {"bind": "/app/somedir", "mode": "rw"}},
ports={"8080": session.port},
environment={
"MCP_SERVER": os.getenv("MCP_SERVER", ""),
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", ""),
"ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", ""),
"GOOGLE_API_KEY": os.getenv("GOOGLE_API_KEY", ""),
"SESSION_AUTH_TOKEN": session.auth_token or "",
"SESSION_ID": session.session_id,
},
network_mode="bridge",
mem_limit=resource_limits.memory_limit,
cpu_quota=resource_limits.cpu_quota,
cpu_period=resource_limits.cpu_period,
tmpfs={
"/tmp": "rw,noexec,nosuid,size=100m",
"/var/tmp": "rw,noexec,nosuid,size=50m",
},
)
session.container_id = container_info.container_id
session.status = "running"
self.sessions[session.session_id] = session
if USE_DATABASE_STORAGE:
try:
await SessionModel.update_session(
session.session_id,
{
"container_id": container_info.container_id,
"status": "running",
},
)
except Exception as e:
logger.error(
"Failed to update session in database",
extra={"session_id": session.session_id, "error": str(e)},
)
logger.info(
"Container started successfully",
extra={
"session_id": session.session_id,
"container_name": session.container_name,
"container_id": container_info.container_id,
"port": session.port,
},
)
except Exception as e:
session.status = "error"
self._save_sessions()
logger.error(f"Failed to start container {session.container_name}: {e}")
async def get_session(self, session_id: str) -> Optional[SessionData]:
session = self.sessions.get(session_id)
if session:
session.last_accessed = datetime.now()
if USE_DATABASE_STORAGE:
try:
await SessionModel.update_session(
session_id, {"last_accessed": datetime.now()}
)
except Exception as e:
logger.warning(
"Failed to update session access time in database",
extra={"session_id": session_id, "error": str(e)},
)
return session
if USE_DATABASE_STORAGE:
try:
db_session = await SessionModel.get_session(session_id)
if db_session:
session_data = SessionData(**db_session)
self.sessions[session_id] = session_data
logger.debug(
"Session loaded from database", extra={"session_id": session_id}
)
return session_data
except Exception as e:
logger.error(
"Failed to load session from database",
extra={"session_id": session_id, "error": str(e)},
)
return None
async def list_sessions(self) -> List[SessionData]:
return list(self.sessions.values())
async def list_containers_async(self, all: bool = False) -> List:
return await self.docker_service.list_containers(all=all)
async def cleanup_expired_sessions(self):
now = datetime.now()
expired_sessions = []
for session_id, session in self.sessions.items():
if now - session.last_accessed > timedelta(minutes=SESSION_TIMEOUT_MINUTES):
expired_sessions.append(session_id)
try:
await self.docker_service.stop_container(
session.container_name, timeout=10
)
await self.docker_service.remove_container(session.container_name)
logger.info(f"Cleaned up container {session.container_name}")
except Exception as e:
logger.error(
f"Error cleaning up container {session.container_name}: {e}"
)
try:
shutil.rmtree(session.host_dir)
logger.info(f"Removed session directory {session.host_dir}")
except OSError as e:
logger.error(
f"Error removing session directory {session.host_dir}: {e}"
)
for session_id in expired_sessions:
del self.sessions[session_id]
if expired_sessions:
self._save_sessions()
logger.info(f"Cleaned up {len(expired_sessions)} expired sessions")
expired_tokens = cleanup_expired_auth_tokens()
if expired_tokens > 0:
logger.info(f"Cleaned up {expired_tokens} expired authentication tokens")
session_manager = SessionManager()