fix: session stability improvements
- Fix docker client initialization bug in app.py (context manager was closing client) - Add restart_session() method to preserve session IDs during container restarts - Add 60-second startup grace period before health checking new sessions - Fix _stop_container and _get_container_info to use docker_service API consistently - Disable mDNS in Dockerfile to prevent Bonjour service name conflicts - Remove old container before restart to free port bindings
This commit is contained in:
@@ -367,6 +367,86 @@ class SessionManager:
|
||||
async def list_sessions(self) -> List[SessionData]:
|
||||
return list(self.sessions.values())
|
||||
|
||||
async def restart_session(self, session_id: str) -> Optional[SessionData]:
|
||||
"""Restart a session's container while preserving the session ID.
|
||||
|
||||
Unlike create_session(), this reuses the existing session data
|
||||
and only creates a new container, maintaining session ID continuity.
|
||||
This method removes the old container to free up the port.
|
||||
"""
|
||||
session = await self.get_session(session_id)
|
||||
if not session:
|
||||
logger.error(
|
||||
"Cannot restart session: not found",
|
||||
extra={"session_id": session_id},
|
||||
)
|
||||
return None
|
||||
|
||||
old_container_id = session.container_id
|
||||
logger.info(
|
||||
"Restarting session container",
|
||||
extra={"session_id": session_id, "old_container_id": old_container_id},
|
||||
)
|
||||
|
||||
# Stop and remove old container to free up the port
|
||||
if old_container_id and self.docker_service:
|
||||
try:
|
||||
logger.info(
|
||||
"Stopping old container for restart",
|
||||
extra={"session_id": session_id, "container_id": old_container_id},
|
||||
)
|
||||
await self.docker_service.stop_container(old_container_id)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to stop old container (may already be stopped)",
|
||||
extra={"session_id": session_id, "container_id": old_container_id, "error": str(e)},
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
"Removing old container for restart",
|
||||
extra={"session_id": session_id, "container_id": old_container_id},
|
||||
)
|
||||
await self.docker_service.remove_container(old_container_id, force=True)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to remove old container",
|
||||
extra={"session_id": session_id, "container_id": old_container_id, "error": str(e)},
|
||||
)
|
||||
|
||||
# Generate new container name for the restart
|
||||
new_container_name = f"opencode-{session_id}-{uuid.uuid4().hex[:8]}"
|
||||
session.container_name = new_container_name
|
||||
session.container_id = None # Clear old container_id
|
||||
session.status = "starting"
|
||||
|
||||
# Update session in store before starting container
|
||||
self.sessions[session_id] = session
|
||||
|
||||
if USE_DATABASE_STORAGE:
|
||||
try:
|
||||
await SessionModel.update_session(
|
||||
session_id,
|
||||
{
|
||||
"container_name": new_container_name,
|
||||
"container_id": None,
|
||||
"status": "starting",
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to update session in database during restart",
|
||||
extra={"session_id": session_id, "error": str(e)},
|
||||
)
|
||||
|
||||
# Start new container for this session
|
||||
if USE_ASYNC_DOCKER:
|
||||
asyncio.create_task(self._start_container_async(session))
|
||||
else:
|
||||
asyncio.create_task(self._start_container_sync(session))
|
||||
|
||||
return session
|
||||
|
||||
async def list_containers_async(self, all: bool = False) -> List:
|
||||
return await self.docker_service.list_containers(all=all)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user