fix: session stability improvements

- Fix docker client initialization bug in app.py (context manager was closing client)
- Add restart_session() method to preserve session IDs during container restarts
- Add 60-second startup grace period before health checking new sessions
- Fix _stop_container and _get_container_info to use docker_service API consistently
- Disable mDNS in Dockerfile to prevent Bonjour service name conflicts
- Remove old container before restart to free port bindings
This commit is contained in:
2026-02-04 19:10:03 +01:00
parent 05aa70c4af
commit 69d18cc494
5 changed files with 138 additions and 41 deletions

View File

@@ -367,6 +367,86 @@ class SessionManager:
async def list_sessions(self) -> List[SessionData]:
return list(self.sessions.values())
async def restart_session(self, session_id: str) -> Optional[SessionData]:
"""Restart a session's container while preserving the session ID.
Unlike create_session(), this reuses the existing session data
and only creates a new container, maintaining session ID continuity.
This method removes the old container to free up the port.
"""
session = await self.get_session(session_id)
if not session:
logger.error(
"Cannot restart session: not found",
extra={"session_id": session_id},
)
return None
old_container_id = session.container_id
logger.info(
"Restarting session container",
extra={"session_id": session_id, "old_container_id": old_container_id},
)
# Stop and remove old container to free up the port
if old_container_id and self.docker_service:
try:
logger.info(
"Stopping old container for restart",
extra={"session_id": session_id, "container_id": old_container_id},
)
await self.docker_service.stop_container(old_container_id)
except Exception as e:
logger.warning(
"Failed to stop old container (may already be stopped)",
extra={"session_id": session_id, "container_id": old_container_id, "error": str(e)},
)
try:
logger.info(
"Removing old container for restart",
extra={"session_id": session_id, "container_id": old_container_id},
)
await self.docker_service.remove_container(old_container_id, force=True)
except Exception as e:
logger.warning(
"Failed to remove old container",
extra={"session_id": session_id, "container_id": old_container_id, "error": str(e)},
)
# Generate new container name for the restart
new_container_name = f"opencode-{session_id}-{uuid.uuid4().hex[:8]}"
session.container_name = new_container_name
session.container_id = None # Clear old container_id
session.status = "starting"
# Update session in store before starting container
self.sessions[session_id] = session
if USE_DATABASE_STORAGE:
try:
await SessionModel.update_session(
session_id,
{
"container_name": new_container_name,
"container_id": None,
"status": "starting",
},
)
except Exception as e:
logger.error(
"Failed to update session in database during restart",
extra={"session_id": session_id, "error": str(e)},
)
# Start new container for this session
if USE_ASYNC_DOCKER:
asyncio.create_task(self._start_container_async(session))
else:
asyncio.create_task(self._start_container_sync(session))
return session
async def list_containers_async(self, all: bool = False) -> List:
return await self.docker_service.list_containers(all=all)