Files
netboot/files/setup-node-storage
Torbjørn Lindahl 492cc8abbc Add K3s agent setup with NVMe-backed persistent storage
Bind-mount K3s agent data, node identity, and kubelet dirs from
NVMe so container image cache and node registration survive reboots
on the diskless netboot nodes. Includes K3s binary download, agent
systemd service, DHCP hostname resolution, and open-iscsi for
Longhorn iSCSI support.
2026-03-01 19:11:12 +01:00

237 lines
7.7 KiB
Bash

#!/bin/bash
# Setup local NVMe storage for K3s node
# Runs at boot via systemd service
#
# Logic:
# - No NVMe: exit cleanly
# - No partition table: auto-format (new drive)
# - Has our labels: mount and exit (already configured)
# - Has other partitions: prompt with 120s timeout (safety)
set -euo pipefail
DEVICE="/dev/nvme0n1"
CONTAINERD_SIZE="75GiB"
CONTAINERD_LABEL="containerd"
LONGHORN_LABEL="longhorn"
CONTAINERD_MOUNT="/var/lib/containerd"
LONGHORN_MOUNT="/var/lib/longhorn"
MARKER_FILE=".netboot-storage"
PROMPT_TIMEOUT=120
# Colors for console output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
# Log to both console and journald
log() { echo -e "${GREEN}[storage]${NC} $*"; logger -t setup-node-storage "$*"; }
warn() { echo -e "${YELLOW}[storage]${NC} $*"; logger -t setup-node-storage -p warning "$*"; }
error() { echo -e "${RED}[storage]${NC} $*"; logger -t setup-node-storage -p err "$*"; }
# Check if NVMe exists
if [ ! -b "$DEVICE" ]; then
log "No NVMe device found at $DEVICE - skipping storage setup"
exit 0
fi
DEVICE_SIZE=$(lsblk -b -d -n -o SIZE "$DEVICE" | awk '{printf "%.0fGB", $1/1000000000}')
log "Found NVMe: $DEVICE ($DEVICE_SIZE)"
# Get partition names (handles nvme naming with 'p' prefix)
if [[ "$DEVICE" == *"nvme"* ]]; then
PART1="${DEVICE}p1"
PART2="${DEVICE}p2"
else
PART1="${DEVICE}1"
PART2="${DEVICE}2"
fi
# Function to mount existing storage
mount_storage() {
log "Mounting existing storage..."
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
if ! mountpoint -q "$CONTAINERD_MOUNT"; then
mount -L "$CONTAINERD_LABEL" "$CONTAINERD_MOUNT" || {
error "Failed to mount containerd partition"
return 1
}
fi
if ! mountpoint -q "$LONGHORN_MOUNT"; then
mount -L "$LONGHORN_LABEL" "$LONGHORN_MOUNT" || {
error "Failed to mount longhorn partition"
return 1
}
fi
# K3s persistence: bind mount agent data and node identity from NVMe
# This allows the node to survive reboots without re-registering
setup_k3s_persistence
log "Storage mounted:"
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
return 0
}
# Setup K3s persistence directories
# Bind mounts NVMe directories to k3s paths so node identity survives reboots
setup_k3s_persistence() {
# K3s agent data (containerd, kubelet certs, etc.)
# Uses overlayfs internally, so must be on real filesystem, not overlay
K3S_AGENT="/var/lib/rancher/k3s/agent"
K3S_AGENT_DATA="$CONTAINERD_MOUNT/k3s-agent"
mkdir -p "$K3S_AGENT_DATA" "$K3S_AGENT"
if ! mountpoint -q "$K3S_AGENT"; then
mount --bind "$K3S_AGENT_DATA" "$K3S_AGENT"
log " $K3S_AGENT: bind mount to NVMe"
fi
# K3s node identity (password file)
# Must persist across reboots or node will be rejected
K3S_NODE="/etc/rancher/node"
K3S_NODE_DATA="$CONTAINERD_MOUNT/k3s-node"
mkdir -p "$K3S_NODE_DATA" "$K3S_NODE"
if ! mountpoint -q "$K3S_NODE"; then
mount --bind "$K3S_NODE_DATA" "$K3S_NODE"
log " $K3S_NODE: bind mount to NVMe"
fi
# Kubelet data (pod volumes, projected tokens, etc.)
# Must be on NVMe so kubelet reports real disk capacity, not the 2G tmpfs overlay
KUBELET_DIR="/var/lib/kubelet"
KUBELET_DATA="$CONTAINERD_MOUNT/kubelet"
mkdir -p "$KUBELET_DATA" "$KUBELET_DIR"
if ! mountpoint -q "$KUBELET_DIR"; then
mount --bind "$KUBELET_DATA" "$KUBELET_DIR"
log " $KUBELET_DIR: bind mount to NVMe"
fi
}
# Function to format the drive
format_storage() {
log "Partitioning $DEVICE..."
wipefs -af "$DEVICE"
parted -s "$DEVICE" mklabel gpt
parted -s "$DEVICE" mkpart primary ext4 1MiB "$CONTAINERD_SIZE"
parted -s "$DEVICE" mkpart primary ext4 "$CONTAINERD_SIZE" 100%
# Tell kernel to re-read partition table and wait for udev
partprobe "$DEVICE"
udevadm settle --timeout=10
# Verify partitions appeared
if [ ! -b "$PART1" ] || [ ! -b "$PART2" ]; then
error "Partitions not found after partprobe: $PART1, $PART2"
exit 1
fi
log "Formatting ${PART1} as ext4 (containerd, 75GB)..."
mkfs.ext4 -L "$CONTAINERD_LABEL" -q "$PART1"
log "Formatting ${PART2} as ext4 (longhorn, remaining)..."
mkfs.ext4 -L "$LONGHORN_LABEL" -q "$PART2"
# Mount the new partitions
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
mount "$PART1" "$CONTAINERD_MOUNT"
mount "$PART2" "$LONGHORN_MOUNT"
# Create marker files with metadata
for mount_point in "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"; do
cat > "${mount_point}/${MARKER_FILE}" <<EOF
# Netboot storage marker - DO NOT DELETE
formatted_date=$(date -Iseconds)
formatted_by=setup-node-storage
hostname=$(hostname)
device=$DEVICE
EOF
done
# K3s persistence: bind mount agent data and node identity from NVMe
setup_k3s_persistence
log "Storage formatted and mounted successfully"
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
}
# Check for partition table
# Method 1: blkid returns empty PTTYPE for unpartitioned drives
# Method 2: parted error message (locale-dependent fallback)
has_partition_table() {
local pttype
pttype=$(blkid -o value -s PTTYPE "$DEVICE" 2>/dev/null)
if [ -n "$pttype" ]; then
return 0 # has partition table
fi
# Fallback: check if parted can read it
if parted -s "$DEVICE" print &>/dev/null; then
return 0 # has partition table
fi
return 1 # no partition table
}
if ! has_partition_table; then
# No partition table - this is a fresh drive, auto-format
log "Empty drive detected (no partition table) - auto-formatting..."
format_storage
exit 0
fi
# Has partition table - check if it's ours
if blkid -L "$CONTAINERD_LABEL" &>/dev/null && blkid -L "$LONGHORN_LABEL" &>/dev/null; then
# Check for marker file (belt and suspenders)
# Create temp mount to check marker without leaving dangling mount
TEMP_MOUNT=$(mktemp -d)
if mount -L "$CONTAINERD_LABEL" "$TEMP_MOUNT" 2>/dev/null; then
if [ -f "${TEMP_MOUNT}/${MARKER_FILE}" ]; then
umount "$TEMP_MOUNT"
rmdir "$TEMP_MOUNT"
log "Storage already configured (found labels and marker)"
mount_storage
exit 0
else
umount "$TEMP_MOUNT"
rmdir "$TEMP_MOUNT"
# Has our labels but no marker - probably ours, mount it
warn "Found labels but no marker file - assuming configured"
mount_storage
exit 0
fi
fi
rmdir "$TEMP_MOUNT" 2>/dev/null || true
fi
# Has partitions but not ours - this could contain data!
warn "NVMe has existing partitions but no netboot labels."
warn "This drive may contain important data!"
echo ""
lsblk "$DEVICE"
echo ""
# Prompt on console with timeout
echo -e "${CYAN}========================================${NC}"
echo -e "${CYAN} Press ENTER within ${PROMPT_TIMEOUT}s to format ${NC}"
echo -e "${CYAN} Or wait to skip (safe default) ${NC}"
echo -e "${CYAN}========================================${NC}"
echo ""
if read -t "$PROMPT_TIMEOUT" -p "Format $DEVICE? [press ENTER to confirm] " response; then
echo ""
warn "Formatting in 5 seconds... Ctrl+C to abort"
sleep 5
format_storage
else
echo ""
warn "Timeout - skipping storage setup (drive left untouched)"
warn "To format manually, reboot and press ENTER when prompted"
exit 0
fi