Bind-mount K3s agent data, node identity, and kubelet dirs from NVMe so container image cache and node registration survive reboots on the diskless netboot nodes. Includes K3s binary download, agent systemd service, DHCP hostname resolution, and open-iscsi for Longhorn iSCSI support.
237 lines
7.7 KiB
Bash
237 lines
7.7 KiB
Bash
#!/bin/bash
|
|
# Setup local NVMe storage for K3s node
|
|
# Runs at boot via systemd service
|
|
#
|
|
# Logic:
|
|
# - No NVMe: exit cleanly
|
|
# - No partition table: auto-format (new drive)
|
|
# - Has our labels: mount and exit (already configured)
|
|
# - Has other partitions: prompt with 120s timeout (safety)
|
|
|
|
set -euo pipefail
|
|
|
|
DEVICE="/dev/nvme0n1"
|
|
CONTAINERD_SIZE="75GiB"
|
|
CONTAINERD_LABEL="containerd"
|
|
LONGHORN_LABEL="longhorn"
|
|
CONTAINERD_MOUNT="/var/lib/containerd"
|
|
LONGHORN_MOUNT="/var/lib/longhorn"
|
|
MARKER_FILE=".netboot-storage"
|
|
PROMPT_TIMEOUT=120
|
|
|
|
# Colors for console output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
# Log to both console and journald
|
|
log() { echo -e "${GREEN}[storage]${NC} $*"; logger -t setup-node-storage "$*"; }
|
|
warn() { echo -e "${YELLOW}[storage]${NC} $*"; logger -t setup-node-storage -p warning "$*"; }
|
|
error() { echo -e "${RED}[storage]${NC} $*"; logger -t setup-node-storage -p err "$*"; }
|
|
|
|
# Check if NVMe exists
|
|
if [ ! -b "$DEVICE" ]; then
|
|
log "No NVMe device found at $DEVICE - skipping storage setup"
|
|
exit 0
|
|
fi
|
|
|
|
DEVICE_SIZE=$(lsblk -b -d -n -o SIZE "$DEVICE" | awk '{printf "%.0fGB", $1/1000000000}')
|
|
log "Found NVMe: $DEVICE ($DEVICE_SIZE)"
|
|
|
|
# Get partition names (handles nvme naming with 'p' prefix)
|
|
if [[ "$DEVICE" == *"nvme"* ]]; then
|
|
PART1="${DEVICE}p1"
|
|
PART2="${DEVICE}p2"
|
|
else
|
|
PART1="${DEVICE}1"
|
|
PART2="${DEVICE}2"
|
|
fi
|
|
|
|
# Function to mount existing storage
|
|
mount_storage() {
|
|
log "Mounting existing storage..."
|
|
|
|
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
|
|
|
|
if ! mountpoint -q "$CONTAINERD_MOUNT"; then
|
|
mount -L "$CONTAINERD_LABEL" "$CONTAINERD_MOUNT" || {
|
|
error "Failed to mount containerd partition"
|
|
return 1
|
|
}
|
|
fi
|
|
|
|
if ! mountpoint -q "$LONGHORN_MOUNT"; then
|
|
mount -L "$LONGHORN_LABEL" "$LONGHORN_MOUNT" || {
|
|
error "Failed to mount longhorn partition"
|
|
return 1
|
|
}
|
|
fi
|
|
|
|
# K3s persistence: bind mount agent data and node identity from NVMe
|
|
# This allows the node to survive reboots without re-registering
|
|
setup_k3s_persistence
|
|
|
|
log "Storage mounted:"
|
|
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
|
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
|
return 0
|
|
}
|
|
|
|
# Setup K3s persistence directories
|
|
# Bind mounts NVMe directories to k3s paths so node identity survives reboots
|
|
setup_k3s_persistence() {
|
|
# K3s agent data (containerd, kubelet certs, etc.)
|
|
# Uses overlayfs internally, so must be on real filesystem, not overlay
|
|
K3S_AGENT="/var/lib/rancher/k3s/agent"
|
|
K3S_AGENT_DATA="$CONTAINERD_MOUNT/k3s-agent"
|
|
mkdir -p "$K3S_AGENT_DATA" "$K3S_AGENT"
|
|
if ! mountpoint -q "$K3S_AGENT"; then
|
|
mount --bind "$K3S_AGENT_DATA" "$K3S_AGENT"
|
|
log " $K3S_AGENT: bind mount to NVMe"
|
|
fi
|
|
|
|
# K3s node identity (password file)
|
|
# Must persist across reboots or node will be rejected
|
|
K3S_NODE="/etc/rancher/node"
|
|
K3S_NODE_DATA="$CONTAINERD_MOUNT/k3s-node"
|
|
mkdir -p "$K3S_NODE_DATA" "$K3S_NODE"
|
|
if ! mountpoint -q "$K3S_NODE"; then
|
|
mount --bind "$K3S_NODE_DATA" "$K3S_NODE"
|
|
log " $K3S_NODE: bind mount to NVMe"
|
|
fi
|
|
|
|
# Kubelet data (pod volumes, projected tokens, etc.)
|
|
# Must be on NVMe so kubelet reports real disk capacity, not the 2G tmpfs overlay
|
|
KUBELET_DIR="/var/lib/kubelet"
|
|
KUBELET_DATA="$CONTAINERD_MOUNT/kubelet"
|
|
mkdir -p "$KUBELET_DATA" "$KUBELET_DIR"
|
|
if ! mountpoint -q "$KUBELET_DIR"; then
|
|
mount --bind "$KUBELET_DATA" "$KUBELET_DIR"
|
|
log " $KUBELET_DIR: bind mount to NVMe"
|
|
fi
|
|
}
|
|
|
|
# Function to format the drive
|
|
format_storage() {
|
|
log "Partitioning $DEVICE..."
|
|
|
|
wipefs -af "$DEVICE"
|
|
parted -s "$DEVICE" mklabel gpt
|
|
parted -s "$DEVICE" mkpart primary ext4 1MiB "$CONTAINERD_SIZE"
|
|
parted -s "$DEVICE" mkpart primary ext4 "$CONTAINERD_SIZE" 100%
|
|
|
|
# Tell kernel to re-read partition table and wait for udev
|
|
partprobe "$DEVICE"
|
|
udevadm settle --timeout=10
|
|
|
|
# Verify partitions appeared
|
|
if [ ! -b "$PART1" ] || [ ! -b "$PART2" ]; then
|
|
error "Partitions not found after partprobe: $PART1, $PART2"
|
|
exit 1
|
|
fi
|
|
|
|
log "Formatting ${PART1} as ext4 (containerd, 75GB)..."
|
|
mkfs.ext4 -L "$CONTAINERD_LABEL" -q "$PART1"
|
|
|
|
log "Formatting ${PART2} as ext4 (longhorn, remaining)..."
|
|
mkfs.ext4 -L "$LONGHORN_LABEL" -q "$PART2"
|
|
|
|
# Mount the new partitions
|
|
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
|
|
mount "$PART1" "$CONTAINERD_MOUNT"
|
|
mount "$PART2" "$LONGHORN_MOUNT"
|
|
|
|
# Create marker files with metadata
|
|
for mount_point in "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"; do
|
|
cat > "${mount_point}/${MARKER_FILE}" <<EOF
|
|
# Netboot storage marker - DO NOT DELETE
|
|
formatted_date=$(date -Iseconds)
|
|
formatted_by=setup-node-storage
|
|
hostname=$(hostname)
|
|
device=$DEVICE
|
|
EOF
|
|
done
|
|
|
|
# K3s persistence: bind mount agent data and node identity from NVMe
|
|
setup_k3s_persistence
|
|
|
|
log "Storage formatted and mounted successfully"
|
|
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
|
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
|
}
|
|
|
|
# Check for partition table
|
|
# Method 1: blkid returns empty PTTYPE for unpartitioned drives
|
|
# Method 2: parted error message (locale-dependent fallback)
|
|
has_partition_table() {
|
|
local pttype
|
|
pttype=$(blkid -o value -s PTTYPE "$DEVICE" 2>/dev/null)
|
|
if [ -n "$pttype" ]; then
|
|
return 0 # has partition table
|
|
fi
|
|
# Fallback: check if parted can read it
|
|
if parted -s "$DEVICE" print &>/dev/null; then
|
|
return 0 # has partition table
|
|
fi
|
|
return 1 # no partition table
|
|
}
|
|
|
|
if ! has_partition_table; then
|
|
# No partition table - this is a fresh drive, auto-format
|
|
log "Empty drive detected (no partition table) - auto-formatting..."
|
|
format_storage
|
|
exit 0
|
|
fi
|
|
|
|
# Has partition table - check if it's ours
|
|
if blkid -L "$CONTAINERD_LABEL" &>/dev/null && blkid -L "$LONGHORN_LABEL" &>/dev/null; then
|
|
# Check for marker file (belt and suspenders)
|
|
# Create temp mount to check marker without leaving dangling mount
|
|
TEMP_MOUNT=$(mktemp -d)
|
|
if mount -L "$CONTAINERD_LABEL" "$TEMP_MOUNT" 2>/dev/null; then
|
|
if [ -f "${TEMP_MOUNT}/${MARKER_FILE}" ]; then
|
|
umount "$TEMP_MOUNT"
|
|
rmdir "$TEMP_MOUNT"
|
|
log "Storage already configured (found labels and marker)"
|
|
mount_storage
|
|
exit 0
|
|
else
|
|
umount "$TEMP_MOUNT"
|
|
rmdir "$TEMP_MOUNT"
|
|
# Has our labels but no marker - probably ours, mount it
|
|
warn "Found labels but no marker file - assuming configured"
|
|
mount_storage
|
|
exit 0
|
|
fi
|
|
fi
|
|
rmdir "$TEMP_MOUNT" 2>/dev/null || true
|
|
fi
|
|
|
|
# Has partitions but not ours - this could contain data!
|
|
warn "NVMe has existing partitions but no netboot labels."
|
|
warn "This drive may contain important data!"
|
|
echo ""
|
|
lsblk "$DEVICE"
|
|
echo ""
|
|
|
|
# Prompt on console with timeout
|
|
echo -e "${CYAN}========================================${NC}"
|
|
echo -e "${CYAN} Press ENTER within ${PROMPT_TIMEOUT}s to format ${NC}"
|
|
echo -e "${CYAN} Or wait to skip (safe default) ${NC}"
|
|
echo -e "${CYAN}========================================${NC}"
|
|
echo ""
|
|
|
|
if read -t "$PROMPT_TIMEOUT" -p "Format $DEVICE? [press ENTER to confirm] " response; then
|
|
echo ""
|
|
warn "Formatting in 5 seconds... Ctrl+C to abort"
|
|
sleep 5
|
|
format_storage
|
|
else
|
|
echo ""
|
|
warn "Timeout - skipping storage setup (drive left untouched)"
|
|
warn "To format manually, reboot and press ENTER when prompted"
|
|
exit 0
|
|
fi
|