- setup-node-storage service auto-partitions NVMe for containerd/longhorn - Root password encrypted with sops/age, decrypted during build - Fix SSH host key permissions (0600) so sshd actually starts - Disable SSH socket activation for reliable boot - Add OPERATIONS.md with runbook - Makefile tracks source dependencies
196 lines
6.1 KiB
Bash
196 lines
6.1 KiB
Bash
#!/bin/bash
|
|
# Setup local NVMe storage for K3s node
|
|
# Runs at boot via systemd service
|
|
#
|
|
# Logic:
|
|
# - No NVMe: exit cleanly
|
|
# - No partition table: auto-format (new drive)
|
|
# - Has our labels: mount and exit (already configured)
|
|
# - Has other partitions: prompt with 120s timeout (safety)
|
|
|
|
set -euo pipefail
|
|
|
|
DEVICE="/dev/nvme0n1"
|
|
CONTAINERD_SIZE="75GiB"
|
|
CONTAINERD_LABEL="containerd"
|
|
LONGHORN_LABEL="longhorn"
|
|
CONTAINERD_MOUNT="/var/lib/containerd"
|
|
LONGHORN_MOUNT="/var/lib/longhorn"
|
|
MARKER_FILE=".netboot-storage"
|
|
PROMPT_TIMEOUT=120
|
|
|
|
# Colors for console output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
# Log to both console and journald
|
|
log() { echo -e "${GREEN}[storage]${NC} $*"; logger -t setup-node-storage "$*"; }
|
|
warn() { echo -e "${YELLOW}[storage]${NC} $*"; logger -t setup-node-storage -p warning "$*"; }
|
|
error() { echo -e "${RED}[storage]${NC} $*"; logger -t setup-node-storage -p err "$*"; }
|
|
|
|
# Check if NVMe exists
|
|
if [ ! -b "$DEVICE" ]; then
|
|
log "No NVMe device found at $DEVICE - skipping storage setup"
|
|
exit 0
|
|
fi
|
|
|
|
DEVICE_SIZE=$(lsblk -b -d -n -o SIZE "$DEVICE" | awk '{printf "%.0fGB", $1/1000000000}')
|
|
log "Found NVMe: $DEVICE ($DEVICE_SIZE)"
|
|
|
|
# Get partition names (handles nvme naming with 'p' prefix)
|
|
if [[ "$DEVICE" == *"nvme"* ]]; then
|
|
PART1="${DEVICE}p1"
|
|
PART2="${DEVICE}p2"
|
|
else
|
|
PART1="${DEVICE}1"
|
|
PART2="${DEVICE}2"
|
|
fi
|
|
|
|
# Function to mount existing storage
|
|
mount_storage() {
|
|
log "Mounting existing storage..."
|
|
|
|
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
|
|
|
|
if ! mountpoint -q "$CONTAINERD_MOUNT"; then
|
|
mount -L "$CONTAINERD_LABEL" "$CONTAINERD_MOUNT" || {
|
|
error "Failed to mount containerd partition"
|
|
return 1
|
|
}
|
|
fi
|
|
|
|
if ! mountpoint -q "$LONGHORN_MOUNT"; then
|
|
mount -L "$LONGHORN_LABEL" "$LONGHORN_MOUNT" || {
|
|
error "Failed to mount longhorn partition"
|
|
return 1
|
|
}
|
|
fi
|
|
|
|
log "Storage mounted:"
|
|
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
|
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
|
return 0
|
|
}
|
|
|
|
# Function to format the drive
|
|
format_storage() {
|
|
log "Partitioning $DEVICE..."
|
|
|
|
wipefs -af "$DEVICE"
|
|
parted -s "$DEVICE" mklabel gpt
|
|
parted -s "$DEVICE" mkpart primary ext4 1MiB "$CONTAINERD_SIZE"
|
|
parted -s "$DEVICE" mkpart primary ext4 "$CONTAINERD_SIZE" 100%
|
|
|
|
# Tell kernel to re-read partition table and wait for udev
|
|
partprobe "$DEVICE"
|
|
udevadm settle --timeout=10
|
|
|
|
# Verify partitions appeared
|
|
if [ ! -b "$PART1" ] || [ ! -b "$PART2" ]; then
|
|
error "Partitions not found after partprobe: $PART1, $PART2"
|
|
exit 1
|
|
fi
|
|
|
|
log "Formatting ${PART1} as ext4 (containerd, 75GB)..."
|
|
mkfs.ext4 -L "$CONTAINERD_LABEL" -q "$PART1"
|
|
|
|
log "Formatting ${PART2} as ext4 (longhorn, remaining)..."
|
|
mkfs.ext4 -L "$LONGHORN_LABEL" -q "$PART2"
|
|
|
|
# Mount the new partitions
|
|
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
|
|
mount "$PART1" "$CONTAINERD_MOUNT"
|
|
mount "$PART2" "$LONGHORN_MOUNT"
|
|
|
|
# Create marker files with metadata
|
|
for mount_point in "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"; do
|
|
cat > "${mount_point}/${MARKER_FILE}" <<EOF
|
|
# Netboot storage marker - DO NOT DELETE
|
|
formatted_date=$(date -Iseconds)
|
|
formatted_by=setup-node-storage
|
|
hostname=$(hostname)
|
|
device=$DEVICE
|
|
EOF
|
|
done
|
|
|
|
log "Storage formatted and mounted successfully"
|
|
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
|
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
|
}
|
|
|
|
# Check for partition table
|
|
# Method 1: blkid returns empty PTTYPE for unpartitioned drives
|
|
# Method 2: parted error message (locale-dependent fallback)
|
|
has_partition_table() {
|
|
local pttype
|
|
pttype=$(blkid -o value -s PTTYPE "$DEVICE" 2>/dev/null)
|
|
if [ -n "$pttype" ]; then
|
|
return 0 # has partition table
|
|
fi
|
|
# Fallback: check if parted can read it
|
|
if parted -s "$DEVICE" print &>/dev/null; then
|
|
return 0 # has partition table
|
|
fi
|
|
return 1 # no partition table
|
|
}
|
|
|
|
if ! has_partition_table; then
|
|
# No partition table - this is a fresh drive, auto-format
|
|
log "Empty drive detected (no partition table) - auto-formatting..."
|
|
format_storage
|
|
exit 0
|
|
fi
|
|
|
|
# Has partition table - check if it's ours
|
|
if blkid -L "$CONTAINERD_LABEL" &>/dev/null && blkid -L "$LONGHORN_LABEL" &>/dev/null; then
|
|
# Check for marker file (belt and suspenders)
|
|
# Create temp mount to check marker without leaving dangling mount
|
|
TEMP_MOUNT=$(mktemp -d)
|
|
if mount -L "$CONTAINERD_LABEL" "$TEMP_MOUNT" 2>/dev/null; then
|
|
if [ -f "${TEMP_MOUNT}/${MARKER_FILE}" ]; then
|
|
umount "$TEMP_MOUNT"
|
|
rmdir "$TEMP_MOUNT"
|
|
log "Storage already configured (found labels and marker)"
|
|
mount_storage
|
|
exit 0
|
|
else
|
|
umount "$TEMP_MOUNT"
|
|
rmdir "$TEMP_MOUNT"
|
|
# Has our labels but no marker - probably ours, mount it
|
|
warn "Found labels but no marker file - assuming configured"
|
|
mount_storage
|
|
exit 0
|
|
fi
|
|
fi
|
|
rmdir "$TEMP_MOUNT" 2>/dev/null || true
|
|
fi
|
|
|
|
# Has partitions but not ours - this could contain data!
|
|
warn "NVMe has existing partitions but no netboot labels."
|
|
warn "This drive may contain important data!"
|
|
echo ""
|
|
lsblk "$DEVICE"
|
|
echo ""
|
|
|
|
# Prompt on console with timeout
|
|
echo -e "${CYAN}========================================${NC}"
|
|
echo -e "${CYAN} Press ENTER within ${PROMPT_TIMEOUT}s to format ${NC}"
|
|
echo -e "${CYAN} Or wait to skip (safe default) ${NC}"
|
|
echo -e "${CYAN}========================================${NC}"
|
|
echo ""
|
|
|
|
if read -t "$PROMPT_TIMEOUT" -p "Format $DEVICE? [press ENTER to confirm] " response; then
|
|
echo ""
|
|
warn "Formatting in 5 seconds... Ctrl+C to abort"
|
|
sleep 5
|
|
format_storage
|
|
else
|
|
echo ""
|
|
warn "Timeout - skipping storage setup (drive left untouched)"
|
|
warn "To format manually, reboot and press ENTER when prompted"
|
|
exit 0
|
|
fi
|