Add K3s agent setup with NVMe-backed persistent storage
Bind-mount K3s agent data, node identity, and kubelet dirs from NVMe so container image cache and node registration survive reboots on the diskless netboot nodes. Includes K3s binary download, agent systemd service, DHCP hostname resolution, and open-iscsi for Longhorn iSCSI support.
This commit is contained in:
4
files/k3s-agent.env
Normal file
4
files/k3s-agent.env
Normal file
@@ -0,0 +1,4 @@
|
||||
# K3s agent configuration
|
||||
# Server URL and token for cluster join
|
||||
K3S_URL="https://192.168.100.1:6443"
|
||||
K3S_TOKEN="K106e2ea6914f7a019d1222c1fdd19c5065978377364701f60eb1f2a585e8c3924b::server:0a15c4d7a13df65b066f5b8eff710ecd"
|
||||
25
files/k3s-agent.service
Normal file
25
files/k3s-agent.service
Normal file
@@ -0,0 +1,25 @@
|
||||
[Unit]
|
||||
Description=Lightweight Kubernetes (K3s Agent)
|
||||
Documentation=https://k3s.io
|
||||
After=network-online.target setup-node-storage.service set-hostname-from-dhcp.service
|
||||
Wants=network-online.target
|
||||
Requires=setup-node-storage.service set-hostname-from-dhcp.service
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
EnvironmentFile=-/etc/rancher/k3s/k3s-agent.env
|
||||
ExecStartPre=/sbin/modprobe br_netfilter
|
||||
ExecStartPre=/sbin/modprobe overlay
|
||||
ExecStart=/usr/local/bin/k3s agent
|
||||
KillMode=process
|
||||
Delegate=yes
|
||||
LimitNOFILE=1048576
|
||||
LimitNPROC=infinity
|
||||
LimitCORE=infinity
|
||||
TasksMax=infinity
|
||||
TimeoutStartSec=0
|
||||
Restart=always
|
||||
RestartSec=5s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
78
files/k3s-join
Normal file
78
files/k3s-join
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/bin/bash
|
||||
# K3s agent join script for netboot nodes
|
||||
# Fetches token from server and starts k3s agent
|
||||
#
|
||||
# Runs at boot via k3s-join.service
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
K3S_SERVER="192.168.100.1"
|
||||
K3S_URL="https://${K3S_SERVER}:6443"
|
||||
TOKEN_URL="http://${K3S_SERVER}:8800/k3s-token"
|
||||
MAX_RETRIES=30
|
||||
RETRY_DELAY=10
|
||||
|
||||
# Colors for console output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() { echo -e "${GREEN}[k3s-join]${NC} $*"; logger -t k3s-join "$*"; }
|
||||
warn() { echo -e "${YELLOW}[k3s-join]${NC} $*"; logger -t k3s-join -p warning "$*"; }
|
||||
error() { echo -e "${RED}[k3s-join]${NC} $*"; logger -t k3s-join -p err "$*"; }
|
||||
|
||||
# Wait for network to be ready
|
||||
wait_for_network() {
|
||||
local count=0
|
||||
while ! ping -c1 -W1 "$K3S_SERVER" &>/dev/null; do
|
||||
count=$((count + 1))
|
||||
if [ $count -ge $MAX_RETRIES ]; then
|
||||
error "Network not available after $MAX_RETRIES attempts"
|
||||
return 1
|
||||
fi
|
||||
warn "Waiting for network... ($count/$MAX_RETRIES)"
|
||||
sleep $RETRY_DELAY
|
||||
done
|
||||
log "Network is up"
|
||||
}
|
||||
|
||||
# Fetch join token from server
|
||||
fetch_token() {
|
||||
local count=0
|
||||
local token=""
|
||||
|
||||
while [ -z "$token" ]; do
|
||||
token=$(curl -sf "$TOKEN_URL" 2>/dev/null || true)
|
||||
if [ -z "$token" ]; then
|
||||
count=$((count + 1))
|
||||
if [ $count -ge $MAX_RETRIES ]; then
|
||||
error "Failed to fetch token after $MAX_RETRIES attempts"
|
||||
return 1
|
||||
fi
|
||||
warn "Waiting for token... ($count/$MAX_RETRIES)"
|
||||
sleep $RETRY_DELAY
|
||||
fi
|
||||
done
|
||||
|
||||
echo "$token"
|
||||
}
|
||||
|
||||
# Main
|
||||
log "Starting K3s agent join process"
|
||||
|
||||
wait_for_network
|
||||
|
||||
log "Fetching join token from $TOKEN_URL"
|
||||
K3S_TOKEN=$(fetch_token)
|
||||
if [ -z "$K3S_TOKEN" ]; then
|
||||
error "Failed to get token, exiting"
|
||||
exit 1
|
||||
fi
|
||||
log "Token acquired"
|
||||
|
||||
log "Starting K3s agent (server: $K3S_URL)"
|
||||
exec /usr/local/bin/k3s agent \
|
||||
--server="$K3S_URL" \
|
||||
--token="$K3S_TOKEN" \
|
||||
--node-name="$(hostname)"
|
||||
26
files/k3s-join.service
Normal file
26
files/k3s-join.service
Normal file
@@ -0,0 +1,26 @@
|
||||
[Unit]
|
||||
Description=K3s Agent Join Service
|
||||
Documentation=file:///usr/local/bin/k3s-join
|
||||
|
||||
# Run after network and storage are ready
|
||||
After=network-online.target setup-node-storage.service
|
||||
Wants=network-online.target
|
||||
Requires=containerd.service
|
||||
|
||||
[Service]
|
||||
Type=exec
|
||||
ExecStart=/usr/local/bin/k3s-join
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
|
||||
# Environment
|
||||
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
|
||||
# Hardening
|
||||
LimitNOFILE=1048576
|
||||
LimitNPROC=infinity
|
||||
LimitCORE=infinity
|
||||
TasksMax=infinity
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
28
files/set-hostname-from-dhcp
Normal file
28
files/set-hostname-from-dhcp
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
# Set hostname from DHCP lease
|
||||
# Runs before k3s-agent to ensure proper node name
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
log() { echo "[hostname] $*"; logger -t set-hostname "$*"; }
|
||||
|
||||
# Wait for DHCP lease
|
||||
MAX_WAIT=60
|
||||
for i in $(seq 1 $MAX_WAIT); do
|
||||
# Check for lease files from systemd-networkd
|
||||
for lease in /run/systemd/netif/leases/*; do
|
||||
if [ -f "$lease" ]; then
|
||||
HOSTNAME=$(grep -oP '^HOSTNAME=\K.*' "$lease" 2>/dev/null || true)
|
||||
if [ -n "$HOSTNAME" ]; then
|
||||
log "Found hostname in DHCP lease: $HOSTNAME"
|
||||
hostnamectl set-hostname "$HOSTNAME"
|
||||
log "Hostname set to: $(hostname)"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log "Warning: No DHCP hostname found after ${MAX_WAIT}s, using default"
|
||||
exit 0
|
||||
15
files/set-hostname-from-dhcp.service
Normal file
15
files/set-hostname-from-dhcp.service
Normal file
@@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Set hostname from DHCP lease
|
||||
Documentation=file:///usr/local/bin/set-hostname-from-dhcp
|
||||
After=network-online.target systemd-networkd.service
|
||||
Wants=network-online.target
|
||||
Before=k3s-agent.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/set-hostname-from-dhcp
|
||||
RemainAfterExit=yes
|
||||
TimeoutStartSec=90
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -69,12 +69,50 @@ mount_storage() {
|
||||
}
|
||||
fi
|
||||
|
||||
# K3s persistence: bind mount agent data and node identity from NVMe
|
||||
# This allows the node to survive reboots without re-registering
|
||||
setup_k3s_persistence
|
||||
|
||||
log "Storage mounted:"
|
||||
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Setup K3s persistence directories
|
||||
# Bind mounts NVMe directories to k3s paths so node identity survives reboots
|
||||
setup_k3s_persistence() {
|
||||
# K3s agent data (containerd, kubelet certs, etc.)
|
||||
# Uses overlayfs internally, so must be on real filesystem, not overlay
|
||||
K3S_AGENT="/var/lib/rancher/k3s/agent"
|
||||
K3S_AGENT_DATA="$CONTAINERD_MOUNT/k3s-agent"
|
||||
mkdir -p "$K3S_AGENT_DATA" "$K3S_AGENT"
|
||||
if ! mountpoint -q "$K3S_AGENT"; then
|
||||
mount --bind "$K3S_AGENT_DATA" "$K3S_AGENT"
|
||||
log " $K3S_AGENT: bind mount to NVMe"
|
||||
fi
|
||||
|
||||
# K3s node identity (password file)
|
||||
# Must persist across reboots or node will be rejected
|
||||
K3S_NODE="/etc/rancher/node"
|
||||
K3S_NODE_DATA="$CONTAINERD_MOUNT/k3s-node"
|
||||
mkdir -p "$K3S_NODE_DATA" "$K3S_NODE"
|
||||
if ! mountpoint -q "$K3S_NODE"; then
|
||||
mount --bind "$K3S_NODE_DATA" "$K3S_NODE"
|
||||
log " $K3S_NODE: bind mount to NVMe"
|
||||
fi
|
||||
|
||||
# Kubelet data (pod volumes, projected tokens, etc.)
|
||||
# Must be on NVMe so kubelet reports real disk capacity, not the 2G tmpfs overlay
|
||||
KUBELET_DIR="/var/lib/kubelet"
|
||||
KUBELET_DATA="$CONTAINERD_MOUNT/kubelet"
|
||||
mkdir -p "$KUBELET_DATA" "$KUBELET_DIR"
|
||||
if ! mountpoint -q "$KUBELET_DIR"; then
|
||||
mount --bind "$KUBELET_DATA" "$KUBELET_DIR"
|
||||
log " $KUBELET_DIR: bind mount to NVMe"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to format the drive
|
||||
format_storage() {
|
||||
log "Partitioning $DEVICE..."
|
||||
@@ -116,6 +154,9 @@ device=$DEVICE
|
||||
EOF
|
||||
done
|
||||
|
||||
# K3s persistence: bind mount agent data and node identity from NVMe
|
||||
setup_k3s_persistence
|
||||
|
||||
log "Storage formatted and mounted successfully"
|
||||
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
|
||||
Reference in New Issue
Block a user