Compare commits
5 Commits
a927b69aad
...
382d08e056
| Author | SHA1 | Date | |
|---|---|---|---|
| 382d08e056 | |||
| 91aab1fce6 | |||
| 492cc8abbc | |||
| 3f191d8f93 | |||
| 258d1ecc60 |
3
.sops.yaml
Normal file
3
.sops.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
creation_rules:
|
||||
- path_regex: secrets/.*\.yaml$
|
||||
age: age1gausnystsln7fpenw7arw7x79xe22z22697jnauj38npy0usayqqxqc7td2y
|
||||
17
Makefile
17
Makefile
@@ -1,9 +1,19 @@
|
||||
.PHONY: build deploy clean help
|
||||
.PHONY: deploy clean help check-nas all
|
||||
|
||||
NAS_HOST=phoenix
|
||||
NAS_PATH=/srv/netboot
|
||||
SCRIPT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
|
||||
|
||||
# Source files that trigger a rebuild
|
||||
BUILD_SOURCES := $(SCRIPT_DIR)/build-image.sh \
|
||||
$(wildcard $(SCRIPT_DIR)/initramfs/*) \
|
||||
$(wildcard $(SCRIPT_DIR)/initramfs/*/*) \
|
||||
$(wildcard $(SCRIPT_DIR)/files/*) \
|
||||
$(wildcard $(SCRIPT_DIR)/secrets/*.yaml)
|
||||
|
||||
# Build artifact (used as target for dependency tracking)
|
||||
BUILD_ARTIFACT := $(SCRIPT_DIR)/http/filesystem.squashfs
|
||||
|
||||
help:
|
||||
@echo "Netboot image build and deployment"
|
||||
@echo ""
|
||||
@@ -23,7 +33,8 @@ check-nas:
|
||||
@echo "Checking NAS connectivity..."
|
||||
@ping -c 1 $(NAS_HOST) > /dev/null 2>&1 && echo "✓ NAS is reachable" || (echo "✗ Cannot reach $(NAS_HOST)"; exit 1)
|
||||
|
||||
build:
|
||||
# Build depends on source files - only rebuilds if sources changed
|
||||
$(BUILD_ARTIFACT): $(BUILD_SOURCES)
|
||||
@echo "Building netboot image..."
|
||||
@echo "This will take 15-30 minutes..."
|
||||
sudo $(SCRIPT_DIR)/build-image.sh
|
||||
@@ -32,6 +43,8 @@ build:
|
||||
@echo "Artifacts ready in $(SCRIPT_DIR)/http/"
|
||||
@du -sh $(SCRIPT_DIR)/http/*
|
||||
|
||||
build: $(BUILD_ARTIFACT)
|
||||
|
||||
deploy: check-nas
|
||||
@echo "Deploying to NAS ($(NAS_HOST):$(NAS_PATH))..."
|
||||
@echo "Syncing http/ directory..."
|
||||
|
||||
333
OPERATIONS.md
Normal file
333
OPERATIONS.md
Normal file
@@ -0,0 +1,333 @@
|
||||
# Netboot Operations Guide
|
||||
|
||||
This document covers day-to-day operations for the netboot K3s cluster system.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
```bash
|
||||
# Build new image (15-30 min, requires sudo)
|
||||
cd /home/lindahl/git/netboot
|
||||
sudo ./build-image.sh
|
||||
make deploy
|
||||
|
||||
# Rebuild initramfs only (faster, ~2 min)
|
||||
sudo ./rebuild-initramfs.sh
|
||||
make deploy
|
||||
|
||||
# SSH to a node
|
||||
ssh root@192.168.100.51
|
||||
|
||||
# Check node storage
|
||||
ssh root@192.168.100.51 "lsblk && df -h /var/lib/containerd /var/lib/longhorn"
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────┐ HTTP (8800) ┌──────────────────┐
|
||||
│ Phoenix NAS │◄────────────────────►│ K3s Nodes │
|
||||
│ 192.168.100.1 │ │ 192.168.100.5x │
|
||||
├─────────────────┤ ├──────────────────┤
|
||||
│ /srv/netboot/ │ │ RAM (overlay) │
|
||||
│ http/ │ │ └─ / (root) │
|
||||
│ vmlinuz │ │ NVMe (persistent)│
|
||||
│ initrd-netboot.img │ ├─ containerd │
|
||||
│ filesystem.squashfs │ └─ longhorn │
|
||||
│ boot.ipxe │ └──────────────────┘
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
**Boot sequence:**
|
||||
1. Node PXE boots → loads iPXE
|
||||
2. iPXE fetches `boot.ipxe` from phoenix
|
||||
3. Downloads kernel + initramfs
|
||||
4. Initramfs downloads squashfs root over HTTP
|
||||
5. Mounts squashfs read-only with tmpfs overlay
|
||||
6. `setup-node-storage.service` partitions/mounts local NVMe
|
||||
7. System starts, K3s joins cluster
|
||||
|
||||
## Building Images
|
||||
|
||||
### Full Build
|
||||
|
||||
Builds everything from scratch: debootstrap, packages, initramfs, squashfs.
|
||||
|
||||
```bash
|
||||
cd /home/lindahl/git/netboot
|
||||
sudo ./build-image.sh
|
||||
make deploy
|
||||
```
|
||||
|
||||
**Time:** 15-30 minutes
|
||||
**When to use:** Package changes, kernel updates, major configuration changes
|
||||
|
||||
### Initramfs-Only Rebuild
|
||||
|
||||
Faster rebuild when only changing boot/network logic.
|
||||
|
||||
```bash
|
||||
sudo ./rebuild-initramfs.sh
|
||||
make deploy
|
||||
```
|
||||
|
||||
**Time:** ~2 minutes
|
||||
**When to use:** Changes to `initramfs/` scripts or hooks
|
||||
|
||||
### Verify Build
|
||||
|
||||
Check that all components are present and valid:
|
||||
|
||||
```bash
|
||||
./verify-image.sh
|
||||
```
|
||||
|
||||
## Secret Management
|
||||
|
||||
Secrets are encrypted with [sops](https://github.com/getsops/sops) using age encryption. The age key lives on phoenix.
|
||||
|
||||
### Encrypted Files
|
||||
|
||||
| File | Contents |
|
||||
|------|----------|
|
||||
| `secrets/netboot.sops.yaml` | Root password hash for console login |
|
||||
|
||||
### Viewing Secrets
|
||||
|
||||
```bash
|
||||
# From any machine with SSH access to phoenix
|
||||
cat secrets/netboot.sops.yaml | ssh phoenix "sops -d --input-type yaml --output-type yaml /dev/stdin"
|
||||
```
|
||||
|
||||
### Updating Root Password
|
||||
|
||||
1. Generate new password hash:
|
||||
```bash
|
||||
ssh phoenix "echo 'newpassword' | openssl passwd -6 -stdin"
|
||||
```
|
||||
|
||||
2. Update the encrypted file:
|
||||
```bash
|
||||
ssh phoenix "cd /path/to/netboot && sops secrets/netboot.sops.yaml"
|
||||
# Edit root_password_hash value, save
|
||||
```
|
||||
|
||||
Or recreate entirely:
|
||||
```bash
|
||||
NEW_HASH=$(ssh phoenix "echo 'newpassword' | openssl passwd -6 -stdin")
|
||||
ssh phoenix "echo 'root_password_hash: \"$NEW_HASH\"' | sops --input-type yaml --output-type yaml -e --age age1gausnystsln7fpenw7arw7x79xe22z697jnauj38npy0usayqqxqc7td2y /dev/stdin" > secrets/netboot.sops.yaml
|
||||
```
|
||||
|
||||
3. Rebuild and deploy:
|
||||
```bash
|
||||
sudo ./build-image.sh
|
||||
make deploy
|
||||
```
|
||||
|
||||
4. Reboot nodes to pick up new password
|
||||
|
||||
### Adding New Secrets
|
||||
|
||||
Edit `.sops.yaml` to add new file patterns, then create encrypted files on phoenix:
|
||||
|
||||
```bash
|
||||
ssh phoenix "sops secrets/newfile.sops.yaml"
|
||||
```
|
||||
|
||||
## Node Storage Setup
|
||||
|
||||
Local NVMe is automatically partitioned on first boot by `setup-node-storage.service`.
|
||||
|
||||
### Partition Layout
|
||||
|
||||
| Partition | Size | Label | Mount Point | Purpose |
|
||||
|-----------|------|-------|-------------|---------|
|
||||
| nvme0n1p1 | 75GB | containerd | /var/lib/containerd | Container images |
|
||||
| nvme0n1p2 | Remaining | longhorn | /var/lib/longhorn | Distributed storage |
|
||||
|
||||
### Automatic Behavior
|
||||
|
||||
| Drive State | Action |
|
||||
|-------------|--------|
|
||||
| No partition table | Auto-format (no prompt) |
|
||||
| Has our labels (containerd/longhorn) | Mount silently |
|
||||
| Has unknown partitions | Prompt on tty1, 120s timeout, skip if no response |
|
||||
|
||||
### Manual Intervention
|
||||
|
||||
If a node has an unknown drive and you want to format it:
|
||||
|
||||
1. Connect to physical console (tty1)
|
||||
2. Reboot the node
|
||||
3. Press ENTER when prompted (within 120 seconds)
|
||||
4. Wait 5 seconds (abort window)
|
||||
5. Drive is formatted and mounted
|
||||
|
||||
### Checking Storage Status
|
||||
|
||||
```bash
|
||||
# On node
|
||||
journalctl -u setup-node-storage
|
||||
cat /var/lib/containerd/.netboot-storage # marker file with metadata
|
||||
lsblk /dev/nvme0n1
|
||||
df -h /var/lib/containerd /var/lib/longhorn
|
||||
```
|
||||
|
||||
## SSH Access
|
||||
|
||||
### Authorized Keys
|
||||
|
||||
Keys are baked into the image at build time. Current keys:
|
||||
|
||||
| Key | Source |
|
||||
|-----|--------|
|
||||
| `ssh-ed25519 AAAAC3...y1J` | lindahl@lindahl-Legion-5-Pro-16ACH6H |
|
||||
| `ssh-ed25519 AAAA...0tX` | lindahl@phoenix.home |
|
||||
|
||||
To add/remove keys, edit `build-image.sh` around line 164-167.
|
||||
|
||||
### Console Access
|
||||
|
||||
Root password is set for physical console login only. SSH remains pubkey-only.
|
||||
|
||||
```bash
|
||||
# Physical console or IPMI
|
||||
login: root
|
||||
Password: <from secrets/netboot.sops.yaml>
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Node Won't Boot
|
||||
|
||||
1. Check phoenix HTTP server:
|
||||
```bash
|
||||
ssh phoenix "curl -I http://localhost:8800/boot.ipxe"
|
||||
ssh phoenix "ls -lh /srv/netboot/http/"
|
||||
```
|
||||
|
||||
2. Check nginx is running:
|
||||
```bash
|
||||
ssh phoenix "systemctl status nginx"
|
||||
```
|
||||
|
||||
3. Verify image integrity:
|
||||
```bash
|
||||
./verify-image.sh
|
||||
```
|
||||
|
||||
### Node Boots But No Network
|
||||
|
||||
1. Check if initramfs has network driver:
|
||||
```bash
|
||||
lsinitramfs http/initrd-netboot.img | grep -E "r8169|r8125"
|
||||
```
|
||||
|
||||
2. Check kernel cmdline includes `ip=dhcp`:
|
||||
```bash
|
||||
cat http/boot.ipxe
|
||||
```
|
||||
|
||||
### Storage Not Mounting
|
||||
|
||||
1. Check service status:
|
||||
```bash
|
||||
ssh root@node "systemctl status setup-node-storage"
|
||||
ssh root@node "journalctl -u setup-node-storage"
|
||||
```
|
||||
|
||||
2. Check if NVMe exists:
|
||||
```bash
|
||||
ssh root@node "lsblk"
|
||||
```
|
||||
|
||||
3. Check labels:
|
||||
```bash
|
||||
ssh root@node "blkid -L containerd && blkid -L longhorn"
|
||||
```
|
||||
|
||||
### Overlay Filling Up
|
||||
|
||||
The root overlay is only 2GB. If it fills:
|
||||
|
||||
```bash
|
||||
# Check what's using space
|
||||
ssh root@node "du -sh /var/* | sort -h"
|
||||
|
||||
# Temporary files should go to NVMe or tmpfs mounts
|
||||
# /tmp, /var/tmp, /var/log are separate tmpfs
|
||||
```
|
||||
|
||||
## File Reference
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `build-image.sh` | Main build script |
|
||||
| `rebuild-initramfs.sh` | Quick initramfs rebuild |
|
||||
| `verify-image.sh` | Validate built image |
|
||||
| `Makefile` | Build/deploy automation |
|
||||
| `initramfs/` | Custom initramfs config for mkinitramfs |
|
||||
| `initramfs/scripts/netboot` | HTTP root download and overlay mount |
|
||||
| `files/setup-node-storage` | NVMe partitioning script |
|
||||
| `files/setup-node-storage.service` | Systemd unit for storage setup |
|
||||
| `secrets/netboot.sops.yaml` | Encrypted root password |
|
||||
| `.sops.yaml` | Sops encryption config |
|
||||
| `http/boot.ipxe` | iPXE boot configuration |
|
||||
|
||||
## Network Configuration
|
||||
|
||||
### IP Address Layout
|
||||
|
||||
| Range | Purpose |
|
||||
|-------|---------|
|
||||
| .1 | phoenix (gateway, DHCP, HTTP) |
|
||||
| .2-.19 | Reserved (future infrastructure) |
|
||||
| .20-.29 | Infrastructure devices |
|
||||
| .50-.59 | Static K3s nodes |
|
||||
| .60-.100 | Dynamic DHCP pool |
|
||||
|
||||
### Static Assignments
|
||||
|
||||
| Host | IP | MAC | Role |
|
||||
|------|-----|-----|------|
|
||||
| phoenix | 192.168.100.1 | - | NAS, HTTP server, DHCP |
|
||||
| usw-flex-2 | 192.168.100.21 | 94:2a:6f:4c:fc:72 | Managed switch |
|
||||
| k3s-node-01 | 192.168.100.51 | 78:55:36:04:e7:c8 | K3s worker |
|
||||
| k3s-node-02 | 192.168.100.52 | 78:55:36:04:e7:1d | K3s worker |
|
||||
|
||||
HTTP server: `http://192.168.100.1:8800/`
|
||||
|
||||
### DHCP Reservations
|
||||
|
||||
Static IP assignments are configured in `/etc/dnsmasq.d/pxe-netboot.conf` on phoenix:
|
||||
|
||||
```
|
||||
dhcp-range=192.168.100.60,192.168.100.100,12h
|
||||
|
||||
# Static DHCP reservations for K3s nodes
|
||||
dhcp-host=78:55:36:04:e7:c8,192.168.100.51,k3s-node-01
|
||||
dhcp-host=78:55:36:04:e7:1d,192.168.100.52,k3s-node-02
|
||||
|
||||
# Infrastructure
|
||||
dhcp-host=94:2a:6f:4c:fc:72,192.168.100.21,usw-flex-2
|
||||
```
|
||||
|
||||
To add a new node:
|
||||
|
||||
1. Boot the node once to get its MAC (check leases):
|
||||
```bash
|
||||
ssh phoenix "cat /var/lib/misc/dnsmasq.leases"
|
||||
```
|
||||
|
||||
2. Add reservation:
|
||||
```bash
|
||||
ssh phoenix "sudo tee -a /etc/dnsmasq.d/pxe-netboot.conf << EOF
|
||||
dhcp-host=XX:XX:XX:XX:XX:XX,192.168.100.5X,k3s-node-0X
|
||||
EOF"
|
||||
```
|
||||
|
||||
3. Restart dnsmasq:
|
||||
```bash
|
||||
ssh phoenix "sudo systemctl restart dnsmasq"
|
||||
```
|
||||
|
||||
To change the boot server IP, edit `http/boot.ipxe` and `initramfs/scripts/netboot`.
|
||||
101
build-image.sh
101
build-image.sh
@@ -14,6 +14,22 @@ VERSION=$(date +%Y%m%d-%H%M)
|
||||
|
||||
echo "Building netboot image version $VERSION"
|
||||
|
||||
# Decrypt secrets from phoenix (requires SSH access as the invoking user, not root)
|
||||
echo "Decrypting secrets from phoenix..."
|
||||
SECRETS_FILE="$SCRIPT_DIR/secrets/netboot.sops.yaml"
|
||||
SUDO_USER_HOME=$(getent passwd "${SUDO_USER:-$USER}" | cut -d: -f6)
|
||||
if [ -f "$SECRETS_FILE" ]; then
|
||||
# Run SSH as the original user (not root) to use their SSH keys
|
||||
ROOT_PW_HASH=$(sudo -u "${SUDO_USER:-$USER}" bash -c "cat '$SECRETS_FILE' | ssh phoenix 'sops -d --input-type yaml --output-type yaml /dev/stdin'" | grep root_password_hash | cut -d' ' -f2)
|
||||
if [ -z "$ROOT_PW_HASH" ]; then
|
||||
echo "WARNING: Failed to decrypt root password, console login will be disabled"
|
||||
ROOT_PW_HASH="*"
|
||||
fi
|
||||
else
|
||||
echo "WARNING: No secrets file found at $SECRETS_FILE, console login will be disabled"
|
||||
ROOT_PW_HASH="*"
|
||||
fi
|
||||
|
||||
# Clean previous build - unmount any stray mounts first
|
||||
if [ -d "$BUILD_DIR/rootfs" ]; then
|
||||
echo "Cleaning up previous build mounts..."
|
||||
@@ -40,6 +56,17 @@ debootstrap --arch=amd64 --variant=minbase --components=main,universe,multiverse
|
||||
noble $BUILD_DIR/rootfs \
|
||||
http://archive.ubuntu.com/ubuntu
|
||||
|
||||
# Write root password hash to temp file for chroot to read
|
||||
# Use /root/ not /tmp/ because systemd installation may mount tmpfs over /tmp
|
||||
mkdir -p "$BUILD_DIR/rootfs/root"
|
||||
if [ -n "$ROOT_PW_HASH" ] && [ "$ROOT_PW_HASH" != "*" ]; then
|
||||
echo "$ROOT_PW_HASH" > "$BUILD_DIR/rootfs/root/.pw_hash"
|
||||
echo "Root password hash written to rootfs"
|
||||
else
|
||||
echo "*" > "$BUILD_DIR/rootfs/root/.pw_hash"
|
||||
echo "WARNING: No valid password hash, console login will be disabled"
|
||||
fi
|
||||
|
||||
# Chroot and configure
|
||||
cat << 'CHROOT_SCRIPT' > $BUILD_DIR/rootfs/setup.sh
|
||||
#!/bin/bash
|
||||
@@ -103,13 +130,20 @@ apt-get install -y \
|
||||
conntrack \
|
||||
socat \
|
||||
ethtool \
|
||||
nfs-common
|
||||
nfs-common \
|
||||
open-iscsi
|
||||
|
||||
# Container runtime prerequisites
|
||||
apt-get install -y \
|
||||
containerd \
|
||||
runc
|
||||
|
||||
# Vulkan drivers for GPU compute workloads (ollama, llama.cpp)
|
||||
apt-get install -y \
|
||||
mesa-vulkan-drivers \
|
||||
libvulkan1 \
|
||||
vulkan-tools
|
||||
|
||||
# Useful tools
|
||||
apt-get install -y \
|
||||
htop \
|
||||
@@ -118,7 +152,10 @@ apt-get install -y \
|
||||
less \
|
||||
rsync \
|
||||
git \
|
||||
squashfs-tools
|
||||
squashfs-tools \
|
||||
parted \
|
||||
fdisk \
|
||||
gdisk
|
||||
|
||||
# Clean up
|
||||
apt-get clean
|
||||
@@ -126,8 +163,9 @@ rm -rf /var/lib/apt/lists/*
|
||||
rm -rf /tmp/*
|
||||
rm -rf /var/tmp/*
|
||||
|
||||
# Configure hostname (will be overridden by netplan)
|
||||
echo "k3s-node" > /etc/hostname
|
||||
# Don't set static hostname - let DHCP provide it via networkd
|
||||
# Empty /etc/hostname allows transient hostname from DHCP
|
||||
echo "" > /etc/hostname
|
||||
|
||||
# Configure network with netplan
|
||||
cat > /etc/netplan/01-netcfg.yaml <<EOF
|
||||
@@ -148,11 +186,19 @@ EOF
|
||||
systemctl enable systemd-networkd
|
||||
systemctl enable systemd-resolved
|
||||
|
||||
# Configure SSH
|
||||
# Configure SSH - disable socket activation, use traditional daemon
|
||||
sed -i 's/#PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/#PubkeyAuthentication.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config
|
||||
# Disable socket activation (Ubuntu 24.04 default) and use traditional sshd
|
||||
systemctl disable ssh.socket 2>/dev/null || true
|
||||
rm -f /etc/systemd/system/ssh.service.requires/ssh.socket 2>/dev/null || true
|
||||
rm -f /etc/systemd/system/sockets.target.wants/ssh.socket 2>/dev/null || true
|
||||
systemctl enable ssh
|
||||
|
||||
# Fix SSH host key permissions (must be 0600 for private keys, sshd refuses otherwise)
|
||||
chmod 600 /etc/ssh/ssh_host_*_key
|
||||
chmod 644 /etc/ssh/ssh_host_*_key.pub
|
||||
|
||||
# Create SSH directory for root
|
||||
mkdir -p /root/.ssh
|
||||
chmod 700 /root/.ssh
|
||||
@@ -165,8 +211,10 @@ SSHKEY
|
||||
|
||||
chmod 600 /root/.ssh/authorized_keys
|
||||
|
||||
# Disable password authentication completely
|
||||
echo "root:*" | chpasswd -e
|
||||
# Set root password from decrypted hash (for console login only)
|
||||
ROOT_PW_HASH=$(cat /root/.pw_hash)
|
||||
echo "root:$ROOT_PW_HASH" | chpasswd -e
|
||||
rm -f /root/.pw_hash
|
||||
|
||||
# Configure tmpfs mounts for ephemeral data
|
||||
cat >> /etc/fstab <<FSTAB
|
||||
@@ -233,6 +281,45 @@ cp "$INITRAMFS_CONFIG/modules" "$BUILD_DIR/rootfs/etc/initramfs-tools/"
|
||||
cp -r "$INITRAMFS_CONFIG/hooks/"* "$BUILD_DIR/rootfs/usr/share/initramfs-tools/hooks/"
|
||||
cp -r "$INITRAMFS_CONFIG/scripts/"* "$BUILD_DIR/rootfs/usr/share/initramfs-tools/scripts/"
|
||||
|
||||
# Install node storage setup service
|
||||
echo "Installing node storage setup service..."
|
||||
FILES_DIR="$SCRIPT_DIR/files"
|
||||
cp "$FILES_DIR/setup-node-storage" "$BUILD_DIR/rootfs/usr/local/bin/"
|
||||
chmod +x "$BUILD_DIR/rootfs/usr/local/bin/setup-node-storage"
|
||||
cp "$FILES_DIR/setup-node-storage.service" "$BUILD_DIR/rootfs/etc/systemd/system/"
|
||||
# Enable the service (create symlink manually since we can't run systemctl)
|
||||
mkdir -p "$BUILD_DIR/rootfs/etc/systemd/system/multi-user.target.wants"
|
||||
ln -sf /etc/systemd/system/setup-node-storage.service \
|
||||
"$BUILD_DIR/rootfs/etc/systemd/system/multi-user.target.wants/setup-node-storage.service"
|
||||
|
||||
# Install DHCP hostname service
|
||||
echo "Installing DHCP hostname service..."
|
||||
cp "$FILES_DIR/set-hostname-from-dhcp" "$BUILD_DIR/rootfs/usr/local/bin/"
|
||||
chmod +x "$BUILD_DIR/rootfs/usr/local/bin/set-hostname-from-dhcp"
|
||||
cp "$FILES_DIR/set-hostname-from-dhcp.service" "$BUILD_DIR/rootfs/etc/systemd/system/"
|
||||
ln -sf /etc/systemd/system/set-hostname-from-dhcp.service \
|
||||
"$BUILD_DIR/rootfs/etc/systemd/system/multi-user.target.wants/set-hostname-from-dhcp.service"
|
||||
|
||||
# Download and install K3s binary
|
||||
echo "Downloading K3s binary..."
|
||||
K3S_VERSION="v1.34.3+k3s1"
|
||||
curl -sfL "https://github.com/k3s-io/k3s/releases/download/${K3S_VERSION}/k3s" \
|
||||
-o "$BUILD_DIR/rootfs/usr/local/bin/k3s"
|
||||
chmod +x "$BUILD_DIR/rootfs/usr/local/bin/k3s"
|
||||
echo "K3s $K3S_VERSION installed"
|
||||
|
||||
# Install K3s agent service
|
||||
echo "Installing K3s agent service..."
|
||||
# Create K3s directories first (will be bind-mounted from NVMe at runtime)
|
||||
mkdir -p "$BUILD_DIR/rootfs/etc/rancher/k3s"
|
||||
mkdir -p "$BUILD_DIR/rootfs/etc/rancher/node"
|
||||
mkdir -p "$BUILD_DIR/rootfs/var/lib/rancher/k3s/agent"
|
||||
cp "$FILES_DIR/k3s-agent.service" "$BUILD_DIR/rootfs/etc/systemd/system/"
|
||||
cp "$FILES_DIR/k3s-agent.env" "$BUILD_DIR/rootfs/etc/rancher/k3s/"
|
||||
# Enable the service
|
||||
ln -sf /etc/systemd/system/k3s-agent.service \
|
||||
"$BUILD_DIR/rootfs/etc/systemd/system/multi-user.target.wants/k3s-agent.service"
|
||||
|
||||
# Build initramfs while /proc/sys/dev are still mounted
|
||||
echo "Building custom netboot initramfs..."
|
||||
KERNEL_VERSION=$(ls -1 $BUILD_DIR/rootfs/boot/vmlinuz-* | sed 's|.*/vmlinuz-||' | head -1)
|
||||
|
||||
4
files/k3s-agent.env
Normal file
4
files/k3s-agent.env
Normal file
@@ -0,0 +1,4 @@
|
||||
# K3s agent configuration
|
||||
# Server URL and token for cluster join
|
||||
K3S_URL="https://192.168.100.1:6443"
|
||||
K3S_TOKEN="K106e2ea6914f7a019d1222c1fdd19c5065978377364701f60eb1f2a585e8c3924b::server:0a15c4d7a13df65b066f5b8eff710ecd"
|
||||
25
files/k3s-agent.service
Normal file
25
files/k3s-agent.service
Normal file
@@ -0,0 +1,25 @@
|
||||
[Unit]
|
||||
Description=Lightweight Kubernetes (K3s Agent)
|
||||
Documentation=https://k3s.io
|
||||
After=network-online.target setup-node-storage.service set-hostname-from-dhcp.service
|
||||
Wants=network-online.target
|
||||
Requires=setup-node-storage.service set-hostname-from-dhcp.service
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
EnvironmentFile=-/etc/rancher/k3s/k3s-agent.env
|
||||
ExecStartPre=/sbin/modprobe br_netfilter
|
||||
ExecStartPre=/sbin/modprobe overlay
|
||||
ExecStart=/usr/local/bin/k3s agent
|
||||
KillMode=process
|
||||
Delegate=yes
|
||||
LimitNOFILE=1048576
|
||||
LimitNPROC=infinity
|
||||
LimitCORE=infinity
|
||||
TasksMax=infinity
|
||||
TimeoutStartSec=0
|
||||
Restart=always
|
||||
RestartSec=5s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
28
files/set-hostname-from-dhcp
Normal file
28
files/set-hostname-from-dhcp
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
# Set hostname from DHCP lease
|
||||
# Runs before k3s-agent to ensure proper node name
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
log() { echo "[hostname] $*"; logger -t set-hostname "$*"; }
|
||||
|
||||
# Wait for DHCP lease
|
||||
MAX_WAIT=60
|
||||
for i in $(seq 1 $MAX_WAIT); do
|
||||
# Check for lease files from systemd-networkd
|
||||
for lease in /run/systemd/netif/leases/*; do
|
||||
if [ -f "$lease" ]; then
|
||||
HOSTNAME=$(grep -oP '^HOSTNAME=\K.*' "$lease" 2>/dev/null || true)
|
||||
if [ -n "$HOSTNAME" ]; then
|
||||
log "Found hostname in DHCP lease: $HOSTNAME"
|
||||
hostnamectl set-hostname "$HOSTNAME"
|
||||
log "Hostname set to: $(hostname)"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log "Warning: No DHCP hostname found after ${MAX_WAIT}s, using default"
|
||||
exit 0
|
||||
15
files/set-hostname-from-dhcp.service
Normal file
15
files/set-hostname-from-dhcp.service
Normal file
@@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Set hostname from DHCP lease
|
||||
Documentation=file:///usr/local/bin/set-hostname-from-dhcp
|
||||
After=network-online.target systemd-networkd.service
|
||||
Wants=network-online.target
|
||||
Before=k3s-agent.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/set-hostname-from-dhcp
|
||||
RemainAfterExit=yes
|
||||
TimeoutStartSec=90
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
236
files/setup-node-storage
Normal file
236
files/setup-node-storage
Normal file
@@ -0,0 +1,236 @@
|
||||
#!/bin/bash
|
||||
# Setup local NVMe storage for K3s node
|
||||
# Runs at boot via systemd service
|
||||
#
|
||||
# Logic:
|
||||
# - No NVMe: exit cleanly
|
||||
# - No partition table: auto-format (new drive)
|
||||
# - Has our labels: mount and exit (already configured)
|
||||
# - Has other partitions: prompt with 120s timeout (safety)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DEVICE="/dev/nvme0n1"
|
||||
CONTAINERD_SIZE="75GiB"
|
||||
CONTAINERD_LABEL="containerd"
|
||||
LONGHORN_LABEL="longhorn"
|
||||
CONTAINERD_MOUNT="/var/lib/containerd"
|
||||
LONGHORN_MOUNT="/var/lib/longhorn"
|
||||
MARKER_FILE=".netboot-storage"
|
||||
PROMPT_TIMEOUT=120
|
||||
|
||||
# Colors for console output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Log to both console and journald
|
||||
log() { echo -e "${GREEN}[storage]${NC} $*"; logger -t setup-node-storage "$*"; }
|
||||
warn() { echo -e "${YELLOW}[storage]${NC} $*"; logger -t setup-node-storage -p warning "$*"; }
|
||||
error() { echo -e "${RED}[storage]${NC} $*"; logger -t setup-node-storage -p err "$*"; }
|
||||
|
||||
# Check if NVMe exists
|
||||
if [ ! -b "$DEVICE" ]; then
|
||||
log "No NVMe device found at $DEVICE - skipping storage setup"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
DEVICE_SIZE=$(lsblk -b -d -n -o SIZE "$DEVICE" | awk '{printf "%.0fGB", $1/1000000000}')
|
||||
log "Found NVMe: $DEVICE ($DEVICE_SIZE)"
|
||||
|
||||
# Get partition names (handles nvme naming with 'p' prefix)
|
||||
if [[ "$DEVICE" == *"nvme"* ]]; then
|
||||
PART1="${DEVICE}p1"
|
||||
PART2="${DEVICE}p2"
|
||||
else
|
||||
PART1="${DEVICE}1"
|
||||
PART2="${DEVICE}2"
|
||||
fi
|
||||
|
||||
# Function to mount existing storage
|
||||
mount_storage() {
|
||||
log "Mounting existing storage..."
|
||||
|
||||
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
|
||||
|
||||
if ! mountpoint -q "$CONTAINERD_MOUNT"; then
|
||||
mount -L "$CONTAINERD_LABEL" "$CONTAINERD_MOUNT" || {
|
||||
error "Failed to mount containerd partition"
|
||||
return 1
|
||||
}
|
||||
fi
|
||||
|
||||
if ! mountpoint -q "$LONGHORN_MOUNT"; then
|
||||
mount -L "$LONGHORN_LABEL" "$LONGHORN_MOUNT" || {
|
||||
error "Failed to mount longhorn partition"
|
||||
return 1
|
||||
}
|
||||
fi
|
||||
|
||||
# K3s persistence: bind mount agent data and node identity from NVMe
|
||||
# This allows the node to survive reboots without re-registering
|
||||
setup_k3s_persistence
|
||||
|
||||
log "Storage mounted:"
|
||||
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Setup K3s persistence directories
|
||||
# Bind mounts NVMe directories to k3s paths so node identity survives reboots
|
||||
setup_k3s_persistence() {
|
||||
# K3s agent data (containerd, kubelet certs, etc.)
|
||||
# Uses overlayfs internally, so must be on real filesystem, not overlay
|
||||
K3S_AGENT="/var/lib/rancher/k3s/agent"
|
||||
K3S_AGENT_DATA="$CONTAINERD_MOUNT/k3s-agent"
|
||||
mkdir -p "$K3S_AGENT_DATA" "$K3S_AGENT"
|
||||
if ! mountpoint -q "$K3S_AGENT"; then
|
||||
mount --bind "$K3S_AGENT_DATA" "$K3S_AGENT"
|
||||
log " $K3S_AGENT: bind mount to NVMe"
|
||||
fi
|
||||
|
||||
# K3s node identity (password file)
|
||||
# Must persist across reboots or node will be rejected
|
||||
K3S_NODE="/etc/rancher/node"
|
||||
K3S_NODE_DATA="$CONTAINERD_MOUNT/k3s-node"
|
||||
mkdir -p "$K3S_NODE_DATA" "$K3S_NODE"
|
||||
if ! mountpoint -q "$K3S_NODE"; then
|
||||
mount --bind "$K3S_NODE_DATA" "$K3S_NODE"
|
||||
log " $K3S_NODE: bind mount to NVMe"
|
||||
fi
|
||||
|
||||
# Kubelet data (pod volumes, projected tokens, etc.)
|
||||
# Must be on NVMe so kubelet reports real disk capacity, not the 2G tmpfs overlay
|
||||
KUBELET_DIR="/var/lib/kubelet"
|
||||
KUBELET_DATA="$CONTAINERD_MOUNT/kubelet"
|
||||
mkdir -p "$KUBELET_DATA" "$KUBELET_DIR"
|
||||
if ! mountpoint -q "$KUBELET_DIR"; then
|
||||
mount --bind "$KUBELET_DATA" "$KUBELET_DIR"
|
||||
log " $KUBELET_DIR: bind mount to NVMe"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to format the drive
|
||||
format_storage() {
|
||||
log "Partitioning $DEVICE..."
|
||||
|
||||
wipefs -af "$DEVICE"
|
||||
parted -s "$DEVICE" mklabel gpt
|
||||
parted -s "$DEVICE" mkpart primary ext4 1MiB "$CONTAINERD_SIZE"
|
||||
parted -s "$DEVICE" mkpart primary ext4 "$CONTAINERD_SIZE" 100%
|
||||
|
||||
# Tell kernel to re-read partition table and wait for udev
|
||||
partprobe "$DEVICE"
|
||||
udevadm settle --timeout=10
|
||||
|
||||
# Verify partitions appeared
|
||||
if [ ! -b "$PART1" ] || [ ! -b "$PART2" ]; then
|
||||
error "Partitions not found after partprobe: $PART1, $PART2"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log "Formatting ${PART1} as ext4 (containerd, 75GB)..."
|
||||
mkfs.ext4 -L "$CONTAINERD_LABEL" -q "$PART1"
|
||||
|
||||
log "Formatting ${PART2} as ext4 (longhorn, remaining)..."
|
||||
mkfs.ext4 -L "$LONGHORN_LABEL" -q "$PART2"
|
||||
|
||||
# Mount the new partitions
|
||||
mkdir -p "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"
|
||||
mount "$PART1" "$CONTAINERD_MOUNT"
|
||||
mount "$PART2" "$LONGHORN_MOUNT"
|
||||
|
||||
# Create marker files with metadata
|
||||
for mount_point in "$CONTAINERD_MOUNT" "$LONGHORN_MOUNT"; do
|
||||
cat > "${mount_point}/${MARKER_FILE}" <<EOF
|
||||
# Netboot storage marker - DO NOT DELETE
|
||||
formatted_date=$(date -Iseconds)
|
||||
formatted_by=setup-node-storage
|
||||
hostname=$(hostname)
|
||||
device=$DEVICE
|
||||
EOF
|
||||
done
|
||||
|
||||
# K3s persistence: bind mount agent data and node identity from NVMe
|
||||
setup_k3s_persistence
|
||||
|
||||
log "Storage formatted and mounted successfully"
|
||||
log " $CONTAINERD_MOUNT: $(df -h "$CONTAINERD_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
log " $LONGHORN_MOUNT: $(df -h "$LONGHORN_MOUNT" | tail -1 | awk '{print $2}')"
|
||||
}
|
||||
|
||||
# Check for partition table
|
||||
# Method 1: blkid returns empty PTTYPE for unpartitioned drives
|
||||
# Method 2: parted error message (locale-dependent fallback)
|
||||
has_partition_table() {
|
||||
local pttype
|
||||
pttype=$(blkid -o value -s PTTYPE "$DEVICE" 2>/dev/null)
|
||||
if [ -n "$pttype" ]; then
|
||||
return 0 # has partition table
|
||||
fi
|
||||
# Fallback: check if parted can read it
|
||||
if parted -s "$DEVICE" print &>/dev/null; then
|
||||
return 0 # has partition table
|
||||
fi
|
||||
return 1 # no partition table
|
||||
}
|
||||
|
||||
if ! has_partition_table; then
|
||||
# No partition table - this is a fresh drive, auto-format
|
||||
log "Empty drive detected (no partition table) - auto-formatting..."
|
||||
format_storage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Has partition table - check if it's ours
|
||||
if blkid -L "$CONTAINERD_LABEL" &>/dev/null && blkid -L "$LONGHORN_LABEL" &>/dev/null; then
|
||||
# Check for marker file (belt and suspenders)
|
||||
# Create temp mount to check marker without leaving dangling mount
|
||||
TEMP_MOUNT=$(mktemp -d)
|
||||
if mount -L "$CONTAINERD_LABEL" "$TEMP_MOUNT" 2>/dev/null; then
|
||||
if [ -f "${TEMP_MOUNT}/${MARKER_FILE}" ]; then
|
||||
umount "$TEMP_MOUNT"
|
||||
rmdir "$TEMP_MOUNT"
|
||||
log "Storage already configured (found labels and marker)"
|
||||
mount_storage
|
||||
exit 0
|
||||
else
|
||||
umount "$TEMP_MOUNT"
|
||||
rmdir "$TEMP_MOUNT"
|
||||
# Has our labels but no marker - probably ours, mount it
|
||||
warn "Found labels but no marker file - assuming configured"
|
||||
mount_storage
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
rmdir "$TEMP_MOUNT" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Has partitions but not ours - this could contain data!
|
||||
warn "NVMe has existing partitions but no netboot labels."
|
||||
warn "This drive may contain important data!"
|
||||
echo ""
|
||||
lsblk "$DEVICE"
|
||||
echo ""
|
||||
|
||||
# Prompt on console with timeout
|
||||
echo -e "${CYAN}========================================${NC}"
|
||||
echo -e "${CYAN} Press ENTER within ${PROMPT_TIMEOUT}s to format ${NC}"
|
||||
echo -e "${CYAN} Or wait to skip (safe default) ${NC}"
|
||||
echo -e "${CYAN}========================================${NC}"
|
||||
echo ""
|
||||
|
||||
if read -t "$PROMPT_TIMEOUT" -p "Format $DEVICE? [press ENTER to confirm] " response; then
|
||||
echo ""
|
||||
warn "Formatting in 5 seconds... Ctrl+C to abort"
|
||||
sleep 5
|
||||
format_storage
|
||||
else
|
||||
echo ""
|
||||
warn "Timeout - skipping storage setup (drive left untouched)"
|
||||
warn "To format manually, reboot and press ENTER when prompted"
|
||||
exit 0
|
||||
fi
|
||||
26
files/setup-node-storage.service
Normal file
26
files/setup-node-storage.service
Normal file
@@ -0,0 +1,26 @@
|
||||
[Unit]
|
||||
Description=Setup local NVMe storage for K3s
|
||||
Documentation=file:///usr/local/bin/setup-node-storage
|
||||
|
||||
# Run early, after devices are available but before container services
|
||||
After=local-fs.target systemd-udevd.service
|
||||
Before=containerd.service
|
||||
|
||||
# Only run if not already mounted
|
||||
ConditionPathIsMountPoint=!/var/lib/containerd
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/setup-node-storage
|
||||
RemainAfterExit=yes
|
||||
|
||||
# Console access for interactive prompt
|
||||
StandardInput=tty
|
||||
TTYPath=/dev/tty1
|
||||
TTYReset=yes
|
||||
|
||||
# Generous timeout for user interaction (3 minutes)
|
||||
TimeoutStartSec=180
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,17 +1,5 @@
|
||||
#!ipxe
|
||||
echo Booting Ubuntu Noble K3s Node via iPXE
|
||||
|
||||
# Clear any previous images
|
||||
imgfree
|
||||
|
||||
echo Loading kernel from http://192.168.100.1:8800/vmlinuz
|
||||
kernel --name vmlinuz http://192.168.100.1:8800/vmlinuz
|
||||
|
||||
echo Loading initramfs from http://192.168.100.1:8800/initrd-netboot.img
|
||||
initrd --name initrd http://192.168.100.1:8800/initrd-netboot.img
|
||||
|
||||
echo Setting kernel arguments for HTTP root mounting
|
||||
imgargs vmlinuz initrd=initrd rdinit=/init boot=netboot root=http://192.168.100.1:8800/filesystem.squashfs rootfstype=squashfs overlayroot=tmpfs ip=dhcp console=tty0 console=ttyS0,115200 loglevel=7 panic=-1 break=mountroot
|
||||
|
||||
echo Booting system...
|
||||
boot vmlinuz
|
||||
echo Configuring network via DHCP...
|
||||
dhcp
|
||||
echo Chaining to dynamic boot script...
|
||||
chain http://192.168.100.1:8800/netboot.ipxe
|
||||
|
||||
@@ -1,207 +1,85 @@
|
||||
#!/bin/sh
|
||||
# Netboot HTTP root mounting - sourced by initramfs init (functions already loaded)
|
||||
# Netboot HTTP root mounting - HARDCODED VALUES - no cmdline parsing
|
||||
|
||||
export PATH=/usr/bin:/usr/sbin:/bin:/sbin
|
||||
|
||||
# HARDCODED CONFIGURATION
|
||||
ROOT_URL="http://192.168.100.1:8800/filesystem.squashfs"
|
||||
OVERLAYROOT="tmpfs"
|
||||
MOUNTPOINT=/root
|
||||
SQUASHFS_MOUNT=/mnt/squashfs
|
||||
OVERLAY_TMPFS=/mnt/overlay
|
||||
|
||||
# Hook functions for initramfs-tools boot script integration
|
||||
netboot_top()
|
||||
{
|
||||
if [ "${netboot_top_used}" != "yes" ]; then
|
||||
[ "$quiet" != "y" ] && log_begin_msg "Running /scripts/netboot-top"
|
||||
run_scripts /scripts/netboot-top
|
||||
[ "$quiet" != "y" ] && log_end_msg
|
||||
fi
|
||||
netboot_top_used=yes
|
||||
# Debug logging to console
|
||||
log() {
|
||||
echo "$@" > /dev/console 2>&1
|
||||
}
|
||||
|
||||
netboot_premount()
|
||||
{
|
||||
if [ "${netboot_premount_used}" != "yes" ]; then
|
||||
[ "$quiet" != "y" ] && log_begin_msg "Running /scripts/netboot-premount"
|
||||
run_scripts /scripts/netboot-premount
|
||||
[ "$quiet" != "y" ] && log_end_msg
|
||||
fi
|
||||
netboot_premount_used=yes
|
||||
}
|
||||
|
||||
netboot_bottom()
|
||||
{
|
||||
if [ "${netboot_premount_used}" = "yes" ] || [ "${netboot_top_used}" = "yes" ]; then
|
||||
[ "$quiet" != "y" ] && log_begin_msg "Running /scripts/netboot-bottom"
|
||||
run_scripts /scripts/netboot-bottom
|
||||
[ "$quiet" != "y" ] && log_end_msg
|
||||
fi
|
||||
netboot_premount_used=no
|
||||
netboot_top_used=no
|
||||
}
|
||||
|
||||
# Parse kernel command line for HTTP root
|
||||
parse_cmdline() {
|
||||
for x in $(cat /proc/cmdline); do
|
||||
case $x in
|
||||
root=http://*)
|
||||
export ROOT_URL="${x#root=}" ;;
|
||||
rootfstype=*)
|
||||
export ROOTFSTYPE="${x#rootfstype=}" ;;
|
||||
overlayroot=*)
|
||||
export OVERLAYROOT="${x#overlayroot=}" ;;
|
||||
ip=*)
|
||||
export BOOTIP="${x#ip=}" ;;
|
||||
*)
|
||||
: ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
# Minimal hook functions
|
||||
netboot_top() { :; }
|
||||
netboot_premount() { :; }
|
||||
netboot_bottom() { :; }
|
||||
mount_top() { :; }
|
||||
mount_premount() { :; }
|
||||
mount_bottom() { :; }
|
||||
|
||||
mountroot() {
|
||||
rc=1
|
||||
log "NETBOOT: ========================================"
|
||||
log "NETBOOT: mountroot() HARDCODED VERSION"
|
||||
log "NETBOOT: ROOT_URL=${ROOT_URL}"
|
||||
log "NETBOOT: ========================================"
|
||||
|
||||
# Run hook scripts
|
||||
netboot_top
|
||||
netboot_premount
|
||||
# Load network module
|
||||
/sbin/modprobe af_packet
|
||||
|
||||
parse_cmdline
|
||||
|
||||
if test -z "${ROOT_URL}"; then
|
||||
log_failure_msg "No root URL defined (root=http://... not found)"
|
||||
return ${rc}
|
||||
fi
|
||||
|
||||
# Configure networking before attempting downloads
|
||||
log_begin_msg "Configuring network"
|
||||
modprobe af_packet || log_warning_msg "af_packet load failed"
|
||||
|
||||
# Load RTL8125 driver (already in module list but explicit load for debugging)
|
||||
modprobe r8125 || log_warning_msg "r8125 driver load failed, may use generic driver"
|
||||
# Wait for udev
|
||||
wait_for_udev 10
|
||||
|
||||
# Configure networking via DHCP
|
||||
log "NETBOOT: Calling configure_networking..."
|
||||
configure_networking
|
||||
udevadm trigger
|
||||
timeout 30 udevadm settle || log_warning_msg "udevadm settle timed out"
|
||||
export DEVICE
|
||||
log_end_msg
|
||||
|
||||
# Validate networking is up
|
||||
INTERFACE_UP=0
|
||||
for iface in $(ip link show | grep "^[0-9]" | awk -F: '{print $2}' | tr -d ' '); do
|
||||
if ip addr show "$iface" | grep -q "inet "; then
|
||||
INTERFACE_UP=1
|
||||
log_begin_msg "Interface $iface has IP address"
|
||||
ip addr show "$iface" | grep "inet " | awk '{print $2}'
|
||||
break
|
||||
fi
|
||||
done
|
||||
# Check we got an IP
|
||||
log "NETBOOT: Checking for IP address..."
|
||||
if ! ip addr show | grep -q "inet "; then
|
||||
log "NETBOOT: FATAL - no IP address"
|
||||
return 1
|
||||
fi
|
||||
log "NETBOOT: Network is up"
|
||||
|
||||
if [ $INTERFACE_UP -eq 0 ]; then
|
||||
log_failure_msg "No network interface obtained an IP address"
|
||||
return ${rc}
|
||||
# Download squashfs
|
||||
log "NETBOOT: Downloading ${ROOT_URL}..."
|
||||
if ! wget -O /filesystem.squashfs "${ROOT_URL}"; then
|
||||
log "NETBOOT: FATAL - wget failed"
|
||||
return 1
|
||||
fi
|
||||
log "NETBOOT: Download complete"
|
||||
|
||||
# Create mount points
|
||||
mkdir -p "${SQUASHFS_MOUNT}" "${OVERLAY_TMPFS}"
|
||||
|
||||
# Mount squashfs
|
||||
log "NETBOOT: Mounting squashfs..."
|
||||
if ! mount -t squashfs /filesystem.squashfs "${SQUASHFS_MOUNT}" -o ro; then
|
||||
log "NETBOOT: FATAL - squashfs mount failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Extract filename from URL
|
||||
FILE_NAME=$(basename "${ROOT_URL}")
|
||||
FILE_PATH="/${FILE_NAME}"
|
||||
# Mount tmpfs for overlay
|
||||
log "NETBOOT: Mounting tmpfs..."
|
||||
if ! mount -t tmpfs -o size=2G tmpfs "${OVERLAY_TMPFS}"; then
|
||||
log "NETBOOT: FATAL - tmpfs mount failed"
|
||||
return 1
|
||||
fi
|
||||
mkdir -p "${OVERLAY_TMPFS}/upper" "${OVERLAY_TMPFS}/work"
|
||||
|
||||
# Download the root filesystem with retries and timeouts
|
||||
log_begin_msg "Downloading root filesystem from ${ROOT_URL}"
|
||||
if wget --timeout=30 --tries=3 --waitretry=5 \
|
||||
--progress=dot:mega \
|
||||
"${ROOT_URL}" -O "${FILE_PATH}"; then
|
||||
log_end_msg
|
||||
else
|
||||
log_failure_msg "Failed to download from ${ROOT_URL} after retries"
|
||||
rm -f "${FILE_PATH}"
|
||||
return ${rc}
|
||||
# Mount overlay
|
||||
log "NETBOOT: Mounting overlay..."
|
||||
if ! mount -t overlay -o "lowerdir=${SQUASHFS_MOUNT},upperdir=${OVERLAY_TMPFS}/upper,workdir=${OVERLAY_TMPFS}/work" overlay "${MOUNTPOINT}"; then
|
||||
log "NETBOOT: FATAL - overlay mount failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify the downloaded file is a valid SquashFS
|
||||
if ! file "${FILE_PATH}" | grep -q "Squash"; then
|
||||
log_failure_msg "Downloaded file is not a valid SquashFS image"
|
||||
rm -f "${FILE_PATH}"
|
||||
return ${rc}
|
||||
fi
|
||||
|
||||
# Handle SquashFS images with overlay
|
||||
if echo "${FILE_NAME}" | grep -q squashfs; then
|
||||
log_begin_msg "Setting up SquashFS with overlay"
|
||||
|
||||
# Setup overlay if requested - need separate mount points
|
||||
if [ -n "${OVERLAYROOT}" ]; then
|
||||
# Create mount points
|
||||
mkdir -p "${SQUASHFS_MOUNT}" "${OVERLAY_TMPFS}"
|
||||
|
||||
# Mount read-only SquashFS to separate location
|
||||
if ! mount -t squashfs "${FILE_PATH}" "${SQUASHFS_MOUNT}" -o ro; then
|
||||
log_failure_msg "Failed to mount SquashFS at ${SQUASHFS_MOUNT}"
|
||||
rm -f "${FILE_PATH}"
|
||||
return ${rc}
|
||||
fi
|
||||
log_begin_msg "SquashFS mounted at ${SQUASHFS_MOUNT}"
|
||||
log_end_msg
|
||||
|
||||
log_begin_msg "Mounting tmpfs for overlay upper/work"
|
||||
|
||||
# Create tmpfs for upper and work directories
|
||||
if ! mount -t tmpfs -o size=2G tmpfs_overlay "${OVERLAY_TMPFS}"; then
|
||||
log_failure_msg "Failed to mount tmpfs for overlay"
|
||||
umount "${SQUASHFS_MOUNT}"
|
||||
rm -f "${FILE_PATH}"
|
||||
return ${rc}
|
||||
fi
|
||||
|
||||
# Create overlay structure
|
||||
mkdir -p "${OVERLAY_TMPFS}/upper" "${OVERLAY_TMPFS}/work"
|
||||
|
||||
# Mount overlay combining read-only lower + writable upper onto /root
|
||||
if ! mount -t overlay \
|
||||
-o "lowerdir=${SQUASHFS_MOUNT},upperdir=${OVERLAY_TMPFS}/upper,workdir=${OVERLAY_TMPFS}/work" \
|
||||
overlay_root "${MOUNTPOINT}"; then
|
||||
log_failure_msg "Failed to mount overlay filesystem"
|
||||
umount "${OVERLAY_TMPFS}"
|
||||
umount "${SQUASHFS_MOUNT}"
|
||||
rm -f "${FILE_PATH}"
|
||||
return ${rc}
|
||||
fi
|
||||
|
||||
log_end_msg
|
||||
log_begin_msg "Overlay mounted at ${MOUNTPOINT} (lower=${SQUASHFS_MOUNT})"
|
||||
log_end_msg
|
||||
|
||||
# Clean up downloaded image as it's now mounted
|
||||
rm -f "${FILE_PATH}"
|
||||
rc=0
|
||||
else
|
||||
# Direct SquashFS mount without overlay - mount directly to /root
|
||||
if ! mount -t squashfs "${FILE_PATH}" "${MOUNTPOINT}" -o ro; then
|
||||
log_failure_msg "Failed to mount SquashFS at ${MOUNTPOINT}"
|
||||
rm -f "${FILE_PATH}"
|
||||
return ${rc}
|
||||
fi
|
||||
log_begin_msg "Mounted SquashFS without overlay at ${MOUNTPOINT}"
|
||||
log_end_msg
|
||||
rc=0
|
||||
fi
|
||||
else
|
||||
log_failure_msg "Unknown filesystem type: ${FILE_NAME}"
|
||||
rm -f "${FILE_PATH}"
|
||||
fi
|
||||
|
||||
return ${rc}
|
||||
}
|
||||
|
||||
# Standard mount hook wrappers expected by initramfs init
|
||||
mount_top()
|
||||
{
|
||||
netboot_top
|
||||
}
|
||||
|
||||
mount_premount()
|
||||
{
|
||||
netboot_premount
|
||||
}
|
||||
|
||||
mount_bottom()
|
||||
{
|
||||
netboot_bottom
|
||||
log "NETBOOT: SUCCESS - root mounted at ${MOUNTPOINT}"
|
||||
return 0
|
||||
}
|
||||
|
||||
51
rebuild-initramfs.sh
Executable file
51
rebuild-initramfs.sh
Executable file
@@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
# Rebuild initramfs with updated netboot script
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
BUILD_DIR="$SCRIPT_DIR/build"
|
||||
ROOTFS="$BUILD_DIR/rootfs"
|
||||
HTTP_DIR="$SCRIPT_DIR/http"
|
||||
|
||||
if [ ! -d "$ROOTFS" ]; then
|
||||
echo "ERROR: Rootfs not found at $ROOTFS"
|
||||
echo "Run build-image.sh first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Copying updated initramfs scripts ==="
|
||||
cp "$SCRIPT_DIR/initramfs/scripts/netboot" "$ROOTFS/usr/share/initramfs-tools/scripts/netboot"
|
||||
cp "$SCRIPT_DIR/initramfs/hooks/netboot" "$ROOTFS/usr/share/initramfs-tools/hooks/netboot"
|
||||
cp "$SCRIPT_DIR/initramfs/initramfs.conf" "$ROOTFS/etc/initramfs-tools/initramfs.conf"
|
||||
cp "$SCRIPT_DIR/initramfs/modules" "$ROOTFS/etc/initramfs-tools/modules"
|
||||
|
||||
echo "=== Getting kernel version ==="
|
||||
KVER=$(ls "$ROOTFS/lib/modules/" | head -1)
|
||||
echo "Kernel version: $KVER"
|
||||
|
||||
echo "=== Mounting filesystems for chroot ==="
|
||||
mount --bind /proc "$ROOTFS/proc"
|
||||
mount --bind /sys "$ROOTFS/sys"
|
||||
mount --bind /dev "$ROOTFS/dev"
|
||||
|
||||
cleanup() {
|
||||
echo "=== Cleaning up mounts ==="
|
||||
umount "$ROOTFS/proc" 2>/dev/null || true
|
||||
umount "$ROOTFS/sys" 2>/dev/null || true
|
||||
umount "$ROOTFS/dev" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "=== Rebuilding initramfs ==="
|
||||
chroot "$ROOTFS" mkinitramfs -v -o /boot/initrd-netboot.img "$KVER"
|
||||
|
||||
echo "=== Copying to http directory ==="
|
||||
cp "$ROOTFS/boot/initrd-netboot.img" "$HTTP_DIR/"
|
||||
chmod 644 "$HTTP_DIR/initrd-netboot.img"
|
||||
|
||||
echo ""
|
||||
echo "=== Done! ==="
|
||||
echo "New initramfs: $HTTP_DIR/initrd-netboot.img"
|
||||
ls -lh "$HTTP_DIR/initrd-netboot.img"
|
||||
echo ""
|
||||
echo "Run 'make deploy' to sync to NAS"
|
||||
16
secrets/netboot.sops.yaml
Normal file
16
secrets/netboot.sops.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
root_password_hash: ENC[AES256_GCM,data:Oc1Kpg1S3NSG4dDoe0AiDmdWe4wdz9zSMn/WlTvURz3u62HcF9ddZh3yKbsXdc19WbGj/ZJa+MFzucgCg6ChT5OG2k4S+JuAVvRaNmB54XSjyIL2vDkambq8Pt4rg5rVxfv5H6uEd5IWUg==,iv:fO72qW/8JIWGubbfjZYsfhjL3XUq/7RbohGPd1avS+8=,tag:nXP7w2b49iYAcnWxM4WFlA==,type:str]
|
||||
sops:
|
||||
age:
|
||||
- recipient: age1gausnystsln7fpenw7arw7x79xe22z697jnauj38npy0usayqqxqc7td2y
|
||||
enc: |
|
||||
-----BEGIN AGE ENCRYPTED FILE-----
|
||||
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBDS1VrWkNZTmswYlRrVXAv
|
||||
ZC9FemRVWkc2bzlVL1BuQm9FaDlPVmVuVFZvCnUyb2xnaDdwQ3BsVkNmY0NxZktp
|
||||
Zk9qSlZVZk16UUhhOHdGRFN1Zno1V3cKLS0tIHV6YXE1bFBHZjMyVVdMbVZEMXlW
|
||||
YTN1RnJ3SjRkN21MYmhQK0hZZFB5Sk0KfxfMPUdJjZq/JDOE87oD2XBpQebvy0a5
|
||||
IAI5tdpEzNP6tF4oqunmh15fPc61Q0C/5ev+uz0QyHhTlTI13lYpGg==
|
||||
-----END AGE ENCRYPTED FILE-----
|
||||
lastmodified: "2026-02-05T20:16:15Z"
|
||||
mac: ENC[AES256_GCM,data:mTCLM3t35mMv9nLQHba65Gq3yAWnY4UKUDHEncMF22RnZKiVDaTMAV6tiaKGu7hHXdDu9fU/E7wPomR8pirGf6pJBUWxCflCe3Q3ZGK9/Aw3guz5ZD34H9nMaCjXME59r1rQdQdQlWP5aW4o+kqfD/bukFpW1HUY0YT8g8fqCpw=,iv:bG1M8Ghuc8JkMNQfODZ1FkMI/8Qs217xlN5ihDnz7hs=,tag:gCScQi1YYXFH4Xo/8Wq5+g==,type:str]
|
||||
unencrypted_suffix: _unencrypted
|
||||
version: 3.11.0
|
||||
Reference in New Issue
Block a user