diff --git a/.sops.yaml b/.sops.yaml new file mode 100644 index 0000000..bfae560 --- /dev/null +++ b/.sops.yaml @@ -0,0 +1,3 @@ +creation_rules: + - path_regex: secrets/.*\.yaml$ + age: age1gausnystsln7fpenw7arw7x79xe22z22697jnauj38npy0usayqqxqc7td2y diff --git a/Makefile b/Makefile index 81a7022..063e561 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,19 @@ -.PHONY: build deploy clean help +.PHONY: deploy clean help check-nas all NAS_HOST=phoenix NAS_PATH=/srv/netboot SCRIPT_DIR=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +# Source files that trigger a rebuild +BUILD_SOURCES := $(SCRIPT_DIR)/build-image.sh \ + $(wildcard $(SCRIPT_DIR)/initramfs/*) \ + $(wildcard $(SCRIPT_DIR)/initramfs/*/*) \ + $(wildcard $(SCRIPT_DIR)/files/*) \ + $(wildcard $(SCRIPT_DIR)/secrets/*.yaml) + +# Build artifact (used as target for dependency tracking) +BUILD_ARTIFACT := $(SCRIPT_DIR)/http/filesystem.squashfs + help: @echo "Netboot image build and deployment" @echo "" @@ -23,7 +33,8 @@ check-nas: @echo "Checking NAS connectivity..." @ping -c 1 $(NAS_HOST) > /dev/null 2>&1 && echo "✓ NAS is reachable" || (echo "✗ Cannot reach $(NAS_HOST)"; exit 1) -build: +# Build depends on source files - only rebuilds if sources changed +$(BUILD_ARTIFACT): $(BUILD_SOURCES) @echo "Building netboot image..." @echo "This will take 15-30 minutes..." sudo $(SCRIPT_DIR)/build-image.sh @@ -32,6 +43,8 @@ build: @echo "Artifacts ready in $(SCRIPT_DIR)/http/" @du -sh $(SCRIPT_DIR)/http/* +build: $(BUILD_ARTIFACT) + deploy: check-nas @echo "Deploying to NAS ($(NAS_HOST):$(NAS_PATH))..." @echo "Syncing http/ directory..." diff --git a/OPERATIONS.md b/OPERATIONS.md new file mode 100644 index 0000000..a0e462a --- /dev/null +++ b/OPERATIONS.md @@ -0,0 +1,333 @@ +# Netboot Operations Guide + +This document covers day-to-day operations for the netboot K3s cluster system. + +## Quick Reference + +```bash +# Build new image (15-30 min, requires sudo) +cd /home/lindahl/git/netboot +sudo ./build-image.sh +make deploy + +# Rebuild initramfs only (faster, ~2 min) +sudo ./rebuild-initramfs.sh +make deploy + +# SSH to a node +ssh root@192.168.100.51 + +# Check node storage +ssh root@192.168.100.51 "lsblk && df -h /var/lib/containerd /var/lib/longhorn" +``` + +## Architecture Overview + +``` +┌─────────────────┐ HTTP (8800) ┌──────────────────┐ +│ Phoenix NAS │◄────────────────────►│ K3s Nodes │ +│ 192.168.100.1 │ │ 192.168.100.5x │ +├─────────────────┤ ├──────────────────┤ +│ /srv/netboot/ │ │ RAM (overlay) │ +│ http/ │ │ └─ / (root) │ +│ vmlinuz │ │ NVMe (persistent)│ +│ initrd-netboot.img │ ├─ containerd │ +│ filesystem.squashfs │ └─ longhorn │ +│ boot.ipxe │ └──────────────────┘ +└─────────────────┘ +``` + +**Boot sequence:** +1. Node PXE boots → loads iPXE +2. iPXE fetches `boot.ipxe` from phoenix +3. Downloads kernel + initramfs +4. Initramfs downloads squashfs root over HTTP +5. Mounts squashfs read-only with tmpfs overlay +6. `setup-node-storage.service` partitions/mounts local NVMe +7. System starts, K3s joins cluster + +## Building Images + +### Full Build + +Builds everything from scratch: debootstrap, packages, initramfs, squashfs. + +```bash +cd /home/lindahl/git/netboot +sudo ./build-image.sh +make deploy +``` + +**Time:** 15-30 minutes +**When to use:** Package changes, kernel updates, major configuration changes + +### Initramfs-Only Rebuild + +Faster rebuild when only changing boot/network logic. + +```bash +sudo ./rebuild-initramfs.sh +make deploy +``` + +**Time:** ~2 minutes +**When to use:** Changes to `initramfs/` scripts or hooks + +### Verify Build + +Check that all components are present and valid: + +```bash +./verify-image.sh +``` + +## Secret Management + +Secrets are encrypted with [sops](https://github.com/getsops/sops) using age encryption. The age key lives on phoenix. + +### Encrypted Files + +| File | Contents | +|------|----------| +| `secrets/netboot.sops.yaml` | Root password hash for console login | + +### Viewing Secrets + +```bash +# From any machine with SSH access to phoenix +cat secrets/netboot.sops.yaml | ssh phoenix "sops -d --input-type yaml --output-type yaml /dev/stdin" +``` + +### Updating Root Password + +1. Generate new password hash: + ```bash + ssh phoenix "echo 'newpassword' | openssl passwd -6 -stdin" + ``` + +2. Update the encrypted file: + ```bash + ssh phoenix "cd /path/to/netboot && sops secrets/netboot.sops.yaml" + # Edit root_password_hash value, save + ``` + + Or recreate entirely: + ```bash + NEW_HASH=$(ssh phoenix "echo 'newpassword' | openssl passwd -6 -stdin") + ssh phoenix "echo 'root_password_hash: \"$NEW_HASH\"' | sops --input-type yaml --output-type yaml -e --age age1gausnystsln7fpenw7arw7x79xe22z697jnauj38npy0usayqqxqc7td2y /dev/stdin" > secrets/netboot.sops.yaml + ``` + +3. Rebuild and deploy: + ```bash + sudo ./build-image.sh + make deploy + ``` + +4. Reboot nodes to pick up new password + +### Adding New Secrets + +Edit `.sops.yaml` to add new file patterns, then create encrypted files on phoenix: + +```bash +ssh phoenix "sops secrets/newfile.sops.yaml" +``` + +## Node Storage Setup + +Local NVMe is automatically partitioned on first boot by `setup-node-storage.service`. + +### Partition Layout + +| Partition | Size | Label | Mount Point | Purpose | +|-----------|------|-------|-------------|---------| +| nvme0n1p1 | 75GB | containerd | /var/lib/containerd | Container images | +| nvme0n1p2 | Remaining | longhorn | /var/lib/longhorn | Distributed storage | + +### Automatic Behavior + +| Drive State | Action | +|-------------|--------| +| No partition table | Auto-format (no prompt) | +| Has our labels (containerd/longhorn) | Mount silently | +| Has unknown partitions | Prompt on tty1, 120s timeout, skip if no response | + +### Manual Intervention + +If a node has an unknown drive and you want to format it: + +1. Connect to physical console (tty1) +2. Reboot the node +3. Press ENTER when prompted (within 120 seconds) +4. Wait 5 seconds (abort window) +5. Drive is formatted and mounted + +### Checking Storage Status + +```bash +# On node +journalctl -u setup-node-storage +cat /var/lib/containerd/.netboot-storage # marker file with metadata +lsblk /dev/nvme0n1 +df -h /var/lib/containerd /var/lib/longhorn +``` + +## SSH Access + +### Authorized Keys + +Keys are baked into the image at build time. Current keys: + +| Key | Source | +|-----|--------| +| `ssh-ed25519 AAAAC3...y1J` | lindahl@lindahl-Legion-5-Pro-16ACH6H | +| `ssh-ed25519 AAAA...0tX` | lindahl@phoenix.home | + +To add/remove keys, edit `build-image.sh` around line 164-167. + +### Console Access + +Root password is set for physical console login only. SSH remains pubkey-only. + +```bash +# Physical console or IPMI +login: root +Password: +``` + +## Troubleshooting + +### Node Won't Boot + +1. Check phoenix HTTP server: + ```bash + ssh phoenix "curl -I http://localhost:8800/boot.ipxe" + ssh phoenix "ls -lh /srv/netboot/http/" + ``` + +2. Check nginx is running: + ```bash + ssh phoenix "systemctl status nginx" + ``` + +3. Verify image integrity: + ```bash + ./verify-image.sh + ``` + +### Node Boots But No Network + +1. Check if initramfs has network driver: + ```bash + lsinitramfs http/initrd-netboot.img | grep -E "r8169|r8125" + ``` + +2. Check kernel cmdline includes `ip=dhcp`: + ```bash + cat http/boot.ipxe + ``` + +### Storage Not Mounting + +1. Check service status: + ```bash + ssh root@node "systemctl status setup-node-storage" + ssh root@node "journalctl -u setup-node-storage" + ``` + +2. Check if NVMe exists: + ```bash + ssh root@node "lsblk" + ``` + +3. Check labels: + ```bash + ssh root@node "blkid -L containerd && blkid -L longhorn" + ``` + +### Overlay Filling Up + +The root overlay is only 2GB. If it fills: + +```bash +# Check what's using space +ssh root@node "du -sh /var/* | sort -h" + +# Temporary files should go to NVMe or tmpfs mounts +# /tmp, /var/tmp, /var/log are separate tmpfs +``` + +## File Reference + +| File | Purpose | +|------|---------| +| `build-image.sh` | Main build script | +| `rebuild-initramfs.sh` | Quick initramfs rebuild | +| `verify-image.sh` | Validate built image | +| `Makefile` | Build/deploy automation | +| `initramfs/` | Custom initramfs config for mkinitramfs | +| `initramfs/scripts/netboot` | HTTP root download and overlay mount | +| `files/setup-node-storage` | NVMe partitioning script | +| `files/setup-node-storage.service` | Systemd unit for storage setup | +| `secrets/netboot.sops.yaml` | Encrypted root password | +| `.sops.yaml` | Sops encryption config | +| `http/boot.ipxe` | iPXE boot configuration | + +## Network Configuration + +### IP Address Layout + +| Range | Purpose | +|-------|---------| +| .1 | phoenix (gateway, DHCP, HTTP) | +| .2-.19 | Reserved (future infrastructure) | +| .20-.29 | Infrastructure devices | +| .50-.59 | Static K3s nodes | +| .60-.100 | Dynamic DHCP pool | + +### Static Assignments + +| Host | IP | MAC | Role | +|------|-----|-----|------| +| phoenix | 192.168.100.1 | - | NAS, HTTP server, DHCP | +| usw-flex-2 | 192.168.100.21 | 94:2a:6f:4c:fc:72 | Managed switch | +| k3s-node-01 | 192.168.100.51 | 78:55:36:04:e7:c8 | K3s worker | +| k3s-node-02 | 192.168.100.52 | 78:55:36:04:e7:1d | K3s worker | + +HTTP server: `http://192.168.100.1:8800/` + +### DHCP Reservations + +Static IP assignments are configured in `/etc/dnsmasq.d/pxe-netboot.conf` on phoenix: + +``` +dhcp-range=192.168.100.60,192.168.100.100,12h + +# Static DHCP reservations for K3s nodes +dhcp-host=78:55:36:04:e7:c8,192.168.100.51,k3s-node-01 +dhcp-host=78:55:36:04:e7:1d,192.168.100.52,k3s-node-02 + +# Infrastructure +dhcp-host=94:2a:6f:4c:fc:72,192.168.100.21,usw-flex-2 +``` + +To add a new node: + +1. Boot the node once to get its MAC (check leases): + ```bash + ssh phoenix "cat /var/lib/misc/dnsmasq.leases" + ``` + +2. Add reservation: + ```bash + ssh phoenix "sudo tee -a /etc/dnsmasq.d/pxe-netboot.conf << EOF + dhcp-host=XX:XX:XX:XX:XX:XX,192.168.100.5X,k3s-node-0X + EOF" + ``` + +3. Restart dnsmasq: + ```bash + ssh phoenix "sudo systemctl restart dnsmasq" + ``` + +To change the boot server IP, edit `http/boot.ipxe` and `initramfs/scripts/netboot`. diff --git a/build-image.sh b/build-image.sh index 38ef58c..69572fc 100755 --- a/build-image.sh +++ b/build-image.sh @@ -14,6 +14,22 @@ VERSION=$(date +%Y%m%d-%H%M) echo "Building netboot image version $VERSION" +# Decrypt secrets from phoenix (requires SSH access as the invoking user, not root) +echo "Decrypting secrets from phoenix..." +SECRETS_FILE="$SCRIPT_DIR/secrets/netboot.sops.yaml" +SUDO_USER_HOME=$(getent passwd "${SUDO_USER:-$USER}" | cut -d: -f6) +if [ -f "$SECRETS_FILE" ]; then + # Run SSH as the original user (not root) to use their SSH keys + ROOT_PW_HASH=$(sudo -u "${SUDO_USER:-$USER}" bash -c "cat '$SECRETS_FILE' | ssh phoenix 'sops -d --input-type yaml --output-type yaml /dev/stdin'" | grep root_password_hash | cut -d' ' -f2) + if [ -z "$ROOT_PW_HASH" ]; then + echo "WARNING: Failed to decrypt root password, console login will be disabled" + ROOT_PW_HASH="*" + fi +else + echo "WARNING: No secrets file found at $SECRETS_FILE, console login will be disabled" + ROOT_PW_HASH="*" +fi + # Clean previous build - unmount any stray mounts first if [ -d "$BUILD_DIR/rootfs" ]; then echo "Cleaning up previous build mounts..." @@ -40,6 +56,17 @@ debootstrap --arch=amd64 --variant=minbase --components=main,universe,multiverse noble $BUILD_DIR/rootfs \ http://archive.ubuntu.com/ubuntu +# Write root password hash to temp file for chroot to read +# Use /root/ not /tmp/ because systemd installation may mount tmpfs over /tmp +mkdir -p "$BUILD_DIR/rootfs/root" +if [ -n "$ROOT_PW_HASH" ] && [ "$ROOT_PW_HASH" != "*" ]; then + echo "$ROOT_PW_HASH" > "$BUILD_DIR/rootfs/root/.pw_hash" + echo "Root password hash written to rootfs" +else + echo "*" > "$BUILD_DIR/rootfs/root/.pw_hash" + echo "WARNING: No valid password hash, console login will be disabled" +fi + # Chroot and configure cat << 'CHROOT_SCRIPT' > $BUILD_DIR/rootfs/setup.sh #!/bin/bash @@ -118,7 +145,10 @@ apt-get install -y \ less \ rsync \ git \ - squashfs-tools + squashfs-tools \ + parted \ + fdisk \ + gdisk # Clean up apt-get clean @@ -148,11 +178,19 @@ EOF systemctl enable systemd-networkd systemctl enable systemd-resolved -# Configure SSH +# Configure SSH - disable socket activation, use traditional daemon sed -i 's/#PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config sed -i 's/#PubkeyAuthentication.*/PubkeyAuthentication yes/' /etc/ssh/sshd_config +# Disable socket activation (Ubuntu 24.04 default) and use traditional sshd +systemctl disable ssh.socket 2>/dev/null || true +rm -f /etc/systemd/system/ssh.service.requires/ssh.socket 2>/dev/null || true +rm -f /etc/systemd/system/sockets.target.wants/ssh.socket 2>/dev/null || true systemctl enable ssh +# Fix SSH host key permissions (must be 0600 for private keys, sshd refuses otherwise) +chmod 600 /etc/ssh/ssh_host_*_key +chmod 644 /etc/ssh/ssh_host_*_key.pub + # Create SSH directory for root mkdir -p /root/.ssh chmod 700 /root/.ssh @@ -165,8 +203,10 @@ SSHKEY chmod 600 /root/.ssh/authorized_keys -# Disable password authentication completely -echo "root:*" | chpasswd -e +# Set root password from decrypted hash (for console login only) +ROOT_PW_HASH=$(cat /root/.pw_hash) +echo "root:$ROOT_PW_HASH" | chpasswd -e +rm -f /root/.pw_hash # Configure tmpfs mounts for ephemeral data cat >> /etc/fstab < "${mount_point}/${MARKER_FILE}" </dev/null) + if [ -n "$pttype" ]; then + return 0 # has partition table + fi + # Fallback: check if parted can read it + if parted -s "$DEVICE" print &>/dev/null; then + return 0 # has partition table + fi + return 1 # no partition table +} + +if ! has_partition_table; then + # No partition table - this is a fresh drive, auto-format + log "Empty drive detected (no partition table) - auto-formatting..." + format_storage + exit 0 +fi + +# Has partition table - check if it's ours +if blkid -L "$CONTAINERD_LABEL" &>/dev/null && blkid -L "$LONGHORN_LABEL" &>/dev/null; then + # Check for marker file (belt and suspenders) + # Create temp mount to check marker without leaving dangling mount + TEMP_MOUNT=$(mktemp -d) + if mount -L "$CONTAINERD_LABEL" "$TEMP_MOUNT" 2>/dev/null; then + if [ -f "${TEMP_MOUNT}/${MARKER_FILE}" ]; then + umount "$TEMP_MOUNT" + rmdir "$TEMP_MOUNT" + log "Storage already configured (found labels and marker)" + mount_storage + exit 0 + else + umount "$TEMP_MOUNT" + rmdir "$TEMP_MOUNT" + # Has our labels but no marker - probably ours, mount it + warn "Found labels but no marker file - assuming configured" + mount_storage + exit 0 + fi + fi + rmdir "$TEMP_MOUNT" 2>/dev/null || true +fi + +# Has partitions but not ours - this could contain data! +warn "NVMe has existing partitions but no netboot labels." +warn "This drive may contain important data!" +echo "" +lsblk "$DEVICE" +echo "" + +# Prompt on console with timeout +echo -e "${CYAN}========================================${NC}" +echo -e "${CYAN} Press ENTER within ${PROMPT_TIMEOUT}s to format ${NC}" +echo -e "${CYAN} Or wait to skip (safe default) ${NC}" +echo -e "${CYAN}========================================${NC}" +echo "" + +if read -t "$PROMPT_TIMEOUT" -p "Format $DEVICE? [press ENTER to confirm] " response; then + echo "" + warn "Formatting in 5 seconds... Ctrl+C to abort" + sleep 5 + format_storage +else + echo "" + warn "Timeout - skipping storage setup (drive left untouched)" + warn "To format manually, reboot and press ENTER when prompted" + exit 0 +fi diff --git a/files/setup-node-storage.service b/files/setup-node-storage.service new file mode 100644 index 0000000..f6a99d8 --- /dev/null +++ b/files/setup-node-storage.service @@ -0,0 +1,26 @@ +[Unit] +Description=Setup local NVMe storage for K3s +Documentation=file:///usr/local/bin/setup-node-storage + +# Run early, after devices are available but before container services +After=local-fs.target systemd-udevd.service +Before=containerd.service + +# Only run if not already mounted +ConditionPathIsMountPoint=!/var/lib/containerd + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/setup-node-storage +RemainAfterExit=yes + +# Console access for interactive prompt +StandardInput=tty +TTYPath=/dev/tty1 +TTYReset=yes + +# Generous timeout for user interaction (3 minutes) +TimeoutStartSec=180 + +[Install] +WantedBy=multi-user.target diff --git a/secrets/netboot.sops.yaml b/secrets/netboot.sops.yaml new file mode 100644 index 0000000..ad4ffcb --- /dev/null +++ b/secrets/netboot.sops.yaml @@ -0,0 +1,16 @@ +root_password_hash: ENC[AES256_GCM,data:Oc1Kpg1S3NSG4dDoe0AiDmdWe4wdz9zSMn/WlTvURz3u62HcF9ddZh3yKbsXdc19WbGj/ZJa+MFzucgCg6ChT5OG2k4S+JuAVvRaNmB54XSjyIL2vDkambq8Pt4rg5rVxfv5H6uEd5IWUg==,iv:fO72qW/8JIWGubbfjZYsfhjL3XUq/7RbohGPd1avS+8=,tag:nXP7w2b49iYAcnWxM4WFlA==,type:str] +sops: + age: + - recipient: age1gausnystsln7fpenw7arw7x79xe22z697jnauj38npy0usayqqxqc7td2y + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBDS1VrWkNZTmswYlRrVXAv + ZC9FemRVWkc2bzlVL1BuQm9FaDlPVmVuVFZvCnUyb2xnaDdwQ3BsVkNmY0NxZktp + Zk9qSlZVZk16UUhhOHdGRFN1Zno1V3cKLS0tIHV6YXE1bFBHZjMyVVdMbVZEMXlW + YTN1RnJ3SjRkN21MYmhQK0hZZFB5Sk0KfxfMPUdJjZq/JDOE87oD2XBpQebvy0a5 + IAI5tdpEzNP6tF4oqunmh15fPc61Q0C/5ev+uz0QyHhTlTI13lYpGg== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2026-02-05T20:16:15Z" + mac: ENC[AES256_GCM,data:mTCLM3t35mMv9nLQHba65Gq3yAWnY4UKUDHEncMF22RnZKiVDaTMAV6tiaKGu7hHXdDu9fU/E7wPomR8pirGf6pJBUWxCflCe3Q3ZGK9/Aw3guz5ZD34H9nMaCjXME59r1rQdQdQlWP5aW4o+kqfD/bukFpW1HUY0YT8g8fqCpw=,iv:bG1M8Ghuc8JkMNQfODZ1FkMI/8Qs217xlN5ihDnz7hs=,tag:gCScQi1YYXFH4Xo/8Wq5+g==,type:str] + unencrypted_suffix: _unencrypted + version: 3.11.0