From 6882281c24bea31bd38d4a4742bfef291c778092 Mon Sep 17 00:00:00 2001 From: Samantha Atkins Date: Sat, 18 Apr 2026 18:28:55 -0400 Subject: [PATCH] added garage, mattermost, etc --- docs/.#tasks.org | 1 - k3s/README.md | 39 ++++-- k3s/garage/garage-webui.yaml | 63 +++++++++ k3s/garage/garage.yaml | 192 +++++++++++++++++++++++++++ k3s/mattermost/mattermost.yaml | 2 +- k3s/mediawiki/mediawiki-db-init.yaml | 40 ++++++ k3s/mediawiki/mediawiki-install.yaml | 46 +++++++ k3s/mediawiki/mediawiki.yaml | 156 ++++++++++++++++++++++ k3s/nats/nats.yaml | 42 +++++- k3s/redis/redis.yaml | 118 ++++++++++++++++ k3s/scripts/check-health.sh | 72 ++++++++++ k3s/scripts/install-nfs-common.sh | 11 ++ k3s/scripts/k3s-control-command | 12 ++ k3s/scripts/on-all-nodes.sh | 22 +++ k3s/scripts/on-controls.sh | 22 +++ k3s/storage/nas-pv.yaml | 168 ++++++++++++++++++----- k3s/vaultwarden/vaultwarden.yaml | 6 +- services/nextcloud.yml | 18 +++ 18 files changed, 980 insertions(+), 50 deletions(-) delete mode 120000 docs/.#tasks.org create mode 100644 k3s/garage/garage-webui.yaml create mode 100644 k3s/garage/garage.yaml create mode 100644 k3s/mediawiki/mediawiki-db-init.yaml create mode 100644 k3s/mediawiki/mediawiki-install.yaml create mode 100644 k3s/mediawiki/mediawiki.yaml create mode 100644 k3s/redis/redis.yaml create mode 100755 k3s/scripts/check-health.sh create mode 100755 k3s/scripts/install-nfs-common.sh create mode 100755 k3s/scripts/k3s-control-command create mode 100755 k3s/scripts/on-all-nodes.sh create mode 100755 k3s/scripts/on-controls.sh create mode 100644 services/nextcloud.yml diff --git a/docs/.#tasks.org b/docs/.#tasks.org deleted file mode 120000 index 794227f..0000000 --- a/docs/.#tasks.org +++ /dev/null @@ -1 +0,0 @@ -samantha@fedora.2598412:1776023928 \ No newline at end of file diff --git a/k3s/README.md b/k3s/README.md index 4bcde09..ead854e 100644 --- a/k3s/README.md +++ b/k3s/README.md @@ -174,6 +174,24 @@ Caddy on each venture ingress VPS proxies to any node's WG IP + NodePort. | 32371 | forgejo | git.sjasoft.com | | 32372 | authentik (HTTP) | auth.sjasoft.com — use this behind Caddy | | 32373 | authentik (HTTPS) | skip — Caddy handles TLS | +| 32374 | mattermost | planned | +| 32375 | listmonk | deployed | +| 32376 | n8n | deployed | +| 32377 | vaultwarden | planned | +| 32379 | monerod (RPC) | planned | +| 32380 | monerod (P2P) | planned | +| 32381 | snikket (HTTP) | planned | +| 32382 | snikket (C2S) | planned | +| 32383 | snikket (S2S) | planned | +| 32384 | snikket (proxy65) | planned | +| 32385 | synapse | planned | +| 32386 | nats (client) | planned | +| 32387 | nats (websocket) | planned | +| 32388 | nats (monitoring) | planned | +| 32389 | nats (leafnode) | planned | +| 32390 | garage (S3 API) | deployed | +| 32391 | garage-webui | deployed | +| 32392 | mediawiki | deployed | --- @@ -202,7 +220,7 @@ so ventures look unrelated from outside. See the WireGuard mesh table above for --- -## Current Deployment Status (2026-04-07) +## Current Deployment Status (2026-04-16) K3s v1.34.6 cluster fully operational. WireGuard full mesh (direct peer-to-peer over vmbr1, hub for external traffic). Headscale removed — too buggy (0.28.x dropped nodes randomly). @@ -222,20 +240,25 @@ hub for external traffic). Headscale removed — too buggy (0.28.x dropped nodes ### Running Services +Scheduler-assigned node in parens reflects current placement (unpinned services may +move on restart). Pinned services have `nodeName` in their manifest. + | Service | Node | NodePort | Domain | Status | |---|---|---|---|---| | postgres:16 | pve-worker (pinned) | ClusterIP | — | running | | mariadb:11 | adder-worker (pinned) | ClusterIP | — | running | -| ghost1 | unpinned | 32368 | blog.the-fulfillment.org | running | -| ghost2 | unpinned | 32369 | blog.privacy-practice.com | running | -| ghost3 | unpinned | 32370 | blog.sjasoft.com | running | -| forgejo:9 | unpinned | 32371 | git.sjasoft.com | running | -| authentik server | unpinned | 32372 | auth.sjasoft.com | running | -| authentik worker | unpinned | — | — | running | +| ghost1 | unpinned (game-worker-ssd) | 32368 | blog.the-fulfillment.org | running | +| ghost2 | unpinned (pve-worker) | 32369 | blog.privacy-practice.com | running | +| ghost3 | unpinned (adder-worker) | 32370 | blog.sjasoft.com | running | +| forgejo:9 | unpinned (pve-worker) | 32371 | git.sjasoft.com | running | +| authentik server | unpinned (adder-worker) | 32372 | auth.sjasoft.com | running | +| authentik worker | unpinned (adder-worker) | — | — | running | +| listmonk | unpinned (pve-worker) | 32375 | — | running | +| n8n | unpinned (game-worker-ssd) | 32376 | — | running | ### Remaining Services to Deploy -n8n, nats, vaultwarden, synapse, snikket, monerod +nats, vaultwarden, synapse, snikket, monerod, mattermost ### Next Steps diff --git a/k3s/garage/garage-webui.yaml b/k3s/garage/garage-webui.yaml new file mode 100644 index 0000000..3233670 --- /dev/null +++ b/k3s/garage/garage-webui.yaml @@ -0,0 +1,63 @@ +# Garage Web UI — khairul169/garage-webui +# Talks to garage-admin (3903) + garage (3900) via in-cluster DNS. +# NodePort 32391 — log in with admin / +# +# Deploy: +# # Generate bcrypt-hashed AUTH_USER_PASS from homelab/GARAGE_WEBUI_PASSWORD: +# PASSWORD=$(pass show homelab/GARAGE_WEBUI_PASSWORD) +# HASH=$(python3 -c "import bcrypt,sys; print(bcrypt.hashpw(sys.argv[1].encode(),bcrypt.gensalt(10)).decode())" "$PASSWORD") +# kubectl create secret generic garage-webui-secret \ +# --from-literal=auth-user-pass="admin:$HASH" +# kubectl apply -f garage-webui.yaml + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: garage-webui +spec: + replicas: 1 + selector: + matchLabels: + app: garage-webui + template: + metadata: + labels: + app: garage-webui + spec: + containers: + - name: garage-webui + image: khairul169/garage-webui:latest + env: + - name: API_BASE_URL + value: "http://garage-admin:3903" + - name: API_ADMIN_KEY + valueFrom: + secretKeyRef: + name: garage-secret + key: admin-token + - name: S3_ENDPOINT_URL + value: "https://s3.sjasoft.com" + - name: S3_REGION + value: "garage" + - name: AUTH_USER_PASS + valueFrom: + secretKeyRef: + name: garage-webui-secret + key: auth-user-pass + ports: + - containerPort: 3909 + +--- +apiVersion: v1 +kind: Service +metadata: + name: garage-webui +spec: + selector: + app: garage-webui + ports: + - port: 3909 + targetPort: 3909 + nodePort: 32391 + type: NodePort diff --git a/k3s/garage/garage.yaml b/k3s/garage/garage.yaml new file mode 100644 index 0000000..bface57 --- /dev/null +++ b/k3s/garage/garage.yaml @@ -0,0 +1,192 @@ +# Garage — S3-compatible object storage, single-node +# Metadata (LMDB, mmap-heavy → NFS-hostile) on local-path PVC +# Data blobs on NAS via nas-nfs (subdir "garage" on /volume1/samantha-private) +# Anti-affinity excludes game-worker-hdd (slow spinning disk) +# +# Ports (NodePort only for S3 API): +# 3900 — S3 API → NodePort 32390 +# 3901 — inter-node RPC (not exposed; single-node) +# 3902 — S3 web hosting (not exposed for now) +# 3903 — admin API (bound to 127.0.0.1 inside pod; kubectl exec to use) +# +# Deploy: +# kubectl create secret generic garage-secret \ +# --from-literal=rpc-secret="$(openssl rand -hex 32)" \ +# --from-literal=admin-token="$(openssl rand -hex 32)" \ +# --from-literal=metrics-token="$(openssl rand -hex 32)" +# kubectl apply -f garage.yaml +# +# First-time layout init (run once after pod is Ready): +# kubectl exec -n default deploy/garage -- garage status +# # copy the node ID from the output, then: +# kubectl exec -n default deploy/garage -- \ +# garage layout assign -z dc1 -c 500G +# kubectl exec -n default deploy/garage -- \ +# garage layout apply --version 1 + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: garage-config +data: + garage.toml: | + metadata_dir = "/meta" + data_dir = "/data" + db_engine = "lmdb" + + replication_factor = 1 + consistency_mode = "consistent" + + rpc_bind_addr = "[::]:3901" + rpc_public_addr = "127.0.0.1:3901" + + [s3_api] + api_bind_addr = "[::]:3900" + s3_region = "garage" + root_domain = ".s3.garage.homelab" + + [s3_web] + bind_addr = "[::]:3902" + root_domain = ".page.sjasoft.com" + index = "index.html" + + [admin] + api_bind_addr = "[::]:3903" + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: garage-meta-pvc +spec: + accessModes: [ReadWriteOnce] + storageClassName: local-path + resources: + requests: + storage: 10Gi + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: garage-data-pvc + annotations: + nfs.io/storage-path: "garage" +spec: + accessModes: [ReadWriteMany] + storageClassName: nas-nfs + resources: + requests: + storage: 500Gi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: garage +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: garage + template: + metadata: + labels: + app: garage + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - game-worker-hdd + containers: + - name: garage + image: dxflrs/garage:v2.3.0 + command: ["/garage"] + args: ["server"] + env: + - name: GARAGE_RPC_SECRET + valueFrom: + secretKeyRef: + name: garage-secret + key: rpc-secret + - name: GARAGE_ADMIN_TOKEN + valueFrom: + secretKeyRef: + name: garage-secret + key: admin-token + - name: GARAGE_METRICS_TOKEN + valueFrom: + secretKeyRef: + name: garage-secret + key: metrics-token + - name: GARAGE_CONFIG_FILE + value: /etc/garage/garage.toml + ports: + - containerPort: 3900 + name: s3 + - containerPort: 3901 + name: rpc + - containerPort: 3902 + name: web + - containerPort: 3903 + name: admin + volumeMounts: + - name: config + mountPath: /etc/garage + - name: meta + mountPath: /meta + - name: data + mountPath: /data + volumes: + - name: config + configMap: + name: garage-config + - name: meta + persistentVolumeClaim: + claimName: garage-meta-pvc + - name: data + persistentVolumeClaim: + claimName: garage-data-pvc + +--- +apiVersion: v1 +kind: Service +metadata: + name: garage +spec: + selector: + app: garage + ports: + - name: s3 + port: 3900 + targetPort: 3900 + nodePort: 32390 + - name: web + port: 3902 + targetPort: 3902 + type: NodePort + +--- +# Admin API — ClusterIP only (not publicly reachable). +# Use via: kubectl port-forward -n default svc/garage-admin 3903:3903 +# Then hit http://localhost:3903 with bearer $(pass show homelab/GARAGE_ADMIN_TOKEN) +apiVersion: v1 +kind: Service +metadata: + name: garage-admin +spec: + selector: + app: garage + ports: + - name: admin + port: 3903 + targetPort: 3903 + type: ClusterIP diff --git a/k3s/mattermost/mattermost.yaml b/k3s/mattermost/mattermost.yaml index cb4b71a..d937ee8 100644 --- a/k3s/mattermost/mattermost.yaml +++ b/k3s/mattermost/mattermost.yaml @@ -53,7 +53,7 @@ spec: spec: containers: - name: mattermost - image: mattermost/mattermost-team-edition:10 + image: mattermost/mattermost-team-edition:10.11 env: - name: MM_DB_PASSWORD valueFrom: diff --git a/k3s/mediawiki/mediawiki-db-init.yaml b/k3s/mediawiki/mediawiki-db-init.yaml new file mode 100644 index 0000000..97188fc --- /dev/null +++ b/k3s/mediawiki/mediawiki-db-init.yaml @@ -0,0 +1,40 @@ +# MediaWiki DB init — creates mediawiki_user and mediawiki_db in MariaDB. +# MediaWiki's official image ships with mysqli/pdo_mysql but NOT pgsql, +# so we use MariaDB (already deployed) rather than the shared postgres. +# Run once before the install job. + +apiVersion: batch/v1 +kind: Job +metadata: + name: mediawiki-db-init +spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: mediawiki-db-init + image: mariadb:11 + env: + - name: MYSQL_PWD + valueFrom: + secretKeyRef: + name: mariadb-secret + key: root-password + - name: MW_DB_PASSWORD + valueFrom: + secretKeyRef: + name: mediawiki-secret + key: db-password + command: + - /bin/sh + - -c + - | + mariadb -h mariadb -u root < "$wgResourceBasePath/resources/assets/change-your-logo.svg" ]; + + $wgEnableEmail = false; + $wgEmergencyContact = ""; + $wgPasswordSender = ""; + + $wgDBtype = "mysql"; + $wgDBserver = "mariadb"; + $wgDBname = "mediawiki_db"; + $wgDBuser = "mediawiki_user"; + $wgDBpassword = getenv('MW_DB_PASSWORD'); + $wgDBport = "3306"; + + $wgSecretKey = getenv('MW_SECRET_KEY'); + $wgUpgradeKey = getenv('MW_UPGRADE_KEY'); + + $wgUploadDirectory = "/var/www/html/images"; + $wgEnableUploads = true; + + $wgPingback = false; + + $wgDefaultSkin = "vector-2022"; + wfLoadSkin( 'Vector' ); + wfLoadSkin( 'MonoBook' ); + wfLoadSkin( 'Timeless' ); + wfLoadSkin( 'MinervaNeue' ); + + $wgLocaltimezone = "UTC"; + date_default_timezone_set($wgLocaltimezone); + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: mediawiki-images-pvc +spec: + accessModes: [ReadWriteOnce] + storageClassName: local-path + resources: + requests: + storage: 20Gi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mediawiki +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: mediawiki + template: + metadata: + labels: + app: mediawiki + spec: + securityContext: + fsGroup: 33 # www-data — so PVC is group-writable by apache + containers: + - name: mediawiki + image: mediawiki:1.43 + env: + - name: MW_SERVER + value: "https://wiki.the-fulfillment.org" + - name: MW_DB_PASSWORD + valueFrom: + secretKeyRef: + name: mediawiki-secret + key: db-password + - name: MW_SECRET_KEY + valueFrom: + secretKeyRef: + name: mediawiki-secret + key: secret-key + - name: MW_UPGRADE_KEY + valueFrom: + secretKeyRef: + name: mediawiki-secret + key: upgrade-key + ports: + - containerPort: 80 + volumeMounts: + - name: localsettings + mountPath: /var/www/html/LocalSettings.php + subPath: LocalSettings.php + - name: images + mountPath: /var/www/html/images + volumes: + - name: localsettings + configMap: + name: mediawiki-localsettings + - name: images + persistentVolumeClaim: + claimName: mediawiki-images-pvc + +--- +apiVersion: v1 +kind: Service +metadata: + name: mediawiki +spec: + selector: + app: mediawiki + ports: + - port: 80 + targetPort: 80 + nodePort: 32392 + type: NodePort diff --git a/k3s/nats/nats.yaml b/k3s/nats/nats.yaml index 985795d..757ac5f 100644 --- a/k3s/nats/nats.yaml +++ b/k3s/nats/nats.yaml @@ -1,14 +1,25 @@ -# NATS — JetStream-enabled message broker +# NATS — JetStream-enabled message broker, leafnode-capable # JetStream enabled with persistent storage via local-path PVC # Unpinned — scheduler places freely -# NodePorts: 32376 (client), 32377 (websocket), 32378 (monitoring) +# NodePorts: 32386 (client), 32387 (websocket), 32388 (monitoring), 32389 (leaf) # # Deploy: +# kubectl create secret generic nats-leaf-secret \ +# --namespace \ +# --from-literal=password="$(openssl rand -base64 32)" # kubectl apply -f nats.yaml -n # # Internal cluster DNS: nats:4222 # WebSocket: nats:8080 # Monitoring: nats:8222 +# Leafnode: nats:7422 (user=leaf, password from secret) +# +# Leaf client config snippet (workstation / VPS): +# leafnodes { +# remotes = [ +# { urls: ["nats-leaf://leaf:PASSWORD@:32389"] } +# ] +# } --- apiVersion: v1 @@ -28,6 +39,14 @@ data: no_tls: true } + leafnodes { + port: 7422 + authorization { + user: leaf + password: $LEAF_PASSWORD + } + } + --- apiVersion: v1 kind: PersistentVolumeClaim @@ -59,11 +78,18 @@ spec: containers: - name: nats image: nats:latest - command: ["-c", "/etc/nats/nats.conf"] + args: ["-c", "/etc/nats/nats.conf"] + env: + - name: LEAF_PASSWORD + valueFrom: + secretKeyRef: + name: nats-leaf-secret + key: password ports: - containerPort: 4222 - containerPort: 8080 - containerPort: 8222 + - containerPort: 7422 volumeMounts: - name: nats-config mountPath: /etc/nats @@ -89,13 +115,17 @@ spec: - name: client port: 4222 targetPort: 4222 - nodePort: 32376 + nodePort: 32386 - name: websocket port: 8080 targetPort: 8080 - nodePort: 32377 + nodePort: 32387 - name: monitoring port: 8222 targetPort: 8222 - nodePort: 32378 + nodePort: 32388 + - name: leaf + port: 7422 + targetPort: 7422 + nodePort: 32389 type: NodePort diff --git a/k3s/redis/redis.yaml b/k3s/redis/redis.yaml new file mode 100644 index 0000000..00efaab --- /dev/null +++ b/k3s/redis/redis.yaml @@ -0,0 +1,118 @@ +# Redis — shared cluster cache/broker/session store +# Pinned to fatmama: host requires vm.overcommit_memory=1 (kernel tuning) +# 8GB maxmemory, noeviction (safe for broker use), AOF + RDB persistence +# +# Database allocation (convention, not enforced): +# 0 — default / ad-hoc cache +# 2 — Plane (Celery broker + cache) +# +# Deploy: +# kubectl create secret generic redis-secret \ +# --from-literal=password="$(openssl rand -hex 32)" +# kubectl apply -f redis.yaml +# +# Cluster DNS: redis:6379 +# Connection string: redis://:@redis:6379/ + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-config +data: + redis.conf: | + # Memory + maxmemory 8gb + maxmemory-policy noeviction + + # Persistence: AOF primary, RDB snapshots as backup + appendonly yes + appendfsync everysec + save 3600 1 + save 300 100 + save 60 10000 + + # Networking + bind 0.0.0.0 + protected-mode no + + loglevel notice + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: redis-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 20Gi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + strategy: + type: Recreate + template: + metadata: + labels: + app: redis + spec: + nodeSelector: + kubernetes.io/hostname: fatmama + containers: + - name: redis + image: redis:7-alpine + args: + - "redis-server" + - "/etc/redis/redis.conf" + - "--requirepass" + - "$(REDIS_PASSWORD)" + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: redis-secret + key: password + ports: + - containerPort: 6379 + resources: + requests: + memory: "8Gi" + limits: + memory: "9Gi" + volumeMounts: + - name: redis-config + mountPath: /etc/redis + - name: redis-data + mountPath: /data + volumes: + - name: redis-config + configMap: + name: redis-config + - name: redis-data + persistentVolumeClaim: + claimName: redis-pvc + +--- +apiVersion: v1 +kind: Service +metadata: + name: redis +spec: + selector: + app: redis + ports: + - port: 6379 + targetPort: 6379 + type: ClusterIP diff --git a/k3s/scripts/check-health.sh b/k3s/scripts/check-health.sh new file mode 100755 index 0000000..13400ea --- /dev/null +++ b/k3s/scripts/check-health.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# K3s cluster health check — run from workstation. +# Checks: +# - Cluster (via pve-control): nodes Ready, no failing pods, coredns healthy +# - Per node: wg0 up, flannel.1 up, /etc/resolv.conf non-empty, host DNS resolves +# - Mesh: each node can ping every other node's WG IP +set -uo pipefail + +CONTROL="pve-control" +NODES=(pve-control adder-control game-control pve-worker adder-worker game-worker-hdd game-worker-ssd fat_mama) + +# SSH alias → WG IP (see k3s/README.md). SSH alias `fat_mama`; k8s node name `fatmama`. +declare -A WG_IP=( + [pve-control]=10.0.0.6 + [pve-worker]=10.0.0.7 + [adder-control]=10.0.0.8 + [adder-worker]=10.0.0.9 + [game-control]=10.0.0.10 + [game-worker-hdd]=10.0.0.11 + [game-worker-ssd]=10.0.0.12 + [fat_mama]=10.0.0.13 +) + +RC=0 +pass() { printf ' \033[32m[OK]\033[0m %s\n' "$*"; } +fail() { printf ' \033[31m[FAIL]\033[0m %s\n' "$*"; RC=1; } + +SSH="ssh -o ConnectTimeout=5 -o BatchMode=yes" + +echo "=== Cluster-level checks (via $CONTROL) ===" +not_ready=$($SSH "$CONTROL" "sudo kubectl get nodes --no-headers | awk '\$2 != \"Ready\" {print \$1\"(\"\$2\")\"}'" 2>/dev/null) +[[ -z "$not_ready" ]] && pass "all nodes Ready" || fail "not Ready: $not_ready" + +bad_pods=$($SSH "$CONTROL" "sudo kubectl get pods -A --no-headers | awk '\$4 != \"Running\" && \$4 != \"Completed\" {print \$1\"/\"\$2\"(\"\$4\")\"}'" 2>/dev/null) +[[ -z "$bad_pods" ]] && pass "no failing pods" || fail "bad pods: $bad_pods" + +coredns=$($SSH "$CONTROL" "sudo kubectl get deploy coredns -n kube-system -o=jsonpath='{.status.readyReplicas}/{.status.replicas}'" 2>/dev/null) +[[ "$coredns" == "1/1" ]] && pass "coredns $coredns" || fail "coredns $coredns" + +echo +echo "=== Per-node checks ===" +for node in "${NODES[@]}"; do + echo "-- $node --" + if ! $SSH "$node" true 2>/dev/null; then + fail "ssh unreachable" + continue + fi + $SSH "$node" "ip -br a show wg0 2>/dev/null | grep -q .." 2>/dev/null \ + && pass "wg0 up" || fail "wg0 missing" + $SSH "$node" "ip -br a show flannel.1 2>/dev/null | grep -q .." 2>/dev/null \ + && pass "flannel.1 up" || fail "flannel.1 missing" + $SSH "$node" "[ -s /etc/resolv.conf ]" 2>/dev/null \ + && pass "resolv.conf non-empty" || fail "resolv.conf empty" + $SSH "$node" "getent hosts registry-1.docker.io >/dev/null 2>&1" 2>/dev/null \ + && pass "host DNS resolves" || fail "host DNS broken" +done + +echo +echo "=== Mesh reachability (each node → every peer's WG IP) ===" +for src in "${NODES[@]}"; do + unreachable="" + for dst in "${NODES[@]}"; do + [[ "$src" == "$dst" ]] && continue + $SSH "$src" "ping -c1 -W2 ${WG_IP[$dst]} >/dev/null 2>&1" 2>/dev/null \ + || unreachable="$unreachable ${dst}(${WG_IP[$dst]})" + done + [[ -z "$unreachable" ]] && pass "$src → all peers" || fail "$src unreachable:$unreachable" +done + +echo +[[ $RC -eq 0 ]] && echo "ALL CHECKS PASSED" || echo "FAILURES DETECTED" +exit $RC diff --git a/k3s/scripts/install-nfs-common.sh b/k3s/scripts/install-nfs-common.sh new file mode 100755 index 0000000..5708ae2 --- /dev/null +++ b/k3s/scripts/install-nfs-common.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Idempotently install nfs-common. Run via on-all-nodes.sh. +set -euo pipefail + +if dpkg -s nfs-common >/dev/null 2>&1; then + echo "nfs-common already installed" + exit 0 +fi +apt-get update -qq +DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nfs-common +echo "installed" diff --git a/k3s/scripts/k3s-control-command b/k3s/scripts/k3s-control-command new file mode 100755 index 0000000..445c77d --- /dev/null +++ b/k3s/scripts/k3s-control-command @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Run one command on pve-control with sudo automatically prepended. +# Usage: k3s-control-command '' +# Example: k3s-control-command 'kubectl get pods -A' +set -euo pipefail + +[[ $# -ge 1 ]] || { echo "Usage: $(basename "$0") ''" >&2; exit 1; } + +cmd="$*" +[[ "$cmd" == sudo* ]] || cmd="sudo $cmd" + +exec ssh pve-control "$cmd" diff --git a/k3s/scripts/on-all-nodes.sh b/k3s/scripts/on-all-nodes.sh new file mode 100755 index 0000000..a45c060 --- /dev/null +++ b/k3s/scripts/on-all-nodes.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Copy a local script to /tmp on every k3s node (control + worker) and execute it with sudo. +# Usage: on-all-nodes.sh [args...] +set -uo pipefail + +SCRIPT="${1:?usage: $(basename "$0") [args...]}" +shift +[[ -f "$SCRIPT" ]] || { echo "not a file: $SCRIPT" >&2; exit 1; } + +NODES=(pve-control adder-control game-control pve-worker adder-worker game-worker-hdd game-worker-ssd fat_mama) +NAME="$(basename "$SCRIPT")" +REMOTE="/tmp/$NAME" + +rc=0 +for node in "${NODES[@]}"; do + echo "========== $node ==========" + if ! scp -q "$SCRIPT" "$node:$REMOTE"; then + echo "(scp failed on $node)" >&2; rc=1; continue + fi + ssh "$node" "sudo bash $REMOTE $*; ret=\$?; rm -f $REMOTE; exit \$ret" || rc=1 +done +exit $rc diff --git a/k3s/scripts/on-controls.sh b/k3s/scripts/on-controls.sh new file mode 100755 index 0000000..d9c43d3 --- /dev/null +++ b/k3s/scripts/on-controls.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Copy a local script to /tmp on each control-plane node and execute it there with sudo. +# Usage: on-controls.sh [args...] +set -uo pipefail + +SCRIPT="${1:?usage: $(basename "$0") [args...]}" +shift +[[ -f "$SCRIPT" ]] || { echo "not a file: $SCRIPT" >&2; exit 1; } + +CONTROLS=(pve-control adder-control game-control) +NAME="$(basename "$SCRIPT")" +REMOTE="/tmp/$NAME" + +rc=0 +for node in "${CONTROLS[@]}"; do + echo "========== $node ==========" + if ! scp -q "$SCRIPT" "$node:$REMOTE"; then + echo "(scp failed on $node)" >&2; rc=1; continue + fi + ssh "$node" "sudo bash $REMOTE $*; ret=\$?; rm -f $REMOTE; exit \$ret" || rc=1 +done +exit $rc diff --git a/k3s/storage/nas-pv.yaml b/k3s/storage/nas-pv.yaml index 56b9890..f849eb9 100644 --- a/k3s/storage/nas-pv.yaml +++ b/k3s/storage/nas-pv.yaml @@ -1,47 +1,153 @@ -# NAS PersistentVolume — Synology 425+ at 192.168.40.96 -# NFS share mounted cluster-wide — any pod can claim storage from it via PVC -# ReadWriteMany — multiple pods on different nodes can mount simultaneously +# NAS storage — Synology 425+ +# Share: 192.168.40.96:/volume1/samantha-private (the only NAS share k3s uses) # -# Prerequisites on every K3s worker VM: -# apt install nfs-common +# Uses kubernetes-sigs/nfs-subdir-external-provisioner: +# any PVC that asks for storageClassName: nas-nfs automatically gets a subdir +# carved out of the share. Subdir name = ${namespace}-${pvc}-${pv} by default, +# or see pathPattern annotation on a PVC to override. # -# Deploy (once, cluster-scoped — no namespace): +# Prerequisites (one-time): +# - apt install nfs-common on every k3s node (handled via +# k3s/scripts/install-nfs-common.sh + on-all-nodes.sh) +# +# Deploy (cluster-scoped): # kubectl apply -f nas-pv.yaml # -# Then any service can claim NAS storage with a PVC like: -# storageClassName: nas-nfs -# accessModes: [ReadWriteMany] -# -# Replace /volume1/k3s with your actual NAS share path. -# Create subdirectories on the NAS per service to keep data organised: -# /volume1/k3s/monerod -# /volume1/k3s/vaultwarden -# etc. +# Usage in a service manifest (note: storageClassName: nas-nfs): +# apiVersion: v1 +# kind: PersistentVolumeClaim +# metadata: +# name: foo-data +# annotations: +# nfs.io/storage-path: "foo" # subdir literal name (optional) +# spec: +# storageClassName: nas-nfs +# accessModes: [ReadWriteMany] +# resources: +# requests: +# storage: 10Gi --- apiVersion: v1 -kind: PersistentVolume +kind: Namespace metadata: - name: nas-pv + name: nfs-provisioner + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nfs-subdir-external-provisioner + namespace: nfs-provisioner + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: nfs-subdir-external-provisioner-runner +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "update", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: run-nfs-subdir-external-provisioner +subjects: + - kind: ServiceAccount + name: nfs-subdir-external-provisioner + namespace: nfs-provisioner +roleRef: + kind: ClusterRole + name: nfs-subdir-external-provisioner-runner + apiGroup: rbac.authorization.k8s.io + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: leader-locking-nfs-subdir-external-provisioner + namespace: nfs-provisioner +rules: + - apiGroups: [""] + resources: ["endpoints"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: leader-locking-nfs-subdir-external-provisioner + namespace: nfs-provisioner +subjects: + - kind: ServiceAccount + name: nfs-subdir-external-provisioner + namespace: nfs-provisioner +roleRef: + kind: Role + name: leader-locking-nfs-subdir-external-provisioner + apiGroup: rbac.authorization.k8s.io + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nfs-subdir-external-provisioner + namespace: nfs-provisioner spec: - capacity: - storage: 40Ti - accessModes: - - ReadWriteMany - persistentVolumeReclaimPolicy: Retain - storageClassName: nas-nfs - mountOptions: - - hard - - nfsvers=4.1 - nfs: - server: 192.168.40.96 - path: /volume1/k3s + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: nfs-subdir-external-provisioner + template: + metadata: + labels: + app: nfs-subdir-external-provisioner + spec: + serviceAccountName: nfs-subdir-external-provisioner + containers: + - name: nfs-subdir-external-provisioner + image: registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2 + volumeMounts: + - name: nfs-client-root + mountPath: /persistentvolumes + env: + - name: PROVISIONER_NAME + value: homelab.nas/nfs-subdir + - name: NFS_SERVER + value: 192.168.40.96 + - name: NFS_PATH + value: /volume1/samantha-private + volumes: + - name: nfs-client-root + nfs: + server: 192.168.40.96 + path: /volume1/samantha-private --- apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: nas-nfs -provisioner: kubernetes.io/no-provisioner -volumeBindingMode: Immediate +provisioner: homelab.nas/nfs-subdir +parameters: + archiveOnDelete: "true" + pathPattern: "${.PVC.annotations.nfs.io/storage-path}" reclaimPolicy: Retain +volumeBindingMode: Immediate diff --git a/k3s/vaultwarden/vaultwarden.yaml b/k3s/vaultwarden/vaultwarden.yaml index 447fc2c..204edec 100644 --- a/k3s/vaultwarden/vaultwarden.yaml +++ b/k3s/vaultwarden/vaultwarden.yaml @@ -1,7 +1,7 @@ # Vaultwarden — self-hosted Bitwarden-compatible password manager # SQLite backend — data persisted in local-path PVC # Unpinned — scheduler places freely -# NodePort 32375 +# NodePort 32377 # Signups disabled — use admin panel to invite users # # Deploy: @@ -11,7 +11,7 @@ # kubectl apply -f vaultwarden.yaml -n # # Generate admin token with: openssl rand -base64 48 -# Admin panel: http://:32375/admin +# Admin panel: http://:32377/admin --- apiVersion: v1 @@ -79,5 +79,5 @@ spec: ports: - port: 8222 targetPort: 8222 - nodePort: 32375 + nodePort: 32377 type: NodePort diff --git a/services/nextcloud.yml b/services/nextcloud.yml new file mode 100644 index 0000000..dfbb201 --- /dev/null +++ b/services/nextcloud.yml @@ -0,0 +1,18 @@ +services: + nextcloud-aio-mastercontainer: + image: nextcloud/all-in-one:latest-arm64 + container_name: nextcloud-aio-mastercontainer + restart: always + ports: + - "11000:11000" + - "8080:8080" + volumes: + - nextcloud_aio_mastercontainer:/mnt/docker-aio-config + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + - APACHE_PORT=11000 + - SKIP_DOMAIN_VALIDATION=true + +volumes: + nextcloud_aio_mastercontainer: + name: nextcloud_aio_mastercontainer \ No newline at end of file