added garage, mattermost, etc

This commit is contained in:
Samantha Atkins 2026-04-18 18:28:55 -04:00
parent b7c9dc81a0
commit 6882281c24
18 changed files with 980 additions and 50 deletions

View file

@ -1 +0,0 @@
samantha@fedora.2598412:1776023928

View file

@ -174,6 +174,24 @@ Caddy on each venture ingress VPS proxies to any node's WG IP + NodePort.
| 32371 | forgejo | git.sjasoft.com | | 32371 | forgejo | git.sjasoft.com |
| 32372 | authentik (HTTP) | auth.sjasoft.com — use this behind Caddy | | 32372 | authentik (HTTP) | auth.sjasoft.com — use this behind Caddy |
| 32373 | authentik (HTTPS) | skip — Caddy handles TLS | | 32373 | authentik (HTTPS) | skip — Caddy handles TLS |
| 32374 | mattermost | planned |
| 32375 | listmonk | deployed |
| 32376 | n8n | deployed |
| 32377 | vaultwarden | planned |
| 32379 | monerod (RPC) | planned |
| 32380 | monerod (P2P) | planned |
| 32381 | snikket (HTTP) | planned |
| 32382 | snikket (C2S) | planned |
| 32383 | snikket (S2S) | planned |
| 32384 | snikket (proxy65) | planned |
| 32385 | synapse | planned |
| 32386 | nats (client) | planned |
| 32387 | nats (websocket) | planned |
| 32388 | nats (monitoring) | planned |
| 32389 | nats (leafnode) | planned |
| 32390 | garage (S3 API) | deployed |
| 32391 | garage-webui | deployed |
| 32392 | mediawiki | deployed |
--- ---
@ -202,7 +220,7 @@ so ventures look unrelated from outside. See the WireGuard mesh table above for
--- ---
## Current Deployment Status (2026-04-07) ## Current Deployment Status (2026-04-16)
K3s v1.34.6 cluster fully operational. WireGuard full mesh (direct peer-to-peer over vmbr1, K3s v1.34.6 cluster fully operational. WireGuard full mesh (direct peer-to-peer over vmbr1,
hub for external traffic). Headscale removed — too buggy (0.28.x dropped nodes randomly). hub for external traffic). Headscale removed — too buggy (0.28.x dropped nodes randomly).
@ -222,20 +240,25 @@ hub for external traffic). Headscale removed — too buggy (0.28.x dropped nodes
### Running Services ### Running Services
Scheduler-assigned node in parens reflects current placement (unpinned services may
move on restart). Pinned services have `nodeName` in their manifest.
| Service | Node | NodePort | Domain | Status | | Service | Node | NodePort | Domain | Status |
|---|---|---|---|---| |---|---|---|---|---|
| postgres:16 | pve-worker (pinned) | ClusterIP | — | running | | postgres:16 | pve-worker (pinned) | ClusterIP | — | running |
| mariadb:11 | adder-worker (pinned) | ClusterIP | — | running | | mariadb:11 | adder-worker (pinned) | ClusterIP | — | running |
| ghost1 | unpinned | 32368 | blog.the-fulfillment.org | running | | ghost1 | unpinned (game-worker-ssd) | 32368 | blog.the-fulfillment.org | running |
| ghost2 | unpinned | 32369 | blog.privacy-practice.com | running | | ghost2 | unpinned (pve-worker) | 32369 | blog.privacy-practice.com | running |
| ghost3 | unpinned | 32370 | blog.sjasoft.com | running | | ghost3 | unpinned (adder-worker) | 32370 | blog.sjasoft.com | running |
| forgejo:9 | unpinned | 32371 | git.sjasoft.com | running | | forgejo:9 | unpinned (pve-worker) | 32371 | git.sjasoft.com | running |
| authentik server | unpinned | 32372 | auth.sjasoft.com | running | | authentik server | unpinned (adder-worker) | 32372 | auth.sjasoft.com | running |
| authentik worker | unpinned | — | — | running | | authentik worker | unpinned (adder-worker) | — | — | running |
| listmonk | unpinned (pve-worker) | 32375 | — | running |
| n8n | unpinned (game-worker-ssd) | 32376 | — | running |
### Remaining Services to Deploy ### Remaining Services to Deploy
n8n, nats, vaultwarden, synapse, snikket, monerod nats, vaultwarden, synapse, snikket, monerod, mattermost
### Next Steps ### Next Steps

View file

@ -0,0 +1,63 @@
# Garage Web UI — khairul169/garage-webui
# Talks to garage-admin (3903) + garage (3900) via in-cluster DNS.
# NodePort 32391 — log in with admin / <pass show homelab/GARAGE_WEBUI_PASSWORD>
#
# Deploy:
# # Generate bcrypt-hashed AUTH_USER_PASS from homelab/GARAGE_WEBUI_PASSWORD:
# PASSWORD=$(pass show homelab/GARAGE_WEBUI_PASSWORD)
# HASH=$(python3 -c "import bcrypt,sys; print(bcrypt.hashpw(sys.argv[1].encode(),bcrypt.gensalt(10)).decode())" "$PASSWORD")
# kubectl create secret generic garage-webui-secret \
# --from-literal=auth-user-pass="admin:$HASH"
# kubectl apply -f garage-webui.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: garage-webui
spec:
replicas: 1
selector:
matchLabels:
app: garage-webui
template:
metadata:
labels:
app: garage-webui
spec:
containers:
- name: garage-webui
image: khairul169/garage-webui:latest
env:
- name: API_BASE_URL
value: "http://garage-admin:3903"
- name: API_ADMIN_KEY
valueFrom:
secretKeyRef:
name: garage-secret
key: admin-token
- name: S3_ENDPOINT_URL
value: "https://s3.sjasoft.com"
- name: S3_REGION
value: "garage"
- name: AUTH_USER_PASS
valueFrom:
secretKeyRef:
name: garage-webui-secret
key: auth-user-pass
ports:
- containerPort: 3909
---
apiVersion: v1
kind: Service
metadata:
name: garage-webui
spec:
selector:
app: garage-webui
ports:
- port: 3909
targetPort: 3909
nodePort: 32391
type: NodePort

192
k3s/garage/garage.yaml Normal file
View file

@ -0,0 +1,192 @@
# Garage — S3-compatible object storage, single-node
# Metadata (LMDB, mmap-heavy → NFS-hostile) on local-path PVC
# Data blobs on NAS via nas-nfs (subdir "garage" on /volume1/samantha-private)
# Anti-affinity excludes game-worker-hdd (slow spinning disk)
#
# Ports (NodePort only for S3 API):
# 3900 — S3 API → NodePort 32390
# 3901 — inter-node RPC (not exposed; single-node)
# 3902 — S3 web hosting (not exposed for now)
# 3903 — admin API (bound to 127.0.0.1 inside pod; kubectl exec to use)
#
# Deploy:
# kubectl create secret generic garage-secret \
# --from-literal=rpc-secret="$(openssl rand -hex 32)" \
# --from-literal=admin-token="$(openssl rand -hex 32)" \
# --from-literal=metrics-token="$(openssl rand -hex 32)"
# kubectl apply -f garage.yaml
#
# First-time layout init (run once after pod is Ready):
# kubectl exec -n default deploy/garage -- garage status
# # copy the node ID from the output, then:
# kubectl exec -n default deploy/garage -- \
# garage layout assign -z dc1 -c 500G <node-id>
# kubectl exec -n default deploy/garage -- \
# garage layout apply --version 1
---
apiVersion: v1
kind: ConfigMap
metadata:
name: garage-config
data:
garage.toml: |
metadata_dir = "/meta"
data_dir = "/data"
db_engine = "lmdb"
replication_factor = 1
consistency_mode = "consistent"
rpc_bind_addr = "[::]:3901"
rpc_public_addr = "127.0.0.1:3901"
[s3_api]
api_bind_addr = "[::]:3900"
s3_region = "garage"
root_domain = ".s3.garage.homelab"
[s3_web]
bind_addr = "[::]:3902"
root_domain = ".page.sjasoft.com"
index = "index.html"
[admin]
api_bind_addr = "[::]:3903"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: garage-meta-pvc
spec:
accessModes: [ReadWriteOnce]
storageClassName: local-path
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: garage-data-pvc
annotations:
nfs.io/storage-path: "garage"
spec:
accessModes: [ReadWriteMany]
storageClassName: nas-nfs
resources:
requests:
storage: 500Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: garage
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: garage
template:
metadata:
labels:
app: garage
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- game-worker-hdd
containers:
- name: garage
image: dxflrs/garage:v2.3.0
command: ["/garage"]
args: ["server"]
env:
- name: GARAGE_RPC_SECRET
valueFrom:
secretKeyRef:
name: garage-secret
key: rpc-secret
- name: GARAGE_ADMIN_TOKEN
valueFrom:
secretKeyRef:
name: garage-secret
key: admin-token
- name: GARAGE_METRICS_TOKEN
valueFrom:
secretKeyRef:
name: garage-secret
key: metrics-token
- name: GARAGE_CONFIG_FILE
value: /etc/garage/garage.toml
ports:
- containerPort: 3900
name: s3
- containerPort: 3901
name: rpc
- containerPort: 3902
name: web
- containerPort: 3903
name: admin
volumeMounts:
- name: config
mountPath: /etc/garage
- name: meta
mountPath: /meta
- name: data
mountPath: /data
volumes:
- name: config
configMap:
name: garage-config
- name: meta
persistentVolumeClaim:
claimName: garage-meta-pvc
- name: data
persistentVolumeClaim:
claimName: garage-data-pvc
---
apiVersion: v1
kind: Service
metadata:
name: garage
spec:
selector:
app: garage
ports:
- name: s3
port: 3900
targetPort: 3900
nodePort: 32390
- name: web
port: 3902
targetPort: 3902
type: NodePort
---
# Admin API — ClusterIP only (not publicly reachable).
# Use via: kubectl port-forward -n default svc/garage-admin 3903:3903
# Then hit http://localhost:3903 with bearer $(pass show homelab/GARAGE_ADMIN_TOKEN)
apiVersion: v1
kind: Service
metadata:
name: garage-admin
spec:
selector:
app: garage
ports:
- name: admin
port: 3903
targetPort: 3903
type: ClusterIP

View file

@ -53,7 +53,7 @@ spec:
spec: spec:
containers: containers:
- name: mattermost - name: mattermost
image: mattermost/mattermost-team-edition:10 image: mattermost/mattermost-team-edition:10.11
env: env:
- name: MM_DB_PASSWORD - name: MM_DB_PASSWORD
valueFrom: valueFrom:

View file

@ -0,0 +1,40 @@
# MediaWiki DB init — creates mediawiki_user and mediawiki_db in MariaDB.
# MediaWiki's official image ships with mysqli/pdo_mysql but NOT pgsql,
# so we use MariaDB (already deployed) rather than the shared postgres.
# Run once before the install job.
apiVersion: batch/v1
kind: Job
metadata:
name: mediawiki-db-init
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: mediawiki-db-init
image: mariadb:11
env:
- name: MYSQL_PWD
valueFrom:
secretKeyRef:
name: mariadb-secret
key: root-password
- name: MW_DB_PASSWORD
valueFrom:
secretKeyRef:
name: mediawiki-secret
key: db-password
command:
- /bin/sh
- -c
- |
mariadb -h mariadb -u root <<EOF
CREATE DATABASE IF NOT EXISTS mediawiki_db
CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE USER IF NOT EXISTS 'mediawiki_user'@'%'
IDENTIFIED BY '${MW_DB_PASSWORD}';
GRANT ALL PRIVILEGES ON mediawiki_db.*
TO 'mediawiki_user'@'%';
FLUSH PRIVILEGES;
EOF

View file

@ -0,0 +1,46 @@
# MediaWiki schema install — runs maintenance/install.php against MariaDB once.
# Creates the DB tables and the Admin user. LocalSettings.php written by
# install is discarded (we mount our own via ConfigMap in mediawiki.yaml).
# To re-run: delete this job first; install.php errors on an initialized DB.
apiVersion: batch/v1
kind: Job
metadata:
name: mediawiki-install
spec:
backoffLimit: 2
template:
spec:
restartPolicy: OnFailure
containers:
- name: mediawiki-install
image: mediawiki:1.43
workingDir: /tmp
env:
- name: MW_DB_PASSWORD
valueFrom:
secretKeyRef:
name: mediawiki-secret
key: db-password
- name: MW_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: mediawiki-secret
key: admin-password
command:
- /bin/sh
- -c
- |
php /var/www/html/maintenance/install.php \
--dbtype=mysql \
--dbserver=mariadb \
--dbname=mediawiki_db \
--dbuser=mediawiki_user \
--dbpass="${MW_DB_PASSWORD}" \
--installdbuser=mediawiki_user \
--installdbpass="${MW_DB_PASSWORD}" \
--pass="${MW_ADMIN_PASSWORD}" \
--scriptpath="" \
--server="http://placeholder" \
"Samantha's Wiki" \
"Admin"

View file

@ -0,0 +1,156 @@
# MediaWiki — personal wiki, postgres-backed
# Unpinned; uploads PVC on local-path.
# LocalSettings.php supplied via ConfigMap — secrets read at runtime from env.
# NodePort 32392.
#
# Deploy flow (run in order, from /home/samantha/private/Knowledge/repos/homelab):
#
# # 1. Generate secrets and stash in pass + k8s:
# DB=$(openssl rand -hex 32)
# ADM=$(openssl rand -hex 16)
# SK=$(openssl rand -hex 32)
# UK=$(openssl rand -hex 16)
# echo "$DB" | pass insert -m -f homelab/MEDIAWIKI_DB_PASSWORD
# echo "$ADM" | pass insert -m -f homelab/MEDIAWIKI_ADMIN_PASSWORD
# echo "$SK" | pass insert -m -f homelab/MEDIAWIKI_SECRET_KEY
# echo "$UK" | pass insert -m -f homelab/MEDIAWIKI_UPGRADE_KEY
# k3s/scripts/k3s-control-command "sudo kubectl create secret generic mediawiki-secret -n default \
# --from-literal=db-password='$DB' \
# --from-literal=admin-password='$ADM' \
# --from-literal=secret-key='$SK' \
# --from-literal=upgrade-key='$UK'"
#
# # 2. Create db/user, run install.php, deploy:
# k3s/scripts/k3s-control-command 'kubectl apply -f /tmp/mediawiki-db-init.yaml' # scp first
# k3s/scripts/k3s-control-command 'kubectl wait --for=condition=complete job/mediawiki-db-init --timeout=60s'
# k3s/scripts/k3s-control-command 'kubectl apply -f /tmp/mediawiki-install.yaml'
# k3s/scripts/k3s-control-command 'kubectl wait --for=condition=complete job/mediawiki-install --timeout=120s'
# k3s/scripts/k3s-control-command 'kubectl apply -f /tmp/mediawiki.yaml'
---
apiVersion: v1
kind: ConfigMap
metadata:
name: mediawiki-localsettings
data:
LocalSettings.php: |
<?php
if (!defined('MEDIAWIKI')) { exit; }
$wgSitename = "Samantha's Wiki";
$wgMetaNamespace = "Samanthas_Wiki";
$wgScriptPath = "";
$wgServer = getenv('MW_SERVER') ?: "http://localhost:32392";
$wgResourceBasePath = $wgScriptPath;
$wgLogos = [ '1x' => "$wgResourceBasePath/resources/assets/change-your-logo.svg" ];
$wgEnableEmail = false;
$wgEmergencyContact = "";
$wgPasswordSender = "";
$wgDBtype = "mysql";
$wgDBserver = "mariadb";
$wgDBname = "mediawiki_db";
$wgDBuser = "mediawiki_user";
$wgDBpassword = getenv('MW_DB_PASSWORD');
$wgDBport = "3306";
$wgSecretKey = getenv('MW_SECRET_KEY');
$wgUpgradeKey = getenv('MW_UPGRADE_KEY');
$wgUploadDirectory = "/var/www/html/images";
$wgEnableUploads = true;
$wgPingback = false;
$wgDefaultSkin = "vector-2022";
wfLoadSkin( 'Vector' );
wfLoadSkin( 'MonoBook' );
wfLoadSkin( 'Timeless' );
wfLoadSkin( 'MinervaNeue' );
$wgLocaltimezone = "UTC";
date_default_timezone_set($wgLocaltimezone);
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mediawiki-images-pvc
spec:
accessModes: [ReadWriteOnce]
storageClassName: local-path
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: mediawiki
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: mediawiki
template:
metadata:
labels:
app: mediawiki
spec:
securityContext:
fsGroup: 33 # www-data — so PVC is group-writable by apache
containers:
- name: mediawiki
image: mediawiki:1.43
env:
- name: MW_SERVER
value: "https://wiki.the-fulfillment.org"
- name: MW_DB_PASSWORD
valueFrom:
secretKeyRef:
name: mediawiki-secret
key: db-password
- name: MW_SECRET_KEY
valueFrom:
secretKeyRef:
name: mediawiki-secret
key: secret-key
- name: MW_UPGRADE_KEY
valueFrom:
secretKeyRef:
name: mediawiki-secret
key: upgrade-key
ports:
- containerPort: 80
volumeMounts:
- name: localsettings
mountPath: /var/www/html/LocalSettings.php
subPath: LocalSettings.php
- name: images
mountPath: /var/www/html/images
volumes:
- name: localsettings
configMap:
name: mediawiki-localsettings
- name: images
persistentVolumeClaim:
claimName: mediawiki-images-pvc
---
apiVersion: v1
kind: Service
metadata:
name: mediawiki
spec:
selector:
app: mediawiki
ports:
- port: 80
targetPort: 80
nodePort: 32392
type: NodePort

View file

@ -1,14 +1,25 @@
# NATS — JetStream-enabled message broker # NATS — JetStream-enabled message broker, leafnode-capable
# JetStream enabled with persistent storage via local-path PVC # JetStream enabled with persistent storage via local-path PVC
# Unpinned — scheduler places freely # Unpinned — scheduler places freely
# NodePorts: 32376 (client), 32377 (websocket), 32378 (monitoring) # NodePorts: 32386 (client), 32387 (websocket), 32388 (monitoring), 32389 (leaf)
# #
# Deploy: # Deploy:
# kubectl create secret generic nats-leaf-secret \
# --namespace <ns> \
# --from-literal=password="$(openssl rand -base64 32)"
# kubectl apply -f nats.yaml -n <ns> # kubectl apply -f nats.yaml -n <ns>
# #
# Internal cluster DNS: nats:4222 # Internal cluster DNS: nats:4222
# WebSocket: nats:8080 # WebSocket: nats:8080
# Monitoring: nats:8222 # Monitoring: nats:8222
# Leafnode: nats:7422 (user=leaf, password from secret)
#
# Leaf client config snippet (workstation / VPS):
# leafnodes {
# remotes = [
# { urls: ["nats-leaf://leaf:PASSWORD@<any-node-wg-ip>:32389"] }
# ]
# }
--- ---
apiVersion: v1 apiVersion: v1
@ -28,6 +39,14 @@ data:
no_tls: true no_tls: true
} }
leafnodes {
port: 7422
authorization {
user: leaf
password: $LEAF_PASSWORD
}
}
--- ---
apiVersion: v1 apiVersion: v1
kind: PersistentVolumeClaim kind: PersistentVolumeClaim
@ -59,11 +78,18 @@ spec:
containers: containers:
- name: nats - name: nats
image: nats:latest image: nats:latest
command: ["-c", "/etc/nats/nats.conf"] args: ["-c", "/etc/nats/nats.conf"]
env:
- name: LEAF_PASSWORD
valueFrom:
secretKeyRef:
name: nats-leaf-secret
key: password
ports: ports:
- containerPort: 4222 - containerPort: 4222
- containerPort: 8080 - containerPort: 8080
- containerPort: 8222 - containerPort: 8222
- containerPort: 7422
volumeMounts: volumeMounts:
- name: nats-config - name: nats-config
mountPath: /etc/nats mountPath: /etc/nats
@ -89,13 +115,17 @@ spec:
- name: client - name: client
port: 4222 port: 4222
targetPort: 4222 targetPort: 4222
nodePort: 32376 nodePort: 32386
- name: websocket - name: websocket
port: 8080 port: 8080
targetPort: 8080 targetPort: 8080
nodePort: 32377 nodePort: 32387
- name: monitoring - name: monitoring
port: 8222 port: 8222
targetPort: 8222 targetPort: 8222
nodePort: 32378 nodePort: 32388
- name: leaf
port: 7422
targetPort: 7422
nodePort: 32389
type: NodePort type: NodePort

118
k3s/redis/redis.yaml Normal file
View file

@ -0,0 +1,118 @@
# Redis — shared cluster cache/broker/session store
# Pinned to fatmama: host requires vm.overcommit_memory=1 (kernel tuning)
# 8GB maxmemory, noeviction (safe for broker use), AOF + RDB persistence
#
# Database allocation (convention, not enforced):
# 0 — default / ad-hoc cache
# 2 — Plane (Celery broker + cache)
#
# Deploy:
# kubectl create secret generic redis-secret \
# --from-literal=password="$(openssl rand -hex 32)"
# kubectl apply -f redis.yaml
#
# Cluster DNS: redis:6379
# Connection string: redis://:<password>@redis:6379/<db-number>
---
apiVersion: v1
kind: ConfigMap
metadata:
name: redis-config
data:
redis.conf: |
# Memory
maxmemory 8gb
maxmemory-policy noeviction
# Persistence: AOF primary, RDB snapshots as backup
appendonly yes
appendfsync everysec
save 3600 1
save 300 100
save 60 10000
# Networking
bind 0.0.0.0
protected-mode no
loglevel notice
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: redis-pvc
spec:
accessModes:
- ReadWriteOnce
storageClassName: local-path
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
spec:
replicas: 1
selector:
matchLabels:
app: redis
strategy:
type: Recreate
template:
metadata:
labels:
app: redis
spec:
nodeSelector:
kubernetes.io/hostname: fatmama
containers:
- name: redis
image: redis:7-alpine
args:
- "redis-server"
- "/etc/redis/redis.conf"
- "--requirepass"
- "$(REDIS_PASSWORD)"
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-secret
key: password
ports:
- containerPort: 6379
resources:
requests:
memory: "8Gi"
limits:
memory: "9Gi"
volumeMounts:
- name: redis-config
mountPath: /etc/redis
- name: redis-data
mountPath: /data
volumes:
- name: redis-config
configMap:
name: redis-config
- name: redis-data
persistentVolumeClaim:
claimName: redis-pvc
---
apiVersion: v1
kind: Service
metadata:
name: redis
spec:
selector:
app: redis
ports:
- port: 6379
targetPort: 6379
type: ClusterIP

72
k3s/scripts/check-health.sh Executable file
View file

@ -0,0 +1,72 @@
#!/usr/bin/env bash
# K3s cluster health check — run from workstation.
# Checks:
# - Cluster (via pve-control): nodes Ready, no failing pods, coredns healthy
# - Per node: wg0 up, flannel.1 up, /etc/resolv.conf non-empty, host DNS resolves
# - Mesh: each node can ping every other node's WG IP
set -uo pipefail
CONTROL="pve-control"
NODES=(pve-control adder-control game-control pve-worker adder-worker game-worker-hdd game-worker-ssd fat_mama)
# SSH alias → WG IP (see k3s/README.md). SSH alias `fat_mama`; k8s node name `fatmama`.
declare -A WG_IP=(
[pve-control]=10.0.0.6
[pve-worker]=10.0.0.7
[adder-control]=10.0.0.8
[adder-worker]=10.0.0.9
[game-control]=10.0.0.10
[game-worker-hdd]=10.0.0.11
[game-worker-ssd]=10.0.0.12
[fat_mama]=10.0.0.13
)
RC=0
pass() { printf ' \033[32m[OK]\033[0m %s\n' "$*"; }
fail() { printf ' \033[31m[FAIL]\033[0m %s\n' "$*"; RC=1; }
SSH="ssh -o ConnectTimeout=5 -o BatchMode=yes"
echo "=== Cluster-level checks (via $CONTROL) ==="
not_ready=$($SSH "$CONTROL" "sudo kubectl get nodes --no-headers | awk '\$2 != \"Ready\" {print \$1\"(\"\$2\")\"}'" 2>/dev/null)
[[ -z "$not_ready" ]] && pass "all nodes Ready" || fail "not Ready: $not_ready"
bad_pods=$($SSH "$CONTROL" "sudo kubectl get pods -A --no-headers | awk '\$4 != \"Running\" && \$4 != \"Completed\" {print \$1\"/\"\$2\"(\"\$4\")\"}'" 2>/dev/null)
[[ -z "$bad_pods" ]] && pass "no failing pods" || fail "bad pods: $bad_pods"
coredns=$($SSH "$CONTROL" "sudo kubectl get deploy coredns -n kube-system -o=jsonpath='{.status.readyReplicas}/{.status.replicas}'" 2>/dev/null)
[[ "$coredns" == "1/1" ]] && pass "coredns $coredns" || fail "coredns $coredns"
echo
echo "=== Per-node checks ==="
for node in "${NODES[@]}"; do
echo "-- $node --"
if ! $SSH "$node" true 2>/dev/null; then
fail "ssh unreachable"
continue
fi
$SSH "$node" "ip -br a show wg0 2>/dev/null | grep -q .." 2>/dev/null \
&& pass "wg0 up" || fail "wg0 missing"
$SSH "$node" "ip -br a show flannel.1 2>/dev/null | grep -q .." 2>/dev/null \
&& pass "flannel.1 up" || fail "flannel.1 missing"
$SSH "$node" "[ -s /etc/resolv.conf ]" 2>/dev/null \
&& pass "resolv.conf non-empty" || fail "resolv.conf empty"
$SSH "$node" "getent hosts registry-1.docker.io >/dev/null 2>&1" 2>/dev/null \
&& pass "host DNS resolves" || fail "host DNS broken"
done
echo
echo "=== Mesh reachability (each node → every peer's WG IP) ==="
for src in "${NODES[@]}"; do
unreachable=""
for dst in "${NODES[@]}"; do
[[ "$src" == "$dst" ]] && continue
$SSH "$src" "ping -c1 -W2 ${WG_IP[$dst]} >/dev/null 2>&1" 2>/dev/null \
|| unreachable="$unreachable ${dst}(${WG_IP[$dst]})"
done
[[ -z "$unreachable" ]] && pass "$src → all peers" || fail "$src unreachable:$unreachable"
done
echo
[[ $RC -eq 0 ]] && echo "ALL CHECKS PASSED" || echo "FAILURES DETECTED"
exit $RC

View file

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Idempotently install nfs-common. Run via on-all-nodes.sh.
set -euo pipefail
if dpkg -s nfs-common >/dev/null 2>&1; then
echo "nfs-common already installed"
exit 0
fi
apt-get update -qq
DEBIAN_FRONTEND=noninteractive apt-get install -y -qq nfs-common
echo "installed"

12
k3s/scripts/k3s-control-command Executable file
View file

@ -0,0 +1,12 @@
#!/usr/bin/env bash
# Run one command on pve-control with sudo automatically prepended.
# Usage: k3s-control-command '<command>'
# Example: k3s-control-command 'kubectl get pods -A'
set -euo pipefail
[[ $# -ge 1 ]] || { echo "Usage: $(basename "$0") '<command>'" >&2; exit 1; }
cmd="$*"
[[ "$cmd" == sudo* ]] || cmd="sudo $cmd"
exec ssh pve-control "$cmd"

22
k3s/scripts/on-all-nodes.sh Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Copy a local script to /tmp on every k3s node (control + worker) and execute it with sudo.
# Usage: on-all-nodes.sh <local-script> [args...]
set -uo pipefail
SCRIPT="${1:?usage: $(basename "$0") <local-script> [args...]}"
shift
[[ -f "$SCRIPT" ]] || { echo "not a file: $SCRIPT" >&2; exit 1; }
NODES=(pve-control adder-control game-control pve-worker adder-worker game-worker-hdd game-worker-ssd fat_mama)
NAME="$(basename "$SCRIPT")"
REMOTE="/tmp/$NAME"
rc=0
for node in "${NODES[@]}"; do
echo "========== $node =========="
if ! scp -q "$SCRIPT" "$node:$REMOTE"; then
echo "(scp failed on $node)" >&2; rc=1; continue
fi
ssh "$node" "sudo bash $REMOTE $*; ret=\$?; rm -f $REMOTE; exit \$ret" || rc=1
done
exit $rc

22
k3s/scripts/on-controls.sh Executable file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Copy a local script to /tmp on each control-plane node and execute it there with sudo.
# Usage: on-controls.sh <local-script> [args...]
set -uo pipefail
SCRIPT="${1:?usage: $(basename "$0") <local-script> [args...]}"
shift
[[ -f "$SCRIPT" ]] || { echo "not a file: $SCRIPT" >&2; exit 1; }
CONTROLS=(pve-control adder-control game-control)
NAME="$(basename "$SCRIPT")"
REMOTE="/tmp/$NAME"
rc=0
for node in "${CONTROLS[@]}"; do
echo "========== $node =========="
if ! scp -q "$SCRIPT" "$node:$REMOTE"; then
echo "(scp failed on $node)" >&2; rc=1; continue
fi
ssh "$node" "sudo bash $REMOTE $*; ret=\$?; rm -f $REMOTE; exit \$ret" || rc=1
done
exit $rc

View file

@ -1,47 +1,153 @@
# NAS PersistentVolume — Synology 425+ at 192.168.40.96 # NAS storage — Synology 425+
# NFS share mounted cluster-wide — any pod can claim storage from it via PVC # Share: 192.168.40.96:/volume1/samantha-private (the only NAS share k3s uses)
# ReadWriteMany — multiple pods on different nodes can mount simultaneously
# #
# Prerequisites on every K3s worker VM: # Uses kubernetes-sigs/nfs-subdir-external-provisioner:
# apt install nfs-common # any PVC that asks for storageClassName: nas-nfs automatically gets a subdir
# carved out of the share. Subdir name = ${namespace}-${pvc}-${pv} by default,
# or see pathPattern annotation on a PVC to override.
# #
# Deploy (once, cluster-scoped — no namespace): # Prerequisites (one-time):
# - apt install nfs-common on every k3s node (handled via
# k3s/scripts/install-nfs-common.sh + on-all-nodes.sh)
#
# Deploy (cluster-scoped):
# kubectl apply -f nas-pv.yaml # kubectl apply -f nas-pv.yaml
# #
# Then any service can claim NAS storage with a PVC like: # Usage in a service manifest (note: storageClassName: nas-nfs):
# storageClassName: nas-nfs # apiVersion: v1
# accessModes: [ReadWriteMany] # kind: PersistentVolumeClaim
# # metadata:
# Replace /volume1/k3s with your actual NAS share path. # name: foo-data
# Create subdirectories on the NAS per service to keep data organised: # annotations:
# /volume1/k3s/monerod # nfs.io/storage-path: "foo" # subdir literal name (optional)
# /volume1/k3s/vaultwarden # spec:
# etc. # storageClassName: nas-nfs
# accessModes: [ReadWriteMany]
# resources:
# requests:
# storage: 10Gi
--- ---
apiVersion: v1 apiVersion: v1
kind: PersistentVolume kind: Namespace
metadata: metadata:
name: nas-pv name: nfs-provisioner
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-subdir-external-provisioner
namespace: nfs-provisioner
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: nfs-subdir-external-provisioner-runner
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: run-nfs-subdir-external-provisioner
subjects:
- kind: ServiceAccount
name: nfs-subdir-external-provisioner
namespace: nfs-provisioner
roleRef:
kind: ClusterRole
name: nfs-subdir-external-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: leader-locking-nfs-subdir-external-provisioner
namespace: nfs-provisioner
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: leader-locking-nfs-subdir-external-provisioner
namespace: nfs-provisioner
subjects:
- kind: ServiceAccount
name: nfs-subdir-external-provisioner
namespace: nfs-provisioner
roleRef:
kind: Role
name: leader-locking-nfs-subdir-external-provisioner
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-subdir-external-provisioner
namespace: nfs-provisioner
spec: spec:
capacity: replicas: 1
storage: 40Ti strategy:
accessModes: type: Recreate
- ReadWriteMany selector:
persistentVolumeReclaimPolicy: Retain matchLabels:
storageClassName: nas-nfs app: nfs-subdir-external-provisioner
mountOptions: template:
- hard metadata:
- nfsvers=4.1 labels:
nfs: app: nfs-subdir-external-provisioner
server: 192.168.40.96 spec:
path: /volume1/k3s serviceAccountName: nfs-subdir-external-provisioner
containers:
- name: nfs-subdir-external-provisioner
image: registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: homelab.nas/nfs-subdir
- name: NFS_SERVER
value: 192.168.40.96
- name: NFS_PATH
value: /volume1/samantha-private
volumes:
- name: nfs-client-root
nfs:
server: 192.168.40.96
path: /volume1/samantha-private
--- ---
apiVersion: storage.k8s.io/v1 apiVersion: storage.k8s.io/v1
kind: StorageClass kind: StorageClass
metadata: metadata:
name: nas-nfs name: nas-nfs
provisioner: kubernetes.io/no-provisioner provisioner: homelab.nas/nfs-subdir
volumeBindingMode: Immediate parameters:
archiveOnDelete: "true"
pathPattern: "${.PVC.annotations.nfs.io/storage-path}"
reclaimPolicy: Retain reclaimPolicy: Retain
volumeBindingMode: Immediate

View file

@ -1,7 +1,7 @@
# Vaultwarden — self-hosted Bitwarden-compatible password manager # Vaultwarden — self-hosted Bitwarden-compatible password manager
# SQLite backend — data persisted in local-path PVC # SQLite backend — data persisted in local-path PVC
# Unpinned — scheduler places freely # Unpinned — scheduler places freely
# NodePort 32375 # NodePort 32377
# Signups disabled — use admin panel to invite users # Signups disabled — use admin panel to invite users
# #
# Deploy: # Deploy:
@ -11,7 +11,7 @@
# kubectl apply -f vaultwarden.yaml -n <ns> # kubectl apply -f vaultwarden.yaml -n <ns>
# #
# Generate admin token with: openssl rand -base64 48 # Generate admin token with: openssl rand -base64 48
# Admin panel: http://<any-node-mesh-ip>:32375/admin # Admin panel: http://<any-node-mesh-ip>:32377/admin
--- ---
apiVersion: v1 apiVersion: v1
@ -79,5 +79,5 @@ spec:
ports: ports:
- port: 8222 - port: 8222
targetPort: 8222 targetPort: 8222
nodePort: 32375 nodePort: 32377
type: NodePort type: NodePort

18
services/nextcloud.yml Normal file
View file

@ -0,0 +1,18 @@
services:
nextcloud-aio-mastercontainer:
image: nextcloud/all-in-one:latest-arm64
container_name: nextcloud-aio-mastercontainer
restart: always
ports:
- "11000:11000"
- "8080:8080"
volumes:
- nextcloud_aio_mastercontainer:/mnt/docker-aio-config
- /var/run/docker.sock:/var/run/docker.sock:ro
environment:
- APACHE_PORT=11000
- SKIP_DOMAIN_VALIDATION=true
volumes:
nextcloud_aio_mastercontainer:
name: nextcloud_aio_mastercontainer