diff --git a/deploy/kustomize/base/configmap.yaml b/deploy/kustomize/base/configmap.yaml index ca57e4b..925de97 100644 --- a/deploy/kustomize/base/configmap.yaml +++ b/deploy/kustomize/base/configmap.yaml @@ -21,7 +21,16 @@ data: # containers) so the in-memory record set + secondary indices + FTS fit. # Keep ~25% headroom below the container's memory limit for ephemeral # work (request handling, gitsheets tree mutations, etc.). - NODE_OPTIONS: "--max-old-space-size=1536" + # + # 2048 (was 1536): a fresh boot rebuilding in-memory state from the full + # `published` import (~31.8k people + ~10.4k tag-assignments + secondary + # indices) OOM'd at the 1536 ceiling. The native FTS5 store is off-heap; + # the V8 heap holds the record maps + indices. 2048 boots cleanly and runs + # stable; it is deliberately kept modest because these nodes are only + # ~3.9Gi — an earlier 3072/3.5Gi trial let the pod grow until it starved + # the node's kubelet (NodeNotReady). See the memory-optimization issue for + # the (suspiciously large ~60x) on-disk-to-heap expansion worth reducing. + NODE_OPTIONS: "--max-old-space-size=2048" PORT: "3001" STORAGE_BACKEND: "filesystem" # The runtime-served branch. `published` is the long-term sandbox + prod diff --git a/deploy/kustomize/base/deployment.yaml b/deploy/kustomize/base/deployment.yaml index 2486e3a..bddf30b 100644 --- a/deploy/kustomize/base/deployment.yaml +++ b/deploy/kustomize/base/deployment.yaml @@ -64,12 +64,19 @@ spec: resources: requests: cpu: 100m - memory: 768Mi + memory: 1Gi limits: cpu: 1000m # Holds the full public dataset + secondary indices + FTS in - # memory (~31k people, 268 projects, 10k tag-assignments, …) - memory: 2Gi + # memory (~31.8k people, 268 projects, 10.4k tag-assignments, …). + # 2.5Gi (was 2Gi): a cold boot rebuilding state from the full + # `published` import exceeded the prior 1536Mi heap; raised to + # NODE_OPTIONS=--max-old-space-size=2048 (see configmap.yaml), + # with the container limit just above that for the off-heap FTS5 + # store + ephemeral request work. Kept at 2.5Gi (not higher) so + # a single pod can't starve a node — these nodes are only ~3.9Gi + # and a 3.5Gi trial drove one NodeNotReady. ~1.4Gi node headroom. + memory: 2560Mi securityContext: runAsNonRoot: true runAsUser: 1000