9643cb197b
- Pin codex@0.142.0 + opencode-ai@1.17.9 in the job image (was @latest, causing dev/prod drift) - Worker now s the job image once per process so prod stops running a stale Codex - Surface Codex error/turn.failed events instead of swallowing them, so the real failure reason is reported rather than 'no assistant response' - Harden the Codex JSON parser to also handle the legacy msg-wrapped shape - Fix the docker-in-docker workdir: bind-mount identical host:container path and set SPOON_AGENT_HOST_WORKDIR (named volume can't be mounted by sibling job containers) - Add docs/compose.prod.yml as a documented reference deployment
163 lines
7.3 KiB
YAML
163 lines
7.3 KiB
YAML
# Production Compose for Spoon
|
|
# -----------------------------------------------------------------------------
|
|
# Reference deployment for the production host. Copy this to the server and run
|
|
# with `docker compose -f compose.prod.yml up -d` (alongside your prod `.env`).
|
|
#
|
|
# Two things in here are load-bearing for the agent ("run a thread") to work.
|
|
# If you change them, read the comments first:
|
|
#
|
|
# 1. AGENT WORKDIR (spoon-agent-worker): the worker is containerized but
|
|
# launches the Codex job container by talking to the HOST Docker daemon.
|
|
# The host can only bind-mount real HOST paths, so the work directory MUST
|
|
# be a bind mount whose path is IDENTICAL inside and outside the container,
|
|
# and SPOON_AGENT_HOST_WORKDIR must match it. A named volume does NOT work
|
|
# here because its real host path is hidden from the worker. All three
|
|
# references to /var/lib/spoon-agent/work below must stay in sync; change
|
|
# them together if you want the data somewhere else.
|
|
#
|
|
# 2. IMAGE FRESHNESS: services use `pull_policy: always` + Watchtower labels so
|
|
# a redeploy / new push always lands. The Codex *job* image is pulled by the
|
|
# worker itself on startup (see SPOON_AGENT_JOB_IMAGE); restarting the worker
|
|
# (which Watchtower does on a new image) re-pulls a fresh job image.
|
|
|
|
networks:
|
|
nginx-bridge: # Change to network you plan to use
|
|
external: true
|
|
|
|
services:
|
|
spoon-next:
|
|
image: git.gbrown.org/gib/${NEXT_CONTAINER_NAME}:latest
|
|
container_name: ${NEXT_CONTAINER_NAME}
|
|
hostname: ${NEXT_CONTAINER_NAME}
|
|
domainname: ${NEXT_DOMAIN}
|
|
networks: ['${NETWORK:-nginx-bridge}']
|
|
#ports: ['${NEXT_PORT}:${NEXT_PORT}']
|
|
pull_policy: always
|
|
environment:
|
|
- NODE_ENV=${NODE_ENV}
|
|
- SENTRY_AUTH_TOKEN=${SENTRY_AUTH_TOKEN}
|
|
- NEXT_PUBLIC_SITE_URL=${NEXT_PUBLIC_SITE_URL:-http://localhost:${NEXT_PORT:-3000}}
|
|
- NEXT_PUBLIC_CONVEX_URL=${NEXT_PUBLIC_CONVEX_URL:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${BACKEND_PORT:-3210}}
|
|
- NEXT_PUBLIC_PLAUSIBLE_URL=${NEXT_PUBLIC_PLAUSIBLE_URL:-https://plausible.gbrown.org}
|
|
- NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN}
|
|
- NEXT_PUBLIC_SENTRY_URL=${NEXT_PUBLIC_SENTRY_URL}
|
|
- NEXT_PUBLIC_SENTRY_ORG=${NEXT_PUBLIC_SENTRY_ORG:-sentry}
|
|
- NEXT_PUBLIC_SENTRY_PROJECT_NAME=${NEXT_PUBLIC_SENTRY_PROJECT_NAME}
|
|
- SPOON_AGENT_WORKER_URL=${SPOON_AGENT_WORKER_URL:-http://spoon-agent-worker:3921}
|
|
- SPOON_AGENT_WORKER_INTERNAL_TOKEN=${SPOON_AGENT_WORKER_INTERNAL_TOKEN}
|
|
- SPOON_WORKER_TOKEN=${SPOON_WORKER_TOKEN}
|
|
depends_on: ['spoon-backend', 'spoon-postgres']
|
|
labels: ['com.centurylinklabs.watchtower.enable=true']
|
|
tty: true
|
|
stdin_open: true
|
|
restart: unless-stopped
|
|
|
|
spoon-agent-worker:
|
|
image: git.gbrown.org/gib/spoon-agent-worker:latest
|
|
container_name: spoon-agent-worker
|
|
hostname: spoon-agent-worker
|
|
domainname: worker.${NEXT_DOMAIN:-spoon.gbrown.org}
|
|
networks: ['${NETWORK:-nginx-bridge}']
|
|
pull_policy: always
|
|
environment:
|
|
- GITHUB_APP_ID=${GITHUB_APP_ID}
|
|
- GITHUB_APP_PRIVATE_KEY=${GITHUB_APP_PRIVATE_KEY}
|
|
- NEXT_PUBLIC_CONVEX_URL=https://api.spoon.gbrown.org
|
|
- SPOON_AGENT_WORKER_ID=${SPOON_AGENT_WORKER_ID:-production-worker}
|
|
- SPOON_AGENT_JOB_IMAGE=${SPOON_AGENT_JOB_IMAGE:-git.gbrown.org/gib/spoon-agent-job:latest}
|
|
- SPOON_AGENT_RUNTIME=docker
|
|
- SPOON_AGENT_NETWORK=${NETWORK:-nginx-bridge}
|
|
# The work directory MUST be the same absolute path here, in the bind mount
|
|
# below, and in SPOON_AGENT_HOST_WORKDIR. See header note (1).
|
|
- SPOON_AGENT_WORKDIR=/var/lib/spoon-agent/work
|
|
- SPOON_AGENT_HOST_WORKDIR=/var/lib/spoon-agent/work
|
|
- SPOON_AGENT_WORKER_HTTP_PORT=${SPOON_AGENT_WORKER_HTTP_PORT:-3921}
|
|
- SPOON_AGENT_WORKER_INTERNAL_TOKEN=${SPOON_AGENT_WORKER_INTERNAL_TOKEN}
|
|
- SPOON_AGENT_MAX_CONCURRENT_JOBS=${SPOON_AGENT_MAX_CONCURRENT_JOBS:-1}
|
|
- SPOON_AGENT_JOB_TIMEOUT_MS=${SPOON_AGENT_JOB_TIMEOUT_MS:-1800000}
|
|
- SPOON_WORKER_TOKEN=${SPOON_WORKER_TOKEN}
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
# Identical host:container path so the sibling Codex job containers can
|
|
# bind-mount the workspace via the host daemon. Do NOT switch this to a
|
|
# named volume. See header note (1).
|
|
- /var/lib/spoon-agent/work:/var/lib/spoon-agent/work
|
|
labels: ['com.centurylinklabs.watchtower.enable=true']
|
|
tty: true
|
|
stdin_open: true
|
|
restart: unless-stopped
|
|
|
|
spoon-backend:
|
|
image: ghcr.io/get-convex/convex-backend:${BACKEND_TAG:-latest}
|
|
container_name: ${BACKEND_CONTAINER_NAME:-spoon-backend}
|
|
hostname: ${BACKEND_CONTAINER_NAME:-spoon-backend}
|
|
domainname: ${BACKEND_DOMAIN:-convex.spoon.gbrown.org}
|
|
networks: ['${NETWORK:-nginx-bridge}']
|
|
#user: '1000:1000'
|
|
#ports: ['${BACKEND_PORT:-3210}:3210','${SITE_PROXY_PORT:-3211}:3211']
|
|
volumes: [./volumes/convex:/convex/data]
|
|
pull_policy: always
|
|
environment:
|
|
- INSTANCE_NAME=${INSTANCE_NAME}
|
|
- CONVEX_CLOUD_ORIGIN=${CONVEX_CLOUD_ORIGIN:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${BACKEND_PORT:-3210}}
|
|
- CONVEX_SITE_ORIGIN=${CONVEX_SITE_ORIGIN:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${SITE_PROXY_PORT:-3211}}
|
|
- DISABLE_BEACON=${DISABLE_BEACON:-true}
|
|
- REDACT_LOGS_TO_CLIENT=${REDACT_LOGS_TO_CLIENT:-true}
|
|
- DO_NOT_REQUIRE_SSL=${DO_NOT_REQUIRE_SSL:-false}
|
|
- POSTGRES_URL=${POSTGRES_URL}
|
|
depends_on: ['spoon-postgres']
|
|
labels: ['com.centurylinklabs.watchtower.enable=true']
|
|
stdin_open: true
|
|
tty: true
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: curl -f http://localhost:3210/version
|
|
interval: 5s
|
|
start_period: 10s
|
|
stop_grace_period: 10s
|
|
stop_signal: SIGINT
|
|
|
|
spoon-dashboard:
|
|
image: ghcr.io/get-convex/convex-dashboard:${DASHBOARD_TAG:-latest}
|
|
container_name: ${DASHBOARD_CONTAINER_NAME:-spoon-dashboard}
|
|
hostname: ${DASHBOARD_CONTAINER_NAME:-spoon-dashboard}
|
|
domainname: ${DASHBOARD_DOMAIN:-dashboard.${BACKEND_DOMAIN:-spoon.gbrown.org}}
|
|
networks: ['${NETWORK:-nginx-bridge}']
|
|
#user: 1000:1000
|
|
#ports: ['${DASHBOARD_PORT:-6791}:6791']
|
|
pull_policy: always
|
|
environment:
|
|
- NEXT_PUBLIC_DEPLOYMENT_URL=${NEXT_PUBLIC_DEPLOYMENT_URL:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${PORT:-3210}}
|
|
depends_on:
|
|
spoon-backend:
|
|
condition: service_healthy
|
|
labels: ['com.centurylinklabs.watchtower.enable=true']
|
|
stdin_open: true
|
|
tty: true
|
|
restart: unless-stopped
|
|
stop_grace_period: 10s
|
|
stop_signal: SIGINT
|
|
|
|
spoon-postgres:
|
|
image: postgres:17
|
|
container_name: ${POSTGRES_CONTAINER_NAME:-spoon-postgres}
|
|
hostname: ${POSTGRES_CONTAINER_NAME:-spoon-postgres}
|
|
domainname: postgres.${NEXT_DOMAIN:-spoon.gbrown.org}
|
|
networks: ['${NETWORK:-nginx-bridge}']
|
|
# ports: ["5434:5432"]
|
|
environment:
|
|
- POSTGRES_USER=${POSTGRES_USER:-spoon}
|
|
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
|
- POSTGRES_DB=${POSTGRES_DB:-spoon_convex}
|
|
volumes: ['./volumes/postgres:/var/lib/postgresql/data']
|
|
labels: ['com.centurylinklabs.watchtower.enable=true']
|
|
tty: true
|
|
stdin_open: true
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ['CMD-SHELL', 'pg_isready -d $${POSTGRES_DB} -U $${POSTGRES_USER}']
|
|
start_period: 20s
|
|
interval: 30s
|
|
retries: 5
|
|
timeout: 5s
|