Fix agent empty-response in prod: workdir mount, image freshness, error surfacing
- Pin codex@0.142.0 + opencode-ai@1.17.9 in the job image (was @latest, causing dev/prod drift) - Worker now s the job image once per process so prod stops running a stale Codex - Surface Codex error/turn.failed events instead of swallowing them, so the real failure reason is reported rather than 'no assistant response' - Harden the Codex JSON parser to also handle the legacy msg-wrapped shape - Fix the docker-in-docker workdir: bind-mount identical host:container path and set SPOON_AGENT_HOST_WORKDIR (named volume can't be mounted by sibling job containers) - Add docs/compose.prod.yml as a documented reference deployment
This commit is contained in:
@@ -0,0 +1,162 @@
|
||||
# Production Compose for Spoon
|
||||
# -----------------------------------------------------------------------------
|
||||
# Reference deployment for the production host. Copy this to the server and run
|
||||
# with `docker compose -f compose.prod.yml up -d` (alongside your prod `.env`).
|
||||
#
|
||||
# Two things in here are load-bearing for the agent ("run a thread") to work.
|
||||
# If you change them, read the comments first:
|
||||
#
|
||||
# 1. AGENT WORKDIR (spoon-agent-worker): the worker is containerized but
|
||||
# launches the Codex job container by talking to the HOST Docker daemon.
|
||||
# The host can only bind-mount real HOST paths, so the work directory MUST
|
||||
# be a bind mount whose path is IDENTICAL inside and outside the container,
|
||||
# and SPOON_AGENT_HOST_WORKDIR must match it. A named volume does NOT work
|
||||
# here because its real host path is hidden from the worker. All three
|
||||
# references to /var/lib/spoon-agent/work below must stay in sync; change
|
||||
# them together if you want the data somewhere else.
|
||||
#
|
||||
# 2. IMAGE FRESHNESS: services use `pull_policy: always` + Watchtower labels so
|
||||
# a redeploy / new push always lands. The Codex *job* image is pulled by the
|
||||
# worker itself on startup (see SPOON_AGENT_JOB_IMAGE); restarting the worker
|
||||
# (which Watchtower does on a new image) re-pulls a fresh job image.
|
||||
|
||||
networks:
|
||||
nginx-bridge: # Change to network you plan to use
|
||||
external: true
|
||||
|
||||
services:
|
||||
spoon-next:
|
||||
image: git.gbrown.org/gib/${NEXT_CONTAINER_NAME}:latest
|
||||
container_name: ${NEXT_CONTAINER_NAME}
|
||||
hostname: ${NEXT_CONTAINER_NAME}
|
||||
domainname: ${NEXT_DOMAIN}
|
||||
networks: ['${NETWORK:-nginx-bridge}']
|
||||
#ports: ['${NEXT_PORT}:${NEXT_PORT}']
|
||||
pull_policy: always
|
||||
environment:
|
||||
- NODE_ENV=${NODE_ENV}
|
||||
- SENTRY_AUTH_TOKEN=${SENTRY_AUTH_TOKEN}
|
||||
- NEXT_PUBLIC_SITE_URL=${NEXT_PUBLIC_SITE_URL:-http://localhost:${NEXT_PORT:-3000}}
|
||||
- NEXT_PUBLIC_CONVEX_URL=${NEXT_PUBLIC_CONVEX_URL:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${BACKEND_PORT:-3210}}
|
||||
- NEXT_PUBLIC_PLAUSIBLE_URL=${NEXT_PUBLIC_PLAUSIBLE_URL:-https://plausible.gbrown.org}
|
||||
- NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN}
|
||||
- NEXT_PUBLIC_SENTRY_URL=${NEXT_PUBLIC_SENTRY_URL}
|
||||
- NEXT_PUBLIC_SENTRY_ORG=${NEXT_PUBLIC_SENTRY_ORG:-sentry}
|
||||
- NEXT_PUBLIC_SENTRY_PROJECT_NAME=${NEXT_PUBLIC_SENTRY_PROJECT_NAME}
|
||||
- SPOON_AGENT_WORKER_URL=${SPOON_AGENT_WORKER_URL:-http://spoon-agent-worker:3921}
|
||||
- SPOON_AGENT_WORKER_INTERNAL_TOKEN=${SPOON_AGENT_WORKER_INTERNAL_TOKEN}
|
||||
- SPOON_WORKER_TOKEN=${SPOON_WORKER_TOKEN}
|
||||
depends_on: ['spoon-backend', 'spoon-postgres']
|
||||
labels: ['com.centurylinklabs.watchtower.enable=true']
|
||||
tty: true
|
||||
stdin_open: true
|
||||
restart: unless-stopped
|
||||
|
||||
spoon-agent-worker:
|
||||
image: git.gbrown.org/gib/spoon-agent-worker:latest
|
||||
container_name: spoon-agent-worker
|
||||
hostname: spoon-agent-worker
|
||||
domainname: worker.${NEXT_DOMAIN:-spoon.gbrown.org}
|
||||
networks: ['${NETWORK:-nginx-bridge}']
|
||||
pull_policy: always
|
||||
environment:
|
||||
- GITHUB_APP_ID=${GITHUB_APP_ID}
|
||||
- GITHUB_APP_PRIVATE_KEY=${GITHUB_APP_PRIVATE_KEY}
|
||||
- NEXT_PUBLIC_CONVEX_URL=https://api.spoon.gbrown.org
|
||||
- SPOON_AGENT_WORKER_ID=${SPOON_AGENT_WORKER_ID:-production-worker}
|
||||
- SPOON_AGENT_JOB_IMAGE=${SPOON_AGENT_JOB_IMAGE:-git.gbrown.org/gib/spoon-agent-job:latest}
|
||||
- SPOON_AGENT_RUNTIME=docker
|
||||
- SPOON_AGENT_NETWORK=${NETWORK:-nginx-bridge}
|
||||
# The work directory MUST be the same absolute path here, in the bind mount
|
||||
# below, and in SPOON_AGENT_HOST_WORKDIR. See header note (1).
|
||||
- SPOON_AGENT_WORKDIR=/var/lib/spoon-agent/work
|
||||
- SPOON_AGENT_HOST_WORKDIR=/var/lib/spoon-agent/work
|
||||
- SPOON_AGENT_WORKER_HTTP_PORT=${SPOON_AGENT_WORKER_HTTP_PORT:-3921}
|
||||
- SPOON_AGENT_WORKER_INTERNAL_TOKEN=${SPOON_AGENT_WORKER_INTERNAL_TOKEN}
|
||||
- SPOON_AGENT_MAX_CONCURRENT_JOBS=${SPOON_AGENT_MAX_CONCURRENT_JOBS:-1}
|
||||
- SPOON_AGENT_JOB_TIMEOUT_MS=${SPOON_AGENT_JOB_TIMEOUT_MS:-1800000}
|
||||
- SPOON_WORKER_TOKEN=${SPOON_WORKER_TOKEN}
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# Identical host:container path so the sibling Codex job containers can
|
||||
# bind-mount the workspace via the host daemon. Do NOT switch this to a
|
||||
# named volume. See header note (1).
|
||||
- /var/lib/spoon-agent/work:/var/lib/spoon-agent/work
|
||||
labels: ['com.centurylinklabs.watchtower.enable=true']
|
||||
tty: true
|
||||
stdin_open: true
|
||||
restart: unless-stopped
|
||||
|
||||
spoon-backend:
|
||||
image: ghcr.io/get-convex/convex-backend:${BACKEND_TAG:-latest}
|
||||
container_name: ${BACKEND_CONTAINER_NAME:-spoon-backend}
|
||||
hostname: ${BACKEND_CONTAINER_NAME:-spoon-backend}
|
||||
domainname: ${BACKEND_DOMAIN:-convex.spoon.gbrown.org}
|
||||
networks: ['${NETWORK:-nginx-bridge}']
|
||||
#user: '1000:1000'
|
||||
#ports: ['${BACKEND_PORT:-3210}:3210','${SITE_PROXY_PORT:-3211}:3211']
|
||||
volumes: [./volumes/convex:/convex/data]
|
||||
pull_policy: always
|
||||
environment:
|
||||
- INSTANCE_NAME=${INSTANCE_NAME}
|
||||
- CONVEX_CLOUD_ORIGIN=${CONVEX_CLOUD_ORIGIN:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${BACKEND_PORT:-3210}}
|
||||
- CONVEX_SITE_ORIGIN=${CONVEX_SITE_ORIGIN:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${SITE_PROXY_PORT:-3211}}
|
||||
- DISABLE_BEACON=${DISABLE_BEACON:-true}
|
||||
- REDACT_LOGS_TO_CLIENT=${REDACT_LOGS_TO_CLIENT:-true}
|
||||
- DO_NOT_REQUIRE_SSL=${DO_NOT_REQUIRE_SSL:-false}
|
||||
- POSTGRES_URL=${POSTGRES_URL}
|
||||
depends_on: ['spoon-postgres']
|
||||
labels: ['com.centurylinklabs.watchtower.enable=true']
|
||||
stdin_open: true
|
||||
tty: true
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: curl -f http://localhost:3210/version
|
||||
interval: 5s
|
||||
start_period: 10s
|
||||
stop_grace_period: 10s
|
||||
stop_signal: SIGINT
|
||||
|
||||
spoon-dashboard:
|
||||
image: ghcr.io/get-convex/convex-dashboard:${DASHBOARD_TAG:-latest}
|
||||
container_name: ${DASHBOARD_CONTAINER_NAME:-spoon-dashboard}
|
||||
hostname: ${DASHBOARD_CONTAINER_NAME:-spoon-dashboard}
|
||||
domainname: ${DASHBOARD_DOMAIN:-dashboard.${BACKEND_DOMAIN:-spoon.gbrown.org}}
|
||||
networks: ['${NETWORK:-nginx-bridge}']
|
||||
#user: 1000:1000
|
||||
#ports: ['${DASHBOARD_PORT:-6791}:6791']
|
||||
pull_policy: always
|
||||
environment:
|
||||
- NEXT_PUBLIC_DEPLOYMENT_URL=${NEXT_PUBLIC_DEPLOYMENT_URL:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${PORT:-3210}}
|
||||
depends_on:
|
||||
spoon-backend:
|
||||
condition: service_healthy
|
||||
labels: ['com.centurylinklabs.watchtower.enable=true']
|
||||
stdin_open: true
|
||||
tty: true
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 10s
|
||||
stop_signal: SIGINT
|
||||
|
||||
spoon-postgres:
|
||||
image: postgres:17
|
||||
container_name: ${POSTGRES_CONTAINER_NAME:-spoon-postgres}
|
||||
hostname: ${POSTGRES_CONTAINER_NAME:-spoon-postgres}
|
||||
domainname: postgres.${NEXT_DOMAIN:-spoon.gbrown.org}
|
||||
networks: ['${NETWORK:-nginx-bridge}']
|
||||
# ports: ["5434:5432"]
|
||||
environment:
|
||||
- POSTGRES_USER=${POSTGRES_USER:-spoon}
|
||||
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
||||
- POSTGRES_DB=${POSTGRES_DB:-spoon_convex}
|
||||
volumes: ['./volumes/postgres:/var/lib/postgresql/data']
|
||||
labels: ['com.centurylinklabs.watchtower.enable=true']
|
||||
tty: true
|
||||
stdin_open: true
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'pg_isready -d $${POSTGRES_DB} -U $${POSTGRES_USER}']
|
||||
start_period: 20s
|
||||
interval: 30s
|
||||
retries: 5
|
||||
timeout: 5s
|
||||
Reference in New Issue
Block a user