Fix agent empty-response in prod: workdir mount, image freshness, error surfacing

- Pin codex@0.142.0 + opencode-ai@1.17.9 in the job image (was @latest,
  causing dev/prod drift)
- Worker now s the job image once per process so prod stops
  running a stale Codex
- Surface Codex error/turn.failed events instead of swallowing them, so the
  real failure reason is reported rather than 'no assistant response'
- Harden the Codex JSON parser to also handle the legacy msg-wrapped shape
- Fix the docker-in-docker workdir: bind-mount identical host:container path
  and set SPOON_AGENT_HOST_WORKDIR (named volume can't be mounted by sibling
  job containers)
- Add docs/compose.prod.yml as a documented reference deployment
This commit is contained in:
Gabriel Brown
2026-06-24 05:38:35 -04:00
parent 980a2c07e8
commit 9643cb197b
8 changed files with 315 additions and 8 deletions
+1 -1
View File
@@ -17,7 +17,7 @@ RUN apt-get update \
&& corepack enable \
&& corepack prepare pnpm@latest --activate \
&& corepack prepare yarn@stable --activate \
&& npm install -g bun@1.3.10 opencode-ai@latest @openai/codex@latest \
&& npm install -g bun@1.3.10 opencode-ai@1.17.9 @openai/codex@0.142.0 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /workspace
+4 -2
View File
@@ -77,11 +77,14 @@ services:
- SPOON_AGENT_MAX_CONCURRENT_JOBS=${SPOON_AGENT_MAX_CONCURRENT_JOBS:-1}
- SPOON_AGENT_JOB_TIMEOUT_MS=${SPOON_AGENT_JOB_TIMEOUT_MS:-1800000}
- SPOON_AGENT_WORKDIR=${SPOON_AGENT_WORKDIR:-/var/lib/spoon-agent/work}
# See compose.yml: the host-side path must match SPOON_AGENT_WORKDIR so the
# sibling job containers' bind mounts resolve on the host Docker daemon.
- SPOON_AGENT_HOST_WORKDIR=${SPOON_AGENT_HOST_WORKDIR:-/var/lib/spoon-agent/work}
- GITHUB_APP_ID=${GITHUB_APP_ID}
- GITHUB_APP_PRIVATE_KEY=${GITHUB_APP_PRIVATE_KEY}
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- agent-work:/var/lib/spoon-agent/work
- ${SPOON_AGENT_HOST_WORKDIR:-/var/lib/spoon-agent/work}:/var/lib/spoon-agent/work
depends_on:
convex-backend:
condition: service_healthy
@@ -90,4 +93,3 @@ services:
volumes:
postgres-data:
convex-data:
agent-work:
+11 -4
View File
@@ -20,6 +20,7 @@ services:
image: spoon-next:latest
#image: git.gbrown.org/gib/spoon-next:latest
container_name: ${NEXT_CONTAINER_NAME}
labels: ['com.centurylinklabs.watchtower.enable=true']
environment:
- NODE_ENV=${NODE_ENV}
- SENTRY_AUTH_TOKEN=${SENTRY_AUTH_TOKEN}
@@ -95,6 +96,7 @@ services:
image: spoon-agent-worker:latest
container_name: ${AGENT_WORKER_CONTAINER_NAME:-spoon-agent-worker}
hostname: ${AGENT_WORKER_CONTAINER_NAME:-spoon-agent-worker}
labels: ['com.centurylinklabs.watchtower.enable=true']
networks: ['${NETWORK:-nginx-bridge}']
environment:
- NEXT_PUBLIC_CONVEX_URL=${CONVEX_SELF_HOSTED_URL:-http://${BACKEND_CONTAINER_NAME:-spoon-backend}:${BACKEND_PORT:-3210}}
@@ -108,15 +110,20 @@ services:
- SPOON_AGENT_MAX_CONCURRENT_JOBS=${SPOON_AGENT_MAX_CONCURRENT_JOBS:-1}
- SPOON_AGENT_JOB_TIMEOUT_MS=${SPOON_AGENT_JOB_TIMEOUT_MS:-1800000}
- SPOON_AGENT_WORKDIR=${SPOON_AGENT_WORKDIR:-/var/lib/spoon-agent/work}
# Required when the worker controls the host Docker socket: bind-mount
# source paths are resolved on the host, not inside this container, so the
# worker must know the host-side path backing SPOON_AGENT_WORKDIR. We bind
# the same host path at the same location below so they are identical.
- SPOON_AGENT_HOST_WORKDIR=${SPOON_AGENT_HOST_WORKDIR:-/var/lib/spoon-agent/work}
- GITHUB_APP_ID=${GITHUB_APP_ID}
- GITHUB_APP_PRIVATE_KEY=${GITHUB_APP_PRIVATE_KEY}
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- spoon-agent-work:/var/lib/spoon-agent/work
# Host bind mount (not a named volume) so the path is identical on the
# host and inside the worker, which is what the sibling job containers
# need for their `-v <path>:/workspace` mounts to resolve correctly.
- ${SPOON_AGENT_HOST_WORKDIR:-/var/lib/spoon-agent/work}:/var/lib/spoon-agent/work
depends_on:
spoon-backend:
condition: service_healthy
restart: unless-stopped
volumes:
spoon-agent-work: