Fix agent empty-response in prod: workdir mount, image freshness, error surfacing

- Pin codex@0.142.0 + opencode-ai@1.17.9 in the job image (was @latest,
  causing dev/prod drift)
- Worker now s the job image once per process so prod stops
  running a stale Codex
- Surface Codex error/turn.failed events instead of swallowing them, so the
  real failure reason is reported rather than 'no assistant response'
- Harden the Codex JSON parser to also handle the legacy msg-wrapped shape
- Fix the docker-in-docker workdir: bind-mount identical host:container path
  and set SPOON_AGENT_HOST_WORKDIR (named volume can't be mounted by sibling
  job containers)
- Add docs/compose.prod.yml as a documented reference deployment
This commit is contained in:
Gabriel Brown
2026-06-24 05:38:35 -04:00
parent 980a2c07e8
commit 9643cb197b
8 changed files with 315 additions and 8 deletions
@@ -26,6 +26,32 @@ describe('agent event normalization', () => {
).toContainEqual({ kind: 'assistant_delta', content: 'hello' });
});
test('normalizes legacy codex-rs msg-wrapped events', () => {
expect(
normalizeCodexJsonLine(
JSON.stringify({
id: '0',
msg: { type: 'agent_message', message: 'hello there' },
}),
),
).toContainEqual({ kind: 'assistant_delta', content: 'hello there\n\n' });
expect(
normalizeCodexJsonLine(
JSON.stringify({
id: '1',
msg: { type: 'error', message: 'usage limit reached' },
}),
),
).toContainEqual({ kind: 'error', message: 'usage limit reached' });
expect(
normalizeCodexJsonLine(
JSON.stringify({ id: '2', msg: { type: 'task_complete' } }),
),
).toContainEqual({ kind: 'assistant_completed' });
});
test('normalizes Codex CLI thread lifecycle events', () => {
expect(
normalizeCodexJsonLine(