Fix agent empty-response in prod: workdir mount, image freshness, error surfacing

- Pin codex@0.142.0 + opencode-ai@1.17.9 in the job image (was @latest,
  causing dev/prod drift)
- Worker now s the job image once per process so prod stops
  running a stale Codex
- Surface Codex error/turn.failed events instead of swallowing them, so the
  real failure reason is reported rather than 'no assistant response'
- Harden the Codex JSON parser to also handle the legacy msg-wrapped shape
- Fix the docker-in-docker workdir: bind-mount identical host:container path
  and set SPOON_AGENT_HOST_WORKDIR (named volume can't be mounted by sibling
  job containers)
- Add docs/compose.prod.yml as a documented reference deployment
This commit is contained in:
Gabriel Brown
2026-06-24 05:38:35 -04:00
parent 980a2c07e8
commit 9643cb197b
8 changed files with 315 additions and 8 deletions
+23
View File
@@ -18,6 +18,26 @@ const networkArgs = () => (env.network ? ['--network', env.network] : []);
const containerRuntime = () => env.containerRuntime;
// `docker run` reuses a stale local `:latest` forever, so without an explicit
// pull the job image never updates in production. Pull once per worker process
// (i.e. once per deploy/restart) so a fresh worker always runs a fresh job
// image. Best-effort: if the registry is unreachable we fall back to whatever
// image is present locally rather than failing the job.
let jobImagePullPromise: Promise<void> | undefined;
export const ensureJobImagePulled = () => {
jobImagePullPromise ??= (async () => {
try {
await execa(containerRuntime(), ['pull', env.jobImage], {
reject: false,
stdin: 'ignore',
});
} catch {
// Ignore: keep running with the locally cached image.
}
})();
return jobImagePullPromise;
};
const hostWorkspacePath = (workdir: string) => {
if (!env.hostWorkdir) return workdir;
const workerRoot = path.resolve(env.workdir);
@@ -46,6 +66,7 @@ export const runInJobContainer = async (args: {
redact: (value: string) => string;
timeoutMs: number;
}): Promise<CommandResult> => {
await ensureJobImagePulled();
const result = await execa(
containerRuntime(),
[
@@ -84,6 +105,7 @@ export const startWorkspaceContainer = async (args: {
command?: string[];
publishTcpPort?: number;
}) => {
await ensureJobImagePulled();
await execa(
containerRuntime(),
[
@@ -180,6 +202,7 @@ export const streamInJobContainer = async (args: {
onStdoutLine?: (line: string) => Promise<void>;
onStderrLine?: (line: string) => Promise<void>;
}): Promise<CommandResult> => {
await ensureJobImagePulled();
const subprocess = execa(
containerRuntime(),
[