Fix worker image missing docker CLI; harden spawn-failure handling
Root cause of the prod empty-response: the spoon-agent-worker image shipped
without a docker CLI binary, so it could never launch the codex job container.
On Debian trixie (the bun base) 'docker.io' + --no-install-recommends installs
the daemon package but omits the client (split into 'docker-cli'), leaving no
'docker' on PATH. execa('docker', ...) hit ENOENT, and with reject:false that
resolves with exitCode undefined -> coerced to 0 -> looked like a successful
empty run -> 'Codex completed without producing an assistant response'.
- agent-worker.Dockerfile: drop docker.io, install the official static docker
CLI client pinned to 29.5.3 (matches the host daemon) to /usr/local/bin/docker
- runtime/docker.ts: normalizeRunResult() so a spawn failure (exitCode null) is
always a non-zero exit carrying the real reason, never a silent empty success
- tests: cover the spawn-failure and normal-result paths
This commit is contained in:
@@ -38,6 +38,31 @@ export const ensureJobImagePulled = () => {
|
||||
return jobImagePullPromise;
|
||||
};
|
||||
|
||||
// execa with `reject: false` resolves (does not throw) even when the runtime
|
||||
// binary is missing (ENOENT) — `exitCode` is then `undefined`. Coercing that to
|
||||
// 0 makes a failed spawn look like a successful empty run, which is exactly how
|
||||
// a worker image without a `docker` CLI silently produced empty agent
|
||||
// responses. Normalize so any spawn failure is a non-zero exit carrying the
|
||||
// real reason.
|
||||
export const normalizeRunResult = (
|
||||
// Declared nullable on purpose: execa's types claim these are always present,
|
||||
// but on a spawn failure (e.g. missing `docker` binary) `exitCode`/`all` are
|
||||
// actually undefined at runtime.
|
||||
result: { exitCode?: number; shortMessage?: string },
|
||||
output: string | undefined,
|
||||
redact: (value: string) => string,
|
||||
): CommandResult => {
|
||||
const text = output ?? '';
|
||||
if (result.exitCode == null) {
|
||||
const reason = result.shortMessage ?? 'container runtime failed to start';
|
||||
return {
|
||||
exitCode: 1,
|
||||
output: redact(`${text}${text ? '\n' : ''}${reason}`),
|
||||
};
|
||||
}
|
||||
return { exitCode: result.exitCode, output: redact(text) };
|
||||
};
|
||||
|
||||
const hostWorkspacePath = (workdir: string) => {
|
||||
if (!env.hostWorkdir) return workdir;
|
||||
const workerRoot = path.resolve(env.workdir);
|
||||
@@ -92,10 +117,7 @@ export const runInJobContainer = async (args: {
|
||||
timeout: args.timeoutMs,
|
||||
},
|
||||
);
|
||||
return {
|
||||
exitCode: result.exitCode ?? 0,
|
||||
output: args.redact(result.all),
|
||||
};
|
||||
return normalizeRunResult(result, result.all, args.redact);
|
||||
};
|
||||
|
||||
export const startWorkspaceContainer = async (args: {
|
||||
@@ -279,10 +301,7 @@ export const streamInJobContainer = async (args: {
|
||||
if (stderrBuffer && args.onStderrLine) {
|
||||
await args.onStderrLine(args.redact(stderrBuffer));
|
||||
}
|
||||
return {
|
||||
exitCode: result.exitCode ?? 0,
|
||||
output: args.redact(output.join('')),
|
||||
};
|
||||
return normalizeRunResult(result, output.join(''), args.redact);
|
||||
};
|
||||
|
||||
export const stopWorkspaceContainer = async (containerName: string) => {
|
||||
|
||||
Reference in New Issue
Block a user