Fix agent empty-response in prod: workdir mount, image freshness, error surfacing
- Pin codex@0.142.0 + opencode-ai@1.17.9 in the job image (was @latest, causing dev/prod drift) - Worker now s the job image once per process so prod stops running a stale Codex - Surface Codex error/turn.failed events instead of swallowing them, so the real failure reason is reported rather than 'no assistant response' - Harden the Codex JSON parser to also handle the legacy msg-wrapped shape - Fix the docker-in-docker workdir: bind-mount identical host:container path and set SPOON_AGENT_HOST_WORKDIR (named volume can't be mounted by sibling job containers) - Add docs/compose.prod.yml as a documented reference deployment
This commit is contained in:
@@ -18,6 +18,26 @@ const networkArgs = () => (env.network ? ['--network', env.network] : []);
|
||||
|
||||
const containerRuntime = () => env.containerRuntime;
|
||||
|
||||
// `docker run` reuses a stale local `:latest` forever, so without an explicit
|
||||
// pull the job image never updates in production. Pull once per worker process
|
||||
// (i.e. once per deploy/restart) so a fresh worker always runs a fresh job
|
||||
// image. Best-effort: if the registry is unreachable we fall back to whatever
|
||||
// image is present locally rather than failing the job.
|
||||
let jobImagePullPromise: Promise<void> | undefined;
|
||||
export const ensureJobImagePulled = () => {
|
||||
jobImagePullPromise ??= (async () => {
|
||||
try {
|
||||
await execa(containerRuntime(), ['pull', env.jobImage], {
|
||||
reject: false,
|
||||
stdin: 'ignore',
|
||||
});
|
||||
} catch {
|
||||
// Ignore: keep running with the locally cached image.
|
||||
}
|
||||
})();
|
||||
return jobImagePullPromise;
|
||||
};
|
||||
|
||||
const hostWorkspacePath = (workdir: string) => {
|
||||
if (!env.hostWorkdir) return workdir;
|
||||
const workerRoot = path.resolve(env.workdir);
|
||||
@@ -46,6 +66,7 @@ export const runInJobContainer = async (args: {
|
||||
redact: (value: string) => string;
|
||||
timeoutMs: number;
|
||||
}): Promise<CommandResult> => {
|
||||
await ensureJobImagePulled();
|
||||
const result = await execa(
|
||||
containerRuntime(),
|
||||
[
|
||||
@@ -84,6 +105,7 @@ export const startWorkspaceContainer = async (args: {
|
||||
command?: string[];
|
||||
publishTcpPort?: number;
|
||||
}) => {
|
||||
await ensureJobImagePulled();
|
||||
await execa(
|
||||
containerRuntime(),
|
||||
[
|
||||
@@ -180,6 +202,7 @@ export const streamInJobContainer = async (args: {
|
||||
onStdoutLine?: (line: string) => Promise<void>;
|
||||
onStderrLine?: (line: string) => Promise<void>;
|
||||
}): Promise<CommandResult> => {
|
||||
await ensureJobImagePulled();
|
||||
const subprocess = execa(
|
||||
containerRuntime(),
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user