From 93bdf83184338170d7f9590f3cc5780fccee7e6f Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 22 Jun 2026 16:44:48 +0100 Subject: [PATCH 1/3] feat(webapp): trace compute template creation at deploy --- .../computeTemplateCreation.server.ts | 182 +++++++++++------- 1 file changed, 115 insertions(+), 67 deletions(-) diff --git a/apps/webapp/app/v3/services/computeTemplateCreation.server.ts b/apps/webapp/app/v3/services/computeTemplateCreation.server.ts index 8d7277a82f..8342291a32 100644 --- a/apps/webapp/app/v3/services/computeTemplateCreation.server.ts +++ b/apps/webapp/app/v3/services/computeTemplateCreation.server.ts @@ -13,9 +13,26 @@ import { isOrgMigrated } from "~/runEngine/concerns/computeMigration.server"; import { backingForQueue, workerRegionRegistry } from "~/v3/workerRegions.server"; import { globalFlagsRegistry } from "~/v3/globalFlagsRegistry.server"; import { getEntitlement } from "~/services/platform.v3.server"; +import { startActiveSpan, attributesFromAuthenticatedEnv } from "~/v3/tracer.server"; type TemplateCreationMode = "required" | "shadow" | "skip"; +// Why the mode was chosen — slices the compute.template.create span by path. +type TemplateModeReason = + | "no-client" + | "no-project" + | "microvm-native" + | "migrated" + | "compute-access" + | "rollout" + | "none"; + +type ResolvedTemplateMode = { + mode: TemplateCreationMode; + migrated: boolean; + reason: TemplateModeReason; +}; + type ResolvedPreset = { name: MachinePresetName; cpu: number; @@ -60,89 +77,116 @@ export class ComputeTemplateCreationService { prisma: PrismaClientOrTransaction; writer?: WritableStreamDefaultWriter; }): Promise { - const mode = await this.resolveMode(options.projectId, options.prisma); + return startActiveSpan("compute.template.create", async (span) => { + const { mode, migrated, reason } = await this.resolveMode( + options.projectId, + options.prisma + ); - if (mode === "skip") { - return; - } + span.setAttributes({ + ...attributesFromAuthenticatedEnv(options.authenticatedEnv), + "compute.template.mode": mode, + "compute.template.migrated": migrated, + "compute.template.reason": reason, + "compute.template.deployment_id": options.deploymentFriendlyId, + "compute.template.presets_total": this.presets.length, + "compute.template.presets_required": this.requiredPresets.size, + }); - if (mode === "shadow") { - this.createTemplate(options.imageReference, { background: true }) - .then((outcome) => { - if (outcome.error) { - logger.error("Shadow template creation failed", { + if (mode === "skip") { + span.setAttribute("compute.template.result", "skipped"); + return; + } + + if (mode === "shadow") { + // Shadow is fire-and-forget (background build), so the span only records + // that it was dispatched — the build outcome lands server-side later. + span.setAttribute("compute.template.result", "shadow_dispatched"); + this.createTemplate(options.imageReference, { background: true }) + .then((outcome) => { + if (outcome.error) { + logger.error("Shadow template creation failed", { + id: options.deploymentFriendlyId, + imageReference: options.imageReference, + error: outcome.error, + }); + } + }) + .catch((error) => { + logger.error("Shadow template creation threw unexpectedly", { id: options.deploymentFriendlyId, imageReference: options.imageReference, - error: outcome.error, + error: error instanceof Error ? error.message : String(error), }); - } - }) - .catch((error) => { - logger.error("Shadow template creation threw unexpectedly", { - id: options.deploymentFriendlyId, - imageReference: options.imageReference, - error: error instanceof Error ? error.message : String(error), }); - }); - return; - } - - // Required mode - if (options.writer) { - try { - await options.writer.write( - `event: log\ndata: ${JSON.stringify({ message: "Building compute template..." })}\n\n` - ); - } catch { - // Stream may be closed if client disconnected - continue with template creation + return; } - } - - logger.info("Creating compute template (required mode)", { - id: options.deploymentFriendlyId, - imageReference: options.imageReference, - presets: this.presets.map((p) => p.name), - requiredPresets: [...this.requiredPresets], - }); - const outcome = await this.createTemplate(options.imageReference); - const failureMessage = this.failureMessageForRequiredMode( - outcome, - options.deploymentFriendlyId, - options.imageReference - ); + // Required mode + if (options.writer) { + try { + await options.writer.write( + `event: log\ndata: ${JSON.stringify({ message: "Building compute template..." })}\n\n` + ); + } catch { + // Stream may be closed if client disconnected - continue with template creation + } + } - if (failureMessage) { - logger.error("Compute template creation failed", { + logger.info("Creating compute template (required mode)", { id: options.deploymentFriendlyId, imageReference: options.imageReference, - error: failureMessage, + presets: this.presets.map((p) => p.name), + requiredPresets: [...this.requiredPresets], }); - const failService = new FailDeploymentService(); - await failService.call(options.authenticatedEnv, options.deploymentFriendlyId, { - error: { - name: "TemplateCreationFailed", - message: `Failed to create compute template: ${failureMessage}`, - }, - }); + const outcome = await this.createTemplate(options.imageReference); + span.setAttribute("compute.template.presets_built", outcome.results.length); - throw new ServiceValidationError(`Compute template creation failed: ${failureMessage}`); - } + const failureMessage = this.failureMessageForRequiredMode( + outcome, + options.deploymentFriendlyId, + options.imageReference + ); + + if (failureMessage) { + span.setAttributes({ + "compute.template.result": "failed", + "compute.template.failure": failureMessage, + }); - logger.info("Compute template created", { - id: options.deploymentFriendlyId, - imageReference: options.imageReference, - results: outcome.results.length, + logger.error("Compute template creation failed", { + id: options.deploymentFriendlyId, + imageReference: options.imageReference, + error: failureMessage, + }); + + const failService = new FailDeploymentService(); + await failService.call(options.authenticatedEnv, options.deploymentFriendlyId, { + error: { + name: "TemplateCreationFailed", + message: `Failed to create compute template: ${failureMessage}`, + }, + }); + + throw new ServiceValidationError(`Compute template creation failed: ${failureMessage}`); + } + + span.setAttribute("compute.template.result", "created"); + logger.info("Compute template created", { + id: options.deploymentFriendlyId, + imageReference: options.imageReference, + results: outcome.results.length, + }); }); } async resolveMode( projectId: string, prisma: PrismaClientOrTransaction - ): Promise { + ): Promise { if (!this.client) { - return "skip"; + return { mode: "skip", migrated: false, reason: "no-client" }; } const project = await prisma.project.findFirst({ @@ -158,11 +202,11 @@ export class ComputeTemplateCreationService { }); if (!project) { - return "skip"; + return { mode: "skip", migrated: false, reason: "no-project" }; } if (project.defaultWorkerGroup?.workloadType === "MICROVM") { - return "required"; + return { mode: "required", migrated: false, reason: "microvm-native" }; } // Migrated orgs route runs to the compute backing even though their stored @@ -194,22 +238,26 @@ export class ComputeTemplateCreationService { } if (migrated) { // required => template built at deploy (deploy fails on error); off => shadow. - return decision.flags?.computeMigrationRequireTemplate ? "required" : "shadow"; + return { + mode: decision.flags?.computeMigrationRequireTemplate ? "required" : "shadow", + migrated: true, + reason: "migrated", + }; } } const hasComputeAccess = await resolveComputeAccess(prisma, project.organization.featureFlags); if (hasComputeAccess) { - return "shadow"; + return { mode: "shadow", migrated: false, reason: "compute-access" }; } const rolloutPct = Number(env.COMPUTE_TEMPLATE_SHADOW_ROLLOUT_PCT ?? "0"); if (rolloutPct > 0 && Math.random() * 100 < rolloutPct) { - return "shadow"; + return { mode: "shadow", migrated: false, reason: "rollout" }; } - return "skip"; + return { mode: "skip", migrated: false, reason: "none" }; } async createTemplate( From ff69578ff1d3757ab6078dfe887bd05289eeaab4 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 22 Jun 2026 16:57:26 +0100 Subject: [PATCH 2/3] feat(webapp): record deployment outcome span at terminal status --- ...eateDeploymentBackgroundWorkerV4.server.ts | 9 ++++ .../services/deploymentIndexFailed.server.ts | 11 ++++ .../app/v3/services/failDeployment.server.ts | 11 ++++ .../v3/services/finalizeDeployment.server.ts | 10 ++++ .../recordDeploymentOutcome.server.ts | 50 +++++++++++++++++++ .../v3/services/timeoutDeployment.server.ts | 11 ++++ 6 files changed, 102 insertions(+) create mode 100644 apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts index d34db841cd..9bf1707d0d 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts @@ -15,6 +15,7 @@ import { } from "./createBackgroundWorker.server"; import { findOrCreateBackgroundWorker } from "./createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server"; import { TimeoutDeploymentService } from "./timeoutDeployment.server"; +import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server"; import { env } from "~/env.server"; export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { @@ -297,6 +298,14 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { // sibling attempt may have just enqueued it as part of a successful // BUILDING → DEPLOYING transition. await TimeoutDeploymentService.dequeue(deployment.id, this._prisma); + + recordDeploymentOutcome({ + status: "FAILED", + deploymentFriendlyId: deployment.friendlyId, + projectId: deployment.projectId, + environmentId: deployment.environmentId, + reason: error.message, + }); } throw error; diff --git a/apps/webapp/app/v3/services/deploymentIndexFailed.server.ts b/apps/webapp/app/v3/services/deploymentIndexFailed.server.ts index ad1ff09726..e8c0acdb90 100644 --- a/apps/webapp/app/v3/services/deploymentIndexFailed.server.ts +++ b/apps/webapp/app/v3/services/deploymentIndexFailed.server.ts @@ -3,6 +3,7 @@ import { BaseService } from "./baseService.server"; import { logger } from "~/services/logger.server"; import { type WorkerDeploymentStatus } from "@trigger.dev/database"; import { DeploymentService } from "./deployment.server"; +import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server"; const FINAL_DEPLOYMENT_STATUSES: WorkerDeploymentStatus[] = [ "CANCELED", @@ -74,6 +75,16 @@ export class DeploymentIndexFailed extends BaseService { }, }); + recordDeploymentOutcome({ + status: "FAILED", + deploymentFriendlyId: deployment.friendlyId, + organizationId: deployment.environment.project.organizationId, + projectId: deployment.environment.projectId, + environmentId: deployment.environmentId, + environmentType: deployment.environment.type, + reason: error.message, + }); + const deploymentService = new DeploymentService(); await deploymentService .appendToEventLog(deployment.environment.project, failedDeployment, [ diff --git a/apps/webapp/app/v3/services/failDeployment.server.ts b/apps/webapp/app/v3/services/failDeployment.server.ts index b26cc77d4d..87b7618d76 100644 --- a/apps/webapp/app/v3/services/failDeployment.server.ts +++ b/apps/webapp/app/v3/services/failDeployment.server.ts @@ -5,6 +5,7 @@ import { type WorkerDeploymentStatus } from "@trigger.dev/database"; import { type FailDeploymentRequestBody } from "@trigger.dev/core/v3/schemas"; import { type AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { DeploymentService } from "./deployment.server"; +import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server"; export const FINAL_DEPLOYMENT_STATUSES: WorkerDeploymentStatus[] = [ "CANCELED", @@ -51,6 +52,16 @@ export class FailDeploymentService extends BaseService { }, }); + recordDeploymentOutcome({ + status: "FAILED", + deploymentFriendlyId: friendlyId, + organizationId: authenticatedEnv.organizationId, + projectId: authenticatedEnv.projectId, + environmentId: authenticatedEnv.id, + environmentType: authenticatedEnv.type, + reason: params.error.message, + }); + const deploymentService = new DeploymentService(); await deploymentService .appendToEventLog(authenticatedEnv.project, failedDeployment, [ diff --git a/apps/webapp/app/v3/services/finalizeDeployment.server.ts b/apps/webapp/app/v3/services/finalizeDeployment.server.ts index 6cbfc323e7..4cd01cb3db 100644 --- a/apps/webapp/app/v3/services/finalizeDeployment.server.ts +++ b/apps/webapp/app/v3/services/finalizeDeployment.server.ts @@ -10,6 +10,7 @@ import { projectPubSub } from "./projectPubSub.server"; import { FailDeploymentService } from "./failDeployment.server"; import { TimeoutDeploymentService } from "./timeoutDeployment.server"; import { DeploymentService } from "./deployment.server"; +import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server"; import { engine } from "../runEngine.server"; import { tryCatch } from "@trigger.dev/core"; @@ -78,6 +79,15 @@ export class FinalizeDeploymentService extends BaseService { }, }); + recordDeploymentOutcome({ + status: "DEPLOYED", + deploymentFriendlyId: deployment.friendlyId, + organizationId: authenticatedEnv.organizationId, + projectId: authenticatedEnv.projectId, + environmentId: authenticatedEnv.id, + environmentType: authenticatedEnv.type, + }); + const deploymentService = new DeploymentService(); await deploymentService .appendToEventLog(authenticatedEnv.project, finalizedDeployment, [ diff --git a/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts b/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts new file mode 100644 index 0000000000..3073636cd5 --- /dev/null +++ b/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts @@ -0,0 +1,50 @@ +import { SpanStatusCode } from "@opentelemetry/api"; +import { type WorkerDeploymentStatus } from "@trigger.dev/database"; +import { logger } from "~/services/logger.server"; +import { tracer } from "~/v3/tracer.server"; + +type TerminalDeploymentStatus = Extract< + WorkerDeploymentStatus, + "DEPLOYED" | "FAILED" | "TIMED_OUT" | "CANCELED" +>; + +/** + * Records a deployment's terminal status as a `deployment.outcome` span so + * deploy success/failure is queryable from traces (no DB read). Call after each + * terminal-status write. Org/project/env are best-effort; never throws. + */ +export function recordDeploymentOutcome(params: { + status: TerminalDeploymentStatus; + deploymentFriendlyId: string; + organizationId?: string; + projectId?: string; + environmentId?: string; + environmentType?: string; + reason?: string; +}): void { + try { + const span = tracer.startSpan("deployment.outcome", { + attributes: { + "$trigger.org.id": params.organizationId, + "$trigger.project.id": params.projectId, + "$trigger.env.id": params.environmentId, + "$trigger.env.type": params.environmentType, + "deployment.outcome.status": params.status, + "deployment.outcome.success": params.status === "DEPLOYED", + "deployment.outcome.deployment_id": params.deploymentFriendlyId, + "deployment.outcome.reason": params.reason, + }, + }); + + if (params.status !== "DEPLOYED") { + span.setStatus({ code: SpanStatusCode.ERROR, message: params.reason }); + } + + span.end(); + } catch (error) { + logger.debug("recordDeploymentOutcome failed", { + deploymentFriendlyId: params.deploymentFriendlyId, + error: error instanceof Error ? error.message : String(error), + }); + } +} diff --git a/apps/webapp/app/v3/services/timeoutDeployment.server.ts b/apps/webapp/app/v3/services/timeoutDeployment.server.ts index 79d1fd9e33..512576836a 100644 --- a/apps/webapp/app/v3/services/timeoutDeployment.server.ts +++ b/apps/webapp/app/v3/services/timeoutDeployment.server.ts @@ -5,6 +5,7 @@ import { PerformDeploymentAlertsService } from "./alerts/performDeploymentAlerts import { type PrismaClientOrTransaction } from "~/db.server"; import { workerQueue } from "~/services/worker.server"; import { DeploymentService } from "./deployment.server"; +import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server"; export class TimeoutDeploymentService extends BaseService { public async call(id: string, fromStatus: string, errorMessage: string) { @@ -48,6 +49,16 @@ export class TimeoutDeploymentService extends BaseService { }, }); + recordDeploymentOutcome({ + status: "TIMED_OUT", + deploymentFriendlyId: deployment.friendlyId, + organizationId: deployment.environment.project.organizationId, + projectId: deployment.environment.projectId, + environmentId: deployment.environmentId, + environmentType: deployment.environment.type, + reason: errorMessage, + }); + const deploymentService = new DeploymentService(); await deploymentService .appendToEventLog(deployment.environment.project, timedOutDeployment, [ From 58b0f613c16bf1631d4c4934e4d4f0ed30c874ff Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Mon, 22 Jun 2026 18:24:32 +0100 Subject: [PATCH 3/3] fix(webapp): consistent deployment.outcome attributes (env on bgworker-fail; drop unused CANCELED) --- ...eateDeploymentBackgroundWorkerV4.server.ts | 24 ++++++++++++------- .../recordDeploymentOutcome.server.ts | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts index 9bf1707d0d..9ed6c3518a 100644 --- a/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts +++ b/apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts @@ -112,7 +112,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { if (findOrCreateError instanceof ServiceValidationError) { // `#failBackgroundWorkerDeployment` already throws its argument; the // outer `throw` covers the non-SVE branch. - await this.#failBackgroundWorkerDeployment(deployment, findOrCreateError); + await this.#failBackgroundWorkerDeployment(deployment, findOrCreateError, environment); } throw findOrCreateError; } @@ -145,7 +145,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { const serviceError = new ServiceValidationError("Error creating background worker files"); - await this.#failBackgroundWorkerDeployment(deployment, serviceError); + await this.#failBackgroundWorkerDeployment(deployment, serviceError, environment); throw serviceError; } @@ -168,7 +168,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { error: resourcesError.message, }); - await this.#failBackgroundWorkerDeployment(deployment, resourcesError); + await this.#failBackgroundWorkerDeployment(deployment, resourcesError, environment); throw resourcesError; } @@ -180,7 +180,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { "Error creating background worker resources" ); - await this.#failBackgroundWorkerDeployment(deployment, serviceError); + await this.#failBackgroundWorkerDeployment(deployment, serviceError, environment); throw serviceError; } @@ -207,7 +207,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { error: schedulesError.message, }); - await this.#failBackgroundWorkerDeployment(deployment, schedulesError); + await this.#failBackgroundWorkerDeployment(deployment, schedulesError, environment); throw schedulesError; } @@ -221,7 +221,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { const serviceError = new ServiceValidationError("Error syncing declarative schedules"); - await this.#failBackgroundWorkerDeployment(deployment, serviceError); + await this.#failBackgroundWorkerDeployment(deployment, serviceError, environment); throw serviceError; } @@ -265,7 +265,11 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { }); } - async #failBackgroundWorkerDeployment(deployment: WorkerDeployment, error: Error) { + async #failBackgroundWorkerDeployment( + deployment: WorkerDeployment, + error: Error, + environment: AuthenticatedEnvironment + ) { // Guarded BUILDING → FAILED transition, symmetric with the BUILDING → DEPLOYING // transition in `call()`. With idempotent retries, two attempts can run side-by-side; // without the predicate, one attempt's failure could downgrade the deployment after @@ -302,8 +306,10 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService { recordDeploymentOutcome({ status: "FAILED", deploymentFriendlyId: deployment.friendlyId, - projectId: deployment.projectId, - environmentId: deployment.environmentId, + organizationId: environment.organizationId, + projectId: environment.projectId, + environmentId: environment.id, + environmentType: environment.type, reason: error.message, }); } diff --git a/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts b/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts index 3073636cd5..e66a7a6a9f 100644 --- a/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts +++ b/apps/webapp/app/v3/services/recordDeploymentOutcome.server.ts @@ -5,7 +5,7 @@ import { tracer } from "~/v3/tracer.server"; type TerminalDeploymentStatus = Extract< WorkerDeploymentStatus, - "DEPLOYED" | "FAILED" | "TIMED_OUT" | "CANCELED" + "DEPLOYED" | "FAILED" | "TIMED_OUT" >; /**